NCBI C++ ToolKit
Classes | Macros | Functions | Variables
feature.cpp File Reference
#include <ncbi_pch.hpp>
#include <serial/objistr.hpp>
#include <serial/serial.hpp>
#include <serial/iterator.hpp>
#include <serial/enumvalues.hpp>
#include <objmgr/object_manager.hpp>
#include <objmgr/scope.hpp>
#include <objmgr/seq_vector.hpp>
#include <objmgr/feat_ci.hpp>
#include <objmgr/impl/handle_range_map.hpp>
#include <objects/seqfeat/Seq_feat.hpp>
#include <objects/seqfeat/SeqFeatXref.hpp>
#include <objects/seqfeat/Imp_feat.hpp>
#include <objects/seqfeat/Prot_ref.hpp>
#include <objects/seqfeat/Gene_ref.hpp>
#include <objects/seqfeat/RNA_ref.hpp>
#include <objects/seqfeat/RNA_gen.hpp>
#include <objects/seqfeat/Org_ref.hpp>
#include <objects/seqfeat/Rsite_ref.hpp>
#include <objects/seqfeat/Trna_ext.hpp>
#include <objects/seqfeat/Cdregion.hpp>
#include <objects/seqfeat/Gb_qual.hpp>
#include <objects/seqfeat/BioSource.hpp>
#include <objects/seqfeat/SubSource.hpp>
#include <objects/seqfeat/Feat_id.hpp>
#include <objects/seqfeat/Variation_ref.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seq/seqport_util.hpp>
#include <objects/seq/IUPACaa.hpp>
#include <objects/seq/NCBIstdaa.hpp>
#include <objects/seq/NCBIeaa.hpp>
#include <objects/seq/NCBI8aa.hpp>
#include <objects/seq/Pubdesc.hpp>
#include <objects/seq/Heterogen.hpp>
#include <objects/seq/Seqdesc.hpp>
#include <objects/seq/Seq_descr.hpp>
#include <objects/seqloc/Seq_loc.hpp>
#include <objects/seqloc/Seq_loc_mix.hpp>
#include <objects/seqloc/Giimport_id.hpp>
#include <objects/seqfeat/SeqFeatData.hpp>
#include <objects/general/Dbtag.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/general/User_object.hpp>
#include <objects/pub/Pub_equiv.hpp>
#include <objects/pub/Pub.hpp>
#include <objects/pub/Pub_set.hpp>
#include <objmgr/util/feature.hpp>
#include <objmgr/util/sequence.hpp>
#include <objmgr/annot_ci.hpp>
#include <algorithm>
+ Include dependency graph for feature.cpp:

Go to the source code of this file.

Go to the SVN repository for this file.

Classes

struct  STypeLink
 
class  CFeatTreeIndex
 
struct  SBestInfoLess
 
class  CDisambiguator
 
struct  CDisambiguator::SParentInfo
 
struct  CDisambiguator::SCandidates
 
struct  SChildLess
 
struct  PByFeatInfoAddIndex
 

Macros

#define SUBTYPE(x)   CSeqFeatData::eSubtype_ ## x
 

Functions

 USING_SCOPE (sequence)
 
void s_GetTypeLabel (const CSeq_feat &feat, string *label, TFeatLabelFlags flags)
 
static void s_GetCdregionLabel (const CSeq_feat &feat, string *tlabel, CScope *scope)
 
static void s_GetRnaRefLabelFromComment (const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
 
static void s_GetRnaRefLabel (const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
 
static void s_GetVariationDbtagLabel (string *tlabel, TFeatLabelFlags, const CDbtag &dbtag)
 
static bool s_GetImpLabel (const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *type_label)
 
static void s_GetVariationLabel (const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *)
 
void s_GetContentLabel (const CSeq_feat &feat, string *label, const string *type_label, TFeatLabelFlags flags, CScope *scope)
 
void GetLabel (const CSeq_feat &feat, string *label, TFeatLabelFlags flags, CScope *scope)
 
void GetLabel (const CSeq_feat &feat, string *label, ELabelType label_type, CScope *scope)
 
CMappedFeat MapSeq_feat (const CSeq_feat_Handle &feat, const CBioseq_Handle &master_seq, const CRange< TSeqPos > &range)
 
CMappedFeat MapSeq_feat (const CSeq_feat_Handle &feat, const CSeq_id_Handle &master_id, const CRange< TSeqPos > &range)
 
CMappedFeat MapSeq_feat (const CSeq_feat_Handle &feat, const CBioseq_Handle &master_seq)
 
CMappedFeat MapSeq_feat (const CSeq_feat_Handle &feat, const CSeq_id_Handle &master_id)
 

Variables

static const bool kSplitCircular = true
 
static const bool kOptimizeTestOverlap = true
 

GetParentFeature

The algorithm is the following: 1.

Feature types are organized in a tree of possible parent-child relationship: 1.1. operon, gap cannot have a parent, 1.2. gene can have operon as a parent, 1.3. mRNA, VDJ_segment, and C_region can have gene as a parent, 1.4. cdregion can have mRNA, VDJ_segment, or C_region as a parent, 1.5. prot can have cdregion as a parent (by its product location), 1.6. mat_peptide, sig_peptide can have prot as a parent, 1.x. all other feature types can have gene as a parent. 2. If parent of a nearest feature type is not found then the next type in the tree is checked, except prot which will have no parent if no cdregion is found. 3. For each parent type candidate the search is done in several ways: 3.1. first we look for a parent by Seq-feat.xref field, 3.2. then by Gene-ref if current parent type is gene, 3.3. then parent candidates are searched by the best intersection of their locations (product in case of prot -> cdregion link), 3.4. if no candidates are found next parent type is checked.

enum  EStrandMatchRule { eStrandMatch_all , eStrandMatch_at_least_one , eStrandMatch_any }
 
typedef pair< Int8, CMappedFeatTMappedFeatScore
 
typedef vector< TMappedFeatScoreTMappedFeatScores
 
bool sFeatureGetChildrenOfSubtypeFaster (CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &, feature::CFeatTree &)
 
bool sFeatureGetChildrenOfSubtype (CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &)
 
bool sGetFeatureGeneBiotypeWrapper (feature::CFeatTree &, CMappedFeat, string &, bool)
 
CMappedFeat GetParentFeature (const CMappedFeat &feat)
 
static EStrandMatchRule s_GetStrandMatchRule (const STypeLink &link, const CFeatTree::CFeatInfo &info, const CFeatTree *tree)
 
static bool s_IsNotSubrange (const CRange< TSeqPos > &r1, const CRange< TSeqPos > &r2)
 
static void s_CollectBestOverlaps (CFeatTree::TFeatArray &features, TBestArray &bests, const STypeLink &link, TRangeArray &pp, CFeatTree *tree, TCanonicalIdsMap &ids_map)
 
static bool s_AllowedParentByOverlap (CSeqFeatData::ESubtype child, CSeqFeatData::ESubtype parent)
 
CMappedFeat GetBestGeneForMrna (const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
 
CMappedFeat GetBestGeneForCds (const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
 
CMappedFeat GetBestMrnaForCds (const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
 
CMappedFeat GetBestCdsForMrna (const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
 
void GetMrnasForGene (const CMappedFeat &gene_feat, list< CMappedFeat > &mrna_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
 
void GetCdssForGene (const CMappedFeat &gene_feat, list< CMappedFeat > &cds_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
 
CMappedFeat GetBestGeneForFeat (const CMappedFeat &feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
 
CMappedFeat GetBestParentForFeat (const CMappedFeat &feat, CSeqFeatData::ESubtype parent_type, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
 
static void GetOverlappingFeatures (CScope &scope, const CSeq_loc &loc, CSeqFeatData::E_Choice, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TMappedFeatScores &feats, const SAnnotSelector *base_sel)
 
static CMappedFeat GetBestOverlappingFeat (CScope &scope, const CSeq_loc &loc, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TBestFeatOpts opts, const SAnnotSelector *base_sel)
 
CMappedFeat GetBestOverlappingFeat (const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
 
CRef< CSeq_loc_MapperCreateSeqLocMapperFromFeat (const CSeq_feat &feat, CSeq_loc_Mapper::EFeatMapDirection dir, CScope *scope)
 Create CSeq_loc_Mapper from a feature, check for special cases like exceptions in CDS features. More...
 
void ClearFeatureIds (const CSeq_annot_EditHandle &annot)
 
void ClearFeatureIds (const CSeq_entry_EditHandle &entry)
 
static void s_SetFeatureId (CFeatTree &ft, const CMappedFeat &feat, int &last_id, const CMappedFeat &parent)
 
static void s_SetChildrenFeatureIds (CFeatTree &ft, const CMappedFeat &feat, int &feat_id)
 
void ReassignFeatureIds (const CSeq_entry_EditHandle &entry)
 
void ReassignFeatureIds (const CSeq_annot_EditHandle &annot)
 
static CRef< CSeq_locs_MakePointForLocationStop (const CSeq_loc &loc)
 
ELocationInFrame IsLocationInFrame (const CSeq_feat_Handle &cds, const CSeq_loc &loc)
 Determines whether location loc is in frame with coding region cds. More...
 
bool PromoteCDSToNucProtSet (objects::CSeq_feat_Handle &orig_feat)
 Promotes coding region from Seq-annot on nucleotide sequence to Seq-annot on nuc-prot-set if necessary and appropriate. More...
 
bool AdjustFeaturePartialFlagForLocation (CSeq_feat &new_feat)
 AdjustFeaturePartialFlagForLocation A function to ensure that Seq-feat.partial is set if either end of the feature is partial, and clear if neither end of the feature is partial. More...
 
bool CopyFeaturePartials (CSeq_feat &dst, const CSeq_feat &src)
 CopyFeaturePartials A function to copy the start and end partialness from one feature to another. More...
 
bool AdjustProteinMolInfoToMatchCDS (CMolInfo &molinfo, const CSeq_feat &cds)
 AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region. More...
 
bool AdjustForCDSPartials (const CSeq_feat &cds, CScope &scope)
 AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the partialness of a coding region has been changed. More...
 
bool AdjustForCDSPartials (const CSeq_feat &cds, CSeq_entry_Handle seh)
 AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the partialness of a coding region has been changed. More...
 
bool RetranslateCDS (const CSeq_feat &cds, CScope &scope)
 RetranslateCDS A function to replace the protein Bioseq pointed to by cds.product with the current translation of the coding region cds. More...
 
void AddFeatureToBioseq (const CBioseq &seq, const CSeq_feat &f, CScope &scope)
 AddFeatureToBioseq A function to add a feature to a Bioseq - will create a new feature table Seq-annot if necessary. More...
 
void AddProteinFeature (const CBioseq &seq, const string &protein_name, const CSeq_feat &cds, CScope &scope)
 AddProteinFeature A function to create a protein feature with the specified protein name. More...
 
bool GetFeatureGeneBiotypeFaster (feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
 
bool GetFeatureGeneBiotype (feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
 

Macro Definition Documentation

◆ SUBTYPE

#define SUBTYPE (   x)    CSeqFeatData::eSubtype_ ## x

Typedef Documentation

◆ TMappedFeatScore

Definition at line 3480 of file feature.cpp.

◆ TMappedFeatScores

Definition at line 3481 of file feature.cpp.

Enumeration Type Documentation

◆ EStrandMatchRule

Enumerator
eStrandMatch_all 
eStrandMatch_at_least_one 
eStrandMatch_any 

Definition at line 2228 of file feature.cpp.

Function Documentation

◆ GetBestOverlappingFeat()

static CMappedFeat GetBestOverlappingFeat ( CScope scope,
const CSeq_loc loc,
CSeqFeatData::ESubtype  feat_subtype,
sequence::EOverlapType  overlap_type,
TBestFeatOpts  opts,
const SAnnotSelector base_sel 
)
static

◆ GetOverlappingFeatures()

static void GetOverlappingFeatures ( CScope scope,
const CSeq_loc loc,
CSeqFeatData::E_Choice  ,
CSeqFeatData::ESubtype  feat_subtype,
sequence::EOverlapType  overlap_type,
TMappedFeatScores feats,
const SAnnotSelector base_sel 
)
static

◆ s_AllowedParentByOverlap()

static bool s_AllowedParentByOverlap ( CSeqFeatData::ESubtype  child,
CSeqFeatData::ESubtype  parent 
)
static

◆ s_CollectBestOverlaps()

static void s_CollectBestOverlaps ( CFeatTree::TFeatArray features,
TBestArray &  bests,
const STypeLink link,
TRangeArray &  pp,
CFeatTree tree,
TCanonicalIdsMap &  ids_map 
)
static

◆ s_GetCdregionLabel()

static void s_GetCdregionLabel ( const CSeq_feat feat,
string tlabel,
CScope scope 
)
inlinestatic

◆ s_GetContentLabel()

void s_GetContentLabel ( const CSeq_feat feat,
string label,
const string type_label,
TFeatLabelFlags  flags,
CScope scope 
)

Definition at line 587 of file feature.cpp.

References CSeqFeatData_Base::e_Biosrc, CSeqFeatData_Base::e_Bond, CSeqFeatData_Base::e_Cdregion, CSeqFeatData_Base::e_Comment, CRsite_ref_Base::e_Db, CSeqFeatData_Base::e_Gene, CSeqFeatData_Base::e_Het, CSeqFeatData_Base::e_Imp, CSeqFeatData_Base::e_Non_std_residue, CSeqFeatData_Base::e_Num, CSeqFeatData_Base::e_Org, CSeqFeatData_Base::e_Prot, CSeqFeatData_Base::e_Psec_str, CSeqFeatData_Base::e_Pub, CSeqFeatData_Base::e_Region, CSeqFeatData_Base::e_Rna, CSeqFeatData_Base::e_Rsite, CSeqFeatData_Base::e_Seq, CSeqFeatData_Base::e_Site, CRsite_ref_Base::e_Str, CSeqFeatData_Base::e_Txinit, CSeqFeatData_Base::e_User, CSeqFeatData_Base::e_Variation, fFGL_NoComments, fFGL_NoQualifiers, flags, CAliasBase< TPrim >::Get(), CSeqFeatData_Base::GetBiosrc(), CSeqFeatData_Base::GetBond(), CUser_object_Base::GetClass(), CSeq_feat_Base::GetComment(), CSeq_feat_Base::GetData(), CRsite_ref_Base::GetDb(), CSeqFeatData_Base::GetGene(), CSeqFeatData_Base::GetHet(), CGene_ref::GetLabel(), COrg_ref::GetLabel(), CProt_ref::GetLabel(), CPub_equiv::GetLabel(), CSeqFeatData_Base::GetNon_std_residue(), CBioSource_Base::GetOrg(), CSeqFeatData_Base::GetOrg(), CSeqFeatData_Base::GetProt(), CSeqFeatData_Base::GetPsec_str(), CPubdesc_Base::GetPub(), CSeqFeatData_Base::GetPub(), CSeq_feat_Base::GetQual(), CSeqFeatData_Base::GetRegion(), CSeqFeatData_Base::GetRsite(), CSeqFeatData_Base::GetSite(), CObject_id_Base::GetStr(), CRsite_ref_Base::GetStr(), CBioSource_Base::GetSubtype(), CDbtag_Base::GetTag(), CUser_object_Base::GetType(), CSeqFeatData_Base::GetUser(), CUser_object_Base::IsSetClass(), CSeq_feat_Base::IsSetComment(), CSeq_feat_Base::IsSetQual(), CBioSource_Base::IsSetSubtype(), CObject_id_Base::IsStr(), ITERATE, label, prefix, s_GetCdregionLabel(), s_GetImpLabel(), s_GetRnaRefLabel(), s_GetVariationLabel(), str(), string, CRsite_ref_Base::Which(), and CSeqFeatData_Base::Which().

Referenced by GetLabel().

◆ s_GetImpLabel()

static bool s_GetImpLabel ( const CSeq_feat feat,
string tlabel,
TFeatLabelFlags  flags,
const string type_label 
)
inlinestatic

◆ s_GetRnaRefLabel()

static void s_GetRnaRefLabel ( const CSeq_feat feat,
string label,
TFeatLabelFlags  flags,
const string type_label 
)
inlinestatic

◆ s_GetRnaRefLabelFromComment()

static void s_GetRnaRefLabelFromComment ( const CSeq_feat feat,
string label,
TFeatLabelFlags  flags,
const string type_label 
)
inlinestatic

◆ s_GetStrandMatchRule()

static EStrandMatchRule s_GetStrandMatchRule ( const STypeLink link,
const CFeatTree::CFeatInfo info,
const CFeatTree tree 
)
static

◆ s_GetTypeLabel()

void s_GetTypeLabel ( const CSeq_feat feat,
string label,
TFeatLabelFlags  flags 
)

◆ s_GetVariationDbtagLabel()

static void s_GetVariationDbtagLabel ( string tlabel,
TFeatLabelFlags  ,
const CDbtag dbtag 
)
static

◆ s_GetVariationLabel()

static void s_GetVariationLabel ( const CSeq_feat feat,
string tlabel,
TFeatLabelFlags  flags,
const string  
)
static

◆ s_IsNotSubrange()

static bool s_IsNotSubrange ( const CRange< TSeqPos > &  r1,
const CRange< TSeqPos > &  r2 
)
inlinestatic

Definition at line 2488 of file feature.cpp.

References CRange_Base::GetFrom(), and COpenRange< Position >::GetToOpen().

Referenced by s_CollectBestOverlaps().

◆ s_MakePointForLocationStop()

static CRef<CSeq_loc> s_MakePointForLocationStop ( const CSeq_loc loc)
static

Definition at line 3807 of file feature.cpp.

References eExtreme_Biological, CSeq_loc::GetStop(), and CSeq_loc::SetPnt().

Referenced by IsLocationInFrame().

◆ s_SetChildrenFeatureIds()

static void s_SetChildrenFeatureIds ( CFeatTree ft,
const CMappedFeat feat,
int feat_id 
)
static

Definition at line 3776 of file feature.cpp.

References CFeatTree::GetChildren(), ITERATE, and s_SetFeatureId().

Referenced by ReassignFeatureIds(), and s_SetFeatureId().

◆ s_SetFeatureId()

static void s_SetFeatureId ( CFeatTree ft,
const CMappedFeat feat,
int last_id,
const CMappedFeat parent 
)
static

◆ sFeatureGetChildrenOfSubtype()

bool sFeatureGetChildrenOfSubtype ( CMappedFeat  mf,
CSeqFeatData::ESubtype  subtype,
vector< CMappedFeat > &  children 
)

◆ sFeatureGetChildrenOfSubtypeFaster()

bool sFeatureGetChildrenOfSubtypeFaster ( CMappedFeat  mf,
CSeqFeatData::ESubtype  subtype,
vector< CMappedFeat > &  children,
feature::CFeatTree &  featTree 
)

Definition at line 4210 of file feature.cpp.

References f().

Referenced by sFeatureGetChildrenOfSubtype(), and sGetFeatureGeneBiotypeWrapper().

◆ sGetFeatureGeneBiotypeWrapper()

bool sGetFeatureGeneBiotypeWrapper ( feature::CFeatTree &  ft,
CMappedFeat  mf,
string biotype,
bool  fast 
)

◆ USING_SCOPE()

USING_SCOPE ( sequence  )

Variable Documentation

◆ kOptimizeTestOverlap

const bool kOptimizeTestOverlap = true
static

Definition at line 1592 of file feature.cpp.

Referenced by s_CollectBestOverlaps().

◆ kSplitCircular

const bool kSplitCircular = true
static

Definition at line 1591 of file feature.cpp.

Modified on Tue Feb 27 05:56:04 2024 by modify_doxy.py rev. 669887