114 tlabel = feat.GetData().GetKey();
116 if (feat.GetData().IsImp()) {
117 if ( tlabel ==
"variation" ) {
118 tlabel =
"Variation";
120 else if ( tlabel !=
"CDS") {
121 tlabel =
"[" + tlabel +
"]";
124 && feat.GetData().GetRegion() ==
"Domain"
125 && feat.IsSetComment() ) {
128 }
else if (feat.GetData().IsImp()) {
129 tlabel =
"[" + feat.GetData().GetImp().GetKey() +
"]";
131 tlabel =
"Unknown=0";
190 feat_it; ++feat_it) {
191 feat_it->GetData().GetProt().GetLabel(tlabel);
196 ERR_POST(
Error <<
"cannot find sequence: " +
id.AsFastaString());
209 string str(
"open reading frame: ");
212 str +=
"frame not set; ";
227 str +=
"positive strand";
230 str +=
"negative strand";
233 str +=
"both strands";
236 str +=
"both strands (reverse)";
239 str +=
"strand unknown";
255 const string* type_label)
260 && feat.
GetComment().find(*type_label) == string::npos) {
265 }
else if (type_label) {
266 *
label += *type_label;
277 const string* type_label)
288 if (!
rna.IsSetExt()) {
296 switch (
rna.GetExt().Which()) {
301 tmp_label =
rna.GetExt().GetName();
303 (tmp_label ==
"ncRNA" || tmp_label ==
"tmRNA"
304 || tmp_label ==
"misc_RNA")) {
307 if ((*q)->GetQual() ==
"product") {
308 tmp_label = (*q)->GetVal();
313 if ((
flags &
fFGL_Type) == 0 && type_label != 0 && !tmp_label.empty() && tmp_label.find(*type_label) == string::npos) {
314 *
label += *type_label +
"-" + tmp_label;
315 }
else if (!tmp_label.empty()) {
317 }
else if (type_label) {
318 *
label += *type_label;
323 if ( !
rna.GetExt().GetTRNA().IsSetAa() ) {
329 rna.GetExt().GetTRNA().GetAa().Which();
333 switch (aa_code_type) {
336 aa_code =
rna.GetExt().GetTRNA().GetAa().GetIupacaa();
351 aa_code =
rna.GetExt().GetTRNA().GetAa().GetNcbieaa();
366 aa_code =
rna.GetExt().GetTRNA().GetAa().GetNcbi8aa();
371 aa_code =
rna.GetExt().GetTRNA().GetAa().GetNcbistdaa();
380 *
label += *type_label +
"-" + tmp_label;
381 }
else if (!tmp_label.empty()) {
383 }
else if (type_label) {
384 *
label += *type_label;
394 if (
rna.GetExt().GetGen().CanGetProduct()) {
395 *
label =
rna.GetExt().GetGen().GetProduct();
396 }
else if (
rna.GetExt().GetGen().CanGetClass()) {
397 *
label =
rna.GetExt().GetGen().GetClass();
410 if ( dbtag.
GetDb() ==
"dbSNP" ) {
411 if ( !tlabel->empty() ) {
420 *tlabel +=
tag.GetStr();
433 const string* type_label)
472 *tlabel += (*it)->GetVal();
481 *tlabel += type_label ? *type_label :
string(
"");
490 *tlabel = (*it)->GetVal();
501 if (pos == string::npos) {
507 *tlabel += type_label ? *type_label :
string(
"");
516 string std_name, func, num, other;
519 if (other.empty()) other = (*it)->GetVal();
521 std_name = (*it)->GetVal();
525 func = (*it)->GetVal();
529 num = (*it)->GetVal();
533 if (!std_name.empty()) {
545 if (!other.empty()) {
551 *tlabel += type_label ? *type_label :
string(
"");
578 if ( !tlabel->empty() ) {
590 const string* type_label,
639 tlabel += CSeqFeatData::GetTypeInfo_enum_EBond()
644 tlabel += CSeqFeatData::GetTypeInfo_enum_ESite()
672 tlabel += CSeqFeatData::GetTypeInfo_enum_EPsec_str()
687 if ( !
str.empty() ) {
690 (*iter)->GetLabel(&
str);
711 if (!tlabel.empty()) {
721 tlabel +=
prefix + (**it).GetQual();
723 if (!(**it).GetVal().empty()) {
724 tlabel +=
"=" + (**it).GetVal();
731 if (tlabel.empty()) {
760 *
label += type_label;
769 size_t label_len =
label->size();
774 *
label += type_label;
785 switch (label_type) {
820 if (
id.IsLocal() ) {
822 if (
local.IsId() ) {
823 int old_id =
local.GetId();
824 int new_id =
RemapId(old_id, tse);
825 if ( new_id != old_id ) {
838 if (
id.IsLocal() ) {
840 if (
local.IsId() ) {
841 int old_id =
local.GetId();
843 if ( new_id != old_id ) {
938 int oid1int = oid1.
GetId();
939 int oid2int = oid2.
GetId();
940 if ( oid1int != oid2int ) {
941 return oid1int < oid2int;
944 else if ( oid1.
IsStr() ) {
945 const string& oid1str = oid1.
GetStr();
946 const string& oid2str = oid2.
GetStr();
986 if ( it->GetSeq_feat_Handle() == feat ) {
992 "MapSeq_feat: feature not found");
1002 if ( !master_seq ) {
1004 "MapSeq_feat: master sequence not found");
1023 if ( !master_seq ) {
1025 "MapSeq_feat: master sequence not found");
1070 : m_StartType(start ==
CSeqFeatData::eSubtype_bad? subtype: start),
1071 m_CurrentType(subtype),
1075 switch ( subtype ) {
1153 return sm_SpecialVDJTypes;
1200 bool sx_IsIrregularLocation(
const CSeq_loc& loc,
1205 if ( !loc.
IsMix() ) {
1209 if ( !loc.
GetId() ) {
1227 if ( sx_IsIrregularLocation(loc1, circular_length) ) {
1235 if (
range.Empty() ) {
1241 if ( plus_strand ) {
1242 if (
range.GetFrom() < pos ) {
1245 pos =
range.GetTo()+1;
1249 if (
range.GetTo() > pos ) {
1252 pos =
range.GetFrom();
1322 static const char kQual_transcript_id[] =
"transcript_id";
1323 static const char kQual_orig_transcript_id[] =
"orig_transcript_id";
1324 static const char kQual_orig_protein_id[] =
"orig_protein_id";
1326 kQualPriority_transcript_id,
1327 kQualPriority_orig_transcript_id,
1328 kQualPriority_orig_protein_id,
1332 struct SMatchingQuals {
1347 if ( (*it)->IsSetVal() ) {
1348 const string& qual = (*it)->GetQual();
1349 if ( qual == kQual_orig_protein_id ||
1350 qual == kQual_orig_transcript_id ||
1351 qual == kQual_transcript_id ) {
1371 if ( (*it)->IsSetVal() ) {
1372 const string& qual = (*it)->GetQual();
1373 if ( qual == kQual_orig_protein_id ) {
1374 qq[kQualPriority_orig_protein_id] = *it;
1376 else if ( qual == kQual_orig_transcript_id ) {
1377 qq[kQualPriority_orig_transcript_id] = *it;
1379 else if ( qual == kQual_transcript_id ) {
1380 qq[kQualPriority_transcript_id] = *it;
1387 Uint1 GetMatch(
const SMatchingQuals& quals2)
const
1389 for (
int i = 0;
i < kQualPriority_count; ++
i ) {
1390 if ( qq[
i] && quals2.qq[
i] &&
1391 qq[
i]->GetVal() == quals2.qq[
i]->GetVal() ) {
1403 return SMatchingQuals::HasMatch(feat);
1412 if ( xrefs.size() == 1 ) {
1416 if (
data.IsGene() ) {
1434 SMatchingQuals quals1(feat1);
1435 SMatchingQuals quals2(feat2);
1436 return quals1.GetMatch(quals2);
1450 (
true || sx_IsIrregularLocation(loc, circular_length)) ) {
1455 return overlap_type;
1475 if ( feat_type != parent_type ) {
1476 for (
STypeLink link(feat_type); link; ++link ) {
1487 static const int kBetterTypeParentQuality= 1000;
1488 static const int kByLocusParentQuality = 750;
1489 static const int kSameTypeParentQuality = 500;
1490 static const int kWorseTypeParentQuality = kSameTypeParentQuality;
1496 int d_child = sx_GetRootDistance(child);
1497 int d_parent = sx_GetRootDistance(parent);
1498 if ( d_parent < d_child ) {
1501 return kBetterTypeParentQuality - (d_child - d_parent);
1506 return kWorseTypeParentQuality - (d_parent - d_child);
1525 if (
id.IsLocal() ) {
1543 if (
data.IsGene() ) {
1566 sx_GetOverlapType(link, c_loc, circular_length);
1572 sel.IncludeFeatSubtype(*type_ptr);
1582 if ( overlap >= 0 && overlap < best_overlap ) {
1584 best_overlap = overlap;
1621 best_parent = sx_GetParentByRef(feat, link);
1622 if ( best_parent ) {
1627 best_parent = sx_GetParentByOverlap(feat, link, circular_length);
1628 if ( best_parent ) {
1654 void CheckBest(
Int1 quality,
Int8 overlap, CFeatInfo*
info)
1657 if ( (quality > m_Quality ||
1658 (quality == m_Quality && overlap < m_Overlap)) ) {
1659 m_Quality = quality;
1660 m_Overlap = overlap;
1664 void CheckBest(
const SBestInfo&
b)
1666 CheckBest(
b.m_Quality,
b.m_Overlap,
b.m_Info);
1673 struct SFeatRangeInfo {
1687 void x_CanonizeId(TCanonicalIdsMap& ids_map)
1690 auto iter = ids_map.find(m_Id);
1691 if ( iter != ids_map.end() ) {
1692 m_Id = iter->second;
1696 m_Info->m_Feat.GetScope(),
1701 ids_map[m_Id] = new_id;
1706 SFeatRangeInfo(TCanonicalIdsMap& ids_map,
1707 CFeatInfo&
info, SBestInfo* best,
1708 bool by_product =
false)
1710 m_SplitRange(
false),
1714 m_Id =
info.m_Feat.GetProductId();
1716 m_Range =
info.m_Feat.GetProductTotalRange();
1720 m_Id =
info.m_Feat.GetLocationId();
1722 m_Range =
info.m_Feat.GetLocationTotalRange();
1726 x_CanonizeId(ids_map);
1728 SFeatRangeInfo(TCanonicalIdsMap& ids_map,
1729 CFeatInfo&
info, SBestInfo* best,
1732 m_Range(it->second.GetOverlappingRange()),
1734 m_SplitRange(
false),
1738 x_CanonizeId(ids_map);
1741 struct PLessByStart {
1743 bool operator()(
const SFeatRangeInfo&
a,
const SFeatRangeInfo&
b)
const
1745 return a.m_Id <
b.m_Id ||
1746 (
a.m_Id ==
b.m_Id &&
a.m_Range <
b.m_Range);
1751 bool operator()(
const SFeatRangeInfo&
a,
const SFeatRangeInfo&
b)
const
1753 return a.m_Id <
b.m_Id ||
1754 (
a.m_Id ==
b.m_Id &&
1755 (
a.m_Range.GetToOpen() <
b.m_Range.GetToOpen() ||
1756 (
a.m_Range.GetToOpen() ==
b.m_Range.GetToOpen() &&
1757 a.m_Range.GetFrom() <
b.m_Range.GetFrom())));
1762 bool s_AddCircularRanges(vector<SFeatRangeInfo>& rr,
1763 SFeatRangeInfo& range_info,
1764 bool by_product =
false)
1766 const bool kAllowOriginInGap =
true;
1770 if ( !kAllowOriginInGap && range_info.m_Range.GetFrom() != 0 ) {
1775 range_info.m_Info->m_Feat.GetProduct():
1776 range_info.m_Info->m_Feat.GetLocation();
1787 if ( start <= stop ) {
1791 TSeqPos circular_length = sx_GetCircularLength(range_info.m_Info->m_Feat.GetScope(), range_info.m_Id);
1795 if ( !kAllowOriginInGap && range_info.m_Range.GetToOpen() < circular_length ) {
1800 TSeqPos total_end_open = range_info.m_Range.GetToOpen();
1801 range_info.m_SplitRange =
true;
1802 range_info.m_Range.SetTo(stop);
1803 rr.push_back(range_info);
1804 range_info.m_Range.SetFrom(start);
1805 range_info.m_Range.SetToOpen(total_end_open);
1806 rr.push_back(range_info);
1810 void s_AddRanges(TCanonicalIdsMap& ids_map,
1811 vector<SFeatRangeInfo>& rr,
1816 info.m_MultiId =
true;
1820 SFeatRangeInfo range_info(ids_map,
info, best, it);
1821 rr.push_back(range_info);
1825 typedef vector<SBestInfo> TBestArray;
1826 typedef vector<SFeatRangeInfo> TRangeArray;
1827 typedef vector<CFeatTree::CFeatInfo*> TInfoArray;
1839 class CFeatTreeParentTypeIndex :
public CObject
1845 m_ByProduct(by_product),
1850 TRangeArray& GetIndex(TCanonicalIdsMap& ids_map,
1851 const TInfoArray& feats) {
1852 if ( m_IndexedParents == feats.size() ) {
1855 for (
size_t ind = m_IndexedParents; ind < feats.size(); ++ind ) {
1857 if ( feat_info.
m_AddIndex < m_IndexedParents ||
1862 SFeatRangeInfo range_info(ids_map, feat_info, 0, m_ByProduct);
1863 if ( range_info.m_Id ) {
1864 if ( !s_AddCircularRanges(m_Index, range_info, m_ByProduct) ) {
1865 m_Index.push_back(range_info);
1869 s_AddRanges(ids_map,
1870 m_Index, feat_info, 0,
1876 sort(m_Index.begin(), m_Index.end(), PLessByEnd());
1877 m_IndexedParents = feats.size();
1884 size_t m_IndexedParents;
1885 TRangeArray m_Index;
1898 const TInfoArray& feats) {
1902 index =
new CFeatTreeParentTypeIndex(
type, by_product);
1979 if (
this != &ft ) {
2038 for ( ; it; ++it ) {
2048 "CFeatTree: feature is null");
2053 if ( !
info.m_Feat ) {
2056 info.m_AddIndex = index;
2058 info.m_CanMatchByQual = sx_CanMatchByQual(feat);
2059 info.m_IsSetGene = sx_GeneSuppressed(feat);
2078 "CFeatTree: feature not found");
2089 "CFeatTree: feature not found");
2091 return it->second.m_Feat;
2105 pair<int, CFeatTree::CFeatInfo*>
2109 pair<int, CFeatInfo*> ret(0,
nullptr);
2110 if ( !
info.m_Feat.IsSetXref() ) {
2121 if ( !
id.IsLocal() ) {
2124 vector<CSeq_feat_Handle> ff =
2126 ITERATE ( vector<CSeq_feat_Handle>, fit, ff ) {
2132 sx_GetParentTypeQuality(parent->
GetSubtype(),
2134 if ( quality > ret.first ) {
2135 ret.first = quality;
2136 ret.second = parent;
2140 if ( ret.first > kByLocusParentQuality ) {
2146 info.GetSubtype()) ) {
2152 if (
data.IsGene() ) {
2153 vector<CSeq_feat_Handle> ff =
2155 ITERATE ( vector<CSeq_feat_Handle>, fit, ff ) {
2158 ret.first = kByLocusParentQuality;
2163 ret.first = kByLocusParentQuality;
2177 pair<int, CFeatInfo*> parent =
2179 if ( !parent.second ) {
2186 if ( parent.first <= kWorseTypeParentQuality ||
2187 parent.first == kSameTypeParentQuality ) {
2195 if ( parent.second->IsSetParent() &&
2196 parent.second->m_Parent == &
info ) {
2200 pair<int, CFeatInfo*> grand_parent =
2202 if ( grand_parent.second == &
info ) {
2204 if ( parent.first < grand_parent.first ) {
2210 if ( parent.second->IsGene() ) {
2218 if ( !
info.IsSetGene() ) {
2240 tree->GetSNPStrandMode() ==
tree->eSNPStrand_both ) {
2244 if (
info.m_Feat.IsSetExcept_text() &&
2245 info.m_Feat.GetExcept_text().find(
"trans-splicing") !=
NPOS ) {
2255 bool operator()(
const SBestInfo& info1,
const SBestInfo& info2)
const {
2256 if (info1.m_Info && info2.m_Info) {
2257 if (info1.m_Quality != info2.m_Quality) {
2258 return info1.m_Quality > info2.m_Quality;
2260 if (info1.m_Overlap != info2.m_Overlap) {
2261 return info1.m_Overlap < info2.m_Overlap;
2264 return info1.m_Info < info2.m_Info;
2276 for (
size_t i = 0;
i <
cnt; ++
i) {
2303 if ( c->GetSubtype() == subtype ) {
2314 info.CheckBest(quality, overlap, parent);
2353 if (cr1.first == cr2.first)
return false;
2355 if (cr1.second.parents.size() != cr2.second.parents.size()) {
2356 return cr1.second.parents.size() < cr2.second.parents.size();
2359 if (!cr1.second.parents.empty()) {
2360 const SBestInfo& p1 = *cr1.second.parents.begin();
2361 const SBestInfo& p2 = *cr2.second.parents.begin();
2362 if (p1.m_Quality != p2.m_Quality)
return p1.m_Quality > p2.m_Quality;
2363 if (p1.m_Overlap != p2.m_Overlap)
return p1.m_Overlap < p2.m_Overlap;
2370 if (
cmp != 0)
return cmp < 0;
2375 if (
cmp != 0)
return cmp < 0;
2381 if ( !f2.
IsSetId() )
return true;
2388 if (
cmp != 0)
return cmp < 0;
2403 if ( !giim2.
IsSetDb() )
return true;
2405 if (
cmp != 0)
return cmp < 0;
2407 else if ( giim2.
IsSetDb() )
return false;
2411 if (
cmp != 0)
return cmp < 0;
2420 if ( oid1.
IsId() ) {
2421 if ( !oid2.
IsId() )
return true;
2426 else if ( oid1.
IsStr() ) {
2427 if ( !oid2.
IsStr() )
return false;
2429 if (
cmp != 0)
return cmp < 0;
2437 else if ( f2.
IsSetId() )
return false;
2455 TOrderedChildren ordered_children;
2457 if (ci->second.parents.empty())
continue;
2458 ordered_children.insert(ci);
2460 ITERATE(TOrderedChildren, ci, ordered_children) {
2462 if (child.second.parents.empty())
continue;
2464 bests[(*ci)->second.index] = *child.second.parents.begin();
2465 CFeatInfo* parent = child.second.parents.begin()->m_Info;
2472 if (bi->m_Info == parent) {
2477 if (*pci == (*ci)->first)
continue;
2479 if (
info.m_Info == parent) {
2480 info.m_Info =
nullptr;
2499 TCanonicalIdsMap& ids_map)
2504 bool check_genes =
false;
2505 if (
tree->GetGeneCheckMode() ==
tree->eGeneCheck_match &&
2518 for (
size_t i = 0;
i <
cnt; ++
i ) {
2520 SBestInfo* best = &bests[
i];
2521 SFeatRangeInfo range_info(ids_map, feat_info, best);
2522 if ( range_info.m_Id ) {
2523 if ( !s_AddCircularRanges(cc, range_info) ) {
2524 cc.push_back(range_info);
2531 sort(cc.begin(), cc.end(), PLessByStart());
2533 typedef pair<CFeatTree::CFeatInfo*, CFeatTree::CFeatInfo*> TFeatPair;
2539 TRangeArray::iterator
pi = pp.begin();
2540 TRangeArray::iterator ci = cc.begin();
2541 for ( ; ci != cc.end(); ) {
2543 while (
pi != pp.end() &&
pi->m_Id < ci->m_Id ) {
2546 if (
pi == pp.end() ) {
2550 if ( ci->m_Id < cur_id || !ci->m_Id ) {
2554 }
while ( ci != cc.end() && (ci->m_Id < cur_id || !ci->m_Id) );
2559 TRangeArray::iterator pe =
pi;
2560 while ( pe != pp.end() && pe->m_Id == cur_id ) {
2565 sx_GetCircularLength(
pi->m_Info->m_Feat.GetScope(), cur_id);
2569 TRangeArray::iterator
i = pe;
2571 i->m_MinFrom = min_from;
2573 min_from =
min(min_from, (--
i)->m_Range.
GetFrom());
2574 i->m_MinFrom = min_from;
2579 for ( ; ci != cc.end() &&
pi != pe && ci->m_Id == cur_id; ++ci ) {
2586 sx_GetOverlapType(link, c_loc, circular_length);
2597 pi->m_Range.GetToOpen() < ci->m_Range.GetFrom() ) {
2602 for ( TRangeArray::iterator pc =
pi;
2603 pc != pe && pc->m_MinFrom < ci->m_Range.GetToOpen();
2605 if ( !pc->m_Range.IntersectingWith(ci->m_Range) ) {
2608 if ( check_genes &&
info.IsSetGene() ) {
2610 if (
info.m_Gene != pc->m_Info->GetChildrenGene() ) {
2614 if (
info.m_MultiId && pc->m_Info->m_MultiId &&
2615 !multi_id_tested.
insert(TFeatPair(&
info, pc->m_Info)).second ) {
2625 Int1 quality = s_GetParentQuality(
info, *pc->m_Info);
2629 ci->m_Id && pc->m_Id &&
2646 if ( overlap >= 0 ) {
2648 if ( !disambibuator.
Add(ci->m_Info, pc->m_Info, quality, overlap) ) {
2652 ci->m_Best->CheckBest(quality, overlap, pc->m_Info);
2659 if (
info.m_MultiId || pc->m_Info->m_MultiId ) {
2676 if ( !c_loc2 || c_loc2_strand != pstrand ) {
2683 c_loc2_strand = pstrand;
2695 if ( overlap >= 0 ) {
2697 disambibuator.
Add(ci->m_Info, pc->m_Info, quality, overlap);
2699 ci->m_Best->CheckBest((
Int1)(quality-1), overlap, pc->m_Info);
2704 for ( ; ci != cc.end() && ci->m_Id == cur_id; ++ci ) {
2739 bool unassigned =
false;
2743 if ( !
info.IsSetParent() ) {
2744 if (
info.IsSetGene() ) {
2745 if (
info.m_Gene ) {
2757 if ( !unassigned ) {
2770 if ( parents.empty() ) {
2775 if ( bests.empty() ) {
2776 swap(bests, bests1);
2779 for (
size_t i = 0;
i < bests1.size(); ++
i ) {
2780 bests[
i].CheckBest(bests1[
i]);
2784 if ( bests.empty() ) {
2790 if ( parents.empty() ) {
2799 TFeatArray::iterator dst =
features.begin();
2800 for (
size_t i = 0;
i <
cnt; ++
i ) {
2802 if ( !
info.IsSetParent() ) {
2826 TRangeArray& genes =
2828 if ( genes.empty() ) {
2837 for (
size_t i = 0;
i <
cnt; ++
i ) {
2839 if ( !
info.IsSetGene() ) {
2870 if (
info.IsSetGene() ) {
2882 bool has_genes =
false;
2893 else if ( !
info.IsSetGene() &&
STypeLink(feat_type).CanHaveGeneParent() ) {
2910 if ( !old_feats.empty() ) {
2911 old_feats.insert(old_feats.end(),
2912 new_feats.begin(), new_feats.end());
2913 swap(old_feats, new_feats);
2916 if ( has_genes && !new_feats.empty() ) {
2938 vector<TFeatArray> feats_by_type;
2940 size_t new_count = 0;
2943 if (
info.IsSetParent() ) {
2956 size_t index = feat_type;
2957 if ( index >= feats_by_type.size() ) {
2958 feats_by_type.resize(index+1);
2960 feats_by_type[feat_type].push_back(&
info);
2964 if ( new_count == 0 ) {
2968 for (
size_t type = 0;
type < feats_by_type.size(); ++
type ) {
2970 if ( feats.empty() ) {
2976 if ( feats.empty() ) {
3003 if (
info.m_IsLinkedToRoot ==
info.eIsLinkedToRoot_linking ) {
3005 <<
info.m_Feat.GetOriginalFeature()
3006 <<
info.m_Parent->m_Feat.GetOriginalFeature()
3009 "CFeatTree: cycle in xrefs to parent feature");
3011 if (
info.m_Parent ) {
3012 info.m_IsLinkedToRoot =
info.eIsLinkedToRoot_linking;
3014 info.m_IsLinkedToRoot =
info.eIsLinkedToRoot_linked;
3027 info.m_Parent = &parent;
3028 info.m_IsSetParent =
true;
3038 info.m_IsSetParent =
true;
3039 info.m_IsLinkedToRoot =
info.eIsLinkedToRoot_linked;
3047 info.m_IsSetGene =
true;
3053 if ( !
info.IsSetParent() ) {
3056 return info.m_Parent;
3063 return info.m_Children;
3102 vector<CMappedFeat> children;
3109 vector<CMappedFeat>& children)
3120 children.reserve(infos->size());
3122 children.push_back((*it)->m_Feat);
3148 m_CanMatchByQual(
false),
3149 m_IsSetParent(
false),
3151 m_IsSetChildren(
false),
3153 m_IsLinkedToRoot(eIsLinkedToRoot_unknown),
3167 return m_Feat.GetAnnot().GetTSE_Handle();
3184 if ( skip_bottom ) {
3190 if ( top_type != bottom_type ) {
3191 for (
STypeLink link(bottom_type); link; ++link ) {
3217 bottom_type, top_type, base_sel);
3309 "GetBestGeneForMrna: mrna_feat is not a mRNA");
3313 tree.AddGenesForMrna(mrna_feat, base_sel);
3314 return tree.GetBestGene(mrna_feat, lookup_type);
3316 return feat_tree->
GetBestGene(mrna_feat, lookup_type);
3329 "GetBestGeneForCds: cds_feat is not a cdregion");
3333 tree.AddGenesForCds(cds_feat, base_sel);
3334 return tree.GetBestGene(cds_feat, lookup_type);
3336 return feat_tree->
GetBestGene(cds_feat, lookup_type);
3348 "GetBestMrnaForCds: cds_feat is not a cdregion");
3352 tree.AddMrnasForCds(cds_feat, base_sel);
3367 "GetBestCdsForMrna: mrna_feat is not a mRNA");
3371 tree.AddCdsForMrna(mrna_feat, base_sel);
3374 const vector<CMappedFeat>& children = feat_tree->
GetChildren(mrna_feat);
3375 ITERATE ( vector<CMappedFeat>, it, children ) {
3385 list< CMappedFeat >& mrna_feats,
3392 "GetMrnasForGene: gene_feat is not a gene");
3396 tree.AddMrnasForGene(gene_feat, base_sel);
3400 const vector<CMappedFeat>& children = feat_tree->
GetChildren(gene_feat);
3401 ITERATE ( vector<CMappedFeat>, it, children ) {
3403 mrna_feats.push_back(*it);
3410 list< CMappedFeat >& cds_feats,
3417 "GetCdssForGene: gene_feat is not a gene");
3421 tree.AddCdsForGene(gene_feat, base_sel);
3425 const vector<CMappedFeat>& children = feat_tree->
GetChildren(gene_feat);
3426 ITERATE ( vector<CMappedFeat>, it, children ) {
3428 const vector<CMappedFeat>& children2 = feat_tree->
GetChildren(*it);
3429 ITERATE ( vector<CMappedFeat>, it2, children2 ) {
3431 cds_feats.push_back(*it2);
3436 cds_feats.push_back(*it);
3450 "GetBestGeneForFeat: feat is null");
3454 tree.AddGenesForFeat(feat, base_sel);
3455 return tree.GetBestGene(feat, lookup_type);
3469 "GetBestParentForFeat: feat is null");
3473 tree.AddFeaturesFor(feat, parent_type, base_sel);
3474 return tree.GetParent(feat, parent_type);
3476 return feat_tree->
GetParent(feat, parent_type);
3491 bool revert_locations =
false;
3493 switch (overlap_type) {
3505 revert_locations =
true;
3522 else if ( loc.
IsInt() ) {
3561 _TRACE(
"test for circularity failed: " << e.GetMsg());
3576 for ( ; feat_it; ++feat_it) {
3578 Int8 cur_diff = ( !revert_locations ) ?
3594 feats.push_back(sc);
3599 for ( ; feat_it; ++feat_it) {
3601 Int8 cur_diff = ( !revert_locations ) ?
3617 feats.push_back(sc);
3622 _TRACE(
"GetOverlappingFeatures(): error: feature iterator failed");
3638 overlap_type, scores, base_sel);
3640 if ( !scores.empty() ) {
3642 return max_element(scores.begin(), scores.end())->second;
3645 return min_element(scores.begin(), scores.end())->second;
3660 switch ( need_subtype ) {
3693 need_subtype, overlap_type, 0, base_sel);
3708 bool severe_feat_exception =
3712 if (severe_feat_exception ||
3729 for (
CFeat_CI feat_it(annot); feat_it; ++feat_it ) {
3739 for (
CFeat_CI feat_it(entry); feat_it; ++feat_it ) {
3780 vector<CMappedFeat> children = ft.
GetChildren(parent);
3781 ITERATE (vector<CMappedFeat>, it, children ) {
3812 stop->
SetPnt().SetId().Assign(citer.GetSeq_id());
3821 bool pos1_not_in =
false;
3827 bool pos2_not_in =
false;
3831 if (pos1_not_in && pos2_not_in) {
3841 unsigned int frame = 0;
3858 TSeqPos mod1 = (pos1 + 3 - frame) %3;
3864 }
else if (pos1 < frame) {
3875 TSeqPos mod2 = (pos2 + 3 - frame) %3;
3878 && pos2 == cds_len) {
3880 }
else if (pos2 <= frame) {
3884 if (pos2 > cds_len) {
3902 if ( (mod1 != 0) && (mod2 != 2) ) {
3904 }
else if (mod1 != 0) {
3906 }
else if (mod2 != 2) {
3917 if (!orig_feat.IsSetData() || !orig_feat.GetData().IsCdregion()) {
3921 if (orig_feat.IsSetPseudo() && orig_feat.GetPseudo()) {
3943 if (parent_entry.
IsSet()
3953 for (; annot_ci; ++annot_ci) {
3954 if ((*annot_ci).IsFtable()) {
3962 new_annot->
SetData().SetFtable();
3969 orig_feat = new_annot.
TakeFeat(feh);
3971 if (feat_list.empty())
3985 bool any_change =
false;
3988 bool should_be_partial = partial5 || partial3;
3989 bool is_partial =
false;
3993 if (should_be_partial && !is_partial) {
3997 else if (!should_be_partial && is_partial) {
4008 bool any_change =
false;
4013 if ((partial5 && !prot_5) || (!partial5 && prot_5)
4014 || (partial3 && !prot_3) || (!partial3 && prot_3)) {
4035 if (partial5 && partial3) {
4038 else if (partial5) {
4041 else if (partial3) {
4064 bool any_change =
false;
4084 new_feat->
Assign(*(
f->GetSeq_feat()));
4096 if ((*it)->IsMolinfo()) {
4104 beh.
SetDescr().Set().push_back(new_molinfo_desc);
4139 if (new_protein && new_protein->
IsSetInst()) {
4152 new_feat->
Assign(*(
f->GetSeq_feat()));
4174 if ((*it)->IsFtable()) {
4187 annot->
SetData().SetFtable().push_back(sf);
4202 new_prot->
SetData().SetProt().SetName().push_back(protein_name);
4213 vector<CMappedFeat>& children,
4214 feature::CFeatTree& featTree)
4219 vector<CMappedFeat> c = featTree.GetChildren(mf);
4220 for (vector<CMappedFeat>::iterator it = c.begin(); it != c.end(); it++) {
4222 if (
f.GetFeatSubtype() == subtype) {
4223 children.push_back(
f);
4237 vector<CMappedFeat>& children)
4241 feature::CFeatTree myTree;
4244 vector<CMappedFeat> c = myTree.GetChildren(mf);
4245 for (vector<CMappedFeat>::iterator it = c.begin(); it != c.end(); it++) {
4247 if (
f.GetFeatSubtype() == subtype) {
4248 children.push_back(
f);
4259 feature::CFeatTree& ft,
4265 #define SUBTYPE(x) CSeqFeatData::eSubtype_ ## x
4267 typedef vector<CMappedFeat> MFS;
4268 typedef MFS::const_iterator MFSit;
4270 const string strRearrange(
"rearrangement required for product");
4287 vector<CMappedFeat> vecCds;
4301 for (MFSit it = vecCds.begin(); it != vecCds.end(); it++) {
4302 if (it->IsSetPseudo() && it->GetPseudo()) {
4305 if (it->IsSetExcept_text() && (it->GetExcept_text() == strRearrange)) {
4308 biotype =
"protein_coding";
4313 vector<CMappedFeat> vecOthers;
4346 for (MFSit it = vecOthers.begin(); it != vecOthers.end(); it++) {
4348 if (!geneIsPseudo && (!it->IsSetPseudo() || !it->GetPseudo())) {
4351 if (singleSubtype ==
SUBTYPE(bad)) {
4352 singleSubtype = currentSubtype;
4354 else if (currentSubtype != singleSubtype) {
4366 vector<string> acceptedClasses = {
4368 "autocatalytically_spliced_intron",
4370 "hammerhead_ribozyme",
4390 if (singleSubtype ==
SUBTYPE(ncRNA) && nonPseudo) {
4392 if (!
rna.IsSetExt()) {
4403 if (rnaClass ==
"other") {
4407 if (std::find(acceptedClasses.begin(), acceptedClasses.end(), rnaClass) ==
4408 acceptedClasses.end()) {
4425 if (singleSubtype !=
SUBTYPE(bad) && nonPseudo) {
4433 if (singleSubtype ==
SUBTYPE(otherRNA)) {
4434 biotype =
"transcribed_pseudogene";
4441 if (singleSubtype !=
SUBTYPE(bad)) {
4457 for (MFSit it = vecCds.begin(); it != vecCds.end(); it++) {
4458 if (!it->IsSetExcept_text()) {
4461 if (it->GetExcept_text() != strRearrange) {
4464 if (it->IsSetPseudo() && it->GetPseudo()) {
4465 biotype =
"segment_pseudogene";
4468 biotype =
"segment";
4477 biotype =
"pseudogene";
4486 feature::CFeatTree& ft,
4496 feature::CFeatTree& ft,
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eBoth
Both preliminary and traceback stages.
CSeq_annot_Handle GetAnnot(void) const
TSeqPos GetLength(void) const
int Compare(const CDbtag &dbt2) const
set< SBestInfo, SBestInfoLess > TBestSet
void Disambiguate(TBestArray &bests)
CFeatTree::CFeatInfo CFeatInfo
map< CFeatInfo *, SCandidates > TChildren
bool Add(CFeatInfo *child, CFeatInfo *parent, Int1 quality, Int8 overlap)
list< CFeatInfo * > TChildList
map< CFeatInfo *, SParentInfo > TParents
CDisambiguator(CFeatTree::TFeatArray &features)
map< TParentKey, CRef< CFeatTreeParentTypeIndex > > TIndex
TRangeArray & GetIndex(CSeqFeatData::ESubtype type, bool by_product, const TInfoArray &feats)
pair< CSeqFeatData::ESubtype, bool > TParentKey
TRangeArray & GetIndex(const STypeLink &link, const TInfoArray &feats)
TCanonicalIdsMap m_CanonicalIds
void GetLabel(string *label) const
void AddLocation(const CSeq_loc &loc, ETransSplicing trans_splcing=eNoTransSplicing)
TLocMap::const_iterator const_iterator
Base class for all object manager exceptions.
Exceptions for objmgr/util library.
void GetLabel(string *label) const
void GetLabel(string *label) const
bool GetLabel(string *label, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const override
Append a label to "label" based on content.
bool GetLabel(string *label, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const override
Get a label that is the concatenation of the pub labels for the pubs in the set.
@RNA_ref.hpp User-defined methods of the data storage class.
ESubtype GetSubtype(void) const
static E_Choice GetTypeFromSubtype(ESubtype subtype)
@ eSubtype_transit_peptide
@ eSubtype_misc_structure
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
@ eSubtype_mobile_element
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
namespace ncbi::objects::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static const string & GetCode(CSeq_data::E_Choice code_type, TIndex idx)
static const string & GetIupacaa3(TIndex ncbistdaa)
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
CSeq_feat_Handle GetGeneByRef(const CGene_ref &ref) const
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
TSeq_feat_Handles GetGenesByRef(const CGene_ref &ref) const
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator end() const
container_type::value_type value_type
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
static unsigned char depth[2 *(256+1+29)+1]
static vector< string > arr
static void s_GetRnaRefLabelFromComment(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
static CMappedFeat GetBestOverlappingFeat(CScope &scope, const CSeq_loc &loc, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TBestFeatOpts opts, const SAnnotSelector *base_sel)
bool sFeatureGetChildrenOfSubtypeFaster(CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &, feature::CFeatTree &)
bool sGetFeatureGeneBiotypeWrapper(feature::CFeatTree &, CMappedFeat, string &, bool)
pair< Int8, CMappedFeat > TMappedFeatScore
static const bool kOptimizeTestOverlap
void s_GetContentLabel(const CSeq_feat &feat, string *label, const string *type_label, TFeatLabelFlags flags, CScope *scope)
static void s_GetVariationLabel(const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *)
static EStrandMatchRule s_GetStrandMatchRule(const STypeLink &link, const CFeatTree::CFeatInfo &info, const CFeatTree *tree)
static const bool kSplitCircular
static void GetOverlappingFeatures(CScope &scope, const CSeq_loc &loc, CSeqFeatData::E_Choice, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TMappedFeatScores &feats, const SAnnotSelector *base_sel)
static void s_SetChildrenFeatureIds(CFeatTree &ft, const CMappedFeat &feat, int &feat_id)
void s_GetTypeLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags)
static void s_GetVariationDbtagLabel(string *tlabel, TFeatLabelFlags, const CDbtag &dbtag)
static bool s_IsNotSubrange(const CRange< TSeqPos > &r1, const CRange< TSeqPos > &r2)
static void s_GetRnaRefLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
static bool s_AllowedParentByOverlap(CSeqFeatData::ESubtype child, CSeqFeatData::ESubtype parent)
static bool s_GetImpLabel(const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *type_label)
static void s_GetCdregionLabel(const CSeq_feat &feat, string *tlabel, CScope *scope)
vector< TMappedFeatScore > TMappedFeatScores
static void s_CollectBestOverlaps(CFeatTree::TFeatArray &features, TBestArray &bests, const STypeLink &link, TRangeArray &pp, CFeatTree *tree, TCanonicalIdsMap &ids_map)
static void s_SetFeatureId(CFeatTree &ft, const CMappedFeat &feat, int &last_id, const CMappedFeat &parent)
static CRef< CSeq_loc > s_MakePointForLocationStop(const CSeq_loc &loc)
@ eStrandMatch_at_least_one
bool sFeatureGetChildrenOfSubtype(CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
C * SerialClone(const C &src)
Create on heap a clone of the source object.
#define MSerial_AsnText
I/O stream manipulators –.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
int Compare(const CSeq_loc &loc) const
bool CheckId(const CSeq_id *&id, bool may_throw=true) const
check that the 'id' field in all parts of the location is the same as the specifies id.
bool IsTruncatedStart(ESeqLocExtremes ext) const
check if parts of the seq-loc are missing
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
void GetMrnasForGene(const CMappedFeat &gene_feat, list< CMappedFeat > &mrna_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
EGeneCheckMode m_GeneCheckMode
bool PromoteCDSToNucProtSet(objects::CSeq_feat_Handle &orig_feat)
Promotes coding region from Seq-annot on nucleotide sequence to Seq-annot on nuc-prot-set if necessar...
CFeatInfo & x_GetInfo(const CSeq_feat_Handle &feat)
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_type, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
bool m_IgnoreMissingGeneXref
CRef< CFeatTreeIndex > m_Index
void GetCdssForGene(const CMappedFeat &gene_feat, list< CMappedFeat > &cds_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
vector< CMappedFeat > GetChildren(const CMappedFeat &feat)
Return all nearest children of a feature.
bool x_AssignParentByRef(CFeatInfo &info)
CMappedFeat GetBestGeneForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
void SetFeatIdMode(EFeatIdMode mode)
const CTSE_Handle & GetTSE(void) const
bool GetFeatureGeneBiotype(feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
void GetLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, CScope *scope)
void SetSNPStrandMode(ESNPStrandMode mode)
void AddGenesForCds(const CMappedFeat &cds_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for a cdregion feature.
void AddCdsForMrna(const CMappedFeat &mrna_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get cdregions for a mRNA feature.
bool RetranslateCDS(const CSeq_feat &cds, CScope &scope)
RetranslateCDS A function to replace the protein Bioseq pointed to by cds.product with the current tr...
void SetIgnoreMissingGeneXref(bool ignore=true)
CFeatInfo * GetChildrenGene(void)
void x_AssignParents(void)
bool AdjustForCDSPartials(const CSeq_feat &cds, CScope &scope)
AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the...
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
void x_SetGene(CFeatInfo &info, CFeatInfo *gene)
pair< int, CTSE_Handle > TFullId
vector< CFeatInfo * > TChildren
bool CopyFeaturePartials(CSeq_feat &dst, const CSeq_feat &src)
CopyFeaturePartials A function to copy the start and end partialness from one feature to another.
ESNPStrandMode
Mode of processing SNP strands.
void AddCdsForGene(const CMappedFeat &gene_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get cdregions for a gene feature.
const TChildren & x_GetChildren(CFeatInfo &info)
const CMappedFeat & GetMappedFeat(const CSeq_feat_Handle &feat) const
Find a corresponding CMappedFeat for a feature already added to a tree.
CMappedFeat GetBestGene(const CMappedFeat &feat, EBestGeneType lookup_type=eBestGene_TreeOnly)
Return parent gene if exists or best overlapping gene.
CMappedFeat GetBestGeneForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
void AddMrnasForGene(const CMappedFeat &gene_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get mRNAs for a gene feature.
void AddProteinFeature(const CBioseq &seq, const string &protein_name, const CSeq_feat &cds, CScope &scope)
AddProteinFeature A function to create a protein feature with the specified protein name.
void ClearFeatureIds(const CSeq_annot_EditHandle &annot)
void AddFeatureToBioseq(const CBioseq &seq, const CSeq_feat &f, CScope &scope)
AddFeatureToBioseq A function to add a feature to a Bioseq - will create a new feature table Seq-anno...
ELabelType
For compatibility with legacy code.
EBestGeneFeatIdMode m_BestGeneFeatIdMode
void x_AssignParentsByOverlap(TFeatArray &features, const STypeLink &link)
ESNPStrandMode m_SNPStrandMode
ELocationInFrame IsLocationInFrame(const CSeq_feat_Handle &cds, const CSeq_loc &loc)
Determines whether location loc is in frame with coding region cds.
CMappedFeat GetBestGeneForFeat(const CMappedFeat &feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
CMappedFeat GetParent(const CMappedFeat &feat)
Return nearest parent of a feature.
vector< CFeatInfo * > TFeatArray
EIsLinkedToRoot m_IsLinkedToRoot
void AddFeaturesFor(CScope &scope, const CSeq_loc &loc, CSeqFeatData::ESubtype bottom_type, CSeqFeatData::ESubtype top_type, const SAnnotSelector *base_sel=0, bool skip_bottom=false)
Add all features from bottom_type to top_type for a feature.
CRef< CSeq_loc_Mapper > CreateSeqLocMapperFromFeat(const CSeq_feat &feat, CSeq_loc_Mapper::EFeatMapDirection dir, CScope *scope)
Create CSeq_loc_Mapper from a feature, check for special cases like exceptions in CDS features.
void AddFeature(const CMappedFeat &feat)
Add a single feature to the tree.
bool GetIgnoreMissingGeneXref(void) const
Mode for taking into account gene xref to a missing genes.
bool AdjustFeaturePartialFlagForLocation(CSeq_feat &new_feat)
AdjustFeaturePartialFlagForLocation A function to ensure that Seq-feat.partial is set if either end o...
bool GetFeatureGeneBiotypeFaster(feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
CMappedFeat MapSeq_feat(const CSeq_feat_Handle &feat, const CBioseq_Handle &master_seq, const CRange< TSeqPos > &range)
void SetGeneCheckMode(EGeneCheckMode mode)
void x_AssignGenesByOverlap(TFeatArray &features)
void GetChildrenTo(const CMappedFeat &feat, vector< CMappedFeat > &children)
Store all nearest children of a feature into a vector.
vector< CFeatInfo * > TChildren
void x_SetGeneRecursive(CFeatInfo &info, CFeatInfo *gene)
CMappedFeat GetParentFeature(const CMappedFeat &feat)
void x_SetNoParent(CFeatInfo &info)
pair< int, CFeatInfo * > x_LookupParentByRef(CFeatInfo &info, CSeqFeatData::ESubtype parent_type)
EGeneCheckMode
Mode for taking into account best gene eGeneCheck_match will try to match a parent feature only if th...
bool IsSetGene(void) const
size_t GetFeatIdsCount(void) const
~CFeatTree(void)
Destructor.
CSeqFeatData::ESubtype GetSubtype(void) const
virtual bool Less(const CSeq_feat &f1, const CSeq_feat &f2, CScope *scope)
void AddGenesForFeat(const CMappedFeat &feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for an arbitrary feature.
void x_VerifyLinkedToRoot(CFeatInfo &info)
vector< CFeatInfo * > TInfoArray
CFeatInfo * x_GetParent(CFeatInfo &info)
CFeatTree & operator=(const CFeatTree &)
int RemapId(int old_id, const CTSE_Handle &tse)
void AddMrnasForCds(const CMappedFeat &cds_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get mRNAs for a cdregion feature.
EGeneCheckMode GetGeneCheckMode(void) const
void AddFeatures(CFeat_CI it)
Add all features collected by a CFeat_CI to the tree.
void AddGenesForMrna(const CMappedFeat &mrna_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for a mRNA feature.
void x_SetParent(CFeatInfo &info, CFeatInfo &parent)
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
CFeatInfo * x_FindInfo(const CSeq_feat_Handle &feat)
bool RemapIds(CSeq_feat &feat, const CTSE_Handle &tse)
void ReassignFeatureIds(const CSeq_entry_EditHandle &entry)
EFeatIdMode
Mode of processing feature ids.
int TFeatLabelFlags
binary OR of FFeatLabelFlags
CMappedFeat GetBestCdsForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
bool GivesGeneToChildren(void) const
CFeatTree(void)
Construct empty tree.
@ eBestGene_AllowOverlapped
@ eLocationInFrame_InFrame
@ eLocationInFrame_BadStart
@ eLocationInFrame_BadStop
@ eLocationInFrame_BadStartAndStop
@ fFGL_NoComments
Leave out comments, even as fallbacks.
@ fFGL_NoQualifiers
Leave out qualifiers.
@ fFGL_Content
Include its content if there is any.
@ fFGL_Type
Always include the feature's type.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
Int8 TestForOverlap64(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
64-bit version of TestForOverlap() Check if the two locations have ovarlap of a given type.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
@ eGetId_Seq_id_BestRank
use CSeq_id::BestRank() as the scoring function
@ fBestFeat_FavorLonger
favor longer features over shorter features
EFeatMapDirection
Mapping direction used when initializing the mapper with a feature.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_annot_Handle GetSeq_annotHandle(const CSeq_annot &annot, EMissing action=eMissing_Default)
const CFeat_id & GetId(void) const
const CSeq_feat::TXref & GetXref(void) const
bool GetPseudo(void) const
TClass GetClass(void) const
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
void SetDescr(TDescr &v) const
void Remove(void) const
Remove current annot.
const CTSE_Handle & GetTSE_Handle(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
bool IsSetXref(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
void ClearFeatIds(void)
Clear feature ids.
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
bool IsSetProduct(void) const
virtual const CSeq_loc & GetLocation(void) const
void SetInst(TInst &v) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Get parent bioseq-set handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
void SetFeatId(int id)
Set single feature id.
bool IsProtein(void) const
TInst_Topology GetInst_Topology(void) const
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
bool IsSetClass(void) const
CSeq_annot_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
void ClearFeatXrefs(void)
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_feat_EditHandle TakeFeat(const CSeq_feat_EditHandle &handle) const
CSeq_annot_EditHandle GetAnnot(void) const
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
bool IsSetQual(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
bool IsSetPseudo(void) const
bool IsSetInst_Topology(void) const
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
CConstRef< CSeq_annot > GetSeq_annotCore(void) const
bool IsSetData(void) const
CSeq_entry_EditHandle GetParentEntry(void) const
Navigate object tree.
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetExactDepth(bool value=true)
SetExactDepth() specifies that annotations will be searched on the segment level specified by SetReso...
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
SAnnotSelector & SetOverlapTotalRange(void)
Check overlapping only of total ranges.
SAnnotSelector & SetSourceLoc(const CSeq_loc &loc)
Set filter for source location of annotations.
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetOverlapType(EOverlapType overlap_type)
Set overlap type.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetLimitSeqAnnot(const CSeq_annot_Handle &limit)
Limit annotations to those from the seq-annot only.
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
EOverlapType
Flag to indicate location overlapping method.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
@ eOverlap_Intervals
default - overlapping of individual intervals
@ eOverlap_TotalRange
overlapping of total ranges only
void Reset(void)
Reset reference object.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
int8_t Int1
1-byte (8-bit) signed integer
position_type GetToOpen(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
#define NCBI_XOBJUTIL_EXPORT
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
list< CRef< CSubSource > > TSubtype
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsId(void) const
Check if variant Id is selected.
const TDb & GetDb(void) const
Get the Db member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetClass(void) const
endeavor which designed this object Check if a value has been assigned to Class data member.
const TClass & GetClass(void) const
Get the Class member data.
const TStr & GetStr(void) const
Get the variant data.
const TType & GetType(void) const
Get the Type member data.
TId GetId(void) const
Get the variant data.
bool IsGen(void) const
Check if variant Gen is selected.
const TGen & GetGen(void) const
Get the variant data.
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
const TClass & GetClass(void) const
Get the Class member data.
@ e_not_set
No variant selected.
@ e_Name
for naming "other" type
E_Choice Which(void) const
Which variant is currently selected.
const TDb & GetDb(void) const
Get the variant data.
const TStr & GetStr(void) const
Get the variant data.
@ e_Db
pointer to a restriction site database
TXref & SetXref(void)
Assign a value to Xref data member.
bool IsSetOrf(void) const
just an ORF ? Check if a value has been assigned to Orf data member.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
vector< CRef< CDbtag > > TDbxref
void ResetPartial(void)
Reset Partial data member.
const TData & GetData(void) const
Get the Data member data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
E_Choice Which(void) const
Which variant is currently selected.
TPsec_str GetPsec_str(void) const
Get the variant data.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
TOrf GetOrf(void) const
Get the Orf member data.
bool IsImp(void) const
Check if variant Imp is selected.
const TRegion & GetRegion(void) const
Get the variant data.
const TCit & GetCit(void) const
Get the Cit member data.
E_Choice Which(void) const
Which variant is currently selected.
void SetPartial(TPartial value)
Assign a value to Partial data member.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
const TId & GetId(void) const
Get the Id member data.
const TLocal & GetLocal(void) const
Get the variant data.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsLocal(void) const
Check if variant Local is selected.
TBond GetBond(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
void SetId(TId &value)
Assign a value to Id data member.
const TUser & GetUser(void) const
Get the variant data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
void SetData(TData &value)
Assign a value to Data data member.
const TGiim & GetGiim(void) const
Get the variant data.
bool CanGetLocation(void) const
Check if it is safe to call GetLocation method.
const TCdregion & GetCdregion(void) const
Get the variant data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
const TProduct & GetProduct(void) const
Get the Product member data.
const TOrg & GetOrg(void) const
Get the variant data.
const TRsite & GetRsite(void) const
Get the variant data.
const TComment & GetComment(void) const
Get the Comment member data.
bool IsSetCit(void) const
citations for this feature Check if a value has been assigned to Cit data member.
bool IsVariation(void) const
Check if variant Variation is selected.
const TGene & GetGene(void) const
Get the variant data.
TSite GetSite(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
bool IsSetId(void) const
the feature copied Check if a value has been assigned to Id data member.
const TNon_std_residue & GetNon_std_residue(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
const TXref & GetXref(void) const
Get the Xref member data.
vector< CRef< CSeqFeatXref > > TXref
vector< CRef< CGb_qual > > TQual
const TRna & GetRna(void) const
Get the variant data.
TGibb GetGibb(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
const TVariation & GetVariation(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
const THet & GetHet(void) const
Get the variant data.
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_Region
named region (globin locus)
@ e_Seq
to annotate origin from another seq
@ e_Txinit
transcription initiation
@ e_Num
a numbering system
@ e_Pub
publication applies to this seq
@ e_User
user defined structure
@ e_Rsite
restriction site (for maps really)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
@ e_General
for use by various databases
@ e_Local
for local software use
@ eFrame_not_set
not set, code uses one
@ eFrame_three
reading frame
const TRelease & GetRelease(void) const
Get the Release member data.
TId GetId(void) const
Get the Id member data.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsSetRelease(void) const
the release Check if a value has been assigned to Release data member.
ENa_strand
strand of nucleic acid
const TId & GetId(void) const
Get the Id member data.
const TWhole & GetWhole(void) const
Get the variant data.
TFrom GetFrom(void) const
Get the From member data.
bool CanGetTo(void) const
Check if it is safe to call GetTo method.
list< CRef< CSeq_loc > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
const TMix & GetMix(void) const
Get the variant data.
bool IsSetDb(void) const
dbase used in Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
@ eNa_strand_both_rev
in reverse orientation
@ eNa_strand_both
in forward orientation
@ eClass_nuc_prot
nuc acid and coded proteins
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
list< CRef< CSeqdesc > > Tdata
const TInst & GetInst(void) const
Get the Inst member data.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TNcbieaa & SetNcbieaa(void)
Select the variant.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
const TFtable & GetFtable(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const TData & GetData(void) const
Get the Data member data.
list< CRef< CSeq_annot > > TAnnot
const TPub & GetPub(void) const
Get the Pub member data.
void SetDefaultCompleteness(void)
Assign default value to Completeness data member.
TIupacaa & SetIupacaa(void)
Select the variant.
@ eCompleteness_complete
complete biological entity
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ e_not_set
No variant selected.
const TId & GetId(void) const
Get the Id member data.
const TName & GetName(void) const
Get the Name member data.
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
bool IsSetName(void) const
names and synonyms some variants have well-known canonical names and possible accepted synonyms Check...
unsigned int
A callback function used to compare two keys in a database.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::KEY key
double f(double x_, const double &y_)
static const char * prefix[]
bool m_DoesNotNeedChildren
TChildList m_ChildrenCandidates
bool operator()(const CFeatTree::CFeatInfo *f1, const CFeatTree::CFeatInfo *f2) const
bool operator()(const SBestInfo &info1, const SBestInfo &info2) const
CDisambiguator::TChildren::const_iterator TChild
bool operator()(const TChild &c1, const TChild &c2) const
CSeqFeatData::ESubtype m_StartType
CSeqFeatData::ESubtype m_CurrentType
bool OverlapByIntervals() const
const CSeqFeatData::ESubtype * GetMultiParentTypes() const
STypeLink & operator++(void)
bool CanHaveCommonGene(void) const
CSeqFeatData::ESubtype m_ParentType
bool CanHaveGeneParent(void) const
bool operator!(void) const
STypeLink(CSeqFeatData::ESubtype subtype=CSeqFeatData::eSubtype_imp, CSeqFeatData::ESubtype start=CSeqFeatData::eSubtype_bad)
static const char *const features[]