NCBI C++ ToolKit
feature.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feature.cpp 92832 2021-02-17 20:03:10Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Clifford Clausen
27 *
28 * File Description:
29 * Sequence utilities
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include <serial/objistr.hpp>
34 #include <serial/serial.hpp>
35 #include <serial/iterator.hpp>
36 #include <serial/enumvalues.hpp>
37 
39 #include <objmgr/scope.hpp>
40 #include <objmgr/seq_vector.hpp>
41 #include <objmgr/feat_ci.hpp>
43 
60 
61 #include <objects/seq/Bioseq.hpp>
63 #include <objects/seq/IUPACaa.hpp>
65 #include <objects/seq/NCBIeaa.hpp>
66 #include <objects/seq/NCBI8aa.hpp>
67 #include <objects/seq/Pubdesc.hpp>
69 #include <objects/seq/Seqdesc.hpp>
71 
76 
80 
82 #include <objects/pub/Pub.hpp>
83 #include <objects/pub/Pub_set.hpp>
84 
85 #include <objmgr/util/feature.hpp>
86 #include <objmgr/util/sequence.hpp>
87 #include <objmgr/annot_ci.hpp>
88 
89 #include <algorithm>
90 
93 BEGIN_SCOPE(feature)
94 USING_SCOPE(sequence);
95 
96 // internal prototypes
98  vector<CMappedFeat>&, feature::CFeatTree&);
100  vector<CMappedFeat>&);
101 bool sGetFeatureGeneBiotypeWrapper(feature::CFeatTree&, CMappedFeat, string&, bool);
102 
103 // Appends a label onto "label" based on the type of feature
105 {
106  string tlabel;
107 
108  // Determine typelabel
109  CSeqFeatData::ESubtype idx = feat.GetData().GetSubtype();
110  if (idx != CSeqFeatData::eSubtype_bad) {
111  if (feat.GetData().IsProt() && idx != CSeqFeatData::eSubtype_prot) {
112  tlabel = feat.GetData().GetKey(CSeqFeatData::eVocabulary_genbank);
113  } else {
114  tlabel = feat.GetData().GetKey();
115  }
116  if (feat.GetData().IsImp()) {
117  if ( tlabel == "variation" ) {
118  tlabel = "Variation";
119  }
120  else if ( tlabel != "CDS") {
121  tlabel = "[" + tlabel + "]";
122  }
123  } else if ((flags & fFGL_NoComments) == 0 && feat.GetData().IsRegion()
124  && feat.GetData().GetRegion() == "Domain"
125  && feat.IsSetComment() ) {
126  tlabel = "Domain";
127  }
128  } else if (feat.GetData().IsImp()) {
129  tlabel = "[" + feat.GetData().GetImp().GetKey() + "]";
130  } else {
131  tlabel = "Unknown=0";
132  }
133  *label += tlabel;
134 }
135 
136 
137 // Appends a label onto tlabel for a CSeqFeatData::e_Cdregion
138 inline
140 (const CSeq_feat& feat,
141  string* tlabel,
142  CScope* scope)
143 {
144  // Check that tlabel exists and that the feature data is Cdregion
145  if (!tlabel || !feat.GetData().IsCdregion()) {
146  return;
147  }
148 
149  const CGene_ref* gref = 0;
150  const CProt_ref* pref = 0;
151 
152  // Look for CProt_ref object to create a label from
153  if (feat.IsSetXref()) {
154  ITERATE ( CSeq_feat::TXref, it, feat.GetXref()) {
155  const CSeqFeatXref& xref = **it;
156  if ( !xref.IsSetData() ) {
157  continue;
158  }
159 
160  switch (xref.GetData().Which()) {
162  pref = &xref.GetData().GetProt();
163  break;
165  gref = &xref.GetData().GetGene();
166  break;
167  default:
168  break;
169  }
170  }
171  }
172 
173  // Try and create a label from a CProt_ref in CSeqFeatXref in feature
174  if (pref) {
175  pref->GetLabel(tlabel);
176  return;
177  }
178 
179  // Try and create a label from a CProt_ref in the feat product and
180  // return if found
181  if (feat.IsSetProduct() && scope) {
182  try {
183  const CSeq_id& id = GetId(feat.GetProduct(), scope);
184  CBioseq_Handle hnd = scope->GetBioseqHandle(id);
185  if (hnd) {
186 
187  for (CFeat_CI feat_it(hnd,
189  .IncludeFeatType(CSeqFeatData::e_Prot));
190  feat_it; ++feat_it) {
191  feat_it->GetData().GetProt().GetLabel(tlabel);
192  return;
193  }
194  }
195  else {
196  ERR_POST(Error << "cannot find sequence: " + id.AsFastaString());
197  }
198  } catch (CObjmgrUtilException&) {}
199  }
200 
201  // Try and create a label from a CGene_ref in CSeqFeatXref in feature
202  if (gref) {
203  gref->GetLabel(tlabel);
204  }
205 
206  // check to see if the CDregion is just an open reading frame
207  if (feat.GetData().GetCdregion().IsSetOrf() &&
208  feat.GetData().GetCdregion().GetOrf()) {
209  string str("open reading frame: ");
210  switch (feat.GetData().GetCdregion().GetFrame()) {
212  str += "frame not set; ";
213  break;
215  str += "frame 1; ";
216  break;
218  str += "frame 2; ";
219  break;
221  str += "frame 3; ";
222  break;
223  }
224 
225  switch (sequence::GetStrand(feat.GetLocation(), scope)) {
226  case eNa_strand_plus:
227  str += "positive strand";
228  break;
229  case eNa_strand_minus:
230  str += "negative strand";
231  break;
232  case eNa_strand_both:
233  str += "both strands";
234  break;
235  case eNa_strand_both_rev:
236  str += "both strands (reverse)";
237  break;
238  default:
239  str += "strand unknown";
240  break;
241  }
242 
243  *tlabel += str;
244  }
245 
246 
247 }
248 
249 
250 inline
252 (const CSeq_feat& feat,
253  string* label,
255  const string* type_label)
256 {
257  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()
258  && !feat.GetComment().empty()) {
259  if ((flags & fFGL_Type) != 0 && type_label != NULL
260  && feat.GetComment().find(*type_label) == string::npos) {
261  *label += *type_label + "-" + feat.GetComment();
262  } else {
263  *label += feat.GetComment();
264  }
265  } else if (type_label) {
266  *label += *type_label;
267  }
268 }
269 
270 
271 // Appends a label onto "label" for a CRNA_ref
272 inline
273 static void s_GetRnaRefLabel
274 (const CSeq_feat& feat,
275  string* label,
277  const string* type_label)
278 {
279  // Check that label exists and that feature data is type RNA-ref
280  if (!label || !feat.GetData().IsRna()) {
281  return;
282  }
283 
284  const CRNA_ref& rna = feat.GetData().GetRna();
285 
286  // Append the feature comment, the type label, or both and return
287  // if Ext is not set
288  if (!rna.IsSetExt()) {
289  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
290  return;
291  }
292 
293  // Append a label based on the type of the type of the ext of the
294  // CRna_ref
295  string tmp_label;
296  switch (rna.GetExt().Which()) {
298  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
299  break;
301  tmp_label = rna.GetExt().GetName();
302  if (feat.CanGetQual() &&
303  (tmp_label == "ncRNA" || tmp_label == "tmRNA"
304  || tmp_label == "misc_RNA")) {
305  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
306  ITERATE( CSeq_feat::TQual, q, qual ) {
307  if ((*q)->GetQual() == "product") {
308  tmp_label = (*q)->GetVal();
309  break;
310  }
311  }
312  }
313  if ((flags & fFGL_Type) == 0 && type_label != 0 && !tmp_label.empty() && tmp_label.find(*type_label) == string::npos) {
314  *label += *type_label + "-" + tmp_label;
315  } else if (!tmp_label.empty()) {
316  *label += tmp_label;
317  } else if (type_label) {
318  *label += *type_label;
319  }
320  break;
322  {
323  if ( !rna.GetExt().GetTRNA().IsSetAa() ) {
324  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
325  break;
326  }
327  try {
328  CTrna_ext::C_Aa::E_Choice aa_code_type =
329  rna.GetExt().GetTRNA().GetAa().Which();
330  int aa_code;
331  CSeq_data in_seq, out_seq;
332  string str_aa_code;
333  switch (aa_code_type) {
335  // Convert an e_Iupacaa code to an Iupacaa3 code for the label
336  aa_code = rna.GetExt().GetTRNA().GetAa().GetIupacaa();
338  aa_code);
339  in_seq.SetIupacaa().Set() = str_aa_code;
340  CSeqportUtil::Convert(in_seq, &out_seq,
342  if (out_seq.GetNcbistdaa().Get().size()) {
343  aa_code = out_seq.GetNcbistdaa().Get()[0];
344  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
345  } else {
346  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
347  }
348  break;
350  // Convert an e_Ncbieaa code to an Iupacaa3 code for the label
351  aa_code = rna.GetExt().GetTRNA().GetAa().GetNcbieaa();
353  aa_code);
354  in_seq.SetNcbieaa().Set() = str_aa_code;
355  CSeqportUtil::Convert(in_seq, &out_seq,
357  if (out_seq.GetNcbistdaa().Get().size()) {
358  aa_code = out_seq.GetNcbistdaa().Get()[0];
359  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
360  } else {
361  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
362  }
363  break;
365  // Convert an e_Ncbi8aa code to an Iupacaa3 code for the label
366  aa_code = rna.GetExt().GetTRNA().GetAa().GetNcbi8aa();
367  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
368  break;
370  // Convert an e_Ncbistdaa code to an Iupacaa3 code for the label
371  aa_code = rna.GetExt().GetTRNA().GetAa().GetNcbistdaa();
372  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
373  break;
374  default:
375  break;
376  }
377 
378  // Append to label, depending on flags
379  if ((flags & fFGL_Type) == 0 && type_label != 0) {
380  *label += *type_label + "-" + tmp_label;
381  } else if (!tmp_label.empty()) {
382  *label += tmp_label;
383  } else if (type_label) {
384  *label += *type_label;
385  }
386  } catch (CSeqportUtil::CBadIndex&) {
387  // fall back to comment (if any)
388  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
389  }
390 
391  break;
392  }
394  if (rna.GetExt().GetGen().CanGetProduct()) {
395  *label = rna.GetExt().GetGen().GetProduct();
396  } else if (rna.GetExt().GetGen().CanGetClass()) {
397  *label = rna.GetExt().GetGen().GetClass();
398  } else {
399  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
400  }
401  break;
402  }
403 }
404 
405 
406 static void s_GetVariationDbtagLabel(string* tlabel,
407  TFeatLabelFlags /*flags*/,
408  const CDbtag& dbtag)
409 {
410  if ( dbtag.GetDb() == "dbSNP" ) {
411  if ( !tlabel->empty() ) {
412  *tlabel += ", ";
413  }
414  const CObject_id& tag = dbtag.GetTag();
415  if ( tag.IsId() ) {
416  *tlabel += "rs";
417  *tlabel += NStr::NumericToString(tag.GetId());
418  }
419  else {
420  *tlabel += tag.GetStr();
421  }
422  }
423 }
424 
425 
426 // Appends a label to tlabel for a CImp_feat. A return value of true indicates
427 // that the label was created for a CImp_feat key = "Site-ref"
428 inline
429 static bool s_GetImpLabel
430 (const CSeq_feat& feat,
431  string* tlabel,
433  const string* type_label)
434 {
435  // Return if tlablel does not exist or feature data is not Imp-feat
436  if (!tlabel || !feat.GetData().IsImp()) {
437  return false;
438  }
439 
440  CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
441  bool empty = true;
442 
443  // If the key is Site-ref
444  if (subtype == CSeqFeatData::eSubtype_site_ref) {
445  if (feat.IsSetCit()) {
446  // Create label based on Pub-set
447  feat.GetCit().GetLabel(tlabel);
448  return true;
449  }
450  }
451  else if (subtype == CSeqFeatData::eSubtype_variation) {
452  if ( feat.IsSetDbxref() ) {
453  ITERATE( CSeq_feat::TDbxref, it, feat.GetDbxref() ) {
454  s_GetVariationDbtagLabel(tlabel, flags, **it);
455  }
456  return false;
457  }
458  // else if the key is not Site-ref
459  } else if ((flags & fFGL_Type) == 0) {
460  // If the key is CDS
461  if (subtype == CSeqFeatData::eSubtype_Imp_CDS) {
462  *tlabel += "[CDS]";
463  // else if the key is repeat_unit or repeat_region
464  } else if (subtype == CSeqFeatData::eSubtype_repeat_unit ||
466  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
467  // Loop thru the feature qualifiers
468  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
469  ITERATE( CSeq_feat::TQual, it, qual ) {
470  // If qualifier qual is rpt_family append qualifier val
471  if (NStr::EqualNocase((*it)->GetQual(),"rpt_family")) {
472  *tlabel += (*it)->GetVal();
473  empty = false;
474  break;
475  }
476  }
477  }
478 
479  // If nothing has been appended yet
480  if (empty) {
481  *tlabel += type_label ? *type_label : string("");
482  }
483  // else if the key is STS
484  } else if (subtype == CSeqFeatData::eSubtype_STS) {
485  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
486  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
487  ITERATE( CSeq_feat::TQual, it, qual ) {
488  if (NStr::EqualNocase((*it)->GetQual(),"standard_name"))
489  {
490  *tlabel = (*it)->GetVal();
491  empty = false;
492  break;
493  }
494  }
495  }
496 
497  // If nothing has been appended yet
498  if (empty) {
499  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
500  size_t pos = feat.GetComment().find(";");
501  if (pos == string::npos) {
502  *tlabel += feat.GetComment();
503  } else {
504  *tlabel += feat.GetComment().substr(0, pos);
505  }
506  } else {
507  *tlabel += type_label ? *type_label : string("");
508  }
509  }
510  // else if the key is misc_feature
511  } else if (subtype != CSeqFeatData::eSubtype_misc_feature) {
512  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
513  // Look for a single qualifier qual in order of preference
514  // "standard_name", "function", "number", any and
515  // append to tlabel and return if found
516  string std_name, func, num, other;
517  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
518  ITERATE( CSeq_feat::TQual, it, qual ) {
519  if (other.empty()) other = (*it)->GetVal();
520  if (std_name.empty() && NStr::EqualNocase((*it)->GetQual(),"standard_name")) {
521  std_name = (*it)->GetVal();
522  break; // no need to search further if found
523  }
524  if (func.empty() && NStr::EqualNocase((*it)->GetQual(), "function")) {
525  func = (*it)->GetVal();
526  continue;
527  }
528  if (num.empty() && NStr::EqualNocase((*it)->GetQual(), "number")) {
529  num = (*it)->GetVal();
530  continue;
531  }
532  }
533  if (!std_name.empty()) {
534  *tlabel += std_name;
535  return false;
536  }
537  if (!func.empty()) {
538  *tlabel += func;
539  return false;
540  }
541  if (!num.empty()) {
542  *tlabel += num;
543  return false;
544  }
545  if (!other.empty()) {
546  *tlabel += other;
547  return false;
548  }
549  // Append type_label if there is one
550  if (empty) {
551  *tlabel += type_label ? *type_label : string("");
552  return false;
553  }
554  }
555  }
556  }
557  return false;
558 }
559 
560 
561 // Appends a label to tlabel for a CImp_feat. A return value of true indicates
562 // that the label was created for a CImp_feat key = "Site-ref"
563 static void s_GetVariationLabel(const CSeq_feat& feat,
564  string* tlabel,
566  const string* /*type_label*/)
567 {
568  // Return if tlablel does not exist or feature data is not Imp-feat
569  if (!tlabel || !feat.GetData().IsVariation()) {
570  return;
571  }
572 
573  const CVariation_ref& var = feat.GetData().GetVariation();
574  if ( var.IsSetId() ) {
575  s_GetVariationDbtagLabel(tlabel, flags, var.GetId());
576  }
577  if ( var.IsSetName() ) {
578  if ( !tlabel->empty() ) {
579  *tlabel += ", ";
580  }
581  *tlabel += var.GetName();
582  }
583 }
584 
585 
586 // Return a label based on the content of the feature
588 (const CSeq_feat& feat,
589  string* label,
590  const string* type_label,
592  CScope* scope)
593 {
594  string tlabel;
595 
596  // Get a content label dependent on the type of the feature data
597  switch (feat.GetData().Which()) {
599  feat.GetData().GetGene().GetLabel(&tlabel);
600  break;
601  case CSeqFeatData::e_Org:
602  feat.GetData().GetOrg().GetLabel(&tlabel);
603  break;
605  s_GetCdregionLabel(feat, &tlabel, scope);
606  break;
608  feat.GetData().GetProt().GetLabel(&tlabel);
609  break;
610  case CSeqFeatData::e_Rna:
611  s_GetRnaRefLabel(feat, &tlabel, flags, type_label);
612  break;
613  case CSeqFeatData::e_Pub:
614  feat.GetData().GetPub().GetPub().GetLabel(&tlabel);
615  break;
616  case CSeqFeatData::e_Seq:
617  break;
618  case CSeqFeatData::e_Imp:
619  if (s_GetImpLabel(feat, &tlabel, flags, type_label)) {
620  *label += tlabel;
621  return;
622  }
623  break;
625  if (feat.GetData().GetRegion().find("Domain") != string::npos &&
626  (flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
627  tlabel += feat.GetComment();
628  } else {
629  tlabel += feat.GetData().GetRegion();
630  }
631  break;
633  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
634  tlabel += feat.GetComment();
635  }
636  break;
638  // Get the ASN string name for the enumerated EBond type
639  tlabel += CSeqFeatData::GetTypeInfo_enum_EBond()
640  ->FindName(feat.GetData().GetBond(), true);
641  break;
643  // Get the ASN string name for the enumerated ESite type
644  tlabel += CSeqFeatData::GetTypeInfo_enum_ESite()
645  ->FindName(feat.GetData().GetSite(), true);
646  break;
648  switch (feat.GetData().GetRsite().Which()) {
649  case CRsite_ref::e_Str:
650  tlabel += feat.GetData().GetRsite().GetStr();
651  break;
652  case CRsite_ref::e_Db:
653  tlabel += feat.GetData().GetRsite().GetDb().GetTag().IsStr() ?
654  feat.GetData().GetRsite().GetDb().GetTag().GetStr() :
655  string("?");
656  break;
657  default:
658  break;
659  }
660  break;
662  if (feat.GetData().GetUser().IsSetClass()) {
663  tlabel += feat.GetData().GetUser().GetClass();
664  } else if (feat.GetData().GetUser().GetType().IsStr()) {
665  tlabel += feat.GetData().GetUser().GetType().GetStr();
666  }
668  break;
669  case CSeqFeatData::e_Num:
670  break;
672  tlabel += CSeqFeatData::GetTypeInfo_enum_EPsec_str()
673  ->FindName(feat.GetData().GetPsec_str(), true);
674  break;
676  tlabel += feat.GetData().GetNon_std_residue();
677  break;
678  case CSeqFeatData::e_Het:
679  tlabel += feat.GetData().GetHet().Get();
680  break;
682  {{
683  const CBioSource& biosrc = feat.GetData().GetBiosrc();
684  string str;
685  if (biosrc.IsSetSubtype()) {
686  ITERATE (CBioSource::TSubtype, iter, biosrc.GetSubtype()) {
687  if ( !str.empty() ) {
688  str += "; ";
689  }
690  (*iter)->GetLabel(&str);
691  }
692  }
693  if (str.empty()) {
694  feat.GetData().GetBiosrc().GetOrg().GetLabel(&str);
695  } else {
696  str += " (";
697  feat.GetData().GetBiosrc().GetOrg().GetLabel(&str);
698  str += ")";
699  }
700  tlabel += str;
701  }}
702  break;
704  s_GetVariationLabel(feat, &tlabel, flags, type_label);
705  break;
706  default:
707  break;
708  }
709 
710  // Return if a label has been calculated above
711  if (!tlabel.empty()) {
712  *label += tlabel;
713  return;
714  }
715 
716  // Put Seq-feat qual into label
717  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
718  string prefix("/");
719  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
720  ITERATE( CSeq_feat::TQual, it, qual ) {
721  tlabel += prefix + (**it).GetQual();
722  prefix = " /";
723  if (!(**it).GetVal().empty()) {
724  tlabel += "=" + (**it).GetVal();
725  }
726  }
727  }
728 
729  // Put Seq-feat comment into label
730  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
731  if (tlabel.empty()) {
732  tlabel = feat.GetComment();
733  } else {
734  tlabel += "; " + feat.GetComment();
735  }
736  }
737 
738  *label += tlabel;
739 }
740 
741 
743 (const CSeq_feat& feat,
744  string* label,
746  CScope* scope)
747 {
748 
749  // Ensure that label exists
750  if (!label) {
751  return;
752  }
753 
754  // Get the type label
755  string type_label;
756  s_GetTypeLabel(feat, &type_label, flags);
757 
758  // Append the type label and return if content label not required
759  if ((flags & fFGL_Type) != 0) {
760  *label += type_label;
761  if ((flags & fFGL_Content) != 0) {
762  *label += ": ";
763  } else {
764  return;
765  }
766  }
767 
768  // Append the content label
769  size_t label_len = label->size();
770  s_GetContentLabel(feat, label, &type_label, flags, scope);
771 
772  // If there is no content label, append the type label
773  if (label->size() == label_len && (flags & fFGL_Type) == 0) {
774  *label += type_label;
775  }
776 }
777 
778 
779 void GetLabel (const CSeq_feat& feat,
780  string* label,
781  ELabelType label_type,
782  CScope* scope)
783 {
785  switch (label_type) {
786  case eType: flags = fFGL_Type; break;
787  case eContent: flags = fFGL_Content; break;
788  case eBoth: flags = fFGL_Both; break;
789  }
790  GetLabel(feat, label, flags, scope);
791 }
792 
793 
795 {
796  m_IdMap.clear();
797 }
798 
799 
801 {
802  return m_IdMap.size();
803 }
804 
805 
806 int CFeatIdRemapper::RemapId(int old_id, const CTSE_Handle& tse)
807 {
808  TFullId key(old_id, tse);
809  int& new_id = m_IdMap[key];
810  if ( !new_id ) {
811  new_id = int(m_IdMap.size());
812  }
813  return new_id;
814 }
815 
816 
818 {
819  bool mapped = false;
820  if ( id.IsLocal() ) {
821  CObject_id& local = id.SetLocal();
822  if ( local.IsId() ) {
823  int old_id = local.GetId();
824  int new_id = RemapId(old_id, tse);
825  if ( new_id != old_id ) {
826  mapped = true;
827  local.SetId(new_id);
828  }
829  }
830  }
831  return mapped;
832 }
833 
834 
835 bool CFeatIdRemapper::RemapId(CFeat_id& id, const CFeat_CI& feat_it)
836 {
837  bool mapped = false;
838  if ( id.IsLocal() ) {
839  CObject_id& local = id.SetLocal();
840  if ( local.IsId() ) {
841  int old_id = local.GetId();
842  int new_id = RemapId(old_id, feat_it.GetAnnot().GetTSE_Handle());
843  if ( new_id != old_id ) {
844  mapped = true;
845  local.SetId(new_id);
846  }
847  }
848  }
849  return mapped;
850 }
851 
852 
854 {
855  bool mapped = false;
856  if ( feat.IsSetId() ) {
857  if ( RemapId(feat.SetId(), tse) ) {
858  mapped = true;
859  }
860  }
861  if ( feat.IsSetXref() ) {
862  NON_CONST_ITERATE ( CSeq_feat::TXref, it, feat.SetXref() ) {
863  CSeqFeatXref& xref = **it;
864  if ( xref.IsSetId() && RemapId(xref.SetId(), tse) ) {
865  mapped = true;
866  }
867  }
868  }
869  return mapped;
870 }
871 
872 
874 {
875  CRef<CSeq_feat> feat(SerialClone(feat_it->GetMappedFeature()));
876  if ( feat->IsSetId() ) {
877  RemapId(feat->SetId(), feat_it);
878  }
879  if ( feat->IsSetXref() ) {
880  NON_CONST_ITERATE ( CSeq_feat::TXref, it, feat->SetXref() ) {
881  CSeqFeatXref& xref = **it;
882  if ( xref.IsSetId() ) {
883  RemapId(xref.SetId(), feat_it);
884  }
885  }
886  }
887  return feat;
888 }
889 
890 
892  const CSeq_feat& f2,
893  CScope* scope)
894 {
895  string l1, l2;
896  GetLabel(f1, &l1, fFGL_Both, scope);
897  GetLabel(f2, &l2, fFGL_Both, scope);
898 
899  int d = NStr::Compare(l1, l2);
900  if ( d != 0 ) {
901  return d < 0;
902  }
903 
904  // TODO: To make C and C++ match better, we stop comparing CDS's at this point.
905  // This can be removed once we have gone completely to C++.
906  if( f1.IsSetData() && f1.GetData().IsCdregion() &&
907  f2.IsSetData() && f2.GetData().IsCdregion() )
908  {
909  return false;
910  }
911 
912  if ( f1.IsSetComment() != f2.IsSetComment() ) {
913  return !f1.IsSetComment();
914  }
915  if ( f1.IsSetComment() ) {
916  d = NStr::Compare(f1.GetComment(), f2.GetComment());
917  if ( d != 0 ) {
918  return d < 0;
919  }
920  }
921 
922  if ( f1.IsSetId() != f2.IsSetId() ) {
923  return f1.IsSetId();
924  }
925  if ( f1.IsSetId() ) {
926  const CFeat_id& id1 = f1.GetId();
927  const CFeat_id& id2 = f2.GetId();
928  if ( id1.Which() != id2.Which() ) {
929  return id1.Which() < id2.Which();
930  }
931  if ( id1.IsLocal() ) {
932  const CObject_id& oid1 = id1.GetLocal();
933  const CObject_id& oid2 = id2.GetLocal();
934  if ( oid1.Which() != oid2.Which() ) {
935  return oid1.Which() < oid2.Which();
936  }
937  if ( oid1.IsId() ) {
938  int oid1int = oid1.GetId();
939  int oid2int = oid2.GetId();
940  if ( oid1int != oid2int ) {
941  return oid1int < oid2int;
942  }
943  }
944  else if ( oid1.IsStr() ) {
945  const string& oid1str = oid1.GetStr();
946  const string& oid2str = oid2.GetStr();
947  int diff = NStr::CompareNocase(oid1str, oid2str);
948  if ( diff != 0 ) {
949  return diff < 0;
950  }
951  }
952  }
953  }
954 
955  if ( f1.GetData().IsGene() && f2.GetData().IsGene() ) {
956  const CGene_ref& g1 = f1.GetData().GetGene();
957  const CGene_ref& g2 = f2.GetData().GetGene();
958  if ( g1.IsSetLocus_tag() != g2.IsSetLocus_tag() ) {
959  return !g1.IsSetLocus_tag();
960  }
961  if ( g1.IsSetLocus_tag() ) {
962  d = NStr::Compare(g1.GetLocus_tag(), g2.GetLocus_tag());
963  if ( d != 0 ) {
964  return d < 0;
965  }
966  }
967  }
968 
969  return false;
970 }
971 
972 
974  const CBioseq_Handle& master_seq,
975  const CRange<TSeqPos>& range)
976 {
977  SAnnotSelector sel(feat.GetFeatSubtype());
978  sel.SetExactDepth();
979  sel.SetResolveAll();
980  CSeq_annot_Handle annot = feat.GetAnnot();
981  sel.SetLimitSeqAnnot(annot);
983  for ( int depth = 0; depth < 10; ++depth ) {
984  sel.SetResolveDepth(depth);
985  for ( CFeat_CI it(master_seq, range, sel); it; ++it ) {
986  if ( it->GetSeq_feat_Handle() == feat ) {
987  return *it;
988  }
989  }
990  }
991  NCBI_THROW(CObjMgrException, eFindFailed,
992  "MapSeq_feat: feature not found");
993 }
994 
995 
998  const CSeq_id_Handle& master_id,
999  const CRange<TSeqPos>& range)
1000 {
1001  CBioseq_Handle master_seq = feat.GetScope().GetBioseqHandle(master_id);
1002  if ( !master_seq ) {
1003  NCBI_THROW(CObjmgrUtilException, eBadLocation,
1004  "MapSeq_feat: master sequence not found");
1005  }
1006  return MapSeq_feat(feat, master_seq, range);
1007 }
1008 
1009 
1012  const CBioseq_Handle& master_seq)
1013 {
1014  return MapSeq_feat(feat, master_seq, CRange<TSeqPos>::GetWhole());
1015 }
1016 
1017 
1020  const CSeq_id_Handle& master_id)
1021 {
1022  CBioseq_Handle master_seq = feat.GetScope().GetBioseqHandle(master_id);
1023  if ( !master_seq ) {
1024  NCBI_THROW(CObjmgrUtilException, eBadLocation,
1025  "MapSeq_feat: master sequence not found");
1026  }
1027  return MapSeq_feat(feat, master_seq);
1028 }
1029 
1030 
1032 {
1035 
1036  bool IsValid(void) const {
1038  }
1039  operator bool(void) const {
1040  return IsValid();
1041  }
1042  bool operator!(void) const {
1043  return !IsValid();
1044  }
1045 
1046  void Next(void);
1048  Next();
1049  return *this;
1050  }
1051 
1052  bool CanHaveGeneParent(void) const;
1053  bool CanHaveCommonGene(void) const;
1054 
1055  // special cdregion to mRNA/VDJ_segment/C_range link
1057 
1058  // check for overlap by intervals
1059  bool OverlapByIntervals() const;
1060 
1061  CSeqFeatData::ESubtype m_StartType; // initial feature type
1062  CSeqFeatData::ESubtype m_CurrentType; // current link child type
1063  CSeqFeatData::ESubtype m_ParentType; // current link parent type
1065 };
1066 
1067 
1069  CSeqFeatData::ESubtype start)
1070  : m_StartType(start == CSeqFeatData::eSubtype_bad? subtype: start),
1071  m_CurrentType(subtype),
1072  m_ParentType(CSeqFeatData::eSubtype_bad),
1073  m_ByProduct(false)
1074 {
1075  switch ( subtype ) {
1078  // artificial subtypes
1080  break;
1083  // operon and gap features do not inherit anything
1085  break;
1087  // Gene features can inherit operon by overlap (CONTAINED_WITHIN)
1089  break;
1094  break;
1097  break;
1100  break;
1102  m_ByProduct = true;
1104  break;
1107  break;
1118  break;
1119  default:
1121  break;
1122  }
1123 }
1124 
1125 
1126 inline bool STypeLink::CanHaveGeneParent(void) const
1127 {
1128  return *this && m_CurrentType != CSeqFeatData::eSubtype_gene;
1129 }
1130 
1131 
1132 inline bool STypeLink::CanHaveCommonGene(void) const
1133 {
1134  return CanHaveGeneParent();
1135 }
1136 
1137 
1139 {
1140  if ( !m_ByProduct &&
1144  // cdregion to mRNA can also link to C_region or VDJ_segment
1145  static const CSeqFeatData::ESubtype sm_SpecialVDJTypes[] = {
1152  };
1153  return sm_SpecialVDJTypes;
1154  }
1155  return 0;
1156 }
1157 
1158 
1160 {
1164 }
1165 
1166 
1168 {
1170  // allow linking proteins to cdregion by product and then location.
1172  m_ByProduct = false;
1173  return;
1174  }
1175  }
1176  switch ( m_ParentType ) {
1178  // no inherit of operons if no gene
1180  break;
1182  if ( m_ByProduct ) {
1183  m_ByProduct = false;
1185  }
1186  else {
1187  m_ByProduct = true;
1188  }
1189  break;
1190  default:
1192  break;
1193  }
1194 }
1195 
1196 
1197 namespace {
1198  // Checks if the location has mixed strands or wrong order of intervals
1199  static
1200  bool sx_IsIrregularLocation(const CSeq_loc& loc,
1201  TSeqPos circular_length)
1202  {
1203  try {
1204  // simple locations are regular
1205  if ( !loc.IsMix() ) {
1206  return false;
1207  }
1208 
1209  if ( !loc.GetId() ) {
1210  // multiple ids locations are irregular
1211  return true;
1212  }
1213 
1214  ENa_strand strand = loc.GetStrand();
1215  if ( strand == eNa_strand_other ) {
1216  // mixed strands
1217  return true;
1218  }
1219 
1220  bool plus_strand = !IsReverse(strand);
1221  TSeqPos pos = plus_strand? 0: kInvalidSeqPos;
1222  bool stop = false;
1223 
1224  const CSeq_loc_mix& mix = loc.GetMix();
1225  ITERATE ( CSeq_loc_mix::Tdata, it, mix.Get() ) {
1226  const CSeq_loc& loc1 = **it;
1227  if ( sx_IsIrregularLocation(loc1, circular_length) ) {
1228  return true;
1229  }
1230  if ( circular_length != kInvalidSeqPos ) {
1231  // cannot check interval order on circular sequences
1232  continue;
1233  }
1235  if ( range.Empty() ) {
1236  continue;
1237  }
1238  if ( stop ) {
1239  return true;
1240  }
1241  if ( plus_strand ) {
1242  if ( range.GetFrom() < pos ) {
1243  return true;
1244  }
1245  pos = range.GetTo()+1;
1246  stop = pos == 0;
1247  }
1248  else {
1249  if ( range.GetTo() > pos ) {
1250  return true;
1251  }
1252  pos = range.GetFrom();
1253  stop = pos == 0;
1254  --pos;
1255  }
1256  }
1257 
1258  return false;
1259  }
1260  catch ( CException& ) {
1261  // something's wrong -> irregular
1262  return true;
1263  }
1264  }
1265 
1266 
1267  static
1268  TSeqPos sx_GetCircularLength(CScope& scope,
1269  const CSeq_loc& loc)
1270  {
1271  try {
1272  const CSeq_id* single_id = 0;
1273  loc.CheckId(single_id);
1274  if ( !single_id ) {
1275  return kInvalidSeqPos;
1276  }
1277 
1278  CBioseq_Handle bh = scope.GetBioseqHandle(*single_id);
1279  if ( bh && bh.IsSetInst_Topology() &&
1281  return bh.GetBioseqLength();
1282  }
1283  }
1284  catch ( CException& /*ignored*/ ) {
1285  return kInvalidSeqPos;
1286  }
1287  return kInvalidSeqPos;
1288  }
1289 
1290 
1291  static
1292  TSeqPos sx_GetCircularLength(CScope& scope,
1293  const CSeq_id_Handle& id)
1294  {
1295  try {
1296  CBioseq_Handle bh = scope.GetBioseqHandle(id);
1297  if ( bh && bh.IsSetInst_Topology() &&
1299  return bh.GetBioseqLength();
1300  }
1301  }
1302  catch ( CException& /*ignored*/ ) {
1303  return kInvalidSeqPos;
1304  }
1305  return kInvalidSeqPos;
1306  }
1307 
1308 
1309  static inline
1310  bool sx_CanMatchByQual(CSeqFeatData::ESubtype type)
1311  {
1312  return
1319  }
1320 
1321 
1322  static const char kQual_transcript_id[] = "transcript_id";
1323  static const char kQual_orig_transcript_id[] = "orig_transcript_id";
1324  static const char kQual_orig_protein_id[] = "orig_protein_id";
1325  enum {
1326  kQualPriority_transcript_id,
1327  kQualPriority_orig_transcript_id,
1328  kQualPriority_orig_protein_id,
1329  kQualPriority_count
1330  };
1331 
1332  struct SMatchingQuals {
1333  CConstRef<CGb_qual> qq[kQualPriority_count];
1334 
1335 
1336  static bool HasMatch(const CMappedFeat& feat)
1337  {
1338  if ( !feat.IsSetQual() ) {
1339  return false;
1340  }
1341  if ( !sx_CanMatchByQual(feat.GetFeatSubtype()) ) {
1342  return false;
1343  }
1345  const CSeq_feat::TQual& quals = f->GetQual();
1346  ITERATE ( CSeq_feat::TQual, it, quals ) {
1347  if ( (*it)->IsSetVal() ) {
1348  const string& qual = (*it)->GetQual();
1349  if ( qual == kQual_orig_protein_id ||
1350  qual == kQual_orig_transcript_id ||
1351  qual == kQual_transcript_id ) {
1352  return true;
1353  }
1354  }
1355  }
1356  return false;
1357  }
1358 
1359 
1360  explicit SMatchingQuals(const CMappedFeat& feat)
1361  {
1362  if ( !feat.IsSetQual() ) {
1363  return;
1364  }
1365  if ( !sx_CanMatchByQual(feat.GetFeatSubtype()) ) {
1366  return;
1367  }
1369  const CSeq_feat::TQual& quals = f->GetQual();
1370  ITERATE ( CSeq_feat::TQual, it, quals ) {
1371  if ( (*it)->IsSetVal() ) {
1372  const string& qual = (*it)->GetQual();
1373  if ( qual == kQual_orig_protein_id ) {
1374  qq[kQualPriority_orig_protein_id] = *it;
1375  }
1376  else if ( qual == kQual_orig_transcript_id ) {
1377  qq[kQualPriority_orig_transcript_id] = *it;
1378  }
1379  else if ( qual == kQual_transcript_id ) {
1380  qq[kQualPriority_transcript_id] = *it;
1381  }
1382  }
1383  }
1384  }
1385 
1386 
1387  Uint1 GetMatch(const SMatchingQuals& quals2) const
1388  {
1389  for ( int i = 0; i < kQualPriority_count; ++i ) {
1390  if ( qq[i] && quals2.qq[i] &&
1391  qq[i]->GetVal() == quals2.qq[i]->GetVal() ) {
1392  return Uint1(i+1);
1393  }
1394  }
1395  return 0;
1396  }
1397  };
1398 
1399 
1400  static inline
1401  bool sx_CanMatchByQual(const CMappedFeat& feat)
1402  {
1403  return SMatchingQuals::HasMatch(feat);
1404  }
1405 
1406 
1407  static inline
1408  bool sx_GeneSuppressed(const CMappedFeat& feat)
1409  {
1410  if ( feat.IsSetXref() ) {
1411  const CSeq_feat::TXref& xrefs = feat.GetXref();
1412  if ( xrefs.size() == 1 ) {
1413  const CSeqFeatXref& xref = *xrefs[0];
1414  if ( xref.IsSetData() ) {
1415  const CSeqFeatData& data = xref.GetData();
1416  if ( data.IsGene() ) {
1417  const CGene_ref& gene = data.GetGene();
1418  if ( !gene.IsSetLocus() && !gene.IsSetLocus_tag() ) {
1419  // feature has single empty gene xref
1420  return true;
1421  }
1422  }
1423  }
1424  }
1425  }
1426  return false;
1427  }
1428 
1429 
1430  static inline
1431  Uint1 sx_GetQualMatch(const CMappedFeat& feat1,
1432  const CMappedFeat& feat2)
1433  {
1434  SMatchingQuals quals1(feat1);
1435  SMatchingQuals quals2(feat2);
1436  return quals1.GetMatch(quals2);
1437  }
1438 
1439 
1440  static inline
1441  EOverlapType sx_GetOverlapType(const STypeLink& link,
1442  const CSeq_loc& loc,
1443  TSeqPos circular_length)
1444  {
1445  EOverlapType overlap_type = eOverlap_Contained;
1446  if ( link.OverlapByIntervals() ) {
1447  overlap_type = eOverlap_CheckIntervals;
1448  }
1450  (true || sx_IsIrregularLocation(loc, circular_length)) ) {
1451  // LOCATION_SUBSET if bad order or mixed strand
1452  // otherwise CONTAINED_WITHIN
1453  overlap_type = eOverlap_Subset;
1454  }
1455  return overlap_type;
1456  }
1457 
1458 
1459  static
1460  int sx_GetRootDistance(CSeqFeatData::ESubtype type)
1461  {
1462  int distance = 0;
1463  while ( type != CSeqFeatData::eSubtype_bad ) {
1464  ++distance;
1466  }
1467  return distance;
1468  }
1469 
1470 
1471  static
1472  bool sx_IsParentType(CSeqFeatData::ESubtype parent_type,
1473  CSeqFeatData::ESubtype feat_type)
1474  {
1475  if ( feat_type != parent_type ) {
1476  for ( STypeLink link(feat_type); link; ++link ) {
1477  // TODO: VDJ
1478  if ( link.m_ParentType == parent_type ) {
1479  return true;
1480  }
1481  }
1482  }
1483  return false;
1484  }
1485 
1486 
1487  static const int kBetterTypeParentQuality= 1000;
1488  static const int kByLocusParentQuality = 750;
1489  static const int kSameTypeParentQuality = 500;
1490  static const int kWorseTypeParentQuality = kSameTypeParentQuality;
1491 
1492  static
1493  int sx_GetParentTypeQuality(CSeqFeatData::ESubtype parent,
1494  CSeqFeatData::ESubtype child)
1495  {
1496  int d_child = sx_GetRootDistance(child);
1497  int d_parent = sx_GetRootDistance(parent);
1498  if ( d_parent < d_child ) {
1499  // parent candidate is higher than child
1500  // return value <= kBetterTypeParentQuality
1501  return kBetterTypeParentQuality - (d_child - d_parent);
1502  }
1503  else {
1504  // parent candidate is not higher than child
1505  // return value <= kWorseTypeParentQuality
1506  return kWorseTypeParentQuality - (d_parent - d_child);
1507  }
1508  }
1509 
1510 
1511  static
1512  CMappedFeat sx_GetParentByRef(const CMappedFeat& feat,
1513  const STypeLink& link)
1514  {
1515  if ( !feat.IsSetXref() ) {
1516  return CMappedFeat();
1517  }
1518 
1519  CTSE_Handle tse = feat.GetAnnot().GetTSE_Handle();
1520  const CSeq_feat::TXref& xrefs = feat.GetXref();
1521  ITERATE ( CSeq_feat::TXref, it, xrefs ) {
1522  const CSeqFeatXref& xref = **it;
1523  if ( xref.IsSetId() ) {
1524  const CFeat_id& id = xref.GetId();
1525  if ( id.IsLocal() ) {
1526  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
1527  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
1528  if ( CSeq_feat_Handle feat1 = tse.GetFeatureWithId(*type_ptr, id.GetLocal(), feat) ) {
1529  return feat1;
1530  }
1531  }
1532  }
1533  else {
1534  if ( CSeq_feat_Handle feat1 = tse.GetFeatureWithId(link.m_ParentType, id.GetLocal(), feat) ) {
1535  return feat1;
1536  }
1537  }
1538  }
1539  }
1541  xref.IsSetData() ) {
1542  const CSeqFeatData& data = xref.GetData();
1543  if ( data.IsGene() ) {
1544  CSeq_feat_Handle feat1 = tse.GetGeneByRef(data.GetGene(), feat);
1545  if ( feat1 ) {
1546  return feat1;
1547  }
1548  }
1549  }
1550  }
1551  return CMappedFeat();
1552  }
1553 
1554 
1555  static
1556  CMappedFeat sx_GetParentByOverlap(const CMappedFeat& feat,
1557  const STypeLink& link,
1558  TSeqPos circular_length)
1559  {
1560  CMappedFeat best_parent;
1561 
1562  const CSeq_loc& c_loc = feat.GetLocation();
1563 
1564  // find best suitable parent by overlap score
1565  EOverlapType overlap_type =
1566  sx_GetOverlapType(link, c_loc, circular_length);
1567 
1568  Int8 best_overlap = kMax_I8;
1569  SAnnotSelector sel(link.m_ParentType);
1570  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
1571  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
1572  sel.IncludeFeatSubtype(*type_ptr);
1573  }
1574  }
1575  sel.SetByProduct(link.m_ByProduct);
1576  for (CFeat_CI it(feat.GetScope(), c_loc, sel); it; ++it) {
1577  Int8 overlap = TestForOverlap64(it->GetLocation(),
1578  c_loc,
1579  overlap_type,
1580  circular_length,
1581  &feat.GetScope());
1582  if ( overlap >= 0 && overlap < best_overlap ) {
1583  best_parent = *it;
1584  best_overlap = overlap;
1585  }
1586  }
1587  return best_parent;
1588  }
1589 }
1590 
1591 static const bool kSplitCircular = true;
1592 static const bool kOptimizeTestOverlap = true;
1593 
1594 /// @name GetParentFeature
1595 /// The algorithm is the following:
1596 /// 1. Feature types are organized in a tree of possible
1597 /// parent-child relationship:
1598 /// 1.1. operon, gap cannot have a parent,
1599 /// 1.2. gene can have operon as a parent,
1600 /// 1.3. mRNA, VDJ_segment, and C_region can have gene as a parent,
1601 /// 1.4. cdregion can have mRNA, VDJ_segment, or C_region as a parent,
1602 /// 1.5. prot can have cdregion as a parent (by its product location),
1603 /// 1.6. mat_peptide, sig_peptide can have prot as a parent,
1604 /// 1.x. all other feature types can have gene as a parent.
1605 /// 2. If parent of a nearest feature type is not found then the next type
1606 /// in the tree is checked, except prot which will have no parent
1607 /// if no cdregion is found.
1608 /// 3. For each parent type candidate the search is done in several ways:
1609 /// 3.1. first we look for a parent by Seq-feat.xref field,
1610 /// 3.2. then by Gene-ref if current parent type is gene,
1611 /// 3.3. then parent candidates are searched by the best intersection
1612 /// of their locations (product in case of prot -> cdregion link),
1613 /// 3.4. if no candidates are found next parent type is checked.
1616 {
1617  CMappedFeat best_parent;
1618  TSeqPos circular_length =
1619  sx_GetCircularLength(feat.GetScope(), feat.GetLocation());
1620  for( STypeLink link(feat.GetFeatSubtype()); link; ++link ) {
1621  best_parent = sx_GetParentByRef(feat, link);
1622  if ( best_parent ) {
1623  // found by Xref
1624  break;
1625  }
1626 
1627  best_parent = sx_GetParentByOverlap(feat, link, circular_length);
1628  if ( best_parent ) {
1629  // parent is found by overlap
1630  break;
1631  }
1632  }
1633  return best_parent;
1634 }
1635 
1636 
1637 /////////////////////////////////////////////////////////////////////////////
1638 // CFeatTreeIndex
1639 /////////////////////////////////////////////////////////////////////////////
1640 
1641 
1642 namespace {
1643  typedef map<CSeq_id_Handle, CSeq_id_Handle> TCanonicalIdsMap;
1644 
1645  struct SBestInfo {
1646  typedef CFeatTree::CFeatInfo CFeatInfo;
1647  SBestInfo(void)
1648  : m_Quality(kMin_I1),
1649  m_Overlap(kMax_I8),
1650  m_Info(0)
1651  {
1652  }
1653 
1654  void CheckBest(Int1 quality, Int8 overlap, CFeatInfo* info)
1655  {
1656  _ASSERT(overlap >= 0);
1657  if ( (quality > m_Quality ||
1658  (quality == m_Quality && overlap < m_Overlap)) ) {
1659  m_Quality = quality;
1660  m_Overlap = overlap;
1661  m_Info = info;
1662  }
1663  }
1664  void CheckBest(const SBestInfo& b)
1665  {
1666  CheckBest(b.m_Quality, b.m_Overlap, b.m_Info);
1667  }
1668 
1669  Int1 m_Quality;
1670  Int8 m_Overlap;
1671  CFeatInfo* m_Info;
1672  };
1673  struct SFeatRangeInfo {
1674  typedef CFeatTree::CFeatInfo CFeatInfo;
1675 
1676  CSeq_id_Handle m_Id;
1677  CRange<TSeqPos> m_Range;
1678  CFeatInfo* m_Info;
1679  bool m_SplitRange;
1680 
1681  // min start coordinate for all entries after this
1682  TSeqPos m_MinFrom;
1683 
1684  // results
1685  SBestInfo* m_Best;
1686 
1687  void x_CanonizeId(TCanonicalIdsMap& ids_map)
1688  {
1689  if ( m_Id ) {
1690  auto iter = ids_map.find(m_Id);
1691  if ( iter != ids_map.end() ) {
1692  m_Id = iter->second;
1693  }
1694  else {
1695  CSeq_id_Handle new_id = sequence::GetId(m_Id,
1696  m_Info->m_Feat.GetScope(),
1698  if ( !new_id ) {
1699  new_id = m_Id;
1700  }
1701  ids_map[m_Id] = new_id;
1702  m_Id = new_id;
1703  }
1704  }
1705  }
1706  SFeatRangeInfo(TCanonicalIdsMap& ids_map,
1707  CFeatInfo& info, SBestInfo* best,
1708  bool by_product = false)
1709  : m_Info(&info),
1710  m_SplitRange(false),
1711  m_Best(best)
1712  {
1713  if ( by_product ) {
1714  m_Id = info.m_Feat.GetProductId();
1715  if ( m_Id ) {
1716  m_Range = info.m_Feat.GetProductTotalRange();
1717  }
1718  }
1719  else {
1720  m_Id = info.m_Feat.GetLocationId();
1721  if ( m_Id ) {
1722  m_Range = info.m_Feat.GetLocationTotalRange();
1723  }
1724  }
1725  // id may be non-canonical
1726  x_CanonizeId(ids_map);
1727  }
1728  SFeatRangeInfo(TCanonicalIdsMap& ids_map,
1729  CFeatInfo& info, SBestInfo* best,
1731  : m_Id(it->first),
1732  m_Range(it->second.GetOverlappingRange()),
1733  m_Info(&info),
1734  m_SplitRange(false),
1735  m_Best(best)
1736  {
1737  // id may be non-canonical
1738  x_CanonizeId(ids_map);
1739  }
1740  };
1741  struct PLessByStart {
1742  // sort first by start coordinate, then by end coordinate
1743  bool operator()(const SFeatRangeInfo& a, const SFeatRangeInfo& b) const
1744  {
1745  return a.m_Id < b.m_Id ||
1746  (a.m_Id == b.m_Id && a.m_Range < b.m_Range);
1747  }
1748  };
1749  struct PLessByEnd {
1750  // sort first by end coordinate, then by start coordinate
1751  bool operator()(const SFeatRangeInfo& a, const SFeatRangeInfo& b) const
1752  {
1753  return a.m_Id < b.m_Id ||
1754  (a.m_Id == b.m_Id &&
1755  (a.m_Range.GetToOpen() < b.m_Range.GetToOpen() ||
1756  (a.m_Range.GetToOpen() == b.m_Range.GetToOpen() &&
1757  a.m_Range.GetFrom() < b.m_Range.GetFrom())));
1758  }
1759  };
1760 
1761  inline
1762  bool s_AddCircularRanges(vector<SFeatRangeInfo>& rr,
1763  SFeatRangeInfo& range_info,
1764  bool by_product = false)
1765  {
1766  const bool kAllowOriginInGap = true;
1767  if ( !kSplitCircular ) {
1768  return false;
1769  }
1770  if ( !kAllowOriginInGap && range_info.m_Range.GetFrom() != 0 ) {
1771  // not from the beginning of sequence
1772  return false;
1773  }
1774  const CSeq_loc& loc = by_product?
1775  range_info.m_Info->m_Feat.GetProduct():
1776  range_info.m_Info->m_Feat.GetLocation();
1777  ENa_strand strand = loc.GetStrand();
1778  if ( strand == eNa_strand_other ) {
1779  // multiple strands
1780  return false;
1781  }
1782  TSeqPos start = loc.GetStart(eExtreme_Biological);
1783  TSeqPos stop = loc.GetStop (eExtreme_Biological);
1784  if ( IsReverse(strand) ) {
1785  swap(start, stop);
1786  }
1787  if ( start <= stop ) {
1788  // direction matches strand - non circular
1789  return false;
1790  }
1791  TSeqPos circular_length = sx_GetCircularLength(range_info.m_Info->m_Feat.GetScope(), range_info.m_Id);
1792  if ( circular_length == kInvalidSeqPos ) {
1793  return false;
1794  }
1795  if ( !kAllowOriginInGap && range_info.m_Range.GetToOpen() < circular_length ) {
1796  // not till the end of sequence
1797  return false;
1798  }
1799  // 0-stop, start-circular end
1800  TSeqPos total_end_open = range_info.m_Range.GetToOpen();
1801  range_info.m_SplitRange = true;
1802  range_info.m_Range.SetTo(stop);
1803  rr.push_back(range_info);
1804  range_info.m_Range.SetFrom(start);
1805  range_info.m_Range.SetToOpen(total_end_open);
1806  rr.push_back(range_info);
1807  return true;
1808  }
1809 
1810  void s_AddRanges(TCanonicalIdsMap& ids_map,
1811  vector<SFeatRangeInfo>& rr,
1813  SBestInfo* best,
1814  const CSeq_loc& loc)
1815  {
1816  info.m_MultiId = true;
1817  CHandleRangeMap hrmap;
1818  hrmap.AddLocation(loc);
1819  ITERATE ( CHandleRangeMap, it, hrmap ) {
1820  SFeatRangeInfo range_info(ids_map, info, best, it);
1821  rr.push_back(range_info);
1822  }
1823  }
1824 
1825  typedef vector<SBestInfo> TBestArray;
1826  typedef vector<SFeatRangeInfo> TRangeArray;
1827  typedef vector<CFeatTree::CFeatInfo*> TInfoArray;
1828 
1829  inline
1830  Int1 s_GetParentQuality(const CFeatTree::CFeatInfo& feat,
1831  const CFeatTree::CFeatInfo& parent)
1832  {
1833  if ( feat.m_CanMatchByQual && parent.m_CanMatchByQual ) {
1834  return sx_GetQualMatch(feat.m_Feat, parent.m_Feat);
1835  }
1836  return 0;
1837  }
1838 
1839  class CFeatTreeParentTypeIndex : public CObject
1840  {
1841  public:
1842  CFeatTreeParentTypeIndex(CSeqFeatData::ESubtype type,
1843  bool by_product)
1844  : m_Type(type),
1845  m_ByProduct(by_product),
1846  m_IndexedParents(0)
1847  {
1848  }
1849 
1850  TRangeArray& GetIndex(TCanonicalIdsMap& ids_map,
1851  const TInfoArray& feats) {
1852  if ( m_IndexedParents == feats.size() ) {
1853  return m_Index;
1854  }
1855  for ( size_t ind = m_IndexedParents; ind < feats.size(); ++ind ) {
1856  CFeatTree::CFeatInfo& feat_info = *feats[ind];
1857  if ( feat_info.m_AddIndex < m_IndexedParents ||
1858  feat_info.GetSubtype() != m_Type ||
1859  (m_ByProduct && !feat_info.m_Feat.IsSetProduct()) ) {
1860  continue;
1861  }
1862  SFeatRangeInfo range_info(ids_map, feat_info, 0, m_ByProduct);
1863  if ( range_info.m_Id ) {
1864  if ( !s_AddCircularRanges(m_Index, range_info, m_ByProduct) ) {
1865  m_Index.push_back(range_info);
1866  }
1867  }
1868  else {
1869  s_AddRanges(ids_map,
1870  m_Index, feat_info, 0,
1871  m_ByProduct?
1872  feat_info.m_Feat.GetProduct():
1873  feat_info.m_Feat.GetLocation());
1874  }
1875  }
1876  sort(m_Index.begin(), m_Index.end(), PLessByEnd());
1877  m_IndexedParents = feats.size();
1878  return m_Index;
1879  }
1880 
1881  private:
1882  CSeqFeatData::ESubtype m_Type;
1883  bool m_ByProduct;
1884  size_t m_IndexedParents;
1885  TRangeArray m_Index;
1886  };
1887 }
1888 
1889 
1890 class CFeatTreeIndex : public CObject
1891 {
1892 public:
1893  typedef pair<CSeqFeatData::ESubtype, bool> TParentKey;
1895 
1897  bool by_product,
1898  const TInfoArray& feats) {
1900  m_Index[TParentKey(type, by_product)];
1901  if ( !index ) {
1902  index = new CFeatTreeParentTypeIndex(type, by_product);
1903  }
1904  return index->GetIndex(m_CanonicalIds, feats);
1905  }
1906 
1907  TRangeArray& GetIndex(const STypeLink& link, const TInfoArray& feats) {
1908  return GetIndex(link.m_ParentType, link.m_ByProduct, feats);
1909  }
1910 
1911 private:
1912  friend class CFeatTree;
1913 
1915  TCanonicalIdsMap m_CanonicalIds;
1916 };
1917 
1918 
1919 /////////////////////////////////////////////////////////////////////////////
1920 // CFeatTree
1921 /////////////////////////////////////////////////////////////////////////////
1922 
1924 {
1925  x_Init();
1926 }
1927 
1928 
1930 {
1931  x_Init();
1932  AddFeatures(it);
1933 }
1934 
1935 
1937 {
1938  x_Init();
1939  CFeat_CI it(sah);
1940  AddFeatures(it);
1941 }
1942 
1944 {
1945  x_Init();
1946  CFeat_CI it(sah, sel);
1947  AddFeatures(it);
1948 }
1949 
1950 
1952 {
1953  x_Init();
1954  CFeat_CI it(seh);
1955  AddFeatures(it);
1956 }
1957 
1959 {
1960  x_Init();
1961  CFeat_CI it(seh, sel);
1962  AddFeatures(it);
1963 }
1964 
1965 
1967 {
1968 }
1969 
1970 
1972 {
1973  *this = ft;
1974 }
1975 
1976 
1978 {
1979  if ( this != &ft ) {
1980  m_AssignedParents = 0;
1981  m_AssignedGenes = 0;
1982  m_InfoMap.clear();
1983  m_InfoArray.clear();
1984  m_RootInfo = CFeatInfo();
1990  m_Index = null;
1991  m_InfoArray.reserve(ft.m_InfoArray.size());
1992  ITERATE ( TInfoArray, it, ft.m_InfoArray ) {
1993  AddFeature((*it)->m_Feat);
1994  }
1995  }
1996  return *this;
1997 }
1998 
1999 
2001 {
2002  m_AssignedParents = 0;
2003  m_AssignedGenes = 0;
2007  m_IgnoreMissingGeneXref = false;
2009 }
2010 
2011 
2013 {
2014  m_FeatIdMode = mode;
2015 }
2016 
2017 
2019 {
2021 }
2022 
2023 
2025 {
2026  m_IgnoreMissingGeneXref = ignore;
2027 }
2028 
2029 
2031 {
2033 }
2034 
2035 
2037 {
2038  for ( ; it; ++it ) {
2039  AddFeature(*it);
2040  }
2041 }
2042 
2043 
2045 {
2046  if ( !feat ) {
2047  NCBI_THROW(CObjMgrException, eInvalidHandle,
2048  "CFeatTree: feature is null");
2049  }
2050  _ASSERT(m_InfoMap.size() == m_InfoArray.size());
2051  size_t index = m_InfoMap.size();
2053  if ( !info.m_Feat ) {
2054  _ASSERT(m_InfoMap.size() == m_InfoArray.size()+1);
2055  m_InfoArray.push_back(&info);
2056  info.m_AddIndex = index;
2057  info.m_Feat = feat;
2058  info.m_CanMatchByQual = sx_CanMatchByQual(feat);
2059  info.m_IsSetGene = sx_GeneSuppressed(feat);
2060  }
2061  else {
2062  _ASSERT(m_InfoMap.size() == m_InfoArray.size());
2063  }
2064 }
2065 
2066 
2068 {
2069  return x_GetInfo(feat.GetSeq_feat_Handle());
2070 }
2071 
2072 
2074 {
2075  TInfoMap::iterator it = m_InfoMap.find(feat);
2076  if ( it == m_InfoMap.end() ) {
2077  NCBI_THROW(CObjMgrException, eFindFailed,
2078  "CFeatTree: feature not found");
2079  }
2080  return it->second;
2081 }
2082 
2083 
2085 {
2087  if ( it == m_InfoMap.end() ) {
2088  NCBI_THROW(CObjMgrException, eFindFailed,
2089  "CFeatTree: feature not found");
2090  }
2091  return it->second.m_Feat;
2092 }
2093 
2094 
2096 {
2097  TInfoMap::iterator it = m_InfoMap.find(feat);
2098  if ( it == m_InfoMap.end() ) {
2099  return 0;
2100  }
2101  return &it->second;
2102 }
2103 
2104 
2105 pair<int, CFeatTree::CFeatInfo*>
2107  CSeqFeatData::ESubtype parent_type)
2108 {
2109  pair<int, CFeatInfo*> ret(0, nullptr);
2110  if ( !info.m_Feat.IsSetXref() ) {
2111  return ret;
2112  }
2113  CTSE_Handle tse = info.GetTSE();
2114  const CSeq_feat::TXref& xrefs = info.m_Feat.GetXref();
2115  ITERATE ( CSeq_feat::TXref, xit, xrefs ) {
2116  const CSeqFeatXref& xref = **xit;
2117  if ( !xref.IsSetId() ) {
2118  continue;
2119  }
2120  const CFeat_id& id = xref.GetId();
2121  if ( !id.IsLocal() ) {
2122  continue;
2123  }
2124  vector<CSeq_feat_Handle> ff =
2125  tse.GetFeaturesWithId(parent_type, id.GetLocal(), info.m_Feat);
2126  ITERATE ( vector<CSeq_feat_Handle>, fit, ff ) {
2127  CFeatInfo* parent = x_FindInfo(*fit);
2128  if ( !parent ) {
2129  continue;
2130  }
2131  int quality =
2132  sx_GetParentTypeQuality(parent->GetSubtype(),
2133  info.GetSubtype());
2134  if ( quality > ret.first ) {
2135  ret.first = quality;
2136  ret.second = parent;
2137  }
2138  }
2139  }
2140  if ( ret.first > kByLocusParentQuality ) {
2141  return ret;
2142  }
2143  if ( (parent_type == CSeqFeatData::eSubtype_gene ||
2144  parent_type == CSeqFeatData::eSubtype_any) &&
2145  sx_IsParentType(CSeqFeatData::eSubtype_gene,
2146  info.GetSubtype()) ) {
2147  // assign non-genes to genes by Gene-ref
2148  ITERATE ( CSeq_feat::TXref, xit, xrefs ) {
2149  const CSeqFeatXref& xref = **xit;
2150  if ( xref.IsSetData() ) {
2151  const CSeqFeatData& data = xref.GetData();
2152  if ( data.IsGene() ) {
2153  vector<CSeq_feat_Handle> ff =
2154  tse.GetGenesByRef(data.GetGene(), info.m_Feat);
2155  ITERATE ( vector<CSeq_feat_Handle>, fit, ff ) {
2156  CFeatInfo* gene = x_FindInfo(*fit);
2157  if ( gene ) {
2158  ret.first = kByLocusParentQuality;
2159  ret.second = gene;
2160  return ret;
2161  }
2162  }
2163  ret.first = kByLocusParentQuality;
2164  ret.second = 0;
2165  return ret;
2166  }
2167  }
2168  }
2169  }
2170  return ret;
2171 }
2172 
2173 
2175 {
2177  pair<int, CFeatInfo*> parent =
2179  if ( !parent.second ) {
2180  if ( parent.first == kByLocusParentQuality && !GetIgnoreMissingGeneXref() ) {
2181  // explicit xref to a missing gene
2182  x_SetGene(info, 0);
2183  }
2184  return false;
2185  }
2186  if ( parent.first <= kWorseTypeParentQuality ||
2187  parent.first == kSameTypeParentQuality ) {
2188  // found reference is of the same or worse type
2189  if ( m_FeatIdMode == eFeatId_by_type ) {
2190  // eFeatId_by_type limits parents to regular tree order
2191  return false;
2192  }
2194  // otherwise check for circular references
2195  if ( parent.second->IsSetParent() &&
2196  parent.second->m_Parent == &info ) {
2197  // two features cycle, keep existing parent
2198  return false;
2199  }
2200  pair<int, CFeatInfo*> grand_parent =
2202  if ( grand_parent.second == &info ) {
2203  // new circular reference, choose by quality
2204  if ( parent.first < grand_parent.first ) {
2205  return false;
2206  }
2207  }
2208  }
2209  // check if gene is found over possible intemediate parents
2210  if ( parent.second->IsGene() ) {
2211  // the gene link may be turned off
2213  return false;
2214  }
2215  // if intermediate parents are possible
2216  if ( STypeLink(info.GetSubtype()).m_ParentType!=CSeqFeatData::eSubtype_gene ) {
2217  // then assign gene only
2218  if ( !info.IsSetGene() ) {
2219  x_SetGene(info, parent.second);
2220  }
2221  return false;
2222  }
2223  }
2224  x_SetParent(info, *parent.second);
2225  return true;
2226 }
2227 
2232 };
2233 // Check what strand match is required
2235  const CFeatTree::CFeatInfo& info,
2236  const CFeatTree* tree)
2237 {
2238  if ( link.m_ParentType == CSeqFeatData::eSubtype_gene ) {
2240  tree->GetSNPStrandMode() == tree->eSNPStrand_both ) {
2241  // try snp rev
2242  return eStrandMatch_any;
2243  }
2244  if ( info.m_Feat.IsSetExcept_text() &&
2245  info.m_Feat.GetExcept_text().find("trans-splicing") != NPOS ) {
2247  }
2248  }
2249  return eStrandMatch_all;
2250 }
2251 
2252 
2254 {
2255  bool operator()(const SBestInfo& info1, const SBestInfo& info2) const {
2256  if (info1.m_Info && info2.m_Info) {
2257  if (info1.m_Quality != info2.m_Quality) {
2258  return info1.m_Quality > info2.m_Quality;
2259  }
2260  if (info1.m_Overlap != info2.m_Overlap) {
2261  return info1.m_Overlap < info2.m_Overlap;
2262  }
2263  }
2264  return info1.m_Info < info2.m_Info;
2265  }
2266 };
2267 
2268 
2270 {
2271 public:
2273  {
2274  m_IsAmbiguous = false;
2275  size_t cnt = features.size();
2276  for (size_t i = 0; i < cnt; ++i) {
2277  m_Children.emplace(features[i], SCandidates(i));
2278  }
2279  }
2280 
2282  typedef list<CFeatInfo*> TChildList;
2283  struct SParentInfo {
2285  : m_NewParent(true),
2287  {
2288  }
2292  };
2293 
2294  bool Add(CFeatInfo* child, CFeatInfo* parent, Int1 quality, Int8 overlap)
2295  {
2296  // Store separate SBestInfo for each child/parent candidate.
2297  SParentInfo& parent_info = m_Parents[parent];
2298  if ( parent_info.m_NewParent ) {
2299  // new parent appeared
2300  // check if it already has children of this type
2301  auto subtype = child->GetSubtype();
2302  for ( auto& c : parent->m_Children ) {
2303  if ( c->GetSubtype() == subtype ) {
2304  parent_info.m_DoesNotNeedChildren = true;
2305  break;
2306  }
2307  }
2308  parent_info.m_NewParent = false;
2309  }
2310  if ( quality == 0 && parent_info.m_DoesNotNeedChildren ) {
2311  return false;
2312  }
2313  SBestInfo info;
2314  info.CheckBest(quality, overlap, parent);
2315  _ASSERT(m_Children.find(child) != m_Children.end());
2316  SCandidates& c = m_Children[child];
2317  if ( !c.parents.empty() ) {
2318  m_IsAmbiguous = true;
2319  }
2320  c.parents.insert(info);
2321  parent_info.m_ChildrenCandidates.push_back(child);
2322  return true;
2323  }
2324 
2325  void Disambiguate(TBestArray& bests);
2326 
2328 
2330  {
2331  SCandidates(void) : index(0) {}
2332  SCandidates(size_t i) : index(i) {}
2333  size_t index;
2335  };
2338 
2339 private:
2343 };
2344 
2345 
2347 {
2349 
2350  bool operator()(const TChild& c1, const TChild& c2) const {
2351  const TChild::value_type& cr1 = *c1;
2352  const TChild::value_type& cr2 = *c2;
2353  if (cr1.first == cr2.first) return false;
2354  // Children with fewer parents go first.
2355  if (cr1.second.parents.size() != cr2.second.parents.size()) {
2356  return cr1.second.parents.size() < cr2.second.parents.size();
2357  }
2358  // Check for better parent quality/overlap.
2359  if (!cr1.second.parents.empty()) {
2360  const SBestInfo& p1 = *cr1.second.parents.begin();
2361  const SBestInfo& p2 = *cr2.second.parents.begin();
2362  if (p1.m_Quality != p2.m_Quality) return p1.m_Quality > p2.m_Quality;
2363  if (p1.m_Overlap != p2.m_Overlap) return p1.m_Overlap < p2.m_Overlap;
2364  }
2365  // Sort children by other values.
2366  const CMappedFeat& f1 = cr1.first->m_Feat;
2367  const CMappedFeat& f2 = cr2.first->m_Feat;
2368  // Sort by location/product
2370  if (cmp != 0) return cmp < 0;
2371  if ( f1.IsSetProduct() ) {
2372  // Features with product go first.
2373  if ( !f2.IsSetProduct() ) return true;
2375  if (cmp != 0) return cmp < 0;
2376  }
2377  else if ( f2.IsSetProduct() ) return false;
2378 
2379  // Sort by feature id, if any
2380  if ( f1.IsSetId() ) {
2381  if ( !f2.IsSetId() ) return true; // Features with id go first.
2382  if (f1.GetId().Which() != f2.GetId().Which()) {
2383  return f1.GetId().Which() < f2.GetId().Which();
2384  }
2385  switch ( f1.GetId().Which() ) {
2386  case CFeat_id::e_General:
2387  cmp = f1.GetId().GetGeneral().Compare(f2.GetId().GetGeneral());
2388  if (cmp != 0) return cmp < 0;
2389  break;
2390  case CFeat_id::e_Gibb:
2391  if (f1.GetId().GetGibb() != f2.GetId().GetGibb()) {
2392  return f1.GetId().GetGibb() < f2.GetId().GetGibb();
2393  }
2394  break;
2395  case CFeat_id::e_Giim:
2396  {
2397  const CGiimport_id& giim1 = f1.GetId().GetGiim();
2398  const CGiimport_id& giim2 = f2.GetId().GetGiim();
2399  if (giim1.GetId() != giim2.GetId()) {
2400  return giim1.GetId() < giim2.GetId();
2401  }
2402  if ( giim1.IsSetDb() ) {
2403  if ( !giim2.IsSetDb() ) return true;
2404  cmp = NStr::Compare(giim1.GetDb(), giim2.GetDb());
2405  if (cmp != 0) return cmp < 0;
2406  }
2407  else if ( giim2.IsSetDb() ) return false;
2408  if ( giim1.IsSetRelease() ) {
2409  if ( !giim2.IsSetRelease() ) return true;
2410  cmp = NStr::Compare(giim1.GetRelease(), giim2.GetRelease());
2411  if (cmp != 0) return cmp < 0;
2412  }
2413  else if ( giim2.IsSetRelease() ) return false;
2414  break;
2415  }
2416  case CFeat_id::e_Local:
2417  {
2418  const CObject_id& oid1 = f1.GetId().GetLocal();
2419  const CObject_id& oid2 = f2.GetId().GetLocal();
2420  if ( oid1.IsId() ) {
2421  if ( !oid2.IsId() ) return true;
2422  if (oid1.GetId() != oid2.GetId()) {
2423  return oid1.GetId() < oid2.GetId();
2424  }
2425  }
2426  else if ( oid1.IsStr() ) {
2427  if ( !oid2.IsStr() ) return false;
2428  cmp = NStr::Compare(oid1.GetStr(), oid2.GetStr());
2429  if (cmp != 0) return cmp < 0;
2430  }
2431  break;
2432  }
2433  default:
2434  break;
2435  }
2436  }
2437  else if ( f2.IsSetId() ) return false;
2438 
2439  // Fallback - sort by ASN.1 string representation (can be slow)
2440  string asn1, asn2;
2441  asn1 << f1.GetMappedFeature();
2442  asn2 << f2.GetMappedFeature();
2443  return asn1 < asn2;
2444  }
2445 };
2446 
2447 
2448 void CDisambiguator::Disambiguate(TBestArray& bests)
2449 {
2450  if ( !m_IsAmbiguous || m_Parents.empty() ) return; // No ambiguous features.
2451 
2452  // Children must be sorted based on both key and value from TChildren map,
2453  // so we need to create a temporary set.
2454  typedef set<TChildren::const_iterator, SChildLess> TOrderedChildren;
2455  TOrderedChildren ordered_children;
2456  ITERATE(TChildren, ci, m_Children) {
2457  if (ci->second.parents.empty()) continue;
2458  ordered_children.insert(ci);
2459  }
2460  ITERATE(TOrderedChildren, ci, ordered_children) {
2461  const TChildren::value_type& child = **ci;
2462  if (child.second.parents.empty()) continue;
2463  // Use the first (possibly the unique) parent.
2464  bests[(*ci)->second.index] = *child.second.parents.begin();
2465  CFeatInfo* parent = child.second.parents.begin()->m_Info;
2466  // Remove the parent candidate from all other children.
2467  TParents::iterator pi = m_Parents.find(parent);
2468  _ASSERT(pi != m_Parents.end());
2469  ITERATE(TChildList, pci, pi->second.m_ChildrenCandidates ) {
2470  SCandidates& ccand = m_Children[*pci];
2471  ERASE_ITERATE(TBestSet, bi, ccand.parents) {
2472  if (bi->m_Info == parent) {
2473  ccand.parents.erase(bi);
2474  break;
2475  }
2476  }
2477  if (*pci == (*ci)->first) continue;
2478  SBestInfo& info = bests[ccand.index];
2479  if (info.m_Info == parent) {
2480  info.m_Info = nullptr;
2481  }
2482  }
2483  }
2484 }
2485 
2486 
2487 static inline
2489 {
2490  return r1.GetFrom() < r2.GetFrom() || r1.GetToOpen() > r2.GetToOpen();
2491 }
2492 
2493 
2495  TBestArray& bests,
2496  const STypeLink& link,
2497  TRangeArray& pp,
2498  CFeatTree* tree,
2499  TCanonicalIdsMap& ids_map)
2500 {
2501  _ASSERT(!features.empty());
2502  _ASSERT(!pp.empty());
2503 
2504  bool check_genes = false;
2505  if ( tree->GetGeneCheckMode() == tree->eGeneCheck_match &&
2507  link.CanHaveCommonGene() ) {
2508  // tree uses common gene information
2509  // the following public method effectively assigns genes by overlap
2510  tree->GetBestGene(features[0]->m_Feat, tree->eBestGene_OverlappedOnly);
2511  check_genes = true;
2512  }
2513 
2514  TRangeArray cc;
2515  // collect children parameters
2516  size_t cnt = features.size();
2517  bests.resize(cnt);
2518  for ( size_t i = 0; i < cnt; ++i ) {
2519  CFeatTree::CFeatInfo& feat_info = *features[i];
2520  SBestInfo* best = &bests[i];
2521  SFeatRangeInfo range_info(ids_map, feat_info, best);
2522  if ( range_info.m_Id ) {
2523  if ( !s_AddCircularRanges(cc, range_info) ) {
2524  cc.push_back(range_info);
2525  }
2526  }
2527  else {
2528  s_AddRanges(ids_map, cc, feat_info, best, feat_info.m_Feat.GetLocation());
2529  }
2530  }
2531  sort(cc.begin(), cc.end(), PLessByStart());
2532 
2533  typedef pair<CFeatTree::CFeatInfo*, CFeatTree::CFeatInfo*> TFeatPair;
2534  set<TFeatPair> multi_id_tested;
2535 
2536  // assign parents in single scan over both lists
2537  {{
2538  CDisambiguator disambibuator(features);
2539  TRangeArray::iterator pi = pp.begin();
2540  TRangeArray::iterator ci = cc.begin();
2541  for ( ; ci != cc.end(); ) {
2542  // skip all parents with Seq-ids smaller than first child
2543  while ( pi != pp.end() && pi->m_Id < ci->m_Id ) {
2544  ++pi;
2545  }
2546  if ( pi == pp.end() ) { // no more parents
2547  break;
2548  }
2549  const CSeq_id_Handle& cur_id = pi->m_Id;
2550  if ( ci->m_Id < cur_id || !ci->m_Id ) {
2551  // skip all children with Seq-ids smaller than first parent
2552  do {
2553  ++ci;
2554  } while ( ci != cc.end() && (ci->m_Id < cur_id || !ci->m_Id) );
2555  continue;
2556  }
2557 
2558  // find end of Seq-id parents
2559  TRangeArray::iterator pe = pi;
2560  while ( pe != pp.end() && pe->m_Id == cur_id ) {
2561  ++pe;
2562  }
2563 
2564  TSeqPos circular_length =
2565  sx_GetCircularLength(pi->m_Info->m_Feat.GetScope(), cur_id);
2566 
2567  {{
2568  // update parents' m_MinFrom on the Seq-id
2569  TRangeArray::iterator i = pe;
2570  TSeqPos min_from = (--i)->m_Range.GetFrom();
2571  i->m_MinFrom = min_from;
2572  while ( i != pi ) {
2573  min_from = min(min_from, (--i)->m_Range.GetFrom());
2574  i->m_MinFrom = min_from;
2575  }
2576  }}
2577 
2578  // scan all Seq-id children
2579  for ( ; ci != cc.end() && pi != pe && ci->m_Id == cur_id; ++ci ) {
2580  // child parameters
2581  CFeatTree::CFeatInfo& info = *ci->m_Info;
2582  const CSeq_loc& c_loc = info.m_Feat.GetLocation();
2583  CRef<CSeq_loc> c_loc2;
2584  ENa_strand c_loc2_strand = eNa_strand_unknown;
2585  EOverlapType overlap_type =
2586  sx_GetOverlapType(link, c_loc, circular_length);
2587  EStrandMatchRule strand_match_rule =
2588  s_GetStrandMatchRule(link, info, tree);
2589  // Some CDS:mRNA/VDJ_segment/C_region relationships may be ambiguous. For these types
2590  // we need to collect all candidates before selecting the best ones.
2591  bool disambiguate =
2592  info.GetSubtype() == CSeqFeatData::eSubtype_cdregion &&
2594 
2595  // skip non-overlapping parents
2596  while ( pi != pe &&
2597  pi->m_Range.GetToOpen() < ci->m_Range.GetFrom() ) {
2598  ++pi;
2599  }
2600 
2601  // scan parent candidates
2602  for ( TRangeArray::iterator pc = pi;
2603  pc != pe && pc->m_MinFrom < ci->m_Range.GetToOpen();
2604  ++pc ) {
2605  if ( !pc->m_Range.IntersectingWith(ci->m_Range) ) {
2606  continue;
2607  }
2608  if ( check_genes && info.IsSetGene() ) {
2609  // check gene mismatch
2610  if ( info.m_Gene != pc->m_Info->GetChildrenGene() ) {
2611  continue;
2612  }
2613  }
2614  if ( info.m_MultiId && pc->m_Info->m_MultiId &&
2615  !multi_id_tested.insert(TFeatPair(&info, pc->m_Info)).second ) {
2616  // already tested this pair of child and parent
2617  continue;
2618  }
2619  const CMappedFeat& p_feat = pc->m_Info->m_Feat;
2620  const CSeq_loc& p_loc =
2621  link.m_ByProduct?
2622  p_feat.GetProduct():
2623  p_feat.GetLocation();
2624  CScope* scope = &p_feat.GetScope();
2625  Int1 quality = s_GetParentQuality(info, *pc->m_Info);
2626  Int8 overlap;
2627  try {
2628  if ( kOptimizeTestOverlap && overlap_type == eOverlap_Subset &&
2629  ci->m_Id && pc->m_Id &&
2630  s_IsNotSubrange(ci->m_Range, pc->m_Range) ) {
2631  // fast check with simple locations failed
2632  overlap = -1;
2633  }
2634  else {
2635  // full check
2636  overlap = TestForOverlap64(p_loc,
2637  c_loc,
2638  overlap_type,
2639  circular_length,
2640  scope);
2641  }
2642  }
2643  catch ( CException& /*ignored*/ ) {
2644  overlap = -1;
2645  }
2646  if ( overlap >= 0 ) {
2647  if (disambiguate) {
2648  if ( !disambibuator.Add(ci->m_Info, pc->m_Info, quality, overlap) ) {
2649  continue;
2650  }
2651  }
2652  ci->m_Best->CheckBest(quality, overlap, pc->m_Info);
2653  continue;
2654  }
2655  if ( strand_match_rule == eStrandMatch_all ) {
2656  // strands mismatch -> no overlap
2657  continue;
2658  }
2659  if ( info.m_MultiId || pc->m_Info->m_MultiId ) {
2660  // cannot compare strands on multi-id locations
2661  continue;
2662  }
2663  ENa_strand pstrand = GetStrand(p_loc, scope);
2664  if ( pstrand == eNa_strand_other ) {
2665  // parent has mixed strands -> no overlap
2666  continue;
2667  }
2668  if ( pstrand == eNa_strand_unknown ) {
2669  pstrand = eNa_strand_plus;
2670  }
2671  if ( strand_match_rule == eStrandMatch_at_least_one &&
2672  GetStrand(c_loc) != eNa_strand_other ) {
2673  // child's strand is single and doesn't match
2674  continue;
2675  }
2676  if ( !c_loc2 || c_loc2_strand != pstrand ) {
2677  // adjust strand to parent
2678  if ( !c_loc2 ) {
2679  c_loc2 = SerialClone(c_loc);
2680  }
2681  // force
2682  c_loc2->SetStrand(pstrand);
2683  c_loc2_strand = pstrand;
2684  }
2685  try {
2686  overlap = TestForOverlap64(p_loc,
2687  *c_loc2,
2688  overlap_type,
2689  circular_length,
2690  scope);
2691  }
2692  catch ( CException& /*ignored*/ ) {
2693  overlap = -1;
2694  }
2695  if ( overlap >= 0 ) {
2696  if (disambiguate) {
2697  disambibuator.Add(ci->m_Info, pc->m_Info, quality, overlap);
2698  }
2699  ci->m_Best->CheckBest((Int1)(quality-1), overlap, pc->m_Info);
2700  }
2701  }
2702  }
2703  // skip remaining Seq-id children
2704  for ( ; ci != cc.end() && ci->m_Id == cur_id; ++ci ) {
2705  }
2706  }
2707  disambibuator.Disambiguate(bests);
2708  }}
2709 }
2710 
2711 
2713  CSeqFeatData::ESubtype parent)
2714 {
2715  if (parent == CSeqFeatData::eSubtype_region &&
2725  return false;
2726  }
2727  return true;
2728 }
2729 
2730 
2732  const STypeLink& link)
2733 {
2734  if ( features.empty() ) {
2735  return;
2736  }
2737  if ( GetGeneCheckMode() == eGeneCheck_match &&
2739  bool unassigned = false;
2740  // assign already known genes as parents
2741  ITERATE ( TFeatArray, it, features ) {
2742  CFeatInfo& info = **it;
2743  if ( !info.IsSetParent() ) {
2744  if ( info.IsSetGene() ) {
2745  if ( info.m_Gene ) {
2746  x_SetParent(info, *info.m_Gene);
2747  }
2748  else {
2750  }
2751  }
2752  else {
2753  unassigned = true;
2754  }
2755  }
2756  }
2757  if ( !unassigned ) {
2758  features.clear();
2759  return;
2760  }
2761  }
2762  if ( !m_Index ) {
2763  m_Index = new CFeatTreeIndex;
2764  }
2765  // TODO: multi-children/multi-parent assignment
2766  TBestArray bests;
2767  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
2768  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
2769  TRangeArray& parents = m_Index->GetIndex(*type_ptr, link.m_ByProduct, m_InfoArray);
2770  if ( parents.empty() ) {
2771  continue;
2772  }
2773  TBestArray bests1;
2774  s_CollectBestOverlaps(features, bests1, link, parents, this, m_Index->m_CanonicalIds);
2775  if ( bests.empty() ) {
2776  swap(bests, bests1);
2777  }
2778  else {
2779  for ( size_t i = 0; i < bests1.size(); ++i ) {
2780  bests[i].CheckBest(bests1[i]);
2781  }
2782  }
2783  }
2784  if ( bests.empty() ) {
2785  return;
2786  }
2787  }
2788  else {
2789  TRangeArray& parents = m_Index->GetIndex(link, m_InfoArray);
2790  if ( parents.empty() ) {
2791  return;
2792  }
2793  s_CollectBestOverlaps(features, bests, link, parents, this, m_Index->m_CanonicalIds);
2794  }
2795  size_t cnt = features.size();
2796  _ASSERT(bests.size() == cnt);
2797 
2798  // assign found parents
2799  TFeatArray::iterator dst = features.begin();
2800  for ( size_t i = 0; i < cnt; ++i ) {
2801  CFeatInfo& info = *features[i];
2802  if ( !info.IsSetParent() ) {
2803  CFeatInfo* best = bests[i].m_Info;
2804  if (best && s_AllowedParentByOverlap(info.GetSubtype(), best->GetSubtype())) {
2805  // assign best parent
2806  x_SetParent(info, *best);
2807  }
2808  else {
2809  // store for future processing
2810  *dst++ = &info;
2811  }
2812  }
2813  }
2814  features.erase(dst, features.end());
2815 }
2816 
2817 
2819 {
2820  if ( features.empty() ) {
2821  return;
2822  }
2823  if ( !m_Index ) {
2824  m_Index = new CFeatTreeIndex;
2825  }
2826  TRangeArray& genes =
2828  if ( genes.empty() ) {
2829  return;
2830  }
2831  TBestArray bests;
2833  size_t cnt = features.size();
2834  _ASSERT(bests.size() == cnt);
2835 
2836  // assign found genes
2837  for ( size_t i = 0; i < cnt; ++i ) {
2838  CFeatInfo& info = *features[i];
2839  if ( !info.IsSetGene() ) {
2840  CFeatInfo* best = bests[i].m_Info;
2841  if ( best ) {
2842  // assign best gene
2843  x_SetGene(info, best);
2844  }
2845  }
2846  }
2847 }
2848 
2849 
2851 {
2852  x_SetGene(info, gene);
2853  ITERATE ( CFeatInfo::TChildren, it, info.m_Children ) {
2854  CFeatInfo& child = **it;
2855  if ( !child.IsSetGene() ) {
2856  x_SetGeneRecursive(child, gene);
2857  }
2858  }
2859 }
2860 
2861 
2863 {
2864  if ( m_AssignedGenes >= m_InfoArray.size() ) {
2865  return;
2866  }
2867 
2868  for ( size_t ind = m_AssignedGenes; ind < m_InfoArray.size(); ++ind ) {
2869  CFeatInfo& info = *m_InfoArray[ind];
2870  if ( info.IsSetGene() ) {
2871  continue;
2872  }
2873  if ( CFeatInfo* parent = info.m_Parent ) {
2874  if ( parent->GivesGeneToChildren() ) {
2875  if ( CFeatInfo* gene = parent->GetChildrenGene() ) {
2876  x_SetGeneRecursive(info, gene);
2877  }
2878  }
2879  }
2880  }
2881 
2882  bool has_genes = false;
2883  TFeatArray old_feats, new_feats;
2884  // collect genes and other features
2885  for ( size_t ind = m_AssignedGenes; ind < m_InfoArray.size(); ++ind ) {
2886  CFeatInfo& info = *m_InfoArray[ind];
2887  TFeatArray* arr = 0;
2888  CSeqFeatData::ESubtype feat_type = info.GetSubtype();
2889  if ( feat_type == CSeqFeatData::eSubtype_gene ) {
2890  has_genes = true;
2891  continue;
2892  }
2893  else if ( !info.IsSetGene() && STypeLink(feat_type).CanHaveGeneParent() ) {
2895  CFeatInfo* gene =
2898  if ( gene ) {
2899  x_SetGene(info, gene);
2900  continue;
2901  }
2902  }
2903  arr = info.m_AddIndex >= m_AssignedGenes? &new_feats: &old_feats;
2904  }
2905  else {
2906  continue;
2907  }
2908  arr->push_back(&info);
2909  }
2910  if ( !old_feats.empty() ) {
2911  old_feats.insert(old_feats.end(),
2912  new_feats.begin(), new_feats.end());
2913  swap(old_feats, new_feats);
2914  old_feats.clear();
2915  }
2916  if ( has_genes && !new_feats.empty() ) {
2917  x_AssignGenesByOverlap(new_feats);
2918  }
2919  m_AssignedGenes = m_InfoArray.size();
2920 }
2921 
2922 
2924  bool operator()(const CFeatTree::CFeatInfo* f1, const CFeatTree::CFeatInfo* f2) const
2925  {
2926  return f1->m_AddIndex < f2->m_AddIndex;
2927  }
2928 };
2929 
2930 
2932 {
2933  if ( m_AssignedParents >= m_InfoArray.size() ) {
2934  return;
2935  }
2936 
2937  // collect all features without assigned parent
2938  vector<TFeatArray> feats_by_type;
2939  feats_by_type.reserve(CSeqFeatData::eSubtype_max+1);
2940  size_t new_count = 0;
2941  for ( size_t ind = m_AssignedParents; ind < m_InfoArray.size(); ++ind ) {
2942  CFeatInfo& info = *m_InfoArray[ind];
2943  if ( info.IsSetParent() ) {
2944  continue;
2945  }
2947  continue;
2948  }
2949  CSeqFeatData::ESubtype feat_type = info.GetSubtype();
2950  STypeLink link(feat_type);
2951  if ( !link ) {
2952  // no parent
2954  }
2955  else {
2956  size_t index = feat_type;
2957  if ( index >= feats_by_type.size() ) {
2958  feats_by_type.resize(index+1);
2959  }
2960  feats_by_type[feat_type].push_back(&info);
2961  ++new_count;
2962  }
2963  }
2964  if ( new_count == 0 ) { // no work to do
2965  return;
2966  }
2967  // assign parents for each parent type
2968  for ( size_t type = 0; type < feats_by_type.size(); ++type ) {
2969  TFeatArray& feats = feats_by_type[type];
2970  if ( feats.empty() ) {
2971  // no work to do
2972  continue;
2973  }
2974  for ( STypeLink link((CSeqFeatData::ESubtype)type); link; ++link ) {
2975  x_AssignParentsByOverlap(feats, link);
2976  if ( feats.empty() ) {
2977  break;
2978  }
2979  }
2980  // all remaining features are without parent
2981  ITERATE ( TFeatArray, it, feats ) {
2982  x_SetNoParent(**it);
2983  }
2984  }
2985 
2986  if ( m_FeatIdMode == eFeatId_always ) {
2987  for ( size_t ind=m_AssignedParents; ind<m_InfoArray.size(); ++ind ) {
2988  CFeatInfo& info = *m_InfoArray[ind];
2990  }
2991  }
2992 
2993  for ( auto& s : m_InfoMap ) {
2994  sort(s.second.m_Children.begin(), s.second.m_Children.begin(), PByFeatInfoAddIndex());
2995  }
2996  m_AssignedParents = m_InfoArray.size();
2997 }
2998 
2999 
3001 {
3002  _ASSERT(info.IsSetParent());
3003  if ( info.m_IsLinkedToRoot == info.eIsLinkedToRoot_linking ) {
3005  << info.m_Feat.GetOriginalFeature()
3006  << info.m_Parent->m_Feat.GetOriginalFeature()
3007  << NcbiEndl;
3008  NCBI_THROW(CObjMgrException, eFindConflict,
3009  "CFeatTree: cycle in xrefs to parent feature");
3010  }
3011  if ( info.m_Parent ) {
3012  info.m_IsLinkedToRoot = info.eIsLinkedToRoot_linking;
3013  x_VerifyLinkedToRoot(*info.m_Parent);
3014  info.m_IsLinkedToRoot = info.eIsLinkedToRoot_linked;
3015  }
3016  _ASSERT(info.m_IsLinkedToRoot == info.eIsLinkedToRoot_linked);
3017 }
3018 
3019 
3021 {
3022  _ASSERT(!info.IsSetParent());
3023  _ASSERT(!info.m_Parent);
3024  _ASSERT(!parent.m_IsSetChildren);
3025  _ASSERT(parent.m_IsLinkedToRoot != info.eIsLinkedToRoot_linking);
3026  parent.m_Children.push_back(&info);
3027  info.m_Parent = &parent;
3028  info.m_IsSetParent = true;
3029  info.m_IsLinkedToRoot = parent.m_IsLinkedToRoot;
3030 }
3031 
3032 
3034 {
3035  // _ASSERT(!info.IsSetParent());
3036  _ASSERT(!info.m_Parent);
3037  m_RootInfo.m_Children.push_back(&info);
3038  info.m_IsSetParent = true;
3039  info.m_IsLinkedToRoot = info.eIsLinkedToRoot_linked;
3040 }
3041 
3042 
3044 {
3045  _ASSERT(!info.IsSetGene() || gene == info.m_Gene);
3046  info.m_Gene = gene;
3047  info.m_IsSetGene = true;
3048 }
3049 
3050 
3052 {
3053  if ( !info.IsSetParent() ) {
3054  x_AssignParents();
3055  }
3056  return info.m_Parent;
3057 }
3058 
3059 
3061 {
3062  x_AssignParents();
3063  return info.m_Children;
3064 }
3065 
3066 
3068 {
3069  CMappedFeat ret;
3070  CFeatInfo* info = x_GetParent(x_GetInfo(feat));
3071  if ( info ) {
3072  ret = info->m_Feat;
3073  }
3074  return ret;
3075 }
3076 
3077 
3080 {
3081  CMappedFeat parent = GetParent(feat);
3082  while ( parent && parent.GetFeatType() != type ) {
3083  parent = GetParent(parent);
3084  }
3085  return parent;
3086 }
3087 
3088 
3090  CSeqFeatData::ESubtype subtype)
3091 {
3092  CMappedFeat parent = GetParent(feat);
3093  while ( parent && parent.GetFeatSubtype() != subtype ) {
3094  parent = GetParent(parent);
3095  }
3096  return parent;
3097 }
3098 
3099 
3100 vector<CMappedFeat> CFeatTree::GetChildren(const CMappedFeat& feat)
3101 {
3102  vector<CMappedFeat> children;
3103  GetChildrenTo(feat, children);
3104  return children;
3105 }
3106 
3107 
3109  vector<CMappedFeat>& children)
3110 {
3111  children.clear();
3112  const TChildren* infos;
3113  if ( feat ) {
3114  infos = &x_GetChildren(x_GetInfo(feat));
3115  }
3116  else {
3117  x_AssignParents();
3118  infos = &m_RootInfo.m_Children;
3119  }
3120  children.reserve(infos->size());
3121  ITERATE ( TChildren, it, *infos ) {
3122  children.push_back((*it)->m_Feat);
3123  }
3124 }
3125 
3126 
3128  EBestGeneType lookup_type)
3129 {
3130  CMappedFeat ret;
3131  if ( lookup_type == eBestGene_TreeOnly ||
3132  lookup_type == eBestGene_AllowOverlapped ) {
3134  }
3135  if ( !ret && lookup_type != eBestGene_TreeOnly ) {
3136  x_AssignGenes();
3137  CFeatInfo* gene = x_GetInfo(feat).m_Gene;
3138  if ( gene ) {
3139  ret = gene->m_Feat;
3140  }
3141  }
3142  return ret;
3143 }
3144 
3145 
3147  : m_AddIndex(0),
3148  m_CanMatchByQual(false),
3149  m_IsSetParent(false),
3150  m_IsSetGene(false),
3151  m_IsSetChildren(false),
3152  m_MultiId(false),
3153  m_IsLinkedToRoot(eIsLinkedToRoot_unknown),
3154  m_Parent(0),
3155  m_Gene(0)
3156 {
3157 }
3158 
3159 
3161 {
3162 }
3163 
3164 
3166 {
3167  return m_Feat.GetAnnot().GetTSE_Handle();
3168 }
3169 
3170 
3172  CSeqFeatData::ESubtype bottom_type,
3173  CSeqFeatData::ESubtype top_type,
3174  const SAnnotSelector* base_sel,
3175  bool skip_bottom)
3176 {
3177  SAnnotSelector sel;
3178  if ( base_sel ) {
3179  sel = *base_sel;
3180  }
3181  else {
3183  }
3184  if ( skip_bottom ) {
3186  }
3187  else {
3188  sel.SetFeatSubtype(bottom_type);
3189  }
3190  if ( top_type != bottom_type ) {
3191  for ( STypeLink link(bottom_type); link; ++link ) {
3192  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
3193  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
3194  sel.IncludeFeatSubtype(*type_ptr);
3195  }
3196  }
3197  else {
3198  sel.IncludeFeatSubtype(link.m_ParentType);
3199  }
3200  if ( link.m_ParentType == top_type ) {
3201  break;
3202  }
3203  }
3204  }
3205  CFeat_CI feat_it(scope, loc, sel);
3206  AddFeatures(feat_it);
3207 }
3208 
3209 
3211  CSeqFeatData::ESubtype bottom_type,
3212  CSeqFeatData::ESubtype top_type,
3213  const SAnnotSelector* base_sel)
3214 {
3215  AddFeature(feat);
3216  AddFeaturesFor(feat.GetScope(), feat.GetLocation(),
3217  bottom_type, top_type, base_sel);
3218 }
3219 
3220 
3222  CSeqFeatData::ESubtype top_type,
3223  const SAnnotSelector* base_sel)
3224 {
3225  AddFeature(feat);
3226  AddFeaturesFor(feat.GetScope(), feat.GetLocation(),
3227  feat.GetFeatSubtype(), top_type, base_sel, true);
3228 }
3229 
3230 
3232  const SAnnotSelector* base_sel)
3233 {
3234  AddFeaturesFor(mrna_feat,
3236  base_sel);
3237 }
3238 
3239 
3241  const SAnnotSelector* base_sel)
3242 {
3243  AddFeaturesFor(mrna_feat,
3246  base_sel);
3247 }
3248 
3249 
3251  const SAnnotSelector* base_sel)
3252 {
3253  AddFeaturesFor(cds_feat,
3255  base_sel);
3256 }
3257 
3258 
3260  const SAnnotSelector* base_sel)
3261 {
3262  AddFeaturesFor(cds_feat,
3264  base_sel);
3265 }
3266 
3267 
3269  const SAnnotSelector* base_sel)
3270 {
3271  AddFeaturesFor(gene_feat,
3274  base_sel);
3275 }
3276 
3277 
3279  const SAnnotSelector* base_sel)
3280 {
3281  AddFeaturesFor(gene_feat,
3284  base_sel);
3285 }
3286 
3287 
3289  const SAnnotSelector* base_sel)
3290 {
3291  AddFeaturesFor(feat,
3293  base_sel);
3294 }
3295 
3296 
3297 /////////////////////////////////////////////////////////////////////////////
3298 // New API for GetBestXxxForXxx()
3299 
3302  CFeatTree* feat_tree,
3303  const SAnnotSelector* base_sel,
3304  CFeatTree::EBestGeneType lookup_type)
3305 {
3306  if ( !mrna_feat ||
3307  mrna_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_mRNA ) {
3308  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3309  "GetBestGeneForMrna: mrna_feat is not a mRNA");
3310  }
3311  if ( !feat_tree ) {
3312  CFeatTree tree;
3313  tree.AddGenesForMrna(mrna_feat, base_sel);
3314  return tree.GetBestGene(mrna_feat, lookup_type);
3315  }
3316  return feat_tree->GetBestGene(mrna_feat, lookup_type);
3317 }
3318 
3319 
3322  CFeatTree* feat_tree,
3323  const SAnnotSelector* base_sel,
3324  CFeatTree::EBestGeneType lookup_type)
3325 {
3326  if ( !cds_feat ||
3328  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3329  "GetBestGeneForCds: cds_feat is not a cdregion");
3330  }
3331  if ( !feat_tree ) {
3332  CFeatTree tree;
3333  tree.AddGenesForCds(cds_feat, base_sel);
3334  return tree.GetBestGene(cds_feat, lookup_type);
3335  }
3336  return feat_tree->GetBestGene(cds_feat, lookup_type);
3337 }
3338 
3339 
3342  CFeatTree* feat_tree,
3343  const SAnnotSelector* base_sel)
3344 {
3345  if ( !cds_feat ||
3347  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3348  "GetBestMrnaForCds: cds_feat is not a cdregion");
3349  }
3350  if ( !feat_tree ) {
3351  CFeatTree tree;
3352  tree.AddMrnasForCds(cds_feat, base_sel);
3353  return tree.GetParent(cds_feat, CSeqFeatData::eSubtype_mRNA);
3354  }
3355  return feat_tree->GetParent(cds_feat, CSeqFeatData::eSubtype_mRNA);
3356 }
3357 
3358 
3361  CFeatTree* feat_tree,
3362  const SAnnotSelector* base_sel)
3363 {
3364  if ( !mrna_feat ||
3365  mrna_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_mRNA ) {
3366  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3367  "GetBestCdsForMrna: mrna_feat is not a mRNA");
3368  }
3369  if ( !feat_tree ) {
3370  CFeatTree tree;
3371  tree.AddCdsForMrna(mrna_feat, base_sel);
3372  return GetBestCdsForMrna(mrna_feat, &tree);
3373  }
3374  const vector<CMappedFeat>& children = feat_tree->GetChildren(mrna_feat);
3375  ITERATE ( vector<CMappedFeat>, it, children ) {
3376  if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion ) {
3377  return *it;
3378  }
3379  }
3380  return CMappedFeat();
3381 }
3382 
3383 
3384 void GetMrnasForGene(const CMappedFeat& gene_feat,
3385  list< CMappedFeat >& mrna_feats,
3386  CFeatTree* feat_tree,
3387  const SAnnotSelector* base_sel)
3388 {
3389  if ( !gene_feat ||
3390  gene_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_gene ) {
3391  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3392  "GetMrnasForGene: gene_feat is not a gene");
3393  }
3394  if ( !feat_tree ) {
3395  CFeatTree tree;
3396  tree.AddMrnasForGene(gene_feat, base_sel);
3397  GetMrnasForGene(gene_feat, mrna_feats, &tree);
3398  return;
3399  }
3400  const vector<CMappedFeat>& children = feat_tree->GetChildren(gene_feat);
3401  ITERATE ( vector<CMappedFeat>, it, children ) {
3402  if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA ) {
3403  mrna_feats.push_back(*it);
3404  }
3405  }
3406 }
3407 
3408 
3409 void GetCdssForGene(const CMappedFeat& gene_feat,
3410  list< CMappedFeat >& cds_feats,
3411  CFeatTree* feat_tree,
3412  const SAnnotSelector* base_sel)
3413 {
3414  if ( !gene_feat ||
3415  gene_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_gene ) {
3416  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3417  "GetCdssForGene: gene_feat is not a gene");
3418  }
3419  if ( !feat_tree ) {
3420  CFeatTree tree;
3421  tree.AddCdsForGene(gene_feat, base_sel);
3422  GetCdssForGene(gene_feat, cds_feats, &tree);
3423  return;
3424  }
3425  const vector<CMappedFeat>& children = feat_tree->GetChildren(gene_feat);
3426  ITERATE ( vector<CMappedFeat>, it, children ) {
3427  if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA ) {
3428  const vector<CMappedFeat>& children2 = feat_tree->GetChildren(*it);
3429  ITERATE ( vector<CMappedFeat>, it2, children2 ) {
3430  if ( it2->GetFeatSubtype()==CSeqFeatData::eSubtype_cdregion ) {
3431  cds_feats.push_back(*it2);
3432  }
3433  }
3434  }
3435  else if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion ) {
3436  cds_feats.push_back(*it);
3437  }
3438  }
3439 }
3440 
3441 
3444  CFeatTree* feat_tree,
3445  const SAnnotSelector* base_sel,
3446  CFeatTree::EBestGeneType lookup_type)
3447 {
3448  if ( !feat ) {
3449  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3450  "GetBestGeneForFeat: feat is null");
3451  }
3452  if ( !feat_tree ) {
3453  CFeatTree tree;
3454  tree.AddGenesForFeat(feat, base_sel);
3455  return tree.GetBestGene(feat, lookup_type);
3456  }
3457  return feat_tree->GetBestGene(feat, lookup_type);
3458 }
3459 
3460 
3463  CSeqFeatData::ESubtype parent_type,
3464  CFeatTree* feat_tree,
3465  const SAnnotSelector* base_sel)
3466 {
3467  if ( !feat ) {
3468  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3469  "GetBestParentForFeat: feat is null");
3470  }
3471  if ( !feat_tree ) {
3472  CFeatTree tree;
3473  tree.AddFeaturesFor(feat, parent_type, base_sel);
3474  return tree.GetParent(feat, parent_type);
3475  }
3476  return feat_tree->GetParent(feat, parent_type);
3477 }
3478 
3479 
3480 typedef pair<Int8, CMappedFeat> TMappedFeatScore;
3481 typedef vector<TMappedFeatScore> TMappedFeatScores;
3482 
3483 static
3484 void GetOverlappingFeatures(CScope& scope, const CSeq_loc& loc,
3485  CSeqFeatData::E_Choice /*feat_type*/,
3486  CSeqFeatData::ESubtype feat_subtype,
3487  sequence::EOverlapType overlap_type,
3488  TMappedFeatScores& feats,
3489  const SAnnotSelector* base_sel)
3490 {
3491  bool revert_locations = false;
3492  SAnnotSelector::EOverlapType annot_overlap_type;
3493  switch (overlap_type) {
3494  case eOverlap_Simple:
3495  case eOverlap_Contained:
3496  case eOverlap_Contains:
3497  // Require total range overlap
3498  annot_overlap_type = SAnnotSelector::eOverlap_TotalRange;
3499  break;
3500  case eOverlap_Subset:
3501  case eOverlap_SubsetRev:
3503  case eOverlap_Interval:
3504  case eOverlap_CheckIntRev:
3505  revert_locations = true;
3506  // there's no break here - proceed to "default"
3507  default:
3508  // Require intervals overlap
3509  annot_overlap_type = SAnnotSelector::eOverlap_Intervals;
3510  break;
3511  }
3512 
3513  CConstRef<CSeq_feat> feat_ref;
3514 
3515  CBioseq_Handle h;
3517  ENa_strand strand = eNa_strand_unknown;
3518  if ( loc.IsWhole() ) {
3519  h = scope.GetBioseqHandle(loc.GetWhole());
3520  range = range.GetWhole();
3521  }
3522  else if ( loc.IsInt() ) {
3523  const CSeq_interval& interval = loc.GetInt();
3524  h = scope.GetBioseqHandle(interval.GetId());
3525  range.SetFrom(interval.GetFrom());
3526  range.SetTo(interval.GetTo());
3527  if ( interval.IsSetStrand() ) {
3528  strand = interval.GetStrand();
3529  }
3530  }
3531  else {
3532  range = range.GetEmpty();
3533  }
3534 
3535  // Check if the sequence is circular
3536  TSeqPos circular_length = kInvalidSeqPos;
3537  if ( h ) {
3538  if ( h.IsSetInst_Topology() &&
3540  circular_length = h.GetBioseqLength();
3541  }
3542  }
3543  else {
3544  try {
3545  const CSeq_id* single_id = 0;
3546  try {
3547  loc.CheckId(single_id);
3548  }
3549  catch (CException&) {
3550  single_id = 0;
3551  }
3552  if ( single_id ) {
3553  CBioseq_Handle h1 = scope.GetBioseqHandle(*single_id);
3554  if ( h1 && h1.IsSetInst_Topology() &&
3556  circular_length = h1.GetBioseqLength();
3557  }
3558  }
3559  }
3560  catch (CException& _DEBUG_ARG(e)) {
3561  _TRACE("test for circularity failed: " << e.GetMsg());
3562  }
3563  }
3564 
3565  try {
3566  SAnnotSelector sel;
3567  if ( base_sel ) {
3568  sel = *base_sel;
3569  }
3570  else {
3572  }
3573  sel.SetFeatSubtype(feat_subtype).SetOverlapType(annot_overlap_type);
3574  if ( h ) {
3575  CFeat_CI feat_it(h, range, strand, sel);
3576  for ( ; feat_it; ++feat_it) {
3577  // treat subset as a special case
3578  Int8 cur_diff = ( !revert_locations ) ?
3579  TestForOverlap64(feat_it->GetLocation(),
3580  loc,
3581  overlap_type,
3582  circular_length,
3583  &scope) :
3584  TestForOverlap64(loc,
3585  feat_it->GetLocation(),
3586  overlap_type,
3587  circular_length,
3588  &scope);
3589  if (cur_diff < 0) {
3590  continue;
3591  }
3592 
3593  TMappedFeatScore sc(cur_diff, *feat_it);
3594  feats.push_back(sc);
3595  }
3596  }
3597  else {
3598  CFeat_CI feat_it(scope, loc, sel);
3599  for ( ; feat_it; ++feat_it) {
3600  // treat subset as a special case
3601  Int8 cur_diff = ( !revert_locations ) ?
3602  TestForOverlap64(feat_it->GetLocation(),
3603  loc,
3604  overlap_type,
3605  circular_length,
3606  &scope) :
3607  TestForOverlap64(loc,
3608  feat_it->GetLocation(),
3609  overlap_type,
3610  circular_length,
3611  &scope);
3612  if (cur_diff < 0) {
3613  continue;
3614  }
3615 
3616  TMappedFeatScore sc(cur_diff, *feat_it);
3617  feats.push_back(sc);
3618  }
3619  }
3620  }
3621  catch (CException&) {
3622  _TRACE("GetOverlappingFeatures(): error: feature iterator failed");
3623  }
3624 }
3625 
3626 
3627 static
3629  const CSeq_loc& loc,
3630  CSeqFeatData::ESubtype feat_subtype,
3631  sequence::EOverlapType overlap_type,
3632  TBestFeatOpts opts,
3633  const SAnnotSelector* base_sel)
3634 {
3635  TMappedFeatScores scores;
3636  GetOverlappingFeatures(scope, loc,
3637  CSeqFeatData::GetTypeFromSubtype(feat_subtype), feat_subtype,
3638  overlap_type, scores, base_sel);
3639 
3640  if ( !scores.empty() ) {
3641  if (opts & fBestFeat_FavorLonger) {
3642  return max_element(scores.begin(), scores.end())->second;
3643  }
3644  else {
3645  return min_element(scores.begin(), scores.end())->second;
3646  }
3647  }
3648  return CMappedFeat();
3649 }
3650 
3651 
3654  CSeqFeatData::ESubtype need_subtype,
3655  sequence::EOverlapType overlap_type,
3656  CFeatTree* feat_tree,
3657  const SAnnotSelector* base_sel)
3658 {
3659  // special cases
3660  switch ( need_subtype ) {
3662  switch ( feat.GetFeatSubtype() ) {
3665  break;
3667  return GetBestGeneForMrna(feat, feat_tree, base_sel);
3669  return GetBestGeneForCds(feat, feat_tree, base_sel);
3670  default:
3671  return GetBestGeneForFeat(feat, feat_tree, base_sel);
3672  }
3673  break;
3676  return GetBestMrnaForCds(feat, feat_tree, base_sel);
3677  }
3678  break;
3680  if ( feat.GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA ) {
3681  return GetBestCdsForMrna(feat, feat_tree, base_sel);
3682  }
3683  break;
3684  default:
3685  break;
3686  }
3687  // in-tree child -> parent lookup
3688  if ( sx_IsParentType(need_subtype, feat.GetFeatSubtype()) ) {
3689  return GetBestParentForFeat(feat, need_subtype, feat_tree, base_sel);
3690  }
3691  // non-tree overlap
3692  return GetBestOverlappingFeat(feat.GetScope(), feat.GetLocation(),
3693  need_subtype, overlap_type, 0, base_sel);
3694 }
3695 
3696 
3700  CScope* scope)
3701 {
3702  CRef<CSeq_loc_Mapper> mapper;
3703  if ( !feat.IsSetProduct() ) return mapper; // NULL
3704 
3705  bool benign_feat_exception = feat.IsSetExcept_text() &&
3706  (feat.GetExcept_text() == "mismatches in translation" ||
3707  feat.GetExcept_text() == "mismatches in transcription");
3708  bool severe_feat_exception =
3709  ((feat.IsSetExcept() && feat.GetExcept()) ||
3710  feat.IsSetExcept_text()) && !benign_feat_exception;
3711 
3712  if (severe_feat_exception ||
3715  return mapper; // NULL
3716  }
3717 
3718  mapper.Reset(new CSeq_loc_Mapper(feat, dir, scope));
3719  return mapper;
3720 }
3721 
3722 
3723 /////////////////////////////////////////////////////////////////////////////
3724 // Assigning feature ids
3725 /////////////////////////////////////////////////////////////////////////////
3726 
3728 {
3729  for ( CFeat_CI feat_it(annot); feat_it; ++feat_it ) {
3730  CSeq_feat_EditHandle feat(*feat_it);
3731  feat.ClearFeatIds();
3732  feat.ClearFeatXrefs();
3733  }
3734 }
3735 
3736 
3738 {
3739  for ( CFeat_CI feat_it(entry); feat_it; ++feat_it ) {
3740  CSeq_feat_EditHandle feat(*feat_it);
3741  feat.ClearFeatIds();
3742  feat.ClearFeatXrefs();
3743  }
3744 }
3745 
3746 
3747 static void s_SetFeatureId(CFeatTree& ft,
3748  const CMappedFeat& feat,
3749  int& last_id,
3750  const CMappedFeat& parent);
3751 static void s_SetChildrenFeatureIds(CFeatTree& ft,
3752  const CMappedFeat& feat,
3753  int& feat_id);
3754 
3755 static void s_SetFeatureId(CFeatTree& ft,
3756  const CMappedFeat& feat,
3757  int& last_id,
3758  const CMappedFeat& parent)
3759 {
3760  CSeq_feat_EditHandle efeat(feat);
3761  efeat.SetFeatId(++last_id);
3762 
3763  if ( parent &&
3764  parent.GetFeatType() == CSeqFeatData::e_Rna &&
3766  // conservative choice: link only between RNA and Cdregion features
3767  efeat.AddFeatXref(parent.GetId().GetLocal());
3768  CSeq_feat_EditHandle parent_efeat(parent);
3769  parent_efeat.AddFeatXref(last_id);
3770  }
3771 
3772  s_SetChildrenFeatureIds(ft, feat, last_id);
3773 }
3774 
3775 
3777  const CMappedFeat& parent,
3778  int& last_id)
3779 {
3780  vector<CMappedFeat> children = ft.GetChildren(parent);
3781  ITERATE (vector<CMappedFeat>, it, children ) {
3782  s_SetFeatureId(ft, *it, last_id, parent);
3783  }
3784 }
3785 
3786 
3788 {
3789  ClearFeatureIds(entry);
3790  int feat_id = 0;
3791  CFeat_CI feat_it(entry);
3792  CFeatTree ft(feat_it);
3793  s_SetChildrenFeatureIds(ft, CMappedFeat(), feat_id);
3794 }
3795 
3796 
3798 {
3799  ClearFeatureIds(annot);
3800  int feat_id = 0;
3801  CFeat_CI feat_it(annot);
3802  CFeatTree ft(feat_it);
3803  s_SetChildrenFeatureIds(ft, CMappedFeat(), feat_id);
3804 }
3805 
3806 
3808 {
3809  CRef<CSeq_loc> stop(new CSeq_loc());
3810 
3811  for ( CSeq_loc_CI citer (loc); citer; ++citer ) {
3812  stop->SetPnt().SetId().Assign(citer.GetSeq_id());
3813  }
3814  stop->SetPnt().SetPoint(loc.GetStop(eExtreme_Biological));
3815  return stop;
3816 }
3817 
3819 {
3821  bool pos1_not_in = false;
3822  if (pos1 == ((TSeqPos)-1)) {
3823  pos1_not_in = true;
3824  }
3827  bool pos2_not_in = false;
3828  if (pos2 == ((TSeqPos)-1)) {
3829  pos2_not_in = true;
3830  }
3831  if (pos1_not_in && pos2_not_in) {
3832  return eLocationInFrame_NotIn;
3833  }
3834 
3837  if (cmp != sequence::eContains && cmp != sequence::eSame) {
3838  return eLocationInFrame_NotIn;
3839  }
3840 
3841  unsigned int frame = 0;
3842  if (cds.IsSetData() && cds.GetData().IsCdregion()) {
3843  const CCdregion& cdr = cds.GetData().GetCdregion();
3844  switch (cdr.GetFrame()) {
3846  case CCdregion::eFrame_one:
3847  frame = 0;
3848  break;
3849  case CCdregion::eFrame_two:
3850  frame = 1;
3851  break;
3853  frame = 2;
3854  break;
3855  }
3856  }
3857  // note - have to add 3 to prevent negative result from subtraction
3858  TSeqPos mod1 = (pos1 + 3 - frame) %3;
3859 
3860  if ( mod1 != 0 && loc.IsPartialStart(eExtreme_Biological)
3862  && pos1 == 0) {
3863  mod1 = 0;
3864  } else if (pos1 < frame) {
3865  // start is out of frame - it's before the coding region begins
3866  mod1 = 1;
3867  }
3869  mod1 = 0;
3870  }
3871 
3872 
3873  TSeqPos cds_len = sequence::GetLength (cds.GetLocation(), &(cds.GetScope()));
3874 
3875  TSeqPos mod2 = (pos2 + 3 - frame) %3;
3876  if ( mod2 != 0 && loc.IsPartialStop(eExtreme_Biological)
3878  && pos2 == cds_len) {
3879  mod2 = 0;
3880  } else if (pos2 <= frame) {
3881  // stop is out of frame - it's before the coding region begins
3882  mod2 = 1;
3883  }
3884  if (pos2 > cds_len) {
3885  // stop is out of frame - it's after the coding region ends
3886  mod2 = 1;
3887  }
3889  mod2 = 2;
3890  }
3891 /*
3892  // Would this work just as well?
3893  if (loc.IsPartialStop(eExtreme_Biological)) {
3894  mod2 = 2;
3895  }
3896  else if
3897  (pos2 <= frame || pos2 > cds_len) {
3898  mod2 = 1;
3899  }
3900 */
3901 
3902  if ( (mod1 != 0) && (mod2 != 2) ) {
3904  } else if (mod1 != 0) {
3906  } else if (mod2 != 2) {
3907  return eLocationInFrame_BadStop;
3908  } else {
3909  return eLocationInFrame_InFrame;
3910  }
3911 }
3912 
3913 
3914 bool PromoteCDSToNucProtSet(objects::CSeq_feat_Handle& orig_feat)
3915 {
3916  // only move coding regions to nuc-prot set
3917  if (!orig_feat.IsSetData() || !orig_feat.GetData().IsCdregion()) {
3918  return false;
3919  }
3920  // don't move if pseudo
3921  if (orig_feat.IsSetPseudo() && orig_feat.GetPseudo()) {
3922  return false;
3923  }
3924  CBioseq_Handle nuc_bsh;
3925  try {
3926  nuc_bsh = orig_feat.GetScope().GetBioseqHandle(orig_feat.GetLocation());
3927  if (!nuc_bsh) {
3928  return false;
3929  }
3930  } catch (...) {
3931  return false;
3932  }
3933 
3934  // This is necessary, to make sure that we are in "editing mode"
3935  const CSeq_annot_Handle& annot_handle = orig_feat.GetAnnot();
3936  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
3937 
3938  CSeq_feat_EditHandle feh(orig_feat);
3939  CSeq_entry_Handle parent_entry = feh.GetAnnot().GetParentEntry();
3940 
3941  bool rval = false;
3942 
3943  if (parent_entry.IsSet()
3944  && parent_entry.GetSet().IsSetClass()
3945  && parent_entry.GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) {
3946  // already on nuc-prot set, leave it alone
3947  } else {
3948  CBioseq_set_Handle nuc_parent = parent_entry.GetParentBioseq_set();
3949  if (nuc_parent && nuc_parent.IsSetClass() && nuc_parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
3951  CSeq_entry_Handle parent_seh = nuc_parent.GetParentEntry();
3952  CSeq_annot_CI annot_ci(parent_seh, CSeq_annot_CI::eSearch_entry);
3953  for (; annot_ci; ++annot_ci) {
3954  if ((*annot_ci).IsFtable()) {
3955  ftable = *annot_ci;
3956  break;
3957  }
3958  }
3959 
3960  if (!ftable) {
3961  CRef<CSeq_annot> new_annot(new CSeq_annot());
3962  new_annot->SetData().SetFtable();
3963  CSeq_entry_EditHandle h = parent_seh.GetEditHandle();
3964  ftable = h.AttachAnnot(*new_annot);
3965  }
3966 
3967  CSeq_annot_EditHandle old_annot = annot_handle.GetEditHandle();
3968  CSeq_annot_EditHandle new_annot = ftable.GetEditHandle();
3969  orig_feat = new_annot.TakeFeat(feh);
3970  const list< CRef< CSeq_feat > > &feat_list = old_annot.GetSeq_annotCore()->GetData().GetFtable();
3971  if (feat_list.empty())
3972  {
3973  old_annot.Remove();
3974  }
3975  rval = true;
3976  }
3977  }
3978  return rval;
3979 }
3980 
3981 // A function to ensure that Seq-feat.partial is set if either end of the
3982 // feature is partial, and clear if neither end of the feature is partial
3984 {
3985  bool any_change = false;
3986  bool partial5 = new_feat.GetLocation().IsPartialStart(eExtreme_Biological);
3987  bool partial3 = new_feat.GetLocation().IsPartialStop(eExtreme_Biological);
3988  bool should_be_partial = partial5 || partial3;
3989  bool is_partial = false;
3990  if (new_feat.IsSetPartial() && new_feat.GetPartial()) {
3991  is_partial = true;
3992  }
3993  if (should_be_partial && !is_partial) {
3994  new_feat.SetPartial(true);
3995  any_change = true;
3996  }
3997  else if (!should_be_partial && is_partial) {
3998  new_feat.ResetPartial();
3999  any_change = true;
4000  }
4001  return any_change;
4002 }
4003 
4004 
4005 // A function to change an existing MolInfo to match a coding region
4007 {
4008  bool any_change = false;
4009  bool partial5 = src.GetLocation().IsPartialStart(eExtreme_Biological);
4010  bool partial3 = src.GetLocation().IsPartialStop(eExtreme_Biological);
4011  bool prot_5 = dst.GetLocation().IsPartialStart(eExtreme_Biological);
4012  bool prot_3 = dst.GetLocation().IsPartialStop(eExtreme_Biological);
4013  if ((partial5 && !prot_5) || (!partial5 && prot_5)
4014  || (partial3 && !prot_3) || (!partial3 && prot_3)) {
4015  dst.SetLocation().SetPartialStart(partial5, eExtreme_Biological);
4016  dst.SetLocation().SetPartialStop(partial3, eExtreme_Biological);
4017  any_change = true;
4018  }
4019  any_change |= AdjustFeaturePartialFlagForLocation(dst);
4020  return any_change;
4021 }
4022 
4023 // A function to change an existing MolInfo to match a coding region
4025 {
4026  bool rval = false;
4027  if (!molinfo.IsSetBiomol() || molinfo.GetBiomol() != CMolInfo::eBiomol_peptide) {
4029  rval = true;
4030  }
4031 
4032  bool partial5 = cds.GetLocation().IsPartialStart(eExtreme_Biological);
4033  bool partial3 = cds.GetLocation().IsPartialStop(eExtreme_Biological);
4035  if (partial5 && partial3) {
4036  completeness = CMolInfo::eCompleteness_no_ends;
4037  }
4038  else if (partial5) {
4039  completeness = CMolInfo::eCompleteness_no_left;
4040  }
4041  else if (partial3) {
4042  completeness = CMolInfo::eCompleteness_no_right;
4043  }
4044  else {
4045  completeness = CMolInfo::eCompleteness_complete;
4046  }
4047 
4048  if (!molinfo.IsSetCompleteness() || molinfo.GetCompleteness() != completeness)
4049  {
4050  if (completeness == CMolInfo::eCompleteness_complete)
4051  molinfo.SetDefaultCompleteness();
4052  else
4053  molinfo.SetCompleteness(completeness);
4054  rval = true;
4055  }
4056  return rval;
4057 }
4058 
4059 // A function to make all of the necessary related changes to
4060 // a Seq-entry after the partialness of a coding region has been
4061 // changed.
4062 bool AdjustForCDSPartials(const CSeq_feat& cds, CScope& scope)
4063 {
4064  bool any_change = false;
4065 
4066  if (!cds.IsSetProduct()) {
4067  return any_change;
4068  }
4069 
4070  // find Bioseq for product
4071  CBioseq_Handle product = scope.GetBioseqHandle(cds.GetProduct());
4072  if (!product) {
4073  return any_change;
4074  }
4075 
4076  // adjust protein feature
4078  if (f) {
4079  // This is necessary, to make sure that we are in "editing mode"
4080  const CSeq_annot_Handle& annot_handle = f->GetAnnot();
4081  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
4082  CSeq_feat_EditHandle feh(*f);
4083  CRef<CSeq_feat> new_feat(new CSeq_feat());
4084  new_feat->Assign(*(f->GetSeq_feat()));
4085  if (CopyFeaturePartials(*new_feat, cds)) {
4086  feh.Replace(*new_feat);
4087  any_change = true;
4088  }
4089  }
4090 
4091  // change or create molinfo on protein bioseq
4092  bool found = false;
4093  CBioseq_EditHandle beh = product.GetEditHandle();
4094 
4096  if ((*it)->IsMolinfo()) {
4097  any_change |= AdjustProteinMolInfoToMatchCDS((*it)->SetMolinfo(), cds);
4098  found = true;
4099  }
4100  }
4101  if (!found) {
4102  CRef<objects::CSeqdesc> new_molinfo_desc(new CSeqdesc);
4103  AdjustProteinMolInfoToMatchCDS(new_molinfo_desc->SetMolinfo(), cds);
4104  beh.SetDescr().Set().push_back(new_molinfo_desc);
4105  any_change = true;
4106  }
4107 
4108  return any_change;
4109 }
4110 
4111 
4112 // A function to make all of the necessary related changes to
4113 // a Seq-entry after the partialness of a coding region has been
4114 // changed.
4116 {
4117  return AdjustForCDSPartials(cds, seh.GetScope());
4118 }
4119 
4120 
4121 bool RetranslateCDS(const CSeq_feat& cds, CScope& scope)
4122 {
4123  // feature must be cds and already have product
4124  if (!cds.IsSetData() || !cds.GetData().IsCdregion() || !cds.IsSetProduct()) {
4125  return false;
4126  }
4127 
4128  // Use Cdregion.Product to get handle to protein bioseq
4129  CBioseq_Handle prot_bsh = scope.GetBioseqHandle(cds.GetProduct());
4130 
4131  // Should be a protein!
4132  if (!prot_bsh || !prot_bsh.IsProtein())
4133  {
4134  return false;
4135  }
4136 
4137  CBioseq_EditHandle peh = prot_bsh.GetEditHandle();
4138  CRef<CBioseq> new_protein = CSeqTranslator::TranslateToProtein(cds, scope);
4139  if (new_protein && new_protein->IsSetInst()) {
4140  CRef<CSeq_inst> new_inst(new CSeq_inst());
4141  new_inst->Assign(new_protein->GetInst());
4142  peh.SetInst(*new_inst);
4143 
4144  // If protein feature exists, update location
4146  if (f) {
4147  // This is necessary, to make sure that we are in "editing mode"
4148  const CSeq_annot_Handle& annot_handle = f->GetAnnot();
4149  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
4150  CSeq_feat_EditHandle feh(*f);
4151  CRef<CSeq_feat> new_feat(new CSeq_feat());
4152  new_feat->Assign(*(f->GetSeq_feat()));
4153  if (new_feat->CanGetLocation() &&
4154  new_feat->GetLocation().IsInt() &&
4155  new_feat->GetLocation().GetInt().CanGetTo())
4156  {
4157  new_feat->SetLocation().SetInt().SetTo(
4158  new_protein->GetLength() - 1);
4159  feh.Replace(*new_feat);
4160  }
4161  }
4162  }
4163 
4165  return true;
4166 }
4167 
4168 
4169 void AddFeatureToBioseq(const CBioseq& seq, const CSeq_feat& f, CScope& scope)
4170 {
4171  bool added = false;
4172  if (seq.IsSetAnnot()) {
4173  ITERATE(CBioseq::TAnnot, it, seq.GetAnnot()) {
4174  if ((*it)->IsFtable()) {
4175  CSeq_annot_Handle sah = scope.GetSeq_annotHandle(**it);
4176  CSeq_annot_EditHandle eh(sah);
4177  eh.AddFeat(f);
4178  added = true;
4179  break;
4180  }
4181  }
4182  }
4183  if (!added) {
4184  CRef<CSeq_annot> annot(new CSeq_annot());
4185  CRef<CSeq_feat> sf(new CSeq_feat());
4186  sf->Assign(f);
4187  annot->SetData().SetFtable().push_back(sf);
4188  CBioseq_Handle bh = scope.GetBioseqHandle(seq);
4189  CBioseq_EditHandle beh(bh);
4190  beh.AttachAnnot(*annot);
4191  }
4192 }
4193 
4194 
4195 void AddProteinFeature(const CBioseq& seq, const string& protein_name, const CSeq_feat& cds, CScope& scope)
4196 {
4197  // make new protein feature
4198  CRef<CSeq_feat> new_prot(new CSeq_feat());
4199  new_prot->SetLocation().SetInt().SetId().Assign(*(cds.GetProduct().GetId()));
4200  new_prot->SetLocation().SetInt().SetFrom(0);
4201  new_prot->SetLocation().SetInt().SetTo(seq.GetLength() - 1);
4202  new_prot->SetData().SetProt().SetName().push_back(protein_name);
4203  CopyFeaturePartials(*new_prot, cds);
4204 
4205  AddFeatureToBioseq(seq, *new_prot, scope);
4206 }
4207 
4208 
4209 // ----------------------------------------------------------------------------
4211  CMappedFeat mf,
4212  CSeqFeatData::ESubtype subtype,
4213  vector<CMappedFeat>& children,
4214  feature::CFeatTree& featTree)
4215 // ----------------------------------------------------------------------------
4216 {
4217  //const CSeq_feat& ff = mf.GetOriginalFeature();
4218 
4219  vector<CMappedFeat> c = featTree.GetChildren(mf);
4220  for (vector<CMappedFeat>::iterator it = c.begin(); it != c.end(); it++) {
4221  CMappedFeat f = *it;
4222  if (f.GetFeatSubtype() == subtype) {
4223  children.push_back(f);
4224  }
4225  else {
4226  sFeatureGetChildrenOfSubtypeFaster(f, subtype, children, featTree);
4227  }
4228  }
4229  return true;
4230 }
4231 
4232 
4233 // ----------------------------------------------------------------------------
4235  CMappedFeat mf,
4236  CSeqFeatData::ESubtype subtype,
4237  vector<CMappedFeat>& children)
4238 // ----------------------------------------------------------------------------
4239 {
4240  //const CSeq_feat& ff = mf.GetOriginalFeature();
4241  feature::CFeatTree myTree;
4242  myTree.AddFeaturesFor(mf, subtype, mf.GetFeatSubtype());
4243 
4244  vector<CMappedFeat> c = myTree.GetChildren(mf);
4245  for (vector<CMappedFeat>::iterator it = c.begin(); it != c.end(); it++) {
4246  CMappedFeat f = *it;
4247  if (f.GetFeatSubtype() == subtype) {
4248  children.push_back(f);
4249  }
4250  else {
4251  sFeatureGetChildrenOfSubtypeFaster(f, subtype, children, myTree);
4252  }
4253  }
4254  return true;
4255 }
4256 
4257 // ----------------------------------------------------------------------------
4259  feature::CFeatTree& ft,
4260  CMappedFeat mf,
4261  string& biotype,
4262  bool fast)
4263 // ----------------------------------------------------------------------------
4264 {
4265 #define SUBTYPE(x) CSeqFeatData::eSubtype_ ## x
4266 
4267  typedef vector<CMappedFeat> MFS;
4268  typedef MFS::const_iterator MFSit;
4269 
4270  const string strRearrange("rearrangement required for product");
4271 
4272  //0a
4273  // Only genes ever get that new gene_biotype attribute, other feature types
4274  // control whether the parent gene gets it but they don't get the attribute
4275  // themselves.
4276  //
4277  if (mf.GetFeatSubtype() != SUBTYPE(gene)) {
4278  return false;
4279  }
4280 
4281  //for debugging specific genes:
4282  // size_t start = mf.GetLocation().GetInt().GetStart(objects::eExtreme_Positional);
4283  // if (start == 23365505-1) {
4284  // cerr << "";
4285  // }
4286 
4287  vector<CMappedFeat> vecCds;
4288  if (fast) {
4289  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(cdregion), vecCds, ft);
4290  }
4291  else {
4292  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(cdregion), vecCds);
4293  }
4294 
4295  //1a
4296  // If there is at least one non-pseudo CDS child without a
4297  // except-text="rearrangement required for product" qualifier then
4298  // gene_biotype qualifier is "protein_coding".
4299  //
4300  if (!mf.IsSetPseudo() || !mf.GetPseudo()) {
4301  for (MFSit it = vecCds.begin(); it != vecCds.end(); it++) {
4302  if (it->IsSetPseudo() && it->GetPseudo()) {
4303  continue;
4304  }
4305  if (it->IsSetExcept_text() && (it->GetExcept_text() == strRearrange)) {
4306  continue;
4307  }
4308  biotype = "protein_coding";
4309  return true;
4310  }
4311  }
4312 
4313  vector<CMappedFeat> vecOthers;
4314  if (fast) {
4315  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(V_region), vecOthers, ft);
4316  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(C_region), vecOthers, ft);
4317  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(V_segment), vecOthers, ft);
4318  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(D_segment), vecOthers, ft);
4319  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(J_segment), vecOthers, ft);
4320  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(tRNA), vecOthers, ft);
4321  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(rRNA), vecOthers, ft);
4322  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(snRNA), vecOthers, ft);
4323  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(snoRNA), vecOthers, ft);
4324  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(tmRNA), vecOthers, ft);
4325  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(otherRNA), vecOthers, ft);
4326  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(ncRNA), vecOthers, ft);
4327  }
4328  else{
4329  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(V_region), vecOthers);
4330  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(C_region), vecOthers);
4331  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(V_segment), vecOthers);
4332  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(D_segment), vecOthers);
4333  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(J_segment), vecOthers);
4334  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(tRNA), vecOthers);
4335  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(rRNA), vecOthers);
4336  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(snRNA), vecOthers);
4337  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(snoRNA), vecOthers);
4338  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(tmRNA), vecOthers);
4339  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(otherRNA), vecOthers);
4340  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(ncRNA), vecOthers);
4341  }
4342  CSeqFeatData::ESubtype singleSubtype = SUBTYPE(bad);
4343  CMappedFeat nonPseudo;
4344 
4345  bool geneIsPseudo = mf.IsSetPseudo() && mf.GetPseudo();
4346  for (MFSit it = vecOthers.begin(); it != vecOthers.end(); it++) {
4347  CSeqFeatData::ESubtype currentSubtype = it->GetFeatSubtype();
4348  if (!geneIsPseudo && (!it->IsSetPseudo() || !it->GetPseudo())) {
4349  nonPseudo = *it;
4350  }
4351  if (singleSubtype == SUBTYPE(bad)) {
4352  singleSubtype = currentSubtype;
4353  }
4354  else if (currentSubtype != singleSubtype) {
4355  singleSubtype = SUBTYPE(bad);
4356  break;
4357  }
4358  }
4359 
4360  //2a
4361  // If the only feature type present in vecOthers is ncRNA and at least one
4362  // of the members is non-pseudo then look at CLASS=RNA-ref.ext.gen.class.
4363  // If CLASS=="other", then gene_biotype="ncRNA".
4364  // If not, then gene_biotype=<CLASS>.
4365  //
4366  vector<string> acceptedClasses = {
4367  "antisense_RNA",
4368  "autocatalytically_spliced_intron",
4369  "guide_RNA",
4370  "hammerhead_ribozyme",
4371  "lncRNA",
4372  "miRNA",
4373  "ncRNA",
4374  "other",
4375  "piRNA",
4376  "rasiRNA",
4377  "ribozyme",
4378  "RNase_MRP_RNA",
4379  "RNase_P_RNA",
4380  "scRNA",
4381  "siRNA",
4382  "snoRNA",
4383  "snRNA",
4384  "SRP_RNA",
4385  "stRNA",
4386  "telomerase_RNA",
4387  "vault_RNA",
4388  "Y_RNA"};
4389 
4390  if (singleSubtype == SUBTYPE(ncRNA) && nonPseudo) {
4391  const CRNA_ref& rna = nonPseudo.GetData().GetRna();
4392  if (!rna.IsSetExt()) {
4393  biotype = "ncRNA";
4394  return true;
4395  }
4396  const CRNA_ref::TExt& ext = rna.GetExt();
4397  if (!ext.IsGen()) {
4398  biotype = "ncRNA";
4399  return true;
4400  }
4401  if (ext.IsGen() && ext.GetGen().IsSetClass()) {
4402  string rnaClass = ext.GetGen().GetClass();
4403  if (rnaClass == "other") {
4404  biotype = "ncRNA";
4405  return true;
4406  }
4407  if (std::find(acceptedClasses.begin(), acceptedClasses.end(), rnaClass) ==
4408  acceptedClasses.end()) {
4409  biotype = "ncRNA";
4410  return true;
4411  }
4412  biotype = rnaClass;
4413  return true;
4414  }
4415  else {
4416  biotype = "ncRNA";
4417  return true;
4418  }
4419  }
4420 
4421  //2b
4422  // If still here and all members of vecOthers are of the same feature type FTYPE and
4423  // at least one of the members is non-pseudo, then gene_biotype=<FTYPE>
4424  //
4425  if (singleSubtype != SUBTYPE(bad) && nonPseudo) {
4426  biotype = CSeqFeatData::SubtypeValueToName(singleSubtype);
4427  return true;
4428  }
4429 
4430  //2c
4431  // If all members of vecOthers are of type miscRNA (and also all pseudo or we would no
4432  // longer be here) then gene_biotype="transcribed_pseudogene".
4433  if (singleSubtype == SUBTYPE(otherRNA)) {
4434  biotype = "transcribed_pseudogene";
4435  return true;
4436  }
4437 
4438  //2d
4439  // If all members of vecOthers are of the same feature type FTYPE (and also all pseudo
4440  // or we would no longer be here) then gene_biotype=<FTYPE>"-pseudogene"
4441  if (singleSubtype != SUBTYPE(bad)) {
4442  biotype = CSeqFeatData::SubtypeValueToName(singleSubtype) + "_pseudogene";
4443  return true;
4444  }
4445 
4446  //3a
4447  // If vecCds is empty then gene_biotype="other", unless pseudo=TRUE
4448  if (vecCds.empty() && (!mf.IsSetPseudo() || !mf.GetPseudo())) {
4449  biotype = "other";
4450  return true;
4451  }
4452 
4453  //3b
4454  // If at least one member of vecCds with "except-text=rearrangement required for product"
4455  // then gene_biotype="segment" for pseudo=FALSE and gene_biotype="segment_pseudogene" for
4456  // pseudo=TRUE.
4457  for (MFSit it = vecCds.begin(); it != vecCds.end(); it++) {
4458  if (!it->IsSetExcept_text()) {
4459  continue;
4460  }
4461  if (it->GetExcept_text() != strRearrange) {
4462  continue;
4463  }
4464  if (it->IsSetPseudo() && it->GetPseudo()) {
4465  biotype = "segment_pseudogene";
4466  }
4467  else {
4468  biotype = "segment";
4469  }
4470  return true;
4471  }
4472 
4473  //3c
4474  // If we made it to that point then all members of the non-empty vecCds are pseudo or
4475  // vecCds is empty and the gene itself is pseudo.
4476  // In this case, gene_biotype="pseudogene".
4477  biotype = "pseudogene";
4478 
4479  return true;
4480 #undef SUBTYPE
4481 }
4482 
4483 
4484 // ----------------------------------------------------------------------------
4486  feature::CFeatTree& ft,
4487  CMappedFeat mf,
4488  string& biotype)
4489 // ----------------------------------------------------------------------------
4490 {
4491  return sGetFeatureGeneBiotypeWrapper(ft, mf, biotype, true);
4492 }
4493 
4494 // ----------------------------------------------------------------------------
4496  feature::CFeatTree& ft,
4497  CMappedFeat mf,
4498  string& biotype)
4499 // ----------------------------------------------------------------------------
4500 {
4501  return sGetFeatureGeneBiotypeWrapper(ft, mf, biotype, false);
4502 }
4503 
4504 
4505 END_SCOPE(feature)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eBoth
Both preliminary and traceback stages.
Definition: blast_def.h:332
CSeq_annot_Handle GetAnnot(void) const
CBioseq_EditHandle –.
CBioseq_Handle –.
CBioseq_set_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CCdregion –.
Definition: Cdregion.hpp:66
Definition: Dbtag.hpp:53
int Compare(const CDbtag &dbt2) const
Definition: Dbtag.cpp:176
set< SBestInfo, SBestInfoLess > TBestSet
Definition: feature.cpp:2327
void Disambiguate(TBestArray &bests)
Definition: feature.cpp:2448
bool m_IsAmbiguous
Definition: feature.cpp:2340
CFeatTree::CFeatInfo CFeatInfo
Definition: feature.cpp:2281
map< CFeatInfo *, SCandidates > TChildren
Definition: feature.cpp:2336
bool Add(CFeatInfo *child, CFeatInfo *parent, Int1 quality, Int8 overlap)
Definition: feature.cpp:2294
list< CFeatInfo * > TChildList
Definition: feature.cpp:2282
map< CFeatInfo *, SParentInfo > TParents
Definition: feature.cpp:2337
TChildren m_Children
Definition: feature.cpp:2341
CDisambiguator(CFeatTree::TFeatArray &features)
Definition: feature.cpp:2272
TParents m_Parents
Definition: feature.cpp:2342
TIndex m_Index
Definition: feature.cpp:1914
map< TParentKey, CRef< CFeatTreeParentTypeIndex > > TIndex
Definition: feature.cpp:1894
TRangeArray & GetIndex(CSeqFeatData::ESubtype type, bool by_product, const TInfoArray &feats)
Definition: feature.cpp:1896
pair< CSeqFeatData::ESubtype, bool > TParentKey
Definition: feature.cpp:1893
TRangeArray & GetIndex(const STypeLink &link, const TInfoArray &feats)
Definition: feature.cpp:1907
TCanonicalIdsMap m_CanonicalIds
Definition: feature.cpp:1915
CFeatTree.
Definition: feature.hpp:173
CFeat_CI –.
Definition: feat_ci.hpp:64
CFeat_id –.
Definition: Feat_id.hpp:66
void GetLabel(string *label) const
Definition: Gene_ref.cpp:57
CGiimport_id –.
Definition: Giimport_id.hpp:66
void AddLocation(const CSeq_loc &loc, ETransSplicing trans_splcing=eNoTransSplicing)
TLocMap::const_iterator const_iterator
CMappedFeat –.
Definition: mapped_feat.hpp:59
Base class for all object manager exceptions.
CObject –.
Definition: ncbiobj.hpp:180
Exceptions for objmgr/util library.
void GetLabel(string *label) const
Definition: Org_ref.cpp:57
void GetLabel(string *label) const
Definition: Prot_ref.cpp:62
bool GetLabel(string *label, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const override
Append a label to "label" based on content.
Definition: Pub_equiv.cpp:56
bool GetLabel(string *label, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const override
Get a label that is the concatenation of the pub labels for the pubs in the set.
Definition: Pub_set.cpp:74
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
static E_Choice GetTypeFromSubtype(ESubtype subtype)
@ eSubtype_transit_peptide
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
CSeq_annot_CI –.
CSeq_annot_Handle –.
CSeq_entry_Handle –.
CSeq_entry_Handle –.
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static const string & GetCode(CSeq_data::E_Choice code_type, TIndex idx)
static const string & GetIupacaa3(TIndex ncbistdaa)
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:604
CSeq_feat_Handle GetGeneByRef(const CGene_ref &ref) const
Definition: tse_handle.cpp:887
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:635
TSeq_feat_Handles GetGenesByRef(const CGene_ref &ref) const
Definition: tse_handle.cpp:901
size_type size() const
Definition: map.hpp:148
const_iterator end() const
Definition: map.hpp:152
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
bool empty() const
Definition: set.hpp:133
void erase(iterator pos)
Definition: set.hpp:151
static uch flags
static unsigned char depth[2 *(256+1+29)+1]
static void s_GetRnaRefLabelFromComment(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
Definition: feature.cpp:252
static CMappedFeat GetBestOverlappingFeat(CScope &scope, const CSeq_loc &loc, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TBestFeatOpts opts, const SAnnotSelector *base_sel)
Definition: feature.cpp:3628
bool sFeatureGetChildrenOfSubtypeFaster(CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &, feature::CFeatTree &)
Definition: feature.cpp:4210
bool sGetFeatureGeneBiotypeWrapper(feature::CFeatTree &, CMappedFeat, string &, bool)
Definition: feature.cpp:4258
pair< Int8, CMappedFeat > TMappedFeatScore
Definition: feature.cpp:3480
static const bool kOptimizeTestOverlap
Definition: feature.cpp:1592
void s_GetContentLabel(const CSeq_feat &feat, string *label, const string *type_label, TFeatLabelFlags flags, CScope *scope)
Definition: feature.cpp:588
static void s_GetVariationLabel(const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *)
Definition: feature.cpp:563
static EStrandMatchRule s_GetStrandMatchRule(const STypeLink &link, const CFeatTree::CFeatInfo &info, const CFeatTree *tree)
Definition: feature.cpp:2234
static const bool kSplitCircular
Definition: feature.cpp:1591
static void GetOverlappingFeatures(CScope &scope, const CSeq_loc &loc, CSeqFeatData::E_Choice, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TMappedFeatScores &feats, const SAnnotSelector *base_sel)
Definition: feature.cpp:3484
static void s_SetChildrenFeatureIds(CFeatTree &ft, const CMappedFeat &feat, int &feat_id)
Definition: feature.cpp:3776
void s_GetTypeLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags)
Definition: feature.cpp:104
static void s_GetVariationDbtagLabel(string *tlabel, TFeatLabelFlags, const CDbtag &dbtag)
Definition: feature.cpp:406
static bool s_IsNotSubrange(const CRange< TSeqPos > &r1, const CRange< TSeqPos > &r2)
Definition: feature.cpp:2488
static void s_GetRnaRefLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
Definition: feature.cpp:274
#define SUBTYPE(x)
static bool s_AllowedParentByOverlap(CSeqFeatData::ESubtype child, CSeqFeatData::ESubtype parent)
Definition: feature.cpp:2712
static bool s_GetImpLabel(const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *type_label)
Definition: feature.cpp:430
static void s_GetCdregionLabel(const CSeq_feat &feat, string *tlabel, CScope *scope)
Definition: feature.cpp:140
vector< TMappedFeatScore > TMappedFeatScores
Definition: feature.cpp:3481
static void s_CollectBestOverlaps(CFeatTree::TFeatArray &features, TBestArray &bests, const STypeLink &link, TRangeArray &pp, CFeatTree *tree, TCanonicalIdsMap &ids_map)
Definition: feature.cpp:2494
static void s_SetFeatureId(CFeatTree &ft, const CMappedFeat &feat, int &last_id, const CMappedFeat &parent)
Definition: feature.cpp:3755
USING_SCOPE(sequence)
static CRef< CSeq_loc > s_MakePointForLocationStop(const CSeq_loc &loc)
Definition: feature.cpp:3807
EStrandMatchRule
Definition: feature.cpp:2228
@ eStrandMatch_at_least_one
Definition: feature.cpp:2230
@ eStrandMatch_any
Definition: feature.cpp:2231
@ eStrandMatch_all
Definition: feature.cpp:2229
bool sFeatureGetChildrenOfSubtype(CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &)
Definition: feature.cpp:4234
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
#define bool
Definition: bool.h:34
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static int type
Definition: getdata.c:31
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
Definition: ncbimisc.hpp:843
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define _DEBUG_ARG(arg)
Definition: ncbidbg.hpp:134
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const float pi
Definition: math.hpp:54
TPrim & Set(void)
Definition: serialbase.hpp:351
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
Definition: serialbase.hpp:347
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
int Compare(const CSeq_loc &loc) const
Definition: Seq_loc.cpp:590
bool CheckId(const CSeq_id *&id, bool may_throw=true) const
check that the 'id' field in all parts of the location is the same as the specifies id.
Definition: Seq_loc.hpp:927
bool IsTruncatedStart(ESeqLocExtremes ext) const
check if parts of the seq-loc are missing
Definition: Seq_loc.cpp:3346
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5196
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ fCompare_Default
Definition: Seq_loc.hpp:245
void x_AssignGenes(void)
Definition: feature.cpp:2862
void GetMrnasForGene(const CMappedFeat &gene_feat, list< CMappedFeat > &mrna_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3384
EGeneCheckMode m_GeneCheckMode
Definition: feature.hpp:423
bool PromoteCDSToNucProtSet(objects::CSeq_feat_Handle &orig_feat)
Promotes coding region from Seq-annot on nucleotide sequence to Seq-annot on nuc-prot-set if necessar...
Definition: feature.cpp:3914
CFeatInfo & x_GetInfo(const CSeq_feat_Handle &feat)
Definition: feature.cpp:2073
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_type, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3462
bool m_IgnoreMissingGeneXref
Definition: feature.hpp:424
TInfoMap m_InfoMap
Definition: feature.hpp:418
CRef< CFeatTreeIndex > m_Index
Definition: feature.hpp:426
void GetCdssForGene(const CMappedFeat &gene_feat, list< CMappedFeat > &cds_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3409
vector< CMappedFeat > GetChildren(const CMappedFeat &feat)
Return all nearest children of a feature.
Definition: feature.cpp:3100
bool x_AssignParentByRef(CFeatInfo &info)
Definition: feature.cpp:2174
CMappedFeat GetBestGeneForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
Definition: feature.cpp:3301
void SetFeatIdMode(EFeatIdMode mode)
Definition: feature.cpp:2012
const CTSE_Handle & GetTSE(void) const
Definition: feature.cpp:3165
bool GetFeatureGeneBiotype(feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
Definition: feature.cpp:4495
void GetLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, CScope *scope)
Definition: feature.cpp:743
void SetSNPStrandMode(ESNPStrandMode mode)
Definition: feature.cpp:2030
void AddGenesForCds(const CMappedFeat &cds_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for a cdregion feature.
Definition: feature.cpp:3250
CFeatInfo m_RootInfo
Definition: feature.hpp:420
void AddCdsForMrna(const CMappedFeat &mrna_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get cdregions for a mRNA feature.
Definition: feature.cpp:3240
bool RetranslateCDS(const CSeq_feat &cds, CScope &scope)
RetranslateCDS A function to replace the protein Bioseq pointed to by cds.product with the current tr...
Definition: feature.cpp:4121
void SetIgnoreMissingGeneXref(bool ignore=true)
Definition: feature.cpp:2024
CFeatInfo * GetChildrenGene(void)
Definition: feature.hpp:362
void x_AssignParents(void)
Definition: feature.cpp:2931
bool AdjustForCDSPartials(const CSeq_feat &cds, CScope &scope)
AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the...
Definition: feature.cpp:4062
void Reset(void)
Definition: feature.cpp:794
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3341
void x_SetGene(CFeatInfo &info, CFeatInfo *gene)
Definition: feature.cpp:3043
pair< int, CTSE_Handle > TFullId
Definition: feature.hpp:115
vector< CFeatInfo * > TChildren
Definition: feature.hpp:366
bool CopyFeaturePartials(CSeq_feat &dst, const CSeq_feat &src)
CopyFeaturePartials A function to copy the start and end partialness from one feature to another.
Definition: feature.cpp:4006
CFeatInfo * m_Gene
Definition: feature.hpp:380
ESNPStrandMode
Mode of processing SNP strands.
Definition: feature.hpp:241
void AddCdsForGene(const CMappedFeat &gene_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get cdregions for a gene feature.
Definition: feature.cpp:3278
const TChildren & x_GetChildren(CFeatInfo &info)
Definition: feature.cpp:3060
const CMappedFeat & GetMappedFeat(const CSeq_feat_Handle &feat) const
Find a corresponding CMappedFeat for a feature already added to a tree.
Definition: feature.cpp:2084
CMappedFeat GetBestGene(const CMappedFeat &feat, EBestGeneType lookup_type=eBestGene_TreeOnly)
Return parent gene if exists or best overlapping gene.
Definition: feature.cpp:3127
CMappedFeat GetBestGeneForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
Definition: feature.cpp:3321
void AddMrnasForGene(const CMappedFeat &gene_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get mRNAs for a gene feature.
Definition: feature.cpp:3268
void AddProteinFeature(const CBioseq &seq, const string &protein_name, const CSeq_feat &cds, CScope &scope)
AddProteinFeature A function to create a protein feature with the specified protein name.
Definition: feature.cpp:4195
CMappedFeat m_Feat
Definition: feature.hpp:369
void ClearFeatureIds(const CSeq_annot_EditHandle &annot)
Definition: feature.cpp:3727
void AddFeatureToBioseq(const CBioseq &seq, const CSeq_feat &f, CScope &scope)
AddFeatureToBioseq A function to add a feature to a Bioseq - will create a new feature table Seq-anno...
Definition: feature.cpp:4169
ELabelType
For compatibility with legacy code.
Definition: feature.hpp:85
EBestGeneFeatIdMode m_BestGeneFeatIdMode
Definition: feature.hpp:422
TChildren m_Children
Definition: feature.hpp:381
void x_AssignParentsByOverlap(TFeatArray &features, const STypeLink &link)
Definition: feature.cpp:2731
ESNPStrandMode m_SNPStrandMode
Definition: feature.hpp:425
ELocationInFrame IsLocationInFrame(const CSeq_feat_Handle &cds, const CSeq_loc &loc)
Determines whether location loc is in frame with coding region cds.
Definition: feature.cpp:3818
CMappedFeat GetBestGeneForFeat(const CMappedFeat &feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
Definition: feature.cpp:3443
ELocationInFrame
Definition: feature.hpp:531
size_t m_AssignedGenes
Definition: feature.hpp:417
CMappedFeat GetParent(const CMappedFeat &feat)
Return nearest parent of a feature.
Definition: feature.cpp:3067
vector< CFeatInfo * > TFeatArray
Definition: feature.hpp:383
void x_Init(void)
Definition: feature.cpp:2000
EIsLinkedToRoot m_IsLinkedToRoot
Definition: feature.hpp:378
void AddFeaturesFor(CScope &scope, const CSeq_loc &loc, CSeqFeatData::ESubtype bottom_type, CSeqFeatData::ESubtype top_type, const SAnnotSelector *base_sel=0, bool skip_bottom=false)
Add all features from bottom_type to top_type for a feature.
Definition: feature.cpp:3171
CRef< CSeq_loc_Mapper > CreateSeqLocMapperFromFeat(const CSeq_feat &feat, CSeq_loc_Mapper::EFeatMapDirection dir, CScope *scope)
Create CSeq_loc_Mapper from a feature, check for special cases like exceptions in CDS features.
Definition: feature.cpp:3698
void AddFeature(const CMappedFeat &feat)
Add a single feature to the tree.
Definition: feature.cpp:2044
bool GetIgnoreMissingGeneXref(void) const
Mode for taking into account gene xref to a missing genes.
Definition: feature.hpp:235
bool AdjustFeaturePartialFlagForLocation(CSeq_feat &new_feat)
AdjustFeaturePartialFlagForLocation A function to ensure that Seq-feat.partial is set if either end o...
Definition: feature.cpp:3983
bool GetFeatureGeneBiotypeFaster(feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
Definition: feature.cpp:4485
CMappedFeat MapSeq_feat(const CSeq_feat_Handle &feat, const CBioseq_Handle &master_seq, const CRange< TSeqPos > &range)
Definition: feature.cpp:973
void SetGeneCheckMode(EGeneCheckMode mode)
Definition: feature.cpp:2018
void x_AssignGenesByOverlap(TFeatArray &features)
Definition: feature.cpp:2818
void GetChildrenTo(const CMappedFeat &feat, vector< CMappedFeat > &children)
Store all nearest children of a feature into a vector.
Definition: feature.cpp:3108
vector< CFeatInfo * > TChildren
Definition: feature.hpp:386
TIdMap m_IdMap
Definition: feature.hpp:117
void x_SetGeneRecursive(CFeatInfo &info, CFeatInfo *gene)
Definition: feature.cpp:2850
CMappedFeat GetParentFeature(const CMappedFeat &feat)
Definition: feature.cpp:1615
void x_SetNoParent(CFeatInfo &info)
Definition: feature.cpp:3033
pair< int, CFeatInfo * > x_LookupParentByRef(CFeatInfo &info, CSeqFeatData::ESubtype parent_type)
Definition: feature.cpp:2106
EFeatIdMode m_FeatIdMode
Definition: feature.hpp:421
EGeneCheckMode
Mode for taking into account best gene eGeneCheck_match will try to match a parent feature only if th...
Definition: feature.hpp:224
bool IsSetGene(void) const
Definition: feature.hpp:350
size_t GetFeatIdsCount(void) const
Definition: feature.cpp:800
~CFeatTree(void)
Destructor.
Definition: feature.cpp:1966
CSeqFeatData::ESubtype GetSubtype(void) const
Definition: feature.hpp:353
virtual bool Less(const CSeq_feat &f1, const CSeq_feat &f2, CScope *scope)
Definition: feature.cpp:891
void AddGenesForFeat(const CMappedFeat &feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for an arbitrary feature.
Definition: feature.cpp:3288
void x_VerifyLinkedToRoot(CFeatInfo &info)
Definition: feature.cpp:3000
vector< CFeatInfo * > TInfoArray
Definition: feature.hpp:415
CFeatInfo * x_GetParent(CFeatInfo &info)
Definition: feature.cpp:3051
CFeatTree & operator=(const CFeatTree &)
Definition: feature.cpp:1977
TInfoArray m_InfoArray
Definition: feature.hpp:419
size_t m_AssignedParents
Definition: feature.hpp:417
int RemapId(int old_id, const CTSE_Handle &tse)
Definition: feature.cpp:806
void AddMrnasForCds(const CMappedFeat &cds_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get mRNAs for a cdregion feature.
Definition: feature.cpp:3259
EGeneCheckMode GetGeneCheckMode(void) const
Definition: feature.hpp:228
void AddFeatures(CFeat_CI it)
Add all features collected by a CFeat_CI to the tree.
Definition: feature.cpp:2036
void AddGenesForMrna(const CMappedFeat &mrna_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for a mRNA feature.
Definition: feature.cpp:3231
void x_SetParent(CFeatInfo &info, CFeatInfo &parent)
Definition: feature.cpp:3020
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
Definition: feature.cpp:4024
CFeatInfo * x_FindInfo(const CSeq_feat_Handle &feat)
Definition: feature.cpp:2095
bool RemapIds(CSeq_feat &feat, const CTSE_Handle &tse)
Definition: feature.cpp:853
void ReassignFeatureIds(const CSeq_entry_EditHandle &entry)
Definition: feature.cpp:3787
EFeatIdMode
Mode of processing feature ids.
Definition: feature.hpp:201
int TFeatLabelFlags
binary OR of FFeatLabelFlags
Definition: feature.hpp:78
CMappedFeat GetBestCdsForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3360
bool GivesGeneToChildren(void) const
Definition: feature.hpp:359
CFeatTree(void)
Construct empty tree.
Definition: feature.cpp:1923
@ eBestGene_AllowOverlapped
Definition: feature.hpp:332
@ eBestGene_TreeOnly
Definition: feature.hpp:331
@ eSNPStrand_both
Definition: feature.hpp:243
@ eContent
Definition: feature.hpp:87
@ eType
Definition: feature.hpp:86
@ eLocationInFrame_InFrame
Definition: feature.hpp:532
@ eLocationInFrame_BadStart
Definition: feature.hpp:533
@ eLocationInFrame_BadStop
Definition: feature.hpp:534
@ eLocationInFrame_BadStartAndStop
Definition: feature.hpp:535
@ eLocationInFrame_NotIn
Definition: feature.hpp:536
@ fFGL_NoComments
Leave out comments, even as fallbacks.
Definition: feature.hpp:75
@ fFGL_NoQualifiers
Leave out qualifiers.
Definition: feature.hpp:76
@ fFGL_Both
Definition: feature.hpp:74
@ fFGL_Content
Include its content if there is any.
Definition: feature.hpp:73
@ fFGL_Type
Always include the feature's type.
Definition: feature.hpp:72
@ eGeneCheck_match
Definition: feature.hpp:226
@ eBestGeneFeatId_ignore
Definition: feature.hpp:213
@ eBestGeneFeatId_always
Definition: feature.hpp:214
@ eFeatId_always
Definition: feature.hpp:204
@ eFeatId_by_type
Definition: feature.hpp:203
@ eFeatId_ignore
Definition: feature.hpp:202
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
EOverlapType
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
Int8 TestForOverlap64(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
64-bit version of TestForOverlap() Check if the two locations have ovarlap of a given type.
ECompare
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
int TBestFeatOpts
Definition: sequence.hpp:348
@ eGetId_Seq_id_BestRank
use CSeq_id::BestRank() as the scoring function
Definition: sequence.hpp:107
@ fBestFeat_FavorLonger
favor longer features over shorter features
Definition: sequence.hpp:339
EFeatMapDirection
Mapping direction used when initializing the mapper with a feature.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_annot_Handle GetSeq_annotHandle(const CSeq_annot &annot, EMissing action=eMissing_Default)
Definition: scope.cpp:192
const CFeat_id & GetId(void) const
const CSeq_feat::TXref & GetXref(void) const
bool IsSetId(void) const
bool GetPseudo(void) const
TClass GetClass(void) const
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
void SetDescr(TDescr &v) const
void Remove(void) const
Remove current annot.
const CTSE_Handle & GetTSE_Handle(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
TSet GetSet(void) const
bool IsSetXref(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
void ClearFeatIds(void)
Clear feature ids.
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
bool IsSetProduct(void) const
virtual const CSeq_loc & GetLocation(void) const
void SetInst(TInst &v) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Get parent bioseq-set handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
void SetFeatId(int id)
Set single feature id.
bool IsProtein(void) const
TInst_Topology GetInst_Topology(void) const
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
bool IsSetClass(void) const
CSeq_annot_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_feat_EditHandle TakeFeat(const CSeq_feat_EditHandle &handle) const
CSeq_annot_EditHandle GetAnnot(void) const
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
bool IsSet(void) const
bool IsSetQual(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
bool IsSetPseudo(void) const
bool IsSetInst_Topology(void) const
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
CConstRef< CSeq_annot > GetSeq_annotCore(void) const
bool IsSetData(void) const
CSeq_entry_EditHandle GetParentEntry(void) const
Navigate object tree.
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetExactDepth(bool value=true)
SetExactDepth() specifies that annotations will be searched on the segment level specified by SetReso...
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
SAnnotSelector & SetOverlapTotalRange(void)
Check overlapping only of total ranges.
SAnnotSelector & SetSourceLoc(const CSeq_loc &loc)
Set filter for source location of annotations.
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetOverlapType(EOverlapType overlap_type)
Set overlap type.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetLimitSeqAnnot(const CSeq_annot_Handle &limit)
Limit annotations to those from the seq-annot only.
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
EOverlapType
Flag to indicate location overlapping method.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
@ eOverlap_Intervals
default - overlapping of individual intervals
@ eOverlap_TotalRange
overlapping of total ranges only
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
#define kMax_I8
Definition: ncbi_limits.h:221
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
int8_t Int1
1-byte (8-bit) signed integer
Definition: ncbitype.h:98
#define kMin_I1
Definition: ncbi_limits.h:211
position_type GetToOpen(void) const
Definition: range.hpp:138
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCout
Definition: ncbistre.hpp:543
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
#define NPOS
Definition: ncbistr.hpp:133
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
#define NCBI_XOBJUTIL_EXPORT
Definition: ncbi_export.h:1339
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
E_Choice Which(void) const
Which variant is currently selected.
Definition: Object_id_.hpp:235
bool IsSetClass(void) const
endeavor which designed this object Check if a value has been assigned to Class data member.
const TClass & GetClass(void) const
Get the Class member data.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TType & GetType(void) const
Get the Type member data.
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
E_Choice
Choice variants.
Definition: Trna_ext_.hpp:109
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
@ e_not_set
No variant selected.
Definition: RNA_ref_.hpp:133
@ e_Name
for naming "other" type
Definition: RNA_ref_.hpp:134
E_Choice Which(void) const
Which variant is currently selected.
Definition: Rsite_ref_.hpp:235
const TDb & GetDb(void) const
Get the variant data.
Definition: Rsite_ref_.cpp:109
const TStr & GetStr(void) const
Get the variant data.
Definition: Rsite_ref_.hpp:270
@ e_Db
pointer to a restriction site database
Definition: Rsite_ref_.hpp:93
@ e_Str
may be unparsable
Definition: Rsite_ref_.hpp:92
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
bool IsSetOrf(void) const
just an ORF ? Check if a value has been assigned to Orf data member.
Definition: Cdregion_.hpp:462
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
void ResetPartial(void)
Reset Partial data member.
Definition: Seq_feat_.hpp:955
const TData & GetData(void) const
Get the Data member data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
E_Choice Which(void) const
Which variant is currently selected.
TPsec_str GetPsec_str(void) const
Get the variant data.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
Definition: Seq_feat_.hpp:1141
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
TOrf GetOrf(void) const
Get the Orf member data.
Definition: Cdregion_.hpp:481
bool IsImp(void) const
Check if variant Imp is selected.
const TRegion & GetRegion(void) const
Get the variant data.
const TCit & GetCit(void) const
Get the Cit member data.
Definition: Seq_feat_.hpp:1240
E_Choice Which(void) const
Which variant is currently selected.
Definition: Feat_id_.hpp:291
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
TBond GetBond(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetId(TId &value)
Assign a value to Id data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
Definition: Seq_feat_.hpp:990
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Feat_id_.cpp:156
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
const TUser & GetUser(void) const
Get the variant data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1333
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TGiim & GetGiim(void) const
Get the variant data.
Definition: Feat_id_.cpp:112
bool CanGetLocation(void) const
Check if it is safe to call GetLocation method.
Definition: Seq_feat_.hpp:1111
const TCdregion & GetCdregion(void) const
Get the variant data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TOrg & GetOrg(void) const
Get the variant data.
const TRsite & GetRsite(void) const
Get the variant data.
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
bool IsSetCit(void) const
citations for this feature Check if a value has been assigned to Cit data member.
Definition: Seq_feat_.hpp:1228
bool IsVariation(void) const
Check if variant Variation is selected.
const TGene & GetGene(void) const
Get the variant data.
TSite GetSite(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
bool IsSetId(void) const
the feature copied Check if a value has been assigned to Id data member.
const TNon_std_residue & GetNon_std_residue(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
Definition: Seq_feat_.hpp:1009
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
TGibb GetGibb(void) const
Get the variant data.
Definition: Feat_id_.hpp:326
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
const TVariation & GetVariation(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
const THet & GetHet(void) const
Get the variant data.
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_Region
named region (globin locus)
@ e_Seq
to annotate origin from another seq
@ e_Txinit
transcription initiation
@ e_Num
a numbering system
@ e_Pub
publication applies to this seq
@ e_User
user defined structure
@ e_Rsite
restriction site (for maps really)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
@ e_Giim
geninfo import
Definition: Feat_id_.hpp:93
@ e_General
for use by various databases
Definition: Feat_id_.hpp:95
@ e_Gibb
geninfo backbone
Definition: Feat_id_.hpp:92
@ e_Local
for local software use
Definition: Feat_id_.hpp:94
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
const TRelease & GetRelease(void) const
Get the Release member data.
TId GetId(void) const
Get the Id member data.
bool IsMix(void) const
Check if variant Mix is selected.
Definition: Seq_loc_.hpp:552
bool IsSetRelease(void) const
the release Check if a value has been assigned to Release data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const TId & GetId(void) const
Get the Id member data.
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
TFrom GetFrom(void) const
Get the From member data.
bool CanGetTo(void) const
Check if it is safe to call GetTo method.
list< CRef< CSeq_loc > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
bool IsSetDb(void) const
dbase used in Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both_rev
in reverse orientation
Definition: Na_strand_.hpp:69
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
Definition: MolInfo_.hpp:600
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
Definition: MolInfo_.hpp:569
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
TNcbieaa & SetNcbieaa(void)
Select the variant.
Definition: Seq_data_.hpp:657
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:422
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:690
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:447
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:453
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
const TPub & GetPub(void) const
Get the Pub member data.
Definition: Pubdesc_.hpp:605
void SetDefaultCompleteness(void)
Assign default value to Completeness data member.
Definition: MolInfo_.hpp:588
TIupacaa & SetIupacaa(void)
Select the variant.
Definition: Seq_data_.hpp:537
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
@ e_Iupacaa
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
@ e_not_set
No variant selected.
Definition: Seq_annot_.hpp:132
const TId & GetId(void) const
Get the Id member data.
const TName & GetName(void) const
Get the Name member data.
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
bool IsSetName(void) const
names and synonyms some variants have well-known canonical names and possible accepted synonyms Check...
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
mdb_mode_t mode
Definition: lmdb++.h:38
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::KEY key
unsigned int a
Definition: ncbi_localip.c:102
const char * tag
T min(T x_, T y_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
The Object manager core.
static unsigned cnt[256]
static const char * prefix[]
Definition: pcregrep.c:405
bool operator()(const CFeatTree::CFeatInfo *f1, const CFeatTree::CFeatInfo *f2) const
Definition: feature.cpp:2924
SAnnotSelector –.
bool operator()(const SBestInfo &info1, const SBestInfo &info2) const
Definition: feature.cpp:2255
CDisambiguator::TChildren::const_iterator TChild
Definition: feature.cpp:2348
bool operator()(const TChild &c1, const TChild &c2) const
Definition: feature.cpp:2350
Definition: type.c:6
#define _ASSERT
#define ftable
Definition: utilfeat.h:37
static const char *const features[]
#define const
Definition: zconf.h:232
#define local
Definition: zutil.h:33
Modified on Wed Apr 17 13:09:06 2024 by modify_doxy.py rev. 669887