NCBI C++ ToolKit
feature.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feature.cpp 92832 2021-02-17 20:03:10Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Clifford Clausen
27 *
28 * File Description:
29 * Sequence utilities
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include <serial/objistr.hpp>
34 #include <serial/serial.hpp>
35 #include <serial/iterator.hpp>
36 #include <serial/enumvalues.hpp>
37 
39 #include <objmgr/scope.hpp>
40 #include <objmgr/seq_vector.hpp>
41 #include <objmgr/feat_ci.hpp>
43 
60 
61 #include <objects/seq/Bioseq.hpp>
63 #include <objects/seq/IUPACaa.hpp>
65 #include <objects/seq/NCBIeaa.hpp>
66 #include <objects/seq/NCBI8aa.hpp>
67 #include <objects/seq/Pubdesc.hpp>
69 #include <objects/seq/Seqdesc.hpp>
71 
76 
80 
82 #include <objects/pub/Pub.hpp>
83 #include <objects/pub/Pub_set.hpp>
84 
85 #include <objmgr/util/feature.hpp>
86 #include <objmgr/util/sequence.hpp>
87 #include <objmgr/annot_ci.hpp>
88 
89 #include <algorithm>
90 
93 BEGIN_SCOPE(feature)
94 USING_SCOPE(sequence);
95 
96 // internal prototypes
98  vector<CMappedFeat>&, feature::CFeatTree&);
100  vector<CMappedFeat>&);
101 bool sGetFeatureGeneBiotypeWrapper(feature::CFeatTree&, CMappedFeat, string&, bool);
102 
103 // Appends a label onto "label" based on the type of feature
105 {
106  string tlabel;
107 
108  // Determine typelabel
109  CSeqFeatData::ESubtype idx = feat.GetData().GetSubtype();
110  if (idx != CSeqFeatData::eSubtype_bad) {
111  if (feat.GetData().IsProt() && idx != CSeqFeatData::eSubtype_prot) {
112  tlabel = feat.GetData().GetKey(CSeqFeatData::eVocabulary_genbank);
113  } else {
114  tlabel = feat.GetData().GetKey();
115  }
116  if (feat.GetData().IsImp()) {
117  if ( tlabel == "variation" ) {
118  tlabel = "Variation";
119  }
120  else if ( tlabel != "CDS") {
121  tlabel = "[" + tlabel + "]";
122  }
123  } else if ((flags & fFGL_NoComments) == 0 && feat.GetData().IsRegion()
124  && feat.GetData().GetRegion() == "Domain"
125  && feat.IsSetComment() ) {
126  tlabel = "Domain";
127  }
128  } else if (feat.GetData().IsImp()) {
129  tlabel = "[" + feat.GetData().GetImp().GetKey() + "]";
130  } else {
131  tlabel = "Unknown=0";
132  }
133  *label += tlabel;
134 }
135 
136 
137 // Appends a label onto tlabel for a CSeqFeatData::e_Cdregion
138 inline
140 (const CSeq_feat& feat,
141  string* tlabel,
142  CScope* scope)
143 {
144  // Check that tlabel exists and that the feature data is Cdregion
145  if (!tlabel || !feat.GetData().IsCdregion()) {
146  return;
147  }
148 
149  const CGene_ref* gref = 0;
150  const CProt_ref* pref = 0;
151 
152  // Look for CProt_ref object to create a label from
153  if (feat.IsSetXref()) {
154  ITERATE ( CSeq_feat::TXref, it, feat.GetXref()) {
155  const CSeqFeatXref& xref = **it;
156  if ( !xref.IsSetData() ) {
157  continue;
158  }
159 
160  switch (xref.GetData().Which()) {
162  pref = &xref.GetData().GetProt();
163  break;
165  gref = &xref.GetData().GetGene();
166  break;
167  default:
168  break;
169  }
170  }
171  }
172 
173  // Try and create a label from a CProt_ref in CSeqFeatXref in feature
174  if (pref) {
175  pref->GetLabel(tlabel);
176  return;
177  }
178 
179  // Try and create a label from a CProt_ref in the feat product and
180  // return if found
181  if (feat.IsSetProduct() && scope) {
182  try {
183  const CSeq_id& id = GetId(feat.GetProduct(), scope);
184  CBioseq_Handle hnd = scope->GetBioseqHandle(id);
185  if (hnd) {
186 
187  for (CFeat_CI feat_it(hnd,
189  .IncludeFeatType(CSeqFeatData::e_Prot));
190  feat_it; ++feat_it) {
191  feat_it->GetData().GetProt().GetLabel(tlabel);
192  return;
193  }
194  }
195  else {
196  ERR_POST(Error << "cannot find sequence: " + id.AsFastaString());
197  }
198  } catch (CObjmgrUtilException&) {}
199  }
200 
201  // Try and create a label from a CGene_ref in CSeqFeatXref in feature
202  if (gref) {
203  gref->GetLabel(tlabel);
204  }
205 
206  // check to see if the CDregion is just an open reading frame
207  if (feat.GetData().GetCdregion().IsSetOrf() &&
208  feat.GetData().GetCdregion().GetOrf()) {
209  string str("open reading frame: ");
210  switch (feat.GetData().GetCdregion().GetFrame()) {
212  str += "frame not set; ";
213  break;
215  str += "frame 1; ";
216  break;
218  str += "frame 2; ";
219  break;
221  str += "frame 3; ";
222  break;
223  }
224 
225  switch (sequence::GetStrand(feat.GetLocation(), scope)) {
226  case eNa_strand_plus:
227  str += "positive strand";
228  break;
229  case eNa_strand_minus:
230  str += "negative strand";
231  break;
232  case eNa_strand_both:
233  str += "both strands";
234  break;
235  case eNa_strand_both_rev:
236  str += "both strands (reverse)";
237  break;
238  default:
239  str += "strand unknown";
240  break;
241  }
242 
243  *tlabel += str;
244  }
245 
246 
247 }
248 
249 
250 inline
252 (const CSeq_feat& feat,
253  string* label,
255  const string* type_label)
256 {
257  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()
258  && !feat.GetComment().empty()) {
259  if ((flags & fFGL_Type) != 0 && type_label != NULL
260  && feat.GetComment().find(*type_label) == string::npos) {
261  *label += *type_label + "-" + feat.GetComment();
262  } else {
263  *label += feat.GetComment();
264  }
265  } else if (type_label) {
266  *label += *type_label;
267  }
268 }
269 
270 
271 // Appends a label onto "label" for a CRNA_ref
272 inline
273 static void s_GetRnaRefLabel
274 (const CSeq_feat& feat,
275  string* label,
277  const string* type_label)
278 {
279  // Check that label exists and that feature data is type RNA-ref
280  if (!label || !feat.GetData().IsRna()) {
281  return;
282  }
283 
284  const CRNA_ref& rna = feat.GetData().GetRna();
285 
286  // Append the feature comment, the type label, or both and return
287  // if Ext is not set
288  if (!rna.IsSetExt()) {
289  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
290  return;
291  }
292 
293  // Append a label based on the type of the type of the ext of the
294  // CRna_ref
295  string tmp_label;
296  switch (rna.GetExt().Which()) {
298  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
299  break;
301  tmp_label = rna.GetExt().GetName();
302  if (feat.CanGetQual() &&
303  (tmp_label == "ncRNA" || tmp_label == "tmRNA"
304  || tmp_label == "misc_RNA")) {
305  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
306  ITERATE( CSeq_feat::TQual, q, qual ) {
307  if ((*q)->GetQual() == "product") {
308  tmp_label = (*q)->GetVal();
309  break;
310  }
311  }
312  }
313  if ((flags & fFGL_Type) == 0 && type_label != 0 && !tmp_label.empty() && tmp_label.find(*type_label) == string::npos) {
314  *label += *type_label + "-" + tmp_label;
315  } else if (!tmp_label.empty()) {
316  *label += tmp_label;
317  } else if (type_label) {
318  *label += *type_label;
319  }
320  break;
322  {
323  if ( !rna.GetExt().GetTRNA().IsSetAa() ) {
324  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
325  break;
326  }
327  try {
328  CTrna_ext::C_Aa::E_Choice aa_code_type =
329  rna.GetExt().GetTRNA().GetAa().Which();
330  int aa_code;
331  CSeq_data in_seq, out_seq;
332  string str_aa_code;
333  switch (aa_code_type) {
335  // Convert an e_Iupacaa code to an Iupacaa3 code for the label
336  aa_code = rna.GetExt().GetTRNA().GetAa().GetIupacaa();
338  aa_code);
339  in_seq.SetIupacaa().Set() = str_aa_code;
340  CSeqportUtil::Convert(in_seq, &out_seq,
342  if (out_seq.GetNcbistdaa().Get().size()) {
343  aa_code = out_seq.GetNcbistdaa().Get()[0];
344  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
345  } else {
346  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
347  }
348  break;
350  // Convert an e_Ncbieaa code to an Iupacaa3 code for the label
351  aa_code = rna.GetExt().GetTRNA().GetAa().GetNcbieaa();
353  aa_code);
354  in_seq.SetNcbieaa().Set() = str_aa_code;
355  CSeqportUtil::Convert(in_seq, &out_seq,
357  if (out_seq.GetNcbistdaa().Get().size()) {
358  aa_code = out_seq.GetNcbistdaa().Get()[0];
359  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
360  } else {
361  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
362  }
363  break;
365  // Convert an e_Ncbi8aa code to an Iupacaa3 code for the label
366  aa_code = rna.GetExt().GetTRNA().GetAa().GetNcbi8aa();
367  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
368  break;
370  // Convert an e_Ncbistdaa code to an Iupacaa3 code for the label
371  aa_code = rna.GetExt().GetTRNA().GetAa().GetNcbistdaa();
372  tmp_label = CSeqportUtil::GetIupacaa3(aa_code);
373  break;
374  default:
375  break;
376  }
377 
378  // Append to label, depending on flags
379  if ((flags & fFGL_Type) == 0 && type_label != 0) {
380  *label += *type_label + "-" + tmp_label;
381  } else if (!tmp_label.empty()) {
382  *label += tmp_label;
383  } else if (type_label) {
384  *label += *type_label;
385  }
386  } catch (CSeqportUtil::CBadIndex&) {
387  // fall back to comment (if any)
388  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
389  }
390 
391  break;
392  }
394  if (rna.GetExt().GetGen().CanGetProduct()) {
395  *label = rna.GetExt().GetGen().GetProduct();
396  } else if (rna.GetExt().GetGen().CanGetClass()) {
397  *label = rna.GetExt().GetGen().GetClass();
398  } else {
399  s_GetRnaRefLabelFromComment(feat, label, flags, type_label);
400  }
401  break;
402  }
403 }
404 
405 
406 static void s_GetVariationDbtagLabel(string* tlabel,
407  TFeatLabelFlags /*flags*/,
408  const CDbtag& dbtag)
409 {
410  if ( dbtag.GetDb() == "dbSNP" ) {
411  if ( !tlabel->empty() ) {
412  *tlabel += ", ";
413  }
414  const CObject_id& tag = dbtag.GetTag();
415  if ( tag.IsId() ) {
416  *tlabel += "rs";
417  *tlabel += NStr::NumericToString(tag.GetId());
418  }
419  else {
420  *tlabel += tag.GetStr();
421  }
422  }
423 }
424 
425 
426 // Appends a label to tlabel for a CImp_feat. A return value of true indicates
427 // that the label was created for a CImp_feat key = "Site-ref"
428 inline
429 static bool s_GetImpLabel
430 (const CSeq_feat& feat,
431  string* tlabel,
433  const string* type_label)
434 {
435  // Return if tlablel does not exist or feature data is not Imp-feat
436  if (!tlabel || !feat.GetData().IsImp()) {
437  return false;
438  }
439 
440  CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
441  bool empty = true;
442 
443  // If the key is Site-ref
444  if (subtype == CSeqFeatData::eSubtype_site_ref) {
445  if (feat.IsSetCit()) {
446  // Create label based on Pub-set
447  feat.GetCit().GetLabel(tlabel);
448  return true;
449  }
450  }
451  else if (subtype == CSeqFeatData::eSubtype_variation) {
452  if ( feat.IsSetDbxref() ) {
453  ITERATE( CSeq_feat::TDbxref, it, feat.GetDbxref() ) {
454  s_GetVariationDbtagLabel(tlabel, flags, **it);
455  }
456  return false;
457  }
458  // else if the key is not Site-ref
459  } else if ((flags & fFGL_Type) == 0) {
460  // If the key is CDS
461  if (subtype == CSeqFeatData::eSubtype_Imp_CDS) {
462  *tlabel += "[CDS]";
463  // else if the key is repeat_unit or repeat_region
464  } else if (subtype == CSeqFeatData::eSubtype_repeat_unit ||
466  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
467  // Loop thru the feature qualifiers
468  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
469  ITERATE( CSeq_feat::TQual, it, qual ) {
470  // If qualifier qual is rpt_family append qualifier val
471  if (NStr::EqualNocase((*it)->GetQual(),"rpt_family")) {
472  *tlabel += (*it)->GetVal();
473  empty = false;
474  break;
475  }
476  }
477  }
478 
479  // If nothing has been appended yet
480  if (empty) {
481  *tlabel += type_label ? *type_label : string("");
482  }
483  // else if the key is STS
484  } else if (subtype == CSeqFeatData::eSubtype_STS) {
485  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
486  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
487  ITERATE( CSeq_feat::TQual, it, qual ) {
488  if (NStr::EqualNocase((*it)->GetQual(),"standard_name"))
489  {
490  *tlabel = (*it)->GetVal();
491  empty = false;
492  break;
493  }
494  }
495  }
496 
497  // If nothing has been appended yet
498  if (empty) {
499  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
500  size_t pos = feat.GetComment().find(";");
501  if (pos == string::npos) {
502  *tlabel += feat.GetComment();
503  } else {
504  *tlabel += feat.GetComment().substr(0, pos);
505  }
506  } else {
507  *tlabel += type_label ? *type_label : string("");
508  }
509  }
510  // else if the key is misc_feature
511  } else if (subtype != CSeqFeatData::eSubtype_misc_feature) {
512  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
513  // Look for a single qualifier qual in order of preference
514  // "standard_name", "function", "number", any and
515  // append to tlabel and return if found
516  string std_name, func, num, other;
517  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
518  ITERATE( CSeq_feat::TQual, it, qual ) {
519  if (other.empty()) other = (*it)->GetVal();
520  if (std_name.empty() && NStr::EqualNocase((*it)->GetQual(),"standard_name")) {
521  std_name = (*it)->GetVal();
522  break; // no need to search further if found
523  }
524  if (func.empty() && NStr::EqualNocase((*it)->GetQual(), "function")) {
525  func = (*it)->GetVal();
526  continue;
527  }
528  if (num.empty() && NStr::EqualNocase((*it)->GetQual(), "number")) {
529  num = (*it)->GetVal();
530  continue;
531  }
532  }
533  if (!std_name.empty()) {
534  *tlabel += std_name;
535  return false;
536  }
537  if (!func.empty()) {
538  *tlabel += func;
539  return false;
540  }
541  if (!num.empty()) {
542  *tlabel += num;
543  return false;
544  }
545  if (!other.empty()) {
546  *tlabel += other;
547  return false;
548  }
549  // Append type_label if there is one
550  if (empty) {
551  *tlabel += type_label ? *type_label : string("");
552  return false;
553  }
554  }
555  }
556  }
557  return false;
558 }
559 
560 
561 // Appends a label to tlabel for a CImp_feat. A return value of true indicates
562 // that the label was created for a CImp_feat key = "Site-ref"
563 static void s_GetVariationLabel(const CSeq_feat& feat,
564  string* tlabel,
566  const string* /*type_label*/)
567 {
568  // Return if tlablel does not exist or feature data is not Imp-feat
569  if (!tlabel || !feat.GetData().IsVariation()) {
570  return;
571  }
572 
573  const CVariation_ref& var = feat.GetData().GetVariation();
574  if ( var.IsSetId() ) {
575  s_GetVariationDbtagLabel(tlabel, flags, var.GetId());
576  }
577  if ( var.IsSetName() ) {
578  if ( !tlabel->empty() ) {
579  *tlabel += ", ";
580  }
581  *tlabel += var.GetName();
582  }
583 }
584 
585 
586 // Return a label based on the content of the feature
588 (const CSeq_feat& feat,
589  string* label,
590  const string* type_label,
592  CScope* scope)
593 {
594  string tlabel;
595 
596  // Get a content label dependent on the type of the feature data
597  switch (feat.GetData().Which()) {
599  feat.GetData().GetGene().GetLabel(&tlabel);
600  break;
601  case CSeqFeatData::e_Org:
602  feat.GetData().GetOrg().GetLabel(&tlabel);
603  break;
605  s_GetCdregionLabel(feat, &tlabel, scope);
606  break;
608  feat.GetData().GetProt().GetLabel(&tlabel);
609  break;
610  case CSeqFeatData::e_Rna:
611  s_GetRnaRefLabel(feat, &tlabel, flags, type_label);
612  break;
613  case CSeqFeatData::e_Pub:
614  feat.GetData().GetPub().GetPub().GetLabel(&tlabel);
615  break;
616  case CSeqFeatData::e_Seq:
617  break;
618  case CSeqFeatData::e_Imp:
619  if (s_GetImpLabel(feat, &tlabel, flags, type_label)) {
620  *label += tlabel;
621  return;
622  }
623  break;
625  if (feat.GetData().GetRegion().find("Domain") != string::npos &&
626  (flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
627  tlabel += feat.GetComment();
628  } else {
629  tlabel += feat.GetData().GetRegion();
630  }
631  break;
633  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
634  tlabel += feat.GetComment();
635  }
636  break;
638  // Get the ASN string name for the enumerated EBond type
639  tlabel += CSeqFeatData::GetTypeInfo_enum_EBond()
640  ->FindName(feat.GetData().GetBond(), true);
641  break;
643  // Get the ASN string name for the enumerated ESite type
644  tlabel += CSeqFeatData::GetTypeInfo_enum_ESite()
645  ->FindName(feat.GetData().GetSite(), true);
646  break;
648  switch (feat.GetData().GetRsite().Which()) {
649  case CRsite_ref::e_Str:
650  tlabel += feat.GetData().GetRsite().GetStr();
651  break;
652  case CRsite_ref::e_Db:
653  tlabel += feat.GetData().GetRsite().GetDb().GetTag().IsStr() ?
654  feat.GetData().GetRsite().GetDb().GetTag().GetStr() :
655  string("?");
656  break;
657  default:
658  break;
659  }
660  break;
662  if (feat.GetData().GetUser().IsSetClass()) {
663  tlabel += feat.GetData().GetUser().GetClass();
664  } else if (feat.GetData().GetUser().GetType().IsStr()) {
665  tlabel += feat.GetData().GetUser().GetType().GetStr();
666  }
668  break;
669  case CSeqFeatData::e_Num:
670  break;
672  tlabel += CSeqFeatData::GetTypeInfo_enum_EPsec_str()
673  ->FindName(feat.GetData().GetPsec_str(), true);
674  break;
676  tlabel += feat.GetData().GetNon_std_residue();
677  break;
678  case CSeqFeatData::e_Het:
679  tlabel += feat.GetData().GetHet().Get();
680  break;
682  {{
683  const CBioSource& biosrc = feat.GetData().GetBiosrc();
684  string str;
685  if (biosrc.IsSetSubtype()) {
686  ITERATE (CBioSource::TSubtype, iter, biosrc.GetSubtype()) {
687  if ( !str.empty() ) {
688  str += "; ";
689  }
690  (*iter)->GetLabel(&str);
691  }
692  }
693  if (str.empty()) {
694  feat.GetData().GetBiosrc().GetOrg().GetLabel(&str);
695  } else {
696  str += " (";
697  feat.GetData().GetBiosrc().GetOrg().GetLabel(&str);
698  str += ")";
699  }
700  tlabel += str;
701  }}
702  break;
704  s_GetVariationLabel(feat, &tlabel, flags, type_label);
705  break;
706  default:
707  break;
708  }
709 
710  // Return if a label has been calculated above
711  if (!tlabel.empty()) {
712  *label += tlabel;
713  return;
714  }
715 
716  // Put Seq-feat qual into label
717  if (feat.IsSetQual() && (0 == (flags & fFGL_NoQualifiers))) {
718  string prefix("/");
719  const CSeq_feat_Base::TQual & qual = feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
720  ITERATE( CSeq_feat::TQual, it, qual ) {
721  tlabel += prefix + (**it).GetQual();
722  prefix = " /";
723  if (!(**it).GetVal().empty()) {
724  tlabel += "=" + (**it).GetVal();
725  }
726  }
727  }
728 
729  // Put Seq-feat comment into label
730  if ((flags & fFGL_NoComments) == 0 && feat.IsSetComment()) {
731  if (tlabel.empty()) {
732  tlabel = feat.GetComment();
733  } else {
734  tlabel += "; " + feat.GetComment();
735  }
736  }
737 
738  *label += tlabel;
739 }
740 
741 
743 (const CSeq_feat& feat,
744  string* label,
746  CScope* scope)
747 {
748 
749  // Ensure that label exists
750  if (!label) {
751  return;
752  }
753 
754  // Get the type label
755  string type_label;
756  s_GetTypeLabel(feat, &type_label, flags);
757 
758  // Append the type label and return if content label not required
759  if ((flags & fFGL_Type) != 0) {
760  *label += type_label;
761  if ((flags & fFGL_Content) != 0) {
762  *label += ": ";
763  } else {
764  return;
765  }
766  }
767 
768  // Append the content label
769  size_t label_len = label->size();
770  s_GetContentLabel(feat, label, &type_label, flags, scope);
771 
772  // If there is no content label, append the type label
773  if (label->size() == label_len && (flags & fFGL_Type) == 0) {
774  *label += type_label;
775  }
776 }
777 
778 
779 void GetLabel (const CSeq_feat& feat,
780  string* label,
781  ELabelType label_type,
782  CScope* scope)
783 {
785  switch (label_type) {
786  case eType: flags = fFGL_Type; break;
787  case eContent: flags = fFGL_Content; break;
788  case eBoth: flags = fFGL_Both; break;
789  }
790  GetLabel(feat, label, flags, scope);
791 }
792 
793 
795 {
796  m_IdMap.clear();
797 }
798 
799 
801 {
802  return m_IdMap.size();
803 }
804 
805 
806 int CFeatIdRemapper::RemapId(int old_id, const CTSE_Handle& tse)
807 {
808  TFullId key(old_id, tse);
809  int& new_id = m_IdMap[key];
810  if ( !new_id ) {
811  new_id = int(m_IdMap.size());
812  }
813  return new_id;
814 }
815 
816 
818 {
819  bool mapped = false;
820  if ( id.IsLocal() ) {
821  CObject_id& local = id.SetLocal();
822  if ( local.IsId() ) {
823  int old_id = local.GetId();
824  int new_id = RemapId(old_id, tse);
825  if ( new_id != old_id ) {
826  mapped = true;
827  local.SetId(new_id);
828  }
829  }
830  }
831  return mapped;
832 }
833 
834 
835 bool CFeatIdRemapper::RemapId(CFeat_id& id, const CFeat_CI& feat_it)
836 {
837  bool mapped = false;
838  if ( id.IsLocal() ) {
839  CObject_id& local = id.SetLocal();
840  if ( local.IsId() ) {
841  int old_id = local.GetId();
842  int new_id = RemapId(old_id, feat_it.GetAnnot().GetTSE_Handle());
843  if ( new_id != old_id ) {
844  mapped = true;
845  local.SetId(new_id);
846  }
847  }
848  }
849  return mapped;
850 }
851 
852 
854 {
855  bool mapped = false;
856  if ( feat.IsSetId() ) {
857  if ( RemapId(feat.SetId(), tse) ) {
858  mapped = true;
859  }
860  }
861  if ( feat.IsSetXref() ) {
862  NON_CONST_ITERATE ( CSeq_feat::TXref, it, feat.SetXref() ) {
863  CSeqFeatXref& xref = **it;
864  if ( xref.IsSetId() && RemapId(xref.SetId(), tse) ) {
865  mapped = true;
866  }
867  }
868  }
869  return mapped;
870 }
871 
872 
874 {
875  CRef<CSeq_feat> feat(SerialClone(feat_it->GetMappedFeature()));
876  if ( feat->IsSetId() ) {
877  RemapId(feat->SetId(), feat_it);
878  }
879  if ( feat->IsSetXref() ) {
880  NON_CONST_ITERATE ( CSeq_feat::TXref, it, feat->SetXref() ) {
881  CSeqFeatXref& xref = **it;
882  if ( xref.IsSetId() ) {
883  RemapId(xref.SetId(), feat_it);
884  }
885  }
886  }
887  return feat;
888 }
889 
890 
892  const CSeq_feat& f2,
893  CScope* scope)
894 {
895  string l1, l2;
896  GetLabel(f1, &l1, fFGL_Both, scope);
897  GetLabel(f2, &l2, fFGL_Both, scope);
898 
899  int d = NStr::Compare(l1, l2);
900  if ( d != 0 ) {
901  return d < 0;
902  }
903 
904  // TODO: To make C and C++ match better, we stop comparing CDS's at this point.
905  // This can be removed once we have gone completely to C++.
906  if( f1.IsSetData() && f1.GetData().IsCdregion() &&
907  f2.IsSetData() && f2.GetData().IsCdregion() )
908  {
909  return false;
910  }
911 
912  if ( f1.IsSetComment() != f2.IsSetComment() ) {
913  return !f1.IsSetComment();
914  }
915  if ( f1.IsSetComment() ) {
916  d = NStr::Compare(f1.GetComment(), f2.GetComment());
917  if ( d != 0 ) {
918  return d < 0;
919  }
920  }
921 
922  if ( f1.IsSetId() != f2.IsSetId() ) {
923  return f1.IsSetId();
924  }
925  if ( f1.IsSetId() ) {
926  const CFeat_id& id1 = f1.GetId();
927  const CFeat_id& id2 = f2.GetId();
928  if ( id1.Which() != id2.Which() ) {
929  return id1.Which() < id2.Which();
930  }
931  if ( id1.IsLocal() ) {
932  const CObject_id& oid1 = id1.GetLocal();
933  const CObject_id& oid2 = id2.GetLocal();
934  if ( oid1.Which() != oid2.Which() ) {
935  return oid1.Which() < oid2.Which();
936  }
937  if ( oid1.IsId() ) {
938  int oid1int = oid1.GetId();
939  int oid2int = oid2.GetId();
940  if ( oid1int != oid2int ) {
941  return oid1int < oid2int;
942  }
943  }
944  else if ( oid1.IsStr() ) {
945  const string& oid1str = oid1.GetStr();
946  const string& oid2str = oid2.GetStr();
947  int diff = NStr::CompareNocase(oid1str, oid2str);
948  if ( diff != 0 ) {
949  return diff < 0;
950  }
951  }
952  }
953  }
954 
955  if ( f1.GetData().IsGene() && f2.GetData().IsGene() ) {
956  const CGene_ref& g1 = f1.GetData().GetGene();
957  const CGene_ref& g2 = f2.GetData().GetGene();
958  if ( g1.IsSetLocus_tag() != g2.IsSetLocus_tag() ) {
959  return !g1.IsSetLocus_tag();
960  }
961  if ( g1.IsSetLocus_tag() ) {
962  d = NStr::Compare(g1.GetLocus_tag(), g2.GetLocus_tag());
963  if ( d != 0 ) {
964  return d < 0;
965  }
966  }
967  }
968 
969  return false;
970 }
971 
972 
974  const CBioseq_Handle& master_seq,
975  const CRange<TSeqPos>& range)
976 {
977  SAnnotSelector sel(feat.GetFeatSubtype());
978  sel.SetExactDepth();
979  sel.SetResolveAll();
980  CSeq_annot_Handle annot = feat.GetAnnot();
981  sel.SetLimitSeqAnnot(annot);
983  for ( int depth = 0; depth < 10; ++depth ) {
984  sel.SetResolveDepth(depth);
985  for ( CFeat_CI it(master_seq, range, sel); it; ++it ) {
986  if ( it->GetSeq_feat_Handle() == feat ) {
987  return *it;
988  }
989  }
990  }
991  NCBI_THROW(CObjMgrException, eFindFailed,
992  "MapSeq_feat: feature not found");
993 }
994 
995 
998  const CSeq_id_Handle& master_id,
999  const CRange<TSeqPos>& range)
1000 {
1001  CBioseq_Handle master_seq = feat.GetScope().GetBioseqHandle(master_id);
1002  if ( !master_seq ) {
1003  NCBI_THROW(CObjmgrUtilException, eBadLocation,
1004  "MapSeq_feat: master sequence not found");
1005  }
1006  return MapSeq_feat(feat, master_seq, range);
1007 }
1008 
1009 
1012  const CBioseq_Handle& master_seq)
1013 {
1014  return MapSeq_feat(feat, master_seq, CRange<TSeqPos>::GetWhole());
1015 }
1016 
1017 
1020  const CSeq_id_Handle& master_id)
1021 {
1022  CBioseq_Handle master_seq = feat.GetScope().GetBioseqHandle(master_id);
1023  if ( !master_seq ) {
1024  NCBI_THROW(CObjmgrUtilException, eBadLocation,
1025  "MapSeq_feat: master sequence not found");
1026  }
1027  return MapSeq_feat(feat, master_seq);
1028 }
1029 
1030 
1032 {
1035 
1036  bool IsValid(void) const {
1038  }
1039  operator bool(void) const {
1040  return IsValid();
1041  }
1042  bool operator!(void) const {
1043  return !IsValid();
1044  }
1045 
1046  void Next(void);
1048  Next();
1049  return *this;
1050  }
1051 
1052  bool CanHaveGeneParent(void) const;
1053  bool CanHaveCommonGene(void) const;
1054 
1055  // special cdregion to mRNA/VDJ_segment/C_range link
1057 
1058  // check for overlap by intervals
1059  bool OverlapByIntervals() const;
1060 
1061  CSeqFeatData::ESubtype m_StartType; // initial feature type
1062  CSeqFeatData::ESubtype m_CurrentType; // current link child type
1063  CSeqFeatData::ESubtype m_ParentType; // current link parent type
1065 };
1066 
1067 
1069  CSeqFeatData::ESubtype start)
1070  : m_StartType(start == CSeqFeatData::eSubtype_bad? subtype: start),
1071  m_CurrentType(subtype),
1072  m_ParentType(CSeqFeatData::eSubtype_bad),
1073  m_ByProduct(false)
1074 {
1075  switch ( subtype ) {
1078  // artificial subtypes
1080  break;
1083  // operon and gap features do not inherit anything
1085  break;
1087  // Gene features can inherit operon by overlap (CONTAINED_WITHIN)
1089  break;
1094  break;
1097  break;
1100  break;
1102  m_ByProduct = true;
1104  break;
1107  break;
1118  break;
1119  default:
1121  break;
1122  }
1123 }
1124 
1125 
1126 inline bool STypeLink::CanHaveGeneParent(void) const
1127 {
1128  return *this && m_CurrentType != CSeqFeatData::eSubtype_gene;
1129 }
1130 
1131 
1132 inline bool STypeLink::CanHaveCommonGene(void) const
1133 {
1134  return CanHaveGeneParent();
1135 }
1136 
1137 
1139 {
1140  if ( !m_ByProduct &&
1144  // cdregion to mRNA can also link to C_region or VDJ_segment
1145  static const CSeqFeatData::ESubtype sm_SpecialVDJTypes[] = {
1152  };
1153  return sm_SpecialVDJTypes;
1154  }
1155  return 0;
1156 }
1157 
1158 
1160 {
1164 }
1165 
1166 
1168 {
1170  // allow linking proteins to cdregion by product and then location.
1172  m_ByProduct = false;
1173  return;
1174  }
1175  }
1176  switch ( m_ParentType ) {
1178  // no inherit of operons if no gene
1180  break;
1182  if ( m_ByProduct ) {
1183  m_ByProduct = false;
1185  }
1186  else {
1187  m_ByProduct = true;
1188  }
1189  break;
1190  default:
1192  break;
1193  }
1194 }
1195 
1196 
1197 namespace {
1198  // Checks if the location has mixed strands or wrong order of intervals
1199  static
1200  bool sx_IsIrregularLocation(const CSeq_loc& loc,
1201  TSeqPos circular_length)
1202  {
1203  try {
1204  // simple locations are regular
1205  if ( !loc.IsMix() ) {
1206  return false;
1207  }
1208 
1209  if ( !loc.GetId() ) {
1210  // multiple ids locations are irregular
1211  return true;
1212  }
1213 
1214  ENa_strand strand = loc.GetStrand();
1215  if ( strand == eNa_strand_other ) {
1216  // mixed strands
1217  return true;
1218  }
1219 
1220  bool plus_strand = !IsReverse(strand);
1221  TSeqPos pos = plus_strand? 0: kInvalidSeqPos;
1222  bool stop = false;
1223 
1224  const CSeq_loc_mix& mix = loc.GetMix();
1225  ITERATE ( CSeq_loc_mix::Tdata, it, mix.Get() ) {
1226  const CSeq_loc& loc1 = **it;
1227  if ( sx_IsIrregularLocation(loc1, circular_length) ) {
1228  return true;
1229  }
1230  if ( circular_length != kInvalidSeqPos ) {
1231  // cannot check interval order on circular sequences
1232  continue;
1233  }
1235  if ( range.Empty() ) {
1236  continue;
1237  }
1238  if ( stop ) {
1239  return true;
1240  }
1241  if ( plus_strand ) {
1242  if ( range.GetFrom() < pos ) {
1243  return true;
1244  }
1245  pos = range.GetTo()+1;
1246  stop = pos == 0;
1247  }
1248  else {
1249  if ( range.GetTo() > pos ) {
1250  return true;
1251  }
1252  pos = range.GetFrom();
1253  stop = pos == 0;
1254  --pos;
1255  }
1256  }
1257 
1258  return false;
1259  }
1260  catch ( CException& ) {
1261  // something's wrong -> irregular
1262  return true;
1263  }
1264  }
1265 
1266 
1267  static
1268  TSeqPos sx_GetCircularLength(CScope& scope,
1269  const CSeq_loc& loc)
1270  {
1271  try {
1272  const CSeq_id* single_id = 0;
1273  loc.CheckId(single_id);
1274  if ( !single_id ) {
1275  return kInvalidSeqPos;
1276  }
1277 
1278  CBioseq_Handle bh = scope.GetBioseqHandle(*single_id);
1279  if ( bh && bh.IsSetInst_Topology() &&
1281  return bh.GetBioseqLength();
1282  }
1283  }
1284  catch ( CException& /*ignored*/ ) {
1285  return kInvalidSeqPos;
1286  }
1287  return kInvalidSeqPos;
1288  }
1289 
1290 
1291  static
1292  TSeqPos sx_GetCircularLength(CScope& scope,
1293  const CSeq_id_Handle& id)
1294  {
1295  try {
1296  CBioseq_Handle bh = scope.GetBioseqHandle(id);
1297  if ( bh && bh.IsSetInst_Topology() &&
1299  return bh.GetBioseqLength();
1300  }
1301  }
1302  catch ( CException& /*ignored*/ ) {
1303  return kInvalidSeqPos;
1304  }
1305  return kInvalidSeqPos;
1306  }
1307 
1308 
1309  static inline
1310  bool sx_CanMatchByQual(CSeqFeatData::ESubtype type)
1311  {
1312  return
1319  }
1320 
1321 
1322  static const char kQual_transcript_id[] = "transcript_id";
1323  static const char kQual_orig_transcript_id[] = "orig_transcript_id";
1324  static const char kQual_orig_protein_id[] = "orig_protein_id";
1325  enum {
1326  kQualPriority_transcript_id,
1327  kQualPriority_orig_transcript_id,
1328  kQualPriority_orig_protein_id,
1329  kQualPriority_count
1330  };
1331 
1332  struct SMatchingQuals {
1333  CConstRef<CGb_qual> qq[kQualPriority_count];
1334 
1335 
1336  static bool HasMatch(const CMappedFeat& feat)
1337  {
1338  if ( !feat.IsSetQual() ) {
1339  return false;
1340  }
1341  if ( !sx_CanMatchByQual(feat.GetFeatSubtype()) ) {
1342  return false;
1343  }
1345  const CSeq_feat::TQual& quals = f->GetQual();
1346  ITERATE ( CSeq_feat::TQual, it, quals ) {
1347  if ( (*it)->IsSetVal() ) {
1348  const string& qual = (*it)->GetQual();
1349  if ( qual == kQual_orig_protein_id ||
1350  qual == kQual_orig_transcript_id ||
1351  qual == kQual_transcript_id ) {
1352  return true;
1353  }
1354  }
1355  }
1356  return false;
1357  }
1358 
1359 
1360  explicit SMatchingQuals(const CMappedFeat& feat)
1361  {
1362  if ( !feat.IsSetQual() ) {
1363  return;
1364  }
1365  if ( !sx_CanMatchByQual(feat.GetFeatSubtype()) ) {
1366  return;
1367  }
1369  const CSeq_feat::TQual& quals = f->GetQual();
1370  ITERATE ( CSeq_feat::TQual, it, quals ) {
1371  if ( (*it)->IsSetVal() ) {
1372  const string& qual = (*it)->GetQual();
1373  if ( qual == kQual_orig_protein_id ) {
1374  qq[kQualPriority_orig_protein_id] = *it;
1375  }
1376  else if ( qual == kQual_orig_transcript_id ) {
1377  qq[kQualPriority_orig_transcript_id] = *it;
1378  }
1379  else if ( qual == kQual_transcript_id ) {
1380  qq[kQualPriority_transcript_id] = *it;
1381  }
1382  }
1383  }
1384  }
1385 
1386 
1387  Uint1 GetMatch(const SMatchingQuals& quals2) const
1388  {
1389  for ( int i = 0; i < kQualPriority_count; ++i ) {
1390  if ( qq[i] && quals2.qq[i] &&
1391  qq[i]->GetVal() == quals2.qq[i]->GetVal() ) {
1392  return Uint1(i+1);
1393  }
1394  }
1395  return 0;
1396  }
1397  };
1398 
1399 
1400  static inline
1401  bool sx_CanMatchByQual(const CMappedFeat& feat)
1402  {
1403  return SMatchingQuals::HasMatch(feat);
1404  }
1405 
1406 
1407  static inline
1408  bool sx_GeneSuppressed(const CMappedFeat& feat)
1409  {
1410  if ( feat.IsSetXref() ) {
1411  const CSeq_feat::TXref& xrefs = feat.GetXref();
1412  if ( xrefs.size() == 1 ) {
1413  const CSeqFeatXref& xref = *xrefs[0];
1414  if ( xref.IsSetData() ) {
1415  const CSeqFeatData& data = xref.GetData();
1416  if ( data.IsGene() ) {
1417  const CGene_ref& gene = data.GetGene();
1418  if ( !gene.IsSetLocus() && !gene.IsSetLocus_tag() ) {
1419  // feature has single empty gene xref
1420  return true;
1421  }
1422  }
1423  }
1424  }
1425  }
1426  return false;
1427  }
1428 
1429 
1430  static inline
1431  Uint1 sx_GetQualMatch(const CMappedFeat& feat1,
1432  const CMappedFeat& feat2)
1433  {
1434  SMatchingQuals quals1(feat1);
1435  SMatchingQuals quals2(feat2);
1436  return quals1.GetMatch(quals2);
1437  }
1438 
1439 
1440  static inline
1441  EOverlapType sx_GetOverlapType(const STypeLink& link,
1442  const CSeq_loc& loc,
1443  TSeqPos circular_length)
1444  {
1445  EOverlapType overlap_type = eOverlap_Contained;
1446  if ( link.OverlapByIntervals() ) {
1447  overlap_type = eOverlap_CheckIntervals;
1448  }
1450  (true || sx_IsIrregularLocation(loc, circular_length)) ) {
1451  // LOCATION_SUBSET if bad order or mixed strand
1452  // otherwise CONTAINED_WITHIN
1453  overlap_type = eOverlap_Subset;
1454  }
1455  return overlap_type;
1456  }
1457 
1458 
1459  static
1460  int sx_GetRootDistance(CSeqFeatData::ESubtype type)
1461  {
1462  int distance = 0;
1463  while ( type != CSeqFeatData::eSubtype_bad ) {
1464  ++distance;
1466  }
1467  return distance;
1468  }
1469 
1470 
1471  static
1472  bool sx_IsParentType(CSeqFeatData::ESubtype parent_type,
1473  CSeqFeatData::ESubtype feat_type)
1474  {
1475  if ( feat_type != parent_type ) {
1476  for ( STypeLink link(feat_type); link; ++link ) {
1477  // TODO: VDJ
1478  if ( link.m_ParentType == parent_type ) {
1479  return true;
1480  }
1481  }
1482  }
1483  return false;
1484  }
1485 
1486 
1487  static const int kBetterTypeParentQuality= 1000;
1488  static const int kByLocusParentQuality = 750;
1489  static const int kSameTypeParentQuality = 500;
1490  static const int kWorseTypeParentQuality = kSameTypeParentQuality;
1491 
1492  static
1493  int sx_GetParentTypeQuality(CSeqFeatData::ESubtype parent,
1494  CSeqFeatData::ESubtype child)
1495  {
1496  int d_child = sx_GetRootDistance(child);
1497  int d_parent = sx_GetRootDistance(parent);
1498  if ( d_parent < d_child ) {
1499  // parent candidate is higher than child
1500  // return value <= kBetterTypeParentQuality
1501  return kBetterTypeParentQuality - (d_child - d_parent);
1502  }
1503  else {
1504  // parent candidate is not higher than child
1505  // return value <= kWorseTypeParentQuality
1506  return kWorseTypeParentQuality - (d_parent - d_child);
1507  }
1508  }
1509 
1510 
1511  static
1512  CMappedFeat sx_GetParentByRef(const CMappedFeat& feat,
1513  const STypeLink& link)
1514  {
1515  if ( !feat.IsSetXref() ) {
1516  return CMappedFeat();
1517  }
1518 
1519  CTSE_Handle tse = feat.GetAnnot().GetTSE_Handle();
1520  const CSeq_feat::TXref& xrefs = feat.GetXref();
1521  ITERATE ( CSeq_feat::TXref, it, xrefs ) {
1522  const CSeqFeatXref& xref = **it;
1523  if ( xref.IsSetId() ) {
1524  const CFeat_id& id = xref.GetId();
1525  if ( id.IsLocal() ) {
1526  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
1527  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
1528  if ( CSeq_feat_Handle feat1 = tse.GetFeatureWithId(*type_ptr, id.GetLocal(), feat) ) {
1529  return feat1;
1530  }
1531  }
1532  }
1533  else {
1534  if ( CSeq_feat_Handle feat1 = tse.GetFeatureWithId(link.m_ParentType, id.GetLocal(), feat) ) {
1535  return feat1;
1536  }
1537  }
1538  }
1539  }
1541  xref.IsSetData() ) {
1542  const CSeqFeatData& data = xref.GetData();
1543  if ( data.IsGene() ) {
1544  CSeq_feat_Handle feat1 = tse.GetGeneByRef(data.GetGene(), feat);
1545  if ( feat1 ) {
1546  return feat1;
1547  }
1548  }
1549  }
1550  }
1551  return CMappedFeat();
1552  }
1553 
1554 
1555  static
1556  CMappedFeat sx_GetParentByOverlap(const CMappedFeat& feat,
1557  const STypeLink& link,
1558  TSeqPos circular_length)
1559  {
1560  CMappedFeat best_parent;
1561 
1562  const CSeq_loc& c_loc = feat.GetLocation();
1563 
1564  // find best suitable parent by overlap score
1565  EOverlapType overlap_type =
1566  sx_GetOverlapType(link, c_loc, circular_length);
1567 
1568  Int8 best_overlap = kMax_I8;
1569  SAnnotSelector sel(link.m_ParentType);
1570  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
1571  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
1572  sel.IncludeFeatSubtype(*type_ptr);
1573  }
1574  }
1575  sel.SetByProduct(link.m_ByProduct);
1576  for (CFeat_CI it(feat.GetScope(), c_loc, sel); it; ++it) {
1577  Int8 overlap = TestForOverlap64(it->GetLocation(),
1578  c_loc,
1579  overlap_type,
1580  circular_length,
1581  &feat.GetScope());
1582  if ( overlap >= 0 && overlap < best_overlap ) {
1583  best_parent = *it;
1584  best_overlap = overlap;
1585  }
1586  }
1587  return best_parent;
1588  }
1589 }
1590 
1591 static const bool kSplitCircular = true;
1592 static const bool kOptimizeTestOverlap = true;
1593 
1594 /// @name GetParentFeature
1595 /// The algorithm is the following:
1596 /// 1. Feature types are organized in a tree of possible
1597 /// parent-child relationship:
1598 /// 1.1. operon, gap cannot have a parent,
1599 /// 1.2. gene can have operon as a parent,
1600 /// 1.3. mRNA, VDJ_segment, and C_region can have gene as a parent,
1601 /// 1.4. cdregion can have mRNA, VDJ_segment, or C_region as a parent,
1602 /// 1.5. prot can have cdregion as a parent (by its product location),
1603 /// 1.6. mat_peptide, sig_peptide can have prot as a parent,
1604 /// 1.x. all other feature types can have gene as a parent.
1605 /// 2. If parent of a nearest feature type is not found then the next type
1606 /// in the tree is checked, except prot which will have no parent
1607 /// if no cdregion is found.
1608 /// 3. For each parent type candidate the search is done in several ways:
1609 /// 3.1. first we look for a parent by Seq-feat.xref field,
1610 /// 3.2. then by Gene-ref if current parent type is gene,
1611 /// 3.3. then parent candidates are searched by the best intersection
1612 /// of their locations (product in case of prot -> cdregion link),
1613 /// 3.4. if no candidates are found next parent type is checked.
1616 {
1617  CMappedFeat best_parent;
1618  TSeqPos circular_length =
1619  sx_GetCircularLength(feat.GetScope(), feat.GetLocation());
1620  for( STypeLink link(feat.GetFeatSubtype()); link; ++link ) {
1621  best_parent = sx_GetParentByRef(feat, link);
1622  if ( best_parent ) {
1623  // found by Xref
1624  break;
1625  }
1626 
1627  best_parent = sx_GetParentByOverlap(feat, link, circular_length);
1628  if ( best_parent ) {
1629  // parent is found by overlap
1630  break;
1631  }
1632  }
1633  return best_parent;
1634 }
1635 
1636 
1637 /////////////////////////////////////////////////////////////////////////////
1638 // CFeatTreeIndex
1639 /////////////////////////////////////////////////////////////////////////////
1640 
1641 
1642 namespace {
1643  typedef map<CSeq_id_Handle, CSeq_id_Handle> TCanonicalIdsMap;
1644 
1645  struct SBestInfo {
1646  typedef CFeatTree::CFeatInfo CFeatInfo;
1647  SBestInfo(void)
1648  : m_Quality(kMin_I1),
1649  m_Overlap(kMax_I8),
1650  m_Info(0)
1651  {
1652  }
1653 
1654  void CheckBest(Int1 quality, Int8 overlap, CFeatInfo* info)
1655  {
1656  _ASSERT(overlap >= 0);
1657  if ( (quality > m_Quality ||
1658  (quality == m_Quality && overlap < m_Overlap)) ) {
1659  m_Quality = quality;
1660  m_Overlap = overlap;
1661  m_Info = info;
1662  }
1663  }
1664  void CheckBest(const SBestInfo& b)
1665  {
1666  CheckBest(b.m_Quality, b.m_Overlap, b.m_Info);
1667  }
1668 
1669  Int1 m_Quality;
1670  Int8 m_Overlap;
1671  CFeatInfo* m_Info;
1672  };
1673  struct SFeatRangeInfo {
1674  typedef CFeatTree::CFeatInfo CFeatInfo;
1675 
1676  CSeq_id_Handle m_Id;
1677  CRange<TSeqPos> m_Range;
1678  CFeatInfo* m_Info;
1679  bool m_SplitRange;
1680 
1681  // min start coordinate for all entries after this
1682  TSeqPos m_MinFrom;
1683 
1684  // results
1685  SBestInfo* m_Best;
1686 
1687  void x_CanonizeId(TCanonicalIdsMap& ids_map)
1688  {
1689  if ( m_Id ) {
1690  auto iter = ids_map.find(m_Id);
1691  if ( iter != ids_map.end() ) {
1692  m_Id = iter->second;
1693  }
1694  else {
1695  CSeq_id_Handle new_id = sequence::GetId(m_Id,
1696  m_Info->m_Feat.GetScope(),
1698  if ( !new_id ) {
1699  new_id = m_Id;
1700  }
1701  ids_map[m_Id] = new_id;
1702  m_Id = new_id;
1703  }
1704  }
1705  }
1706  SFeatRangeInfo(TCanonicalIdsMap& ids_map,
1707  CFeatInfo& info, SBestInfo* best,
1708  bool by_product = false)
1709  : m_Info(&info),
1710  m_SplitRange(false),
1711  m_Best(best)
1712  {
1713  if ( by_product ) {
1714  m_Id = info.m_Feat.GetProductId();
1715  if ( m_Id ) {
1716  m_Range = info.m_Feat.GetProductTotalRange();
1717  }
1718  }
1719  else {
1720  m_Id = info.m_Feat.GetLocationId();
1721  if ( m_Id ) {
1722  m_Range = info.m_Feat.GetLocationTotalRange();
1723  }
1724  }
1725  // id may be non-canonical
1726  x_CanonizeId(ids_map);
1727  }
1728  SFeatRangeInfo(TCanonicalIdsMap& ids_map,
1729  CFeatInfo& info, SBestInfo* best,
1731  : m_Id(it->first),
1732  m_Range(it->second.GetOverlappingRange()),
1733  m_Info(&info),
1734  m_SplitRange(false),
1735  m_Best(best)
1736  {
1737  // id may be non-canonical
1738  x_CanonizeId(ids_map);
1739  }
1740  };
1741  struct PLessByStart {
1742  // sort first by start coordinate, then by end coordinate
1743  bool operator()(const SFeatRangeInfo& a, const SFeatRangeInfo& b) const
1744  {
1745  return a.m_Id < b.m_Id ||
1746  (a.m_Id == b.m_Id && a.m_Range < b.m_Range);
1747  }
1748  };
1749  struct PLessByEnd {
1750  // sort first by end coordinate, then by start coordinate
1751  bool operator()(const SFeatRangeInfo& a, const SFeatRangeInfo& b) const
1752  {
1753  return a.m_Id < b.m_Id ||
1754  (a.m_Id == b.m_Id &&
1755  (a.m_Range.GetToOpen() < b.m_Range.GetToOpen() ||
1756  (a.m_Range.GetToOpen() == b.m_Range.GetToOpen() &&
1757  a.m_Range.GetFrom() < b.m_Range.GetFrom())));
1758  }
1759  };
1760 
1761  inline
1762  bool s_AddCircularRanges(vector<SFeatRangeInfo>& rr,
1763  SFeatRangeInfo& range_info,
1764  bool by_product = false)
1765  {
1766  const bool kAllowOriginInGap = true;
1767  if ( !kSplitCircular ) {
1768  return false;
1769  }
1770  if ( !kAllowOriginInGap && range_info.m_Range.GetFrom() != 0 ) {
1771  // not from the beginning of sequence
1772  return false;
1773  }
1774  const CSeq_loc& loc = by_product?
1775  range_info.m_Info->m_Feat.GetProduct():
1776  range_info.m_Info->m_Feat.GetLocation();
1777  ENa_strand strand = loc.GetStrand();
1778  if ( strand == eNa_strand_other ) {
1779  // multiple strands
1780  return false;
1781  }
1782  TSeqPos start = loc.GetStart(eExtreme_Biological);
1783  TSeqPos stop = loc.GetStop (eExtreme_Biological);
1784  if ( IsReverse(strand) ) {
1785  swap(start, stop);
1786  }
1787  if ( start <= stop ) {
1788  // direction matches strand - non circular
1789  return false;
1790  }
1791  TSeqPos circular_length = sx_GetCircularLength(range_info.m_Info->m_Feat.GetScope(), range_info.m_Id);
1792  if ( circular_length == kInvalidSeqPos ) {
1793  return false;
1794  }
1795  if ( !kAllowOriginInGap && range_info.m_Range.GetToOpen() < circular_length ) {
1796  // not till the end of sequence
1797  return false;
1798  }
1799  // 0-stop, start-circular end
1800  TSeqPos total_end_open = range_info.m_Range.GetToOpen();
1801  range_info.m_SplitRange = true;
1802  range_info.m_Range.SetTo(stop);
1803  rr.push_back(range_info);
1804  range_info.m_Range.SetFrom(start);
1805  range_info.m_Range.SetToOpen(total_end_open);
1806  rr.push_back(range_info);
1807  return true;
1808  }
1809 
1810  void s_AddRanges(TCanonicalIdsMap& ids_map,
1811  vector<SFeatRangeInfo>& rr,
1813  SBestInfo* best,
1814  const CSeq_loc& loc)
1815  {
1816  info.m_MultiId = true;
1817  CHandleRangeMap hrmap;
1818  hrmap.AddLocation(loc);
1819  ITERATE ( CHandleRangeMap, it, hrmap ) {
1820  SFeatRangeInfo range_info(ids_map, info, best, it);
1821  rr.push_back(range_info);
1822  }
1823  }
1824 
1825  typedef vector<SBestInfo> TBestArray;
1826  typedef vector<SFeatRangeInfo> TRangeArray;
1827  typedef vector<CFeatTree::CFeatInfo*> TInfoArray;
1828 
1829  inline
1830  Int1 s_GetParentQuality(const CFeatTree::CFeatInfo& feat,
1831  const CFeatTree::CFeatInfo& parent)
1832  {
1833  if ( feat.m_CanMatchByQual && parent.m_CanMatchByQual ) {
1834  return sx_GetQualMatch(feat.m_Feat, parent.m_Feat);
1835  }
1836  return 0;
1837  }
1838 
1839  class CFeatTreeParentTypeIndex : public CObject
1840  {
1841  public:
1842  CFeatTreeParentTypeIndex(CSeqFeatData::ESubtype type,
1843  bool by_product)
1844  : m_Type(type),
1845  m_ByProduct(by_product),
1846  m_IndexedParents(0)
1847  {
1848  }
1849 
1850  TRangeArray& GetIndex(TCanonicalIdsMap& ids_map,
1851  const TInfoArray& feats) {
1852  if ( m_IndexedParents == feats.size() ) {
1853  return m_Index;
1854  }
1855  for ( size_t ind = m_IndexedParents; ind < feats.size(); ++ind ) {
1856  CFeatTree::CFeatInfo& feat_info = *feats[ind];
1857  if ( feat_info.m_AddIndex < m_IndexedParents ||
1858  feat_info.GetSubtype() != m_Type ||
1859  (m_ByProduct && !feat_info.m_Feat.IsSetProduct()) ) {
1860  continue;
1861  }
1862  SFeatRangeInfo range_info(ids_map, feat_info, 0, m_ByProduct);
1863  if ( range_info.m_Id ) {
1864  if ( !s_AddCircularRanges(m_Index, range_info, m_ByProduct) ) {
1865  m_Index.push_back(range_info);
1866  }
1867  }
1868  else {
1869  s_AddRanges(ids_map,
1870  m_Index, feat_info, 0,
1871  m_ByProduct?
1872  feat_info.m_Feat.GetProduct():
1873  feat_info.m_Feat.GetLocation());
1874  }
1875  }
1876  sort(m_Index.begin(), m_Index.end(), PLessByEnd());
1877  m_IndexedParents = feats.size();
1878  return m_Index;
1879  }
1880 
1881  private:
1882  CSeqFeatData::ESubtype m_Type;
1883  bool m_ByProduct;
1884  size_t m_IndexedParents;
1885  TRangeArray m_Index;
1886  };
1887 }
1888 
1889 
1890 class CFeatTreeIndex : public CObject
1891 {
1892 public:
1893  typedef pair<CSeqFeatData::ESubtype, bool> TParentKey;
1895 
1897  bool by_product,
1898  const TInfoArray& feats) {
1900  m_Index[TParentKey(type, by_product)];
1901  if ( !index ) {
1902  index = new CFeatTreeParentTypeIndex(type, by_product);
1903  }
1904  return index->GetIndex(m_CanonicalIds, feats);
1905  }
1906 
1907  TRangeArray& GetIndex(const STypeLink& link, const TInfoArray& feats) {
1908  return GetIndex(link.m_ParentType, link.m_ByProduct, feats);
1909  }
1910 
1911 private:
1912  friend class CFeatTree;
1913 
1915  TCanonicalIdsMap m_CanonicalIds;
1916 };
1917 
1918 
1919 /////////////////////////////////////////////////////////////////////////////
1920 // CFeatTree
1921 /////////////////////////////////////////////////////////////////////////////
1922 
1924 {
1925  x_Init();
1926 }
1927 
1928 
1930 {
1931  x_Init();
1932  AddFeatures(it);
1933 }
1934 
1935 
1937 {
1938  x_Init();
1939  CFeat_CI it(sah);
1940  AddFeatures(it);
1941 }
1942 
1944 {
1945  x_Init();
1946  CFeat_CI it(sah, sel);
1947  AddFeatures(it);
1948 }
1949 
1950 
1952 {
1953  x_Init();
1954  CFeat_CI it(seh);
1955  AddFeatures(it);
1956 }
1957 
1959 {
1960  x_Init();
1961  CFeat_CI it(seh, sel);
1962  AddFeatures(it);
1963 }
1964 
1965 
1967 {
1968 }
1969 
1970 
1972 {
1973  *this = ft;
1974 }
1975 
1976 
1978 {
1979  if ( this != &ft ) {
1980  m_AssignedParents = 0;
1981  m_AssignedGenes = 0;
1982  m_InfoMap.clear();
1983  m_InfoArray.clear();
1984  m_RootInfo = CFeatInfo();
1990  m_Index = null;
1991  m_InfoArray.reserve(ft.m_InfoArray.size());
1992  ITERATE ( TInfoArray, it, ft.m_InfoArray ) {
1993  AddFeature((*it)->m_Feat);
1994  }
1995  }
1996  return *this;
1997 }
1998 
1999 
2001 {
2002  m_AssignedParents = 0;
2003  m_AssignedGenes = 0;
2007  m_IgnoreMissingGeneXref = false;
2009 }
2010 
2011 
2013 {
2014  m_FeatIdMode = mode;
2015 }
2016 
2017 
2019 {
2021 }
2022 
2023 
2025 {
2026  m_IgnoreMissingGeneXref = ignore;
2027 }
2028 
2029 
2031 {
2033 }
2034 
2035 
2037 {
2038  for ( ; it; ++it ) {
2039  AddFeature(*it);
2040  }
2041 }
2042 
2043 
2045 {
2046  if ( !feat ) {
2047  NCBI_THROW(CObjMgrException, eInvalidHandle,
2048  "CFeatTree: feature is null");
2049  }
2050  _ASSERT(m_InfoMap.size() == m_InfoArray.size());
2051  size_t index = m_InfoMap.size();
2053  if ( !info.m_Feat ) {
2054  _ASSERT(m_InfoMap.size() == m_InfoArray.size()+1);
2055  m_InfoArray.push_back(&info);
2056  info.m_AddIndex = index;
2057  info.m_Feat = feat;
2058  info.m_CanMatchByQual = sx_CanMatchByQual(feat);
2059  info.m_IsSetGene = sx_GeneSuppressed(feat);
2060  }
2061  else {
2062  _ASSERT(m_InfoMap.size() == m_InfoArray.size());
2063  }
2064 }
2065 
2066 
2068 {
2069  return x_GetInfo(feat.GetSeq_feat_Handle());
2070 }
2071 
2072 
2074 {
2075  TInfoMap::iterator it = m_InfoMap.find(feat);
2076  if ( it == m_InfoMap.end() ) {
2077  NCBI_THROW(CObjMgrException, eFindFailed,
2078  "CFeatTree: feature not found");
2079  }
2080  return it->second;
2081 }
2082 
2083 
2085 {
2087  if ( it == m_InfoMap.end() ) {
2088  NCBI_THROW(CObjMgrException, eFindFailed,
2089  "CFeatTree: feature not found");
2090  }
2091  return it->second.m_Feat;
2092 }
2093 
2094 
2096 {
2097  TInfoMap::iterator it = m_InfoMap.find(feat);
2098  if ( it == m_InfoMap.end() ) {
2099  return 0;
2100  }
2101  return &it->second;
2102 }
2103 
2104 
2105 pair<int, CFeatTree::CFeatInfo*>
2107  CSeqFeatData::ESubtype parent_type)
2108 {
2109  pair<int, CFeatInfo*> ret(0, nullptr);
2110  if ( !info.m_Feat.IsSetXref() ) {
2111  return ret;
2112  }
2113  CTSE_Handle tse = info.GetTSE();
2114  const CSeq_feat::TXref& xrefs = info.m_Feat.GetXref();
2115  ITERATE ( CSeq_feat::TXref, xit, xrefs ) {
2116  const CSeqFeatXref& xref = **xit;
2117  if ( !xref.IsSetId() ) {
2118  continue;
2119  }
2120  const CFeat_id& id = xref.GetId();
2121  if ( !id.IsLocal() ) {
2122  continue;
2123  }
2124  vector<CSeq_feat_Handle> ff =
2125  tse.GetFeaturesWithId(parent_type, id.GetLocal(), info.m_Feat);
2126  ITERATE ( vector<CSeq_feat_Handle>, fit, ff ) {
2127  CFeatInfo* parent = x_FindInfo(*fit);
2128  if ( !parent ) {
2129  continue;
2130  }
2131  int quality =
2132  sx_GetParentTypeQuality(parent->GetSubtype(),
2133  info.GetSubtype());
2134  if ( quality > ret.first ) {
2135  ret.first = quality;
2136  ret.second = parent;
2137  }
2138  }
2139  }
2140  if ( ret.first > kByLocusParentQuality ) {
2141  return ret;
2142  }
2143  if ( (parent_type == CSeqFeatData::eSubtype_gene ||
2144  parent_type == CSeqFeatData::eSubtype_any) &&
2145  sx_IsParentType(CSeqFeatData::eSubtype_gene,
2146  info.GetSubtype()) ) {
2147  // assign non-genes to genes by Gene-ref
2148  ITERATE ( CSeq_feat::TXref, xit, xrefs ) {
2149  const CSeqFeatXref& xref = **xit;
2150  if ( xref.IsSetData() ) {
2151  const CSeqFeatData& data = xref.GetData();
2152  if ( data.IsGene() ) {
2153  vector<CSeq_feat_Handle> ff =
2154  tse.GetGenesByRef(data.GetGene(), info.m_Feat);
2155  ITERATE ( vector<CSeq_feat_Handle>, fit, ff ) {
2156  CFeatInfo* gene = x_FindInfo(*fit);
2157  if ( gene ) {
2158  ret.first = kByLocusParentQuality;
2159  ret.second = gene;
2160  return ret;
2161  }
2162  }
2163  ret.first = kByLocusParentQuality;
2164  ret.second = 0;
2165  return ret;
2166  }
2167  }
2168  }
2169  }
2170  return ret;
2171 }
2172 
2173 
2175 {
2177  pair<int, CFeatInfo*> parent =
2179  if ( !parent.second ) {
2180  if ( parent.first == kByLocusParentQuality && !GetIgnoreMissingGeneXref() ) {
2181  // explicit xref to a missing gene
2182  x_SetGene(info, 0);
2183  }
2184  return false;
2185  }
2186  if ( parent.first <= kWorseTypeParentQuality ||
2187  parent.first == kSameTypeParentQuality ) {
2188  // found reference is of the same or worse type
2189  if ( m_FeatIdMode == eFeatId_by_type ) {
2190  // eFeatId_by_type limits parents to regular tree order
2191  return false;
2192  }
2194  // otherwise check for circular references
2195  if ( parent.second->IsSetParent() &&
2196  parent.second->m_Parent == &info ) {
2197  // two features cycle, keep existing parent
2198  return false;
2199  }
2200  pair<int, CFeatInfo*> grand_parent =
2202  if ( grand_parent.second == &info ) {
2203  // new circular reference, choose by quality
2204  if ( parent.first < grand_parent.first ) {
2205  return false;
2206  }
2207  }
2208  }
2209  // check if gene is found over possible intemediate parents
2210  if ( parent.second->IsGene() ) {
2211  // the gene link may be turned off
2213  return false;
2214  }
2215  // if intermediate parents are possible
2216  if ( STypeLink(info.GetSubtype()).m_ParentType!=CSeqFeatData::eSubtype_gene ) {
2217  // then assign gene only
2218  if ( !info.IsSetGene() ) {
2219  x_SetGene(info, parent.second);
2220  }
2221  return false;
2222  }
2223  }
2224  x_SetParent(info, *parent.second);
2225  return true;
2226 }
2227 
2232 };
2233 // Check what strand match is required
2235  const CFeatTree::CFeatInfo& info,
2236  const CFeatTree* tree)
2237 {
2238  if ( link.m_ParentType == CSeqFeatData::eSubtype_gene ) {
2240  tree->GetSNPStrandMode() == tree->eSNPStrand_both ) {
2241  // try snp rev
2242  return eStrandMatch_any;
2243  }
2244  if ( info.m_Feat.IsSetExcept_text() &&
2245  info.m_Feat.GetExcept_text().find("trans-splicing") != NPOS ) {
2247  }
2248  }
2249  return eStrandMatch_all;
2250 }
2251 
2252 
2254 {
2255  bool operator()(const SBestInfo& info1, const SBestInfo& info2) const {
2256  if (info1.m_Info && info2.m_Info) {
2257  if (info1.m_Quality != info2.m_Quality) {
2258  return info1.m_Quality > info2.m_Quality;
2259  }
2260  if (info1.m_Overlap != info2.m_Overlap) {
2261  return info1.m_Overlap < info2.m_Overlap;
2262  }
2263  }
2264  return info1.m_Info < info2.m_Info;
2265  }
2266 };
2267 
2268 
2270 {
2271 public:
2273  {
2274  m_IsAmbiguous = false;
2275  size_t cnt = features.size();
2276  for (size_t i = 0; i < cnt; ++i) {
2277  m_Children.emplace(features[i], SCandidates(i));
2278  }
2279  }
2280 
2282  typedef list<CFeatInfo*> TChildList;
2283  struct SParentInfo {
2285  : m_NewParent(true),
2287  {
2288  }
2292  };
2293 
2294  bool Add(CFeatInfo* child, CFeatInfo* parent, Int1 quality, Int8 overlap)
2295  {
2296  // Store separate SBestInfo for each child/parent candidate.
2297  SParentInfo& parent_info = m_Parents[parent];
2298  if ( parent_info.m_NewParent ) {
2299  // new parent appeared
2300  // check if it already has children of this type
2301  auto subtype = child->GetSubtype();
2302  for ( auto& c : parent->m_Children ) {
2303  if ( c->GetSubtype() == subtype ) {
2304  parent_info.m_DoesNotNeedChildren = true;
2305  break;
2306  }
2307  }
2308  parent_info.m_NewParent = false;
2309  }
2310  if ( quality == 0 && parent_info.m_DoesNotNeedChildren ) {
2311  return false;
2312  }
2313  SBestInfo info;
2314  info.CheckBest(quality, overlap, parent);
2315  _ASSERT(m_Children.find(child) != m_Children.end());
2316  SCandidates& c = m_Children[child];
2317  if ( !c.parents.empty() ) {
2318  m_IsAmbiguous = true;
2319  }
2320  c.parents.insert(info);
2321  parent_info.m_ChildrenCandidates.push_back(child);
2322  return true;
2323  }
2324 
2325  void Disambiguate(TBestArray& bests);
2326 
2328 
2330  {
2331  SCandidates(void) : index(0) {}
2332  SCandidates(size_t i) : index(i) {}
2333  size_t index;
2335  };
2338 
2339 private:
2343 };
2344 
2345 
2347 {
2349 
2350  bool operator()(const TChild& c1, const TChild& c2) const {
2351  const TChild::value_type& cr1 = *c1;
2352  const TChild::value_type& cr2 = *c2;
2353  if (cr1.first == cr2.first) return false;
2354  // Children with fewer parents go first.
2355  if (cr1.second.parents.size() != cr2.second.parents.size()) {
2356  return cr1.second.parents.size() < cr2.second.parents.size();
2357  }
2358  // Check for better parent quality/overlap.
2359  if (!cr1.second.parents.empty()) {
2360  const SBestInfo& p1 = *cr1.second.parents.begin();
2361  const SBestInfo& p2 = *cr2.second.parents.begin();
2362  if (p1.m_Quality != p2.m_Quality) return p1.m_Quality > p2.m_Quality;
2363  if (p1.m_Overlap != p2.m_Overlap) return p1.m_Overlap < p2.m_Overlap;
2364  }
2365  // Sort children by other values.
2366  const CMappedFeat& f1 = cr1.first->m_Feat;
2367  const CMappedFeat& f2 = cr2.first->m_Feat;
2368  // Sort by location/product
2370  if (cmp != 0) return cmp < 0;
2371  if ( f1.IsSetProduct() ) {
2372  // Features with product go first.
2373  if ( !f2.IsSetProduct() ) return true;
2375  if (cmp != 0) return cmp < 0;
2376  }
2377  else if ( f2.IsSetProduct() ) return false;
2378 
2379  // Sort by feature id, if any
2380  if ( f1.IsSetId() ) {
2381  if ( !f2.IsSetId() ) return true; // Features with id go first.
2382  if (f1.GetId().Which() != f2.GetId().Which()) {
2383  return f1.GetId().Which() < f2.GetId().Which();
2384  }
2385  switch ( f1.GetId().Which() ) {
2386  case CFeat_id::e_General:
2387  cmp = f1.GetId().GetGeneral().Compare(f2.GetId().GetGeneral());
2388  if (cmp != 0) return cmp < 0;
2389  break;
2390  case CFeat_id::e_Gibb:
2391  if (f1.GetId().GetGibb() != f2.GetId().GetGibb()) {
2392  return f1.GetId().GetGibb() < f2.GetId().GetGibb();
2393  }
2394  break;
2395  case CFeat_id::e_Giim:
2396  {
2397  const CGiimport_id& giim1 = f1.GetId().GetGiim();
2398  const CGiimport_id& giim2 = f2.GetId().GetGiim();
2399  if (giim1.GetId() != giim2.GetId()) {
2400  return giim1.GetId() < giim2.GetId();
2401  }
2402  if ( giim1.IsSetDb() ) {
2403  if ( !giim2.IsSetDb() ) return true;
2404  cmp = NStr::Compare(giim1.GetDb(), giim2.GetDb());
2405  if (cmp != 0) return cmp < 0;
2406  }
2407  else if ( giim2.IsSetDb() ) return false;
2408  if ( giim1.IsSetRelease() ) {
2409  if ( !giim2.IsSetRelease() ) return true;
2410  cmp = NStr::Compare(giim1.GetRelease(), giim2.GetRelease());
2411  if (cmp != 0) return cmp < 0;
2412  }
2413  else if ( giim2.IsSetRelease() ) return false;
2414  break;
2415  }
2416  case CFeat_id::e_Local:
2417  {
2418  const CObject_id& oid1 = f1.GetId().GetLocal();
2419  const CObject_id& oid2 = f2.GetId().GetLocal();
2420  if ( oid1.IsId() ) {
2421  if ( !oid2.IsId() ) return true;
2422  if (oid1.GetId() != oid2.GetId()) {
2423  return oid1.GetId() < oid2.GetId();
2424  }
2425  }
2426  else if ( oid1.IsStr() ) {
2427  if ( !oid2.IsStr() ) return false;
2428  cmp = NStr::Compare(oid1.GetStr(), oid2.GetStr());
2429  if (cmp != 0) return cmp < 0;
2430  }
2431  break;
2432  }
2433  default:
2434  break;
2435  }
2436  }
2437  else if ( f2.IsSetId() ) return false;
2438 
2439  // Fallback - sort by ASN.1 string representation (can be slow)
2440  string asn1, asn2;
2441  asn1 << f1.GetMappedFeature();
2442  asn2 << f2.GetMappedFeature();
2443  return asn1 < asn2;
2444  }
2445 };
2446 
2447 
2448 void CDisambiguator::Disambiguate(TBestArray& bests)
2449 {
2450  if ( !m_IsAmbiguous || m_Parents.empty() ) return; // No ambiguous features.
2451 
2452  // Children must be sorted based on both key and value from TChildren map,
2453  // so we need to create a temporary set.
2454  typedef set<TChildren::const_iterator, SChildLess> TOrderedChildren;
2455  TOrderedChildren ordered_children;
2456  ITERATE(TChildren, ci, m_Children) {
2457  if (ci->second.parents.empty()) continue;
2458  ordered_children.insert(ci);
2459  }
2460  ITERATE(TOrderedChildren, ci, ordered_children) {
2461  const TChildren::value_type& child = **ci;
2462  if (child.second.parents.empty()) continue;
2463  // Use the first (possibly the unique) parent.
2464  bests[(*ci)->second.index] = *child.second.parents.begin();
2465  CFeatInfo* parent = child.second.parents.begin()->m_Info;
2466  // Remove the parent candidate from all other children.
2467  TParents::iterator pi = m_Parents.find(parent);
2468  _ASSERT(pi != m_Parents.end());
2469  ITERATE(TChildList, pci, pi->second.m_ChildrenCandidates ) {
2470  SCandidates& ccand = m_Children[*pci];
2471  ERASE_ITERATE(TBestSet, bi, ccand.parents) {
2472  if (bi->m_Info == parent) {
2473  ccand.parents.erase(bi);
2474  break;
2475  }
2476  }
2477  if (*pci == (*ci)->first) continue;
2478  SBestInfo& info = bests[ccand.index];
2479  if (info.m_Info == parent) {
2480  info.m_Info = nullptr;
2481  }
2482  }
2483  }
2484 }
2485 
2486 
2487 static inline
2489 {
2490  return r1.GetFrom() < r2.GetFrom() || r1.GetToOpen() > r2.GetToOpen();
2491 }
2492 
2493 
2495  TBestArray& bests,
2496  const STypeLink& link,
2497  TRangeArray& pp,
2498  CFeatTree* tree,
2499  TCanonicalIdsMap& ids_map)
2500 {
2501  _ASSERT(!features.empty());
2502  _ASSERT(!pp.empty());
2503 
2504  bool check_genes = false;
2505  if ( tree->GetGeneCheckMode() == tree->eGeneCheck_match &&
2507  link.CanHaveCommonGene() ) {
2508  // tree uses common gene information
2509  // the following public method effectively assigns genes by overlap
2510  tree->GetBestGene(features[0]->m_Feat, tree->eBestGene_OverlappedOnly);
2511  check_genes = true;
2512  }
2513 
2514  TRangeArray cc;
2515  // collect children parameters
2516  size_t cnt = features.size();
2517  bests.resize(cnt);
2518  for ( size_t i = 0; i < cnt; ++i ) {
2519  CFeatTree::CFeatInfo& feat_info = *features[i];
2520  SBestInfo* best = &bests[i];
2521  SFeatRangeInfo range_info(ids_map, feat_info, best);
2522  if ( range_info.m_Id ) {
2523  if ( !s_AddCircularRanges(cc, range_info) ) {
2524  cc.push_back(range_info);
2525  }
2526  }
2527  else {
2528  s_AddRanges(ids_map, cc, feat_info, best, feat_info.m_Feat.GetLocation());
2529  }
2530  }
2531  sort(cc.begin(), cc.end(), PLessByStart());
2532 
2533  typedef pair<CFeatTree::CFeatInfo*, CFeatTree::CFeatInfo*> TFeatPair;
2534  set<TFeatPair> multi_id_tested;
2535 
2536  // assign parents in single scan over both lists
2537  {{
2538  CDisambiguator disambibuator(features);
2539  TRangeArray::iterator pi = pp.begin();
2540  TRangeArray::iterator ci = cc.begin();
2541  for ( ; ci != cc.end(); ) {
2542  // skip all parents with Seq-ids smaller than first child
2543  while ( pi != pp.end() && pi->m_Id < ci->m_Id ) {
2544  ++pi;
2545  }
2546  if ( pi == pp.end() ) { // no more parents
2547  break;
2548  }
2549  const CSeq_id_Handle& cur_id = pi->m_Id;
2550  if ( ci->m_Id < cur_id || !ci->m_Id ) {
2551  // skip all children with Seq-ids smaller than first parent
2552  do {
2553  ++ci;
2554  } while ( ci != cc.end() && (ci->m_Id < cur_id || !ci->m_Id) );
2555  continue;
2556  }
2557 
2558  // find end of Seq-id parents
2559  TRangeArray::iterator pe = pi;
2560  while ( pe != pp.end() && pe->m_Id == cur_id ) {
2561  ++pe;
2562  }
2563 
2564  TSeqPos circular_length =
2565  sx_GetCircularLength(pi->m_Info->m_Feat.GetScope(), cur_id);
2566 
2567  {{
2568  // update parents' m_MinFrom on the Seq-id
2569  TRangeArray::iterator i = pe;
2570  TSeqPos min_from = (--i)->m_Range.GetFrom();
2571  i->m_MinFrom = min_from;
2572  while ( i != pi ) {
2573  min_from = min(min_from, (--i)->m_Range.GetFrom());
2574  i->m_MinFrom = min_from;
2575  }
2576  }}
2577 
2578  // scan all Seq-id children
2579  for ( ; ci != cc.end() && pi != pe && ci->m_Id == cur_id; ++ci ) {
2580  // child parameters
2581  CFeatTree::CFeatInfo& info = *ci->m_Info;
2582  const CSeq_loc& c_loc = info.m_Feat.GetLocation();
2583  CRef<CSeq_loc> c_loc2;
2584  ENa_strand c_loc2_strand = eNa_strand_unknown;
2585  EOverlapType overlap_type =
2586  sx_GetOverlapType(link, c_loc, circular_length);
2587  EStrandMatchRule strand_match_rule =
2588  s_GetStrandMatchRule(link, info, tree);
2589  // Some CDS:mRNA/VDJ_segment/C_region relationships may be ambiguous. For these types
2590  // we need to collect all candidates before selecting the best ones.
2591  bool disambiguate =
2592  info.GetSubtype() == CSeqFeatData::eSubtype_cdregion &&
2594 
2595  // skip non-overlapping parents
2596  while ( pi != pe &&
2597  pi->m_Range.GetToOpen() < ci->m_Range.GetFrom() ) {
2598  ++pi;
2599  }
2600 
2601  // scan parent candidates
2602  for ( TRangeArray::iterator pc = pi;
2603  pc != pe && pc->m_MinFrom < ci->m_Range.GetToOpen();
2604  ++pc ) {
2605  if ( !pc->m_Range.IntersectingWith(ci->m_Range) ) {
2606  continue;
2607  }
2608  if ( check_genes && info.IsSetGene() ) {
2609  // check gene mismatch
2610  if ( info.m_Gene != pc->m_Info->GetChildrenGene() ) {
2611  continue;
2612  }
2613  }
2614  if ( info.m_MultiId && pc->m_Info->m_MultiId &&
2615  !multi_id_tested.insert(TFeatPair(&info, pc->m_Info)).second ) {
2616  // already tested this pair of child and parent
2617  continue;
2618  }
2619  const CMappedFeat& p_feat = pc->m_Info->m_Feat;
2620  const CSeq_loc& p_loc =
2621  link.m_ByProduct?
2622  p_feat.GetProduct():
2623  p_feat.GetLocation();
2624  CScope* scope = &p_feat.GetScope();
2625  Int1 quality = s_GetParentQuality(info, *pc->m_Info);
2626  Int8 overlap;
2627  try {
2628  if ( kOptimizeTestOverlap && overlap_type == eOverlap_Subset &&
2629  ci->m_Id && pc->m_Id &&
2630  s_IsNotSubrange(ci->m_Range, pc->m_Range) ) {
2631  // fast check with simple locations failed
2632  overlap = -1;
2633  }
2634  else {
2635  // full check
2636  overlap = TestForOverlap64(p_loc,
2637  c_loc,
2638  overlap_type,
2639  circular_length,
2640  scope);
2641  }
2642  }
2643  catch ( CException& /*ignored*/ ) {
2644  overlap = -1;
2645  }
2646  if ( overlap >= 0 ) {
2647  if (disambiguate) {
2648  if ( !disambibuator.Add(ci->m_Info, pc->m_Info, quality, overlap) ) {
2649  continue;
2650  }
2651  }
2652  ci->m_Best->CheckBest(quality, overlap, pc->m_Info);
2653  continue;
2654  }
2655  if ( strand_match_rule == eStrandMatch_all ) {
2656  // strands mismatch -> no overlap
2657  continue;
2658  }
2659  if ( info.m_MultiId || pc->m_Info->m_MultiId ) {
2660  // cannot compare strands on multi-id locations
2661  continue;
2662  }
2663  ENa_strand pstrand = GetStrand(p_loc, scope);
2664  if ( pstrand == eNa_strand_other ) {
2665  // parent has mixed strands -> no overlap
2666  continue;
2667  }
2668  if ( pstrand == eNa_strand_unknown ) {
2669  pstrand = eNa_strand_plus;
2670  }
2671  if ( strand_match_rule == eStrandMatch_at_least_one &&
2672  GetStrand(c_loc) != eNa_strand_other ) {
2673  // child's strand is single and doesn't match
2674  continue;
2675  }
2676  if ( !c_loc2 || c_loc2_strand != pstrand ) {
2677  // adjust strand to parent
2678  if ( !c_loc2 ) {
2679  c_loc2 = SerialClone(c_loc);
2680  }
2681  // force
2682  c_loc2->SetStrand(pstrand);
2683  c_loc2_strand = pstrand;
2684  }
2685  try {
2686  overlap = TestForOverlap64(p_loc,
2687  *c_loc2,
2688  overlap_type,
2689  circular_length,
2690  scope);
2691  }
2692  catch ( CException& /*ignored*/ ) {
2693  overlap = -1;
2694  }
2695  if ( overlap >= 0 ) {
2696  if (disambiguate) {
2697  disambibuator.Add(ci->m_Info, pc->m_Info, quality, overlap);
2698  }
2699  ci->m_Best->CheckBest((Int1)(quality-1), overlap, pc->m_Info);
2700  }
2701  }
2702  }
2703  // skip remaining Seq-id children
2704  for ( ; ci != cc.end() && ci->m_Id == cur_id; ++ci ) {
2705  }
2706  }
2707  disambibuator.Disambiguate(bests);
2708  }}
2709 }
2710 
2711 
2713  CSeqFeatData::ESubtype parent)
2714 {
2715  if (parent == CSeqFeatData::eSubtype_region &&
2725  return false;
2726  }
2727  return true;
2728 }
2729 
2730 
2732  const STypeLink& link)
2733 {
2734  if ( features.empty() ) {
2735  return;
2736  }
2737  if ( GetGeneCheckMode() == eGeneCheck_match &&
2739  bool unassigned = false;
2740  // assign already known genes as parents
2741  ITERATE ( TFeatArray, it, features ) {
2742  CFeatInfo& info = **it;
2743  if ( !info.IsSetParent() ) {
2744  if ( info.IsSetGene() ) {
2745  if ( info.m_Gene ) {
2746  x_SetParent(info, *info.m_Gene);
2747  }
2748  else {
2750  }
2751  }
2752  else {
2753  unassigned = true;
2754  }
2755  }
2756  }
2757  if ( !unassigned ) {
2758  features.clear();
2759  return;
2760  }
2761  }
2762  if ( !m_Index ) {
2763  m_Index = new CFeatTreeIndex;
2764  }
2765  // TODO: multi-children/multi-parent assignment
2766  TBestArray bests;
2767  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
2768  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
2769  TRangeArray& parents = m_Index->GetIndex(*type_ptr, link.m_ByProduct, m_InfoArray);
2770  if ( parents.empty() ) {
2771  continue;
2772  }
2773  TBestArray bests1;
2774  s_CollectBestOverlaps(features, bests1, link, parents, this, m_Index->m_CanonicalIds);
2775  if ( bests.empty() ) {
2776  swap(bests, bests1);
2777  }
2778  else {
2779  for ( size_t i = 0; i < bests1.size(); ++i ) {
2780  bests[i].CheckBest(bests1[i]);
2781  }
2782  }
2783  }
2784  if ( bests.empty() ) {
2785  return;
2786  }
2787  }
2788  else {
2789  TRangeArray& parents = m_Index->GetIndex(link, m_InfoArray);
2790  if ( parents.empty() ) {
2791  return;
2792  }
2793  s_CollectBestOverlaps(features, bests, link, parents, this, m_Index->m_CanonicalIds);
2794  }
2795  size_t cnt = features.size();
2796  _ASSERT(bests.size() == cnt);
2797 
2798  // assign found parents
2799  TFeatArray::iterator dst = features.begin();
2800  for ( size_t i = 0; i < cnt; ++i ) {
2801  CFeatInfo& info = *features[i];
2802  if ( !info.IsSetParent() ) {
2803  CFeatInfo* best = bests[i].m_Info;
2804  if (best && s_AllowedParentByOverlap(info.GetSubtype(), best->GetSubtype())) {
2805  // assign best parent
2806  x_SetParent(info, *best);
2807  }
2808  else {
2809  // store for future processing
2810  *dst++ = &info;
2811  }
2812  }
2813  }
2814  features.erase(dst, features.end());
2815 }
2816 
2817 
2819 {
2820  if ( features.empty() ) {
2821  return;
2822  }
2823  if ( !m_Index ) {
2824  m_Index = new CFeatTreeIndex;
2825  }
2826  TRangeArray& genes =
2828  if ( genes.empty() ) {
2829  return;
2830  }
2831  TBestArray bests;
2833  size_t cnt = features.size();
2834  _ASSERT(bests.size() == cnt);
2835 
2836  // assign found genes
2837  for ( size_t i = 0; i < cnt; ++i ) {
2838  CFeatInfo& info = *features[i];
2839  if ( !info.IsSetGene() ) {
2840  CFeatInfo* best = bests[i].m_Info;
2841  if ( best ) {
2842  // assign best gene
2843  x_SetGene(info, best);
2844  }
2845  }
2846  }
2847 }
2848 
2849 
2851 {
2852  x_SetGene(info, gene);
2853  ITERATE ( CFeatInfo::TChildren, it, info.m_Children ) {
2854  CFeatInfo& child = **it;
2855  if ( !child.IsSetGene() ) {
2856  x_SetGeneRecursive(child, gene);
2857  }
2858  }
2859 }
2860 
2861 
2863 {
2864  if ( m_AssignedGenes >= m_InfoArray.size() ) {
2865  return;
2866  }
2867 
2868  for ( size_t ind = m_AssignedGenes; ind < m_InfoArray.size(); ++ind ) {
2869  CFeatInfo& info = *m_InfoArray[ind];
2870  if ( info.IsSetGene() ) {
2871  continue;
2872  }
2873  if ( CFeatInfo* parent = info.m_Parent ) {
2874  if ( parent->GivesGeneToChildren() ) {
2875  if ( CFeatInfo* gene = parent->GetChildrenGene() ) {
2876  x_SetGeneRecursive(info, gene);
2877  }
2878  }
2879  }
2880  }
2881 
2882  bool has_genes = false;
2883  TFeatArray old_feats, new_feats;
2884  // collect genes and other features
2885  for ( size_t ind = m_AssignedGenes; ind < m_InfoArray.size(); ++ind ) {
2886  CFeatInfo& info = *m_InfoArray[ind];
2887  TFeatArray* arr = 0;
2888  CSeqFeatData::ESubtype feat_type = info.GetSubtype();
2889  if ( feat_type == CSeqFeatData::eSubtype_gene ) {
2890  has_genes = true;
2891  continue;
2892  }
2893  else if ( !info.IsSetGene() && STypeLink(feat_type).CanHaveGeneParent() ) {
2895  CFeatInfo* gene =
2898  if ( gene ) {
2899  x_SetGene(info, gene);
2900  continue;
2901  }
2902  }
2903  arr = info.m_AddIndex >= m_AssignedGenes? &new_feats: &old_feats;
2904  }
2905  else {
2906  continue;
2907  }
2908  arr->push_back(&info);
2909  }
2910  if ( !old_feats.empty() ) {
2911  old_feats.insert(old_feats.end(),
2912  new_feats.begin(), new_feats.end());
2913  swap(old_feats, new_feats);
2914  old_feats.clear();
2915  }
2916  if ( has_genes && !new_feats.empty() ) {
2917  x_AssignGenesByOverlap(new_feats);
2918  }
2919  m_AssignedGenes = m_InfoArray.size();
2920 }
2921 
2922 
2924  bool operator()(const CFeatTree::CFeatInfo* f1, const CFeatTree::CFeatInfo* f2) const
2925  {
2926  return f1->m_AddIndex < f2->m_AddIndex;
2927  }
2928 };
2929 
2930 
2932 {
2933  if ( m_AssignedParents >= m_InfoArray.size() ) {
2934  return;
2935  }
2936 
2937  // collect all features without assigned parent
2938  vector<TFeatArray> feats_by_type;
2939  feats_by_type.reserve(CSeqFeatData::eSubtype_max+1);
2940  size_t new_count = 0;
2941  for ( size_t ind = m_AssignedParents; ind < m_InfoArray.size(); ++ind ) {
2942  CFeatInfo& info = *m_InfoArray[ind];
2943  if ( info.IsSetParent() ) {
2944  continue;
2945  }
2947  continue;
2948  }
2949  CSeqFeatData::ESubtype feat_type = info.GetSubtype();
2950  STypeLink link(feat_type);
2951  if ( !link ) {
2952  // no parent
2954  }
2955  else {
2956  size_t index = feat_type;
2957  if ( index >= feats_by_type.size() ) {
2958  feats_by_type.resize(index+1);
2959  }
2960  feats_by_type[feat_type].push_back(&info);
2961  ++new_count;
2962  }
2963  }
2964  if ( new_count == 0 ) { // no work to do
2965  return;
2966  }
2967  // assign parents for each parent type
2968  for ( size_t type = 0; type < feats_by_type.size(); ++type ) {
2969  TFeatArray& feats = feats_by_type[type];
2970  if ( feats.empty() ) {
2971  // no work to do
2972  continue;
2973  }
2974  for ( STypeLink link((CSeqFeatData::ESubtype)type); link; ++link ) {
2975  x_AssignParentsByOverlap(feats, link);
2976  if ( feats.empty() ) {
2977  break;
2978  }
2979  }
2980  // all remaining features are without parent
2981  ITERATE ( TFeatArray, it, feats ) {
2982  x_SetNoParent(**it);
2983  }
2984  }
2985 
2986  if ( m_FeatIdMode == eFeatId_always ) {
2987  for ( size_t ind=m_AssignedParents; ind<m_InfoArray.size(); ++ind ) {
2988  CFeatInfo& info = *m_InfoArray[ind];
2990  }
2991  }
2992 
2993  for ( auto& s : m_InfoMap ) {
2994  sort(s.second.m_Children.begin(), s.second.m_Children.begin(), PByFeatInfoAddIndex());
2995  }
2996  m_AssignedParents = m_InfoArray.size();
2997 }
2998 
2999 
3001 {
3002  _ASSERT(info.IsSetParent());
3003  if ( info.m_IsLinkedToRoot == info.eIsLinkedToRoot_linking ) {
3005  << info.m_Feat.GetOriginalFeature()
3006  << info.m_Parent->m_Feat.GetOriginalFeature()
3007  << NcbiEndl;
3008  NCBI_THROW(CObjMgrException, eFindConflict,
3009  "CFeatTree: cycle in xrefs to parent feature");
3010  }
3011  if ( info.m_Parent ) {
3012  info.m_IsLinkedToRoot = info.eIsLinkedToRoot_linking;
3013  x_VerifyLinkedToRoot(*info.m_Parent);
3014  info.m_IsLinkedToRoot = info.eIsLinkedToRoot_linked;
3015  }
3016  _ASSERT(info.m_IsLinkedToRoot == info.eIsLinkedToRoot_linked);
3017 }
3018 
3019 
3021 {
3022  _ASSERT(!info.IsSetParent());
3023  _ASSERT(!info.m_Parent);
3024  _ASSERT(!parent.m_IsSetChildren);
3025  _ASSERT(parent.m_IsLinkedToRoot != info.eIsLinkedToRoot_linking);
3026  parent.m_Children.push_back(&info);
3027  info.m_Parent = &parent;
3028  info.m_IsSetParent = true;
3029  info.m_IsLinkedToRoot = parent.m_IsLinkedToRoot;
3030 }
3031 
3032 
3034 {
3035  // _ASSERT(!info.IsSetParent());
3036  _ASSERT(!info.m_Parent);
3037  m_RootInfo.m_Children.push_back(&info);
3038  info.m_IsSetParent = true;
3039  info.m_IsLinkedToRoot = info.eIsLinkedToRoot_linked;
3040 }
3041 
3042 
3044 {
3045  _ASSERT(!info.IsSetGene() || gene == info.m_Gene);
3046  info.m_Gene = gene;
3047  info.m_IsSetGene = true;
3048 }
3049 
3050 
3052 {
3053  if ( !info.IsSetParent() ) {
3054  x_AssignParents();
3055  }
3056  return info.m_Parent;
3057 }
3058 
3059 
3061 {
3062  x_AssignParents();
3063  return info.m_Children;
3064 }
3065 
3066 
3068 {
3069  CMappedFeat ret;
3070  CFeatInfo* info = x_GetParent(x_GetInfo(feat));
3071  if ( info ) {
3072  ret = info->m_Feat;
3073  }
3074  return ret;
3075 }
3076 
3077 
3080 {
3081  CMappedFeat parent = GetParent(feat);
3082  while ( parent && parent.GetFeatType() != type ) {
3083  parent = GetParent(parent);
3084  }
3085  return parent;
3086 }
3087 
3088 
3090  CSeqFeatData::ESubtype subtype)
3091 {
3092  CMappedFeat parent = GetParent(feat);
3093  while ( parent && parent.GetFeatSubtype() != subtype ) {
3094  parent = GetParent(parent);
3095  }
3096  return parent;
3097 }
3098 
3099 
3100 vector<CMappedFeat> CFeatTree::GetChildren(const CMappedFeat& feat)
3101 {
3102  vector<CMappedFeat> children;
3103  GetChildrenTo(feat, children);
3104  return children;
3105 }
3106 
3107 
3109  vector<CMappedFeat>& children)
3110 {
3111  children.clear();
3112  const TChildren* infos;
3113  if ( feat ) {
3114  infos = &x_GetChildren(x_GetInfo(feat));
3115  }
3116  else {
3117  x_AssignParents();
3118  infos = &m_RootInfo.m_Children;
3119  }
3120  children.reserve(infos->size());
3121  ITERATE ( TChildren, it, *infos ) {
3122  children.push_back((*it)->m_Feat);
3123  }
3124 }
3125 
3126 
3128  EBestGeneType lookup_type)
3129 {
3130  CMappedFeat ret;
3131  if ( lookup_type == eBestGene_TreeOnly ||
3132  lookup_type == eBestGene_AllowOverlapped ) {
3134  }
3135  if ( !ret && lookup_type != eBestGene_TreeOnly ) {
3136  x_AssignGenes();
3137  CFeatInfo* gene = x_GetInfo(feat).m_Gene;
3138  if ( gene ) {
3139  ret = gene->m_Feat;
3140  }
3141  }
3142  return ret;
3143 }
3144 
3145 
3147  : m_AddIndex(0),
3148  m_CanMatchByQual(false),
3149  m_IsSetParent(false),
3150  m_IsSetGene(false),
3151  m_IsSetChildren(false),
3152  m_MultiId(false),
3153  m_IsLinkedToRoot(eIsLinkedToRoot_unknown),
3154  m_Parent(0),
3155  m_Gene(0)
3156 {
3157 }
3158 
3159 
3161 {
3162 }
3163 
3164 
3166 {
3167  return m_Feat.GetAnnot().GetTSE_Handle();
3168 }
3169 
3170 
3172  CSeqFeatData::ESubtype bottom_type,
3173  CSeqFeatData::ESubtype top_type,
3174  const SAnnotSelector* base_sel,
3175  bool skip_bottom)
3176 {
3177  SAnnotSelector sel;
3178  if ( base_sel ) {
3179  sel = *base_sel;
3180  }
3181  else {
3183  }
3184  if ( skip_bottom ) {
3186  }
3187  else {
3188  sel.SetFeatSubtype(bottom_type);
3189  }
3190  if ( top_type != bottom_type ) {
3191  for ( STypeLink link(bottom_type); link; ++link ) {
3192  if ( const CSeqFeatData::ESubtype* type_ptr = link.GetMultiParentTypes() ) {
3193  for ( ; *type_ptr != CSeqFeatData::eSubtype_bad; ++type_ptr ) {
3194  sel.IncludeFeatSubtype(*type_ptr);
3195  }
3196  }
3197  else {
3198  sel.IncludeFeatSubtype(link.m_ParentType);
3199  }
3200  if ( link.m_ParentType == top_type ) {
3201  break;
3202  }
3203  }
3204  }
3205  CFeat_CI feat_it(scope, loc, sel);
3206  AddFeatures(feat_it);
3207 }
3208 
3209 
3211  CSeqFeatData::ESubtype bottom_type,
3212  CSeqFeatData::ESubtype top_type,
3213  const SAnnotSelector* base_sel)
3214 {
3215  AddFeature(feat);
3216  AddFeaturesFor(feat.GetScope(), feat.GetLocation(),
3217  bottom_type, top_type, base_sel);
3218 }
3219 
3220 
3222  CSeqFeatData::ESubtype top_type,
3223  const SAnnotSelector* base_sel)
3224 {
3225  AddFeature(feat);
3226  AddFeaturesFor(feat.GetScope(), feat.GetLocation(),
3227  feat.GetFeatSubtype(), top_type, base_sel, true);
3228 }
3229 
3230 
3232  const SAnnotSelector* base_sel)
3233 {
3234  AddFeaturesFor(mrna_feat,
3236  base_sel);
3237 }
3238 
3239 
3241  const SAnnotSelector* base_sel)
3242 {
3243  AddFeaturesFor(mrna_feat,
3246  base_sel);
3247 }
3248 
3249 
3251  const SAnnotSelector* base_sel)
3252 {
3253  AddFeaturesFor(cds_feat,
3255  base_sel);
3256 }
3257 
3258 
3260  const SAnnotSelector* base_sel)
3261 {
3262  AddFeaturesFor(cds_feat,
3264  base_sel);
3265 }
3266 
3267 
3269  const SAnnotSelector* base_sel)
3270 {
3271  AddFeaturesFor(gene_feat,
3274  base_sel);
3275 }
3276 
3277 
3279  const SAnnotSelector* base_sel)
3280 {
3281  AddFeaturesFor(gene_feat,
3284  base_sel);
3285 }
3286 
3287 
3289  const SAnnotSelector* base_sel)
3290 {
3291  AddFeaturesFor(feat,
3293  base_sel);
3294 }
3295 
3296 
3297 /////////////////////////////////////////////////////////////////////////////
3298 // New API for GetBestXxxForXxx()
3299 
3302  CFeatTree* feat_tree,
3303  const SAnnotSelector* base_sel,
3304  CFeatTree::EBestGeneType lookup_type)
3305 {
3306  if ( !mrna_feat ||
3307  mrna_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_mRNA ) {
3308  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3309  "GetBestGeneForMrna: mrna_feat is not a mRNA");
3310  }
3311  if ( !feat_tree ) {
3312  CFeatTree tree;
3313  tree.AddGenesForMrna(mrna_feat, base_sel);
3314  return tree.GetBestGene(mrna_feat, lookup_type);
3315  }
3316  return feat_tree->GetBestGene(mrna_feat, lookup_type);
3317 }
3318 
3319 
3322  CFeatTree* feat_tree,
3323  const SAnnotSelector* base_sel,
3324  CFeatTree::EBestGeneType lookup_type)
3325 {
3326  if ( !cds_feat ||
3328  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3329  "GetBestGeneForCds: cds_feat is not a cdregion");
3330  }
3331  if ( !feat_tree ) {
3332  CFeatTree tree;
3333  tree.AddGenesForCds(cds_feat, base_sel);
3334  return tree.GetBestGene(cds_feat, lookup_type);
3335  }
3336  return feat_tree->GetBestGene(cds_feat, lookup_type);
3337 }
3338 
3339 
3342  CFeatTree* feat_tree,
3343  const SAnnotSelector* base_sel)
3344 {
3345  if ( !cds_feat ||
3347  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3348  "GetBestMrnaForCds: cds_feat is not a cdregion");
3349  }
3350  if ( !feat_tree ) {
3351  CFeatTree tree;
3352  tree.AddMrnasForCds(cds_feat, base_sel);
3353  return tree.GetParent(cds_feat, CSeqFeatData::eSubtype_mRNA);
3354  }
3355  return feat_tree->GetParent(cds_feat, CSeqFeatData::eSubtype_mRNA);
3356 }
3357 
3358 
3361  CFeatTree* feat_tree,
3362  const SAnnotSelector* base_sel)
3363 {
3364  if ( !mrna_feat ||
3365  mrna_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_mRNA ) {
3366  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3367  "GetBestCdsForMrna: mrna_feat is not a mRNA");
3368  }
3369  if ( !feat_tree ) {
3370  CFeatTree tree;
3371  tree.AddCdsForMrna(mrna_feat, base_sel);
3372  return GetBestCdsForMrna(mrna_feat, &tree);
3373  }
3374  const vector<CMappedFeat>& children = feat_tree->GetChildren(mrna_feat);
3375  ITERATE ( vector<CMappedFeat>, it, children ) {
3376  if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion ) {
3377  return *it;
3378  }
3379  }
3380  return CMappedFeat();
3381 }
3382 
3383 
3384 void GetMrnasForGene(const CMappedFeat& gene_feat,
3385  list< CMappedFeat >& mrna_feats,
3386  CFeatTree* feat_tree,
3387  const SAnnotSelector* base_sel)
3388 {
3389  if ( !gene_feat ||
3390  gene_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_gene ) {
3391  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3392  "GetMrnasForGene: gene_feat is not a gene");
3393  }
3394  if ( !feat_tree ) {
3395  CFeatTree tree;
3396  tree.AddMrnasForGene(gene_feat, base_sel);
3397  GetMrnasForGene(gene_feat, mrna_feats, &tree);
3398  return;
3399  }
3400  const vector<CMappedFeat>& children = feat_tree->GetChildren(gene_feat);
3401  ITERATE ( vector<CMappedFeat>, it, children ) {
3402  if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA ) {
3403  mrna_feats.push_back(*it);
3404  }
3405  }
3406 }
3407 
3408 
3409 void GetCdssForGene(const CMappedFeat& gene_feat,
3410  list< CMappedFeat >& cds_feats,
3411  CFeatTree* feat_tree,
3412  const SAnnotSelector* base_sel)
3413 {
3414  if ( !gene_feat ||
3415  gene_feat.GetFeatSubtype() != CSeqFeatData::eSubtype_gene ) {
3416  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3417  "GetCdssForGene: gene_feat is not a gene");
3418  }
3419  if ( !feat_tree ) {
3420  CFeatTree tree;
3421  tree.AddCdsForGene(gene_feat, base_sel);
3422  GetCdssForGene(gene_feat, cds_feats, &tree);
3423  return;
3424  }
3425  const vector<CMappedFeat>& children = feat_tree->GetChildren(gene_feat);
3426  ITERATE ( vector<CMappedFeat>, it, children ) {
3427  if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA ) {
3428  const vector<CMappedFeat>& children2 = feat_tree->GetChildren(*it);
3429  ITERATE ( vector<CMappedFeat>, it2, children2 ) {
3430  if ( it2->GetFeatSubtype()==CSeqFeatData::eSubtype_cdregion ) {
3431  cds_feats.push_back(*it2);
3432  }
3433  }
3434  }
3435  else if ( it->GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion ) {
3436  cds_feats.push_back(*it);
3437  }
3438  }
3439 }
3440 
3441 
3444  CFeatTree* feat_tree,
3445  const SAnnotSelector* base_sel,
3446  CFeatTree::EBestGeneType lookup_type)
3447 {
3448  if ( !feat ) {
3449  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3450  "GetBestGeneForFeat: feat is null");
3451  }
3452  if ( !feat_tree ) {
3453  CFeatTree tree;
3454  tree.AddGenesForFeat(feat, base_sel);
3455  return tree.GetBestGene(feat, lookup_type);
3456  }
3457  return feat_tree->GetBestGene(feat, lookup_type);
3458 }
3459 
3460 
3463  CSeqFeatData::ESubtype parent_type,
3464  CFeatTree* feat_tree,
3465  const SAnnotSelector* base_sel)
3466 {
3467  if ( !feat ) {
3468  NCBI_THROW(CObjmgrUtilException, eBadFeature,
3469  "GetBestParentForFeat: feat is null");
3470  }
3471  if ( !feat_tree ) {
3472  CFeatTree tree;
3473  tree.AddFeaturesFor(feat, parent_type, base_sel);
3474  return tree.GetParent(feat, parent_type);
3475  }
3476  return feat_tree->GetParent(feat, parent_type);
3477 }
3478 
3479 
3480 typedef pair<Int8, CMappedFeat> TMappedFeatScore;
3481 typedef vector<TMappedFeatScore> TMappedFeatScores;
3482 
3483 static
3484 void GetOverlappingFeatures(CScope& scope, const CSeq_loc& loc,
3485  CSeqFeatData::E_Choice /*feat_type*/,
3486  CSeqFeatData::ESubtype feat_subtype,
3487  sequence::EOverlapType overlap_type,
3488  TMappedFeatScores& feats,
3489  const SAnnotSelector* base_sel)
3490 {
3491  bool revert_locations = false;
3492  SAnnotSelector::EOverlapType annot_overlap_type;
3493  switch (overlap_type) {
3494  case eOverlap_Simple:
3495  case eOverlap_Contained:
3496  case eOverlap_Contains:
3497  // Require total range overlap
3498  annot_overlap_type = SAnnotSelector::eOverlap_TotalRange;
3499  break;
3500  case eOverlap_Subset:
3501  case eOverlap_SubsetRev:
3503  case eOverlap_Interval:
3504  case eOverlap_CheckIntRev:
3505  revert_locations = true;
3506  // there's no break here - proceed to "default"
3507  default:
3508  // Require intervals overlap
3509  annot_overlap_type = SAnnotSelector::eOverlap_Intervals;
3510  break;
3511  }
3512 
3513  CConstRef<CSeq_feat> feat_ref;
3514 
3515  CBioseq_Handle h;
3517  ENa_strand strand = eNa_strand_unknown;
3518  if ( loc.IsWhole() ) {
3519  h = scope.GetBioseqHandle(loc.GetWhole());
3520  range = range.GetWhole();
3521  }
3522  else if ( loc.IsInt() ) {
3523  const CSeq_interval& interval = loc.GetInt();
3524  h = scope.GetBioseqHandle(interval.GetId());
3525  range.SetFrom(interval.GetFrom());
3526  range.SetTo(interval.GetTo());
3527  if ( interval.IsSetStrand() ) {
3528  strand = interval.GetStrand();
3529  }
3530  }
3531  else {
3532  range = range.GetEmpty();
3533  }
3534 
3535  // Check if the sequence is circular
3536  TSeqPos circular_length = kInvalidSeqPos;
3537  if ( h ) {
3538  if ( h.IsSetInst_Topology() &&
3540  circular_length = h.GetBioseqLength();
3541  }
3542  }
3543  else {
3544  try {
3545  const CSeq_id* single_id = 0;
3546  try {
3547  loc.CheckId(single_id);
3548  }
3549  catch (CException&) {
3550  single_id = 0;
3551  }
3552  if ( single_id ) {
3553  CBioseq_Handle h1 = scope.GetBioseqHandle(*single_id);
3554  if ( h1 && h1.IsSetInst_Topology() &&
3556  circular_length = h1.GetBioseqLength();
3557  }
3558  }
3559  }
3560  catch (CException& _DEBUG_ARG(e)) {
3561  _TRACE("test for circularity failed: " << e.GetMsg());
3562  }
3563  }
3564 
3565  try {
3566  SAnnotSelector sel;
3567  if ( base_sel ) {
3568  sel = *base_sel;
3569  }
3570  else {
3572  }
3573  sel.SetFeatSubtype(feat_subtype).SetOverlapType(annot_overlap_type);
3574  if ( h ) {
3575  CFeat_CI feat_it(h, range, strand, sel);
3576  for ( ; feat_it; ++feat_it) {
3577  // treat subset as a special case
3578  Int8 cur_diff = ( !revert_locations ) ?
3579  TestForOverlap64(feat_it->GetLocation(),
3580  loc,
3581  overlap_type,
3582  circular_length,
3583  &scope) :
3584  TestForOverlap64(loc,
3585  feat_it->GetLocation(),
3586  overlap_type,
3587  circular_length,
3588  &scope);
3589  if (cur_diff < 0) {
3590  continue;
3591  }
3592 
3593  TMappedFeatScore sc(cur_diff, *feat_it);
3594  feats.push_back(sc);
3595  }
3596  }
3597  else {
3598  CFeat_CI feat_it(scope, loc, sel);
3599  for ( ; feat_it; ++feat_it) {
3600  // treat subset as a special case
3601  Int8 cur_diff = ( !revert_locations ) ?
3602  TestForOverlap64(feat_it->GetLocation(),
3603  loc,
3604  overlap_type,
3605  circular_length,
3606  &scope) :
3607  TestForOverlap64(loc,
3608  feat_it->GetLocation(),
3609  overlap_type,
3610  circular_length,
3611  &scope);
3612  if (cur_diff < 0) {
3613  continue;
3614  }
3615 
3616  TMappedFeatScore sc(cur_diff, *feat_it);
3617  feats.push_back(sc);
3618  }
3619  }
3620  }
3621  catch (CException&) {
3622  _TRACE("GetOverlappingFeatures(): error: feature iterator failed");
3623  }
3624 }
3625 
3626 
3627 static
3629  const CSeq_loc& loc,
3630  CSeqFeatData::ESubtype feat_subtype,
3631  sequence::EOverlapType overlap_type,
3632  TBestFeatOpts opts,
3633  const SAnnotSelector* base_sel)
3634 {
3635  TMappedFeatScores scores;
3636  GetOverlappingFeatures(scope, loc,
3637  CSeqFeatData::GetTypeFromSubtype(feat_subtype), feat_subtype,
3638  overlap_type, scores, base_sel);
3639 
3640  if ( !scores.empty() ) {
3641  if (opts & fBestFeat_FavorLonger) {
3642  return max_element(scores.begin(), scores.end())->second;
3643  }
3644  else {
3645  return min_element(scores.begin(), scores.end())->second;
3646  }
3647  }
3648  return CMappedFeat();
3649 }
3650 
3651 
3654  CSeqFeatData::ESubtype need_subtype,
3655  sequence::EOverlapType overlap_type,
3656  CFeatTree* feat_tree,
3657  const SAnnotSelector* base_sel)
3658 {
3659  // special cases
3660  switch ( need_subtype ) {
3662  switch ( feat.GetFeatSubtype() ) {
3665  break;
3667  return GetBestGeneForMrna(feat, feat_tree, base_sel);
3669  return GetBestGeneForCds(feat, feat_tree, base_sel);
3670  default:
3671  return GetBestGeneForFeat(feat, feat_tree, base_sel);
3672  }
3673  break;
3676  return GetBestMrnaForCds(feat, feat_tree, base_sel);
3677  }
3678  break;
3680  if ( feat.GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA ) {
3681  return GetBestCdsForMrna(feat, feat_tree, base_sel);
3682  }
3683  break;
3684  default:
3685  break;
3686  }
3687  // in-tree child -> parent lookup
3688  if ( sx_IsParentType(need_subtype, feat.GetFeatSubtype()) ) {
3689  return GetBestParentForFeat(feat, need_subtype, feat_tree, base_sel);
3690  }
3691  // non-tree overlap
3692  return GetBestOverlappingFeat(feat.GetScope(), feat.GetLocation(),
3693  need_subtype, overlap_type, 0, base_sel);
3694 }
3695 
3696 
3700  CScope* scope)
3701 {
3702  CRef<CSeq_loc_Mapper> mapper;
3703  if ( !feat.IsSetProduct() ) return mapper; // NULL
3704 
3705  bool benign_feat_exception = feat.IsSetExcept_text() &&
3706  (feat.GetExcept_text() == "mismatches in translation" ||
3707  feat.GetExcept_text() == "mismatches in transcription");
3708  bool severe_feat_exception =
3709  ((feat.IsSetExcept() && feat.GetExcept()) ||
3710  feat.IsSetExcept_text()) && !benign_feat_exception;
3711 
3712  if (severe_feat_exception ||
3715  return mapper; // NULL
3716  }
3717 
3718  mapper.Reset(new CSeq_loc_Mapper(feat, dir, scope));
3719  return mapper;
3720 }
3721 
3722 
3723 /////////////////////////////////////////////////////////////////////////////
3724 // Assigning feature ids
3725 /////////////////////////////////////////////////////////////////////////////
3726 
3728 {
3729  for ( CFeat_CI feat_it(annot); feat_it; ++feat_it ) {
3730  CSeq_feat_EditHandle feat(*feat_it);
3731  feat.ClearFeatIds();
3732  feat.ClearFeatXrefs();
3733  }
3734 }
3735 
3736 
3738 {
3739  for ( CFeat_CI feat_it(entry); feat_it; ++feat_it ) {
3740  CSeq_feat_EditHandle feat(*feat_it);
3741  feat.ClearFeatIds();
3742  feat.ClearFeatXrefs();
3743  }
3744 }
3745 
3746 
3747 static void s_SetFeatureId(CFeatTree& ft,
3748  const CMappedFeat& feat,
3749  int& last_id,
3750  const CMappedFeat& parent);
3751 static void s_SetChildrenFeatureIds(CFeatTree& ft,
3752  const CMappedFeat& feat,
3753  int& feat_id);
3754 
3755 static void s_SetFeatureId(CFeatTree& ft,
3756  const CMappedFeat& feat,
3757  int& last_id,
3758  const CMappedFeat& parent)
3759 {
3760  CSeq_feat_EditHandle efeat(feat);
3761  efeat.SetFeatId(++last_id);
3762 
3763  if ( parent &&
3764  parent.GetFeatType() == CSeqFeatData::e_Rna &&
3766  // conservative choice: link only between RNA and Cdregion features
3767  efeat.AddFeatXref(parent.GetId().GetLocal());
3768  CSeq_feat_EditHandle parent_efeat(parent);
3769  parent_efeat.AddFeatXref(last_id);
3770  }
3771 
3772  s_SetChildrenFeatureIds(ft, feat, last_id);
3773 }
3774 
3775 
3777  const CMappedFeat& parent,
3778  int& last_id)
3779 {
3780  vector<CMappedFeat> children = ft.GetChildren(parent);
3781  ITERATE (vector<CMappedFeat>, it, children ) {
3782  s_SetFeatureId(ft, *it, last_id, parent);
3783  }
3784 }
3785 
3786 
3788 {
3789  ClearFeatureIds(entry);
3790  int feat_id = 0;
3791  CFeat_CI feat_it(entry);
3792  CFeatTree ft(feat_it);
3793  s_SetChildrenFeatureIds(ft, CMappedFeat(), feat_id);
3794 }
3795 
3796 
3798 {
3799  ClearFeatureIds(annot);
3800  int feat_id = 0;
3801  CFeat_CI feat_it(annot);
3802  CFeatTree ft(feat_it);
3803  s_SetChildrenFeatureIds(ft, CMappedFeat(), feat_id);
3804 }
3805 
3806 
3808 {
3809  CRef<CSeq_loc> stop(new CSeq_loc());
3810 
3811  for ( CSeq_loc_CI citer (loc); citer; ++citer ) {
3812  stop->SetPnt().SetId().Assign(citer.GetSeq_id());
3813  }
3814  stop->SetPnt().SetPoint(loc.GetStop(eExtreme_Biological));
3815  return stop;
3816 }
3817 
3819 {
3821  bool pos1_not_in = false;
3822  if (pos1 == ((TSeqPos)-1)) {
3823  pos1_not_in = true;
3824  }
3827  bool pos2_not_in = false;
3828  if (pos2 == ((TSeqPos)-1)) {
3829  pos2_not_in = true;
3830  }
3831  if (pos1_not_in && pos2_not_in) {
3832  return eLocationInFrame_NotIn;
3833  }
3834 
3837  if (cmp != sequence::eContains && cmp != sequence::eSame) {
3838  return eLocationInFrame_NotIn;
3839  }
3840 
3841  unsigned int frame = 0;
3842  if (cds.IsSetData() && cds.GetData().IsCdregion()) {
3843  const CCdregion& cdr = cds.GetData().GetCdregion();
3844  switch (cdr.GetFrame()) {
3846  case CCdregion::eFrame_one:
3847  frame = 0;
3848  break;
3849  case CCdregion::eFrame_two:
3850  frame = 1;
3851  break;
3853  frame = 2;
3854  break;
3855  }
3856  }
3857  // note - have to add 3 to prevent negative result from subtraction
3858  TSeqPos mod1 = (pos1 + 3 - frame) %3;
3859 
3860  if ( mod1 != 0 && loc.IsPartialStart(eExtreme_Biological)
3862  && pos1 == 0) {
3863  mod1 = 0;
3864  } else if (pos1 < frame) {
3865  // start is out of frame - it's before the coding region begins
3866  mod1 = 1;
3867  }
3869  mod1 = 0;
3870  }
3871 
3872 
3873  TSeqPos cds_len = sequence::GetLength (cds.GetLocation(), &(cds.GetScope()));
3874 
3875  TSeqPos mod2 = (pos2 + 3 - frame) %3;
3876  if ( mod2 != 0 && loc.IsPartialStop(eExtreme_Biological)
3878  && pos2 == cds_len) {
3879  mod2 = 0;
3880  } else if (pos2 <= frame) {
3881  // stop is out of frame - it's before the coding region begins
3882  mod2 = 1;
3883  }
3884  if (pos2 > cds_len) {
3885  // stop is out of frame - it's after the coding region ends
3886  mod2 = 1;
3887  }
3889  mod2 = 2;
3890  }
3891 /*
3892  // Would this work just as well?
3893  if (loc.IsPartialStop(eExtreme_Biological)) {
3894  mod2 = 2;
3895  }
3896  else if
3897  (pos2 <= frame || pos2 > cds_len) {
3898  mod2 = 1;
3899  }
3900 */
3901 
3902  if ( (mod1 != 0) && (mod2 != 2) ) {
3904  } else if (mod1 != 0) {
3906  } else if (mod2 != 2) {
3907  return eLocationInFrame_BadStop;
3908  } else {
3909  return eLocationInFrame_InFrame;
3910  }
3911 }
3912 
3913 
3914 bool PromoteCDSToNucProtSet(objects::CSeq_feat_Handle& orig_feat)
3915 {
3916  // only move coding regions to nuc-prot set
3917  if (!orig_feat.IsSetData() || !orig_feat.GetData().IsCdregion()) {
3918  return false;
3919  }
3920  // don't move if pseudo
3921  if (orig_feat.IsSetPseudo() && orig_feat.GetPseudo()) {
3922  return false;
3923  }
3924  CBioseq_Handle nuc_bsh;
3925  try {
3926  nuc_bsh = orig_feat.GetScope().GetBioseqHandle(orig_feat.GetLocation());
3927  if (!nuc_bsh) {
3928  return false;
3929  }
3930  } catch (...) {
3931  return false;
3932  }
3933 
3934  // This is necessary, to make sure that we are in "editing mode"
3935  const CSeq_annot_Handle& annot_handle = orig_feat.GetAnnot();
3936  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
3937 
3938  CSeq_feat_EditHandle feh(orig_feat);
3939  CSeq_entry_Handle parent_entry = feh.GetAnnot().GetParentEntry();
3940 
3941  bool rval = false;
3942 
3943  if (parent_entry.IsSet()
3944  && parent_entry.GetSet().IsSetClass()
3945  && parent_entry.GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) {
3946  // already on nuc-prot set, leave it alone
3947  } else {
3948  CBioseq_set_Handle nuc_parent = parent_entry.GetParentBioseq_set();
3949  if (nuc_parent && nuc_parent.IsSetClass() && nuc_parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
3951  CSeq_entry_Handle parent_seh = nuc_parent.GetParentEntry();
3952  CSeq_annot_CI annot_ci(parent_seh, CSeq_annot_CI::eSearch_entry);
3953  for (; annot_ci; ++annot_ci) {
3954  if ((*annot_ci).IsFtable()) {
3955  ftable = *annot_ci;
3956  break;
3957  }
3958  }
3959 
3960  if (!ftable) {
3961  CRef<CSeq_annot> new_annot(new CSeq_annot());
3962  new_annot->SetData().SetFtable();
3963  CSeq_entry_EditHandle h = parent_seh.GetEditHandle();
3964  ftable = h.AttachAnnot(*new_annot);
3965  }
3966 
3967  CSeq_annot_EditHandle old_annot = annot_handle.GetEditHandle();
3968  CSeq_annot_EditHandle new_annot = ftable.GetEditHandle();
3969  orig_feat = new_annot.TakeFeat(feh);
3970  const list< CRef< CSeq_feat > > &feat_list = old_annot.GetSeq_annotCore()->GetData().GetFtable();
3971  if (feat_list.empty())
3972  {
3973  old_annot.Remove();
3974  }
3975  rval = true;
3976  }
3977  }
3978  return rval;
3979 }
3980 
3981 // A function to ensure that Seq-feat.partial is set if either end of the
3982 // feature is partial, and clear if neither end of the feature is partial
3984 {
3985  bool any_change = false;
3986  bool partial5 = new_feat.GetLocation().IsPartialStart(eExtreme_Biological);
3987  bool partial3 = new_feat.GetLocation().IsPartialStop(eExtreme_Biological);
3988  bool should_be_partial = partial5 || partial3;
3989  bool is_partial = false;
3990  if (new_feat.IsSetPartial() && new_feat.GetPartial()) {
3991  is_partial = true;
3992  }
3993  if (should_be_partial && !is_partial) {
3994  new_feat.SetPartial(true);
3995  any_change = true;
3996  }
3997  else if (!should_be_partial && is_partial) {
3998  new_feat.ResetPartial();
3999  any_change = true;
4000  }
4001  return any_change;
4002 }
4003 
4004 
4005 // A function to change an existing MolInfo to match a coding region
4007 {
4008  bool any_change = false;
4009  bool partial5 = src.GetLocation().IsPartialStart(eExtreme_Biological);
4010  bool partial3 = src.GetLocation().IsPartialStop(eExtreme_Biological);
4011  bool prot_5 = dst.GetLocation().IsPartialStart(eExtreme_Biological);
4012  bool prot_3 = dst.GetLocation().IsPartialStop(eExtreme_Biological);
4013  if ((partial5 && !prot_5) || (!partial5 && prot_5)
4014  || (partial3 && !prot_3) || (!partial3 && prot_3)) {
4015  dst.SetLocation().SetPartialStart(partial5, eExtreme_Biological);
4016  dst.SetLocation().SetPartialStop(partial3, eExtreme_Biological);
4017  any_change = true;
4018  }
4019  any_change |= AdjustFeaturePartialFlagForLocation(dst);
4020  return any_change;
4021 }
4022 
4023 // A function to change an existing MolInfo to match a coding region
4025 {
4026  bool rval = false;
4027  if (!molinfo.IsSetBiomol() || molinfo.GetBiomol() != CMolInfo::eBiomol_peptide) {
4029  rval = true;
4030  }
4031 
4032  bool partial5 = cds.GetLocation().IsPartialStart(eExtreme_Biological);
4033  bool partial3 = cds.GetLocation().IsPartialStop(eExtreme_Biological);
4035  if (partial5 && partial3) {
4036  completeness = CMolInfo::eCompleteness_no_ends;
4037  }
4038  else if (partial5) {
4039  completeness = CMolInfo::eCompleteness_no_left;
4040  }
4041  else if (partial3) {
4042  completeness = CMolInfo::eCompleteness_no_right;
4043  }
4044  else {
4045  completeness = CMolInfo::eCompleteness_complete;
4046  }
4047 
4048  if (!molinfo.IsSetCompleteness() || molinfo.GetCompleteness() != completeness)
4049  {
4050  if (completeness == CMolInfo::eCompleteness_complete)
4051  molinfo.SetDefaultCompleteness();
4052  else
4053  molinfo.SetCompleteness(completeness);
4054  rval = true;
4055  }
4056  return rval;
4057 }
4058 
4059 // A function to make all of the necessary related changes to
4060 // a Seq-entry after the partialness of a coding region has been
4061 // changed.
4062 bool AdjustForCDSPartials(const CSeq_feat& cds, CScope& scope)
4063 {
4064  bool any_change = false;
4065 
4066  if (!cds.IsSetProduct()) {
4067  return any_change;
4068  }
4069 
4070  // find Bioseq for product
4071  CBioseq_Handle product = scope.GetBioseqHandle(cds.GetProduct());
4072  if (!product) {
4073  return any_change;
4074  }
4075 
4076  // adjust protein feature
4078  if (f) {
4079  // This is necessary, to make sure that we are in "editing mode"
4080  const CSeq_annot_Handle& annot_handle = f->GetAnnot();
4081  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
4082  CSeq_feat_EditHandle feh(*f);
4083  CRef<CSeq_feat> new_feat(new CSeq_feat());
4084  new_feat->Assign(*(f->GetSeq_feat()));
4085  if (CopyFeaturePartials(*new_feat, cds)) {
4086  feh.Replace(*new_feat);
4087  any_change = true;
4088  }
4089  }
4090 
4091  // change or create molinfo on protein bioseq
4092  bool found = false;
4093  CBioseq_EditHandle beh = product.GetEditHandle();
4094 
4096  if ((*it)->IsMolinfo()) {
4097  any_change |= AdjustProteinMolInfoToMatchCDS((*it)->SetMolinfo(), cds);
4098  found = true;
4099  }
4100  }
4101  if (!found) {
4102  CRef<objects::CSeqdesc> new_molinfo_desc(new CSeqdesc);
4103  AdjustProteinMolInfoToMatchCDS(new_molinfo_desc->SetMolinfo(), cds);
4104  beh.SetDescr().Set().push_back(new_molinfo_desc);
4105  any_change = true;
4106  }
4107 
4108  return any_change;
4109 }
4110 
4111 
4112 // A function to make all of the necessary related changes to
4113 // a Seq-entry after the partialness of a coding region has been
4114 // changed.
4116 {
4117  return AdjustForCDSPartials(cds, seh.GetScope());
4118 }
4119 
4120 
4121 bool RetranslateCDS(const CSeq_feat& cds, CScope& scope)
4122 {
4123  // feature must be cds and already have product
4124  if (!cds.IsSetData() || !cds.GetData().IsCdregion() || !cds.IsSetProduct()) {
4125  return false;
4126  }
4127 
4128  // Use Cdregion.Product to get handle to protein bioseq
4129  CBioseq_Handle prot_bsh = scope.GetBioseqHandle(cds.GetProduct());
4130 
4131  // Should be a protein!
4132  if (!prot_bsh || !prot_bsh.IsProtein())
4133  {
4134  return false;
4135  }
4136 
4137  CBioseq_EditHandle peh = prot_bsh.GetEditHandle();
4138  CRef<CBioseq> new_protein = CSeqTranslator::TranslateToProtein(cds, scope);
4139  if (new_protein && new_protein->IsSetInst()) {
4140  CRef<CSeq_inst> new_inst(new CSeq_inst());
4141  new_inst->Assign(new_protein->GetInst());
4142  peh.SetInst(*new_inst);
4143 
4144  // If protein feature exists, update location
4146  if (f) {
4147  // This is necessary, to make sure that we are in "editing mode"
4148  const CSeq_annot_Handle& annot_handle = f->GetAnnot();
4149  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
4150  CSeq_feat_EditHandle feh(*f);
4151  CRef<CSeq_feat> new_feat(new CSeq_feat());
4152  new_feat->Assign(*(f->GetSeq_feat()));
4153  if (new_feat->CanGetLocation() &&
4154  new_feat->GetLocation().IsInt() &&
4155  new_feat->GetLocation().GetInt().CanGetTo())
4156  {
4157  new_feat->SetLocation().SetInt().SetTo(
4158  new_protein->GetLength() - 1);
4159  feh.Replace(*new_feat);
4160  }
4161  }
4162  }
4163 
4165  return true;
4166 }
4167 
4168 
4169 void AddFeatureToBioseq(const CBioseq& seq, const CSeq_feat& f, CScope& scope)
4170 {
4171  bool added = false;
4172  if (seq.IsSetAnnot()) {
4173  ITERATE(CBioseq::TAnnot, it, seq.GetAnnot()) {
4174  if ((*it)->IsFtable()) {
4175  CSeq_annot_Handle sah = scope.GetSeq_annotHandle(**it);
4176  CSeq_annot_EditHandle eh(sah);
4177  eh.AddFeat(f);
4178  added = true;
4179  break;
4180  }
4181  }
4182  }
4183  if (!added) {
4184  CRef<CSeq_annot> annot(new CSeq_annot());
4185  CRef<CSeq_feat> sf(new CSeq_feat());
4186  sf->Assign(f);
4187  annot->SetData().SetFtable().push_back(sf);
4188  CBioseq_Handle bh = scope.GetBioseqHandle(seq);
4189  CBioseq_EditHandle beh(bh);
4190  beh.AttachAnnot(*annot);
4191  }
4192 }
4193 
4194 
4195 void AddProteinFeature(const CBioseq& seq, const string& protein_name, const CSeq_feat& cds, CScope& scope)
4196 {
4197  // make new protein feature
4198  CRef<CSeq_feat> new_prot(new CSeq_feat());
4199  new_prot->SetLocation().SetInt().SetId().Assign(*(cds.GetProduct().GetId()));
4200  new_prot->SetLocation().SetInt().SetFrom(0);
4201  new_prot->SetLocation().SetInt().SetTo(seq.GetLength() - 1);
4202  new_prot->SetData().SetProt().SetName().push_back(protein_name);
4203  CopyFeaturePartials(*new_prot, cds);
4204 
4205  AddFeatureToBioseq(seq, *new_prot, scope);
4206 }
4207 
4208 
4209 // ----------------------------------------------------------------------------
4211  CMappedFeat mf,
4212  CSeqFeatData::ESubtype subtype,
4213  vector<CMappedFeat>& children,
4214  feature::CFeatTree& featTree)
4215 // ----------------------------------------------------------------------------
4216 {
4217  //const CSeq_feat& ff = mf.GetOriginalFeature();
4218 
4219  vector<CMappedFeat> c = featTree.GetChildren(mf);
4220  for (vector<CMappedFeat>::iterator it = c.begin(); it != c.end(); it++) {
4221  CMappedFeat f = *it;
4222  if (f.GetFeatSubtype() == subtype) {
4223  children.push_back(f);
4224  }
4225  else {
4226  sFeatureGetChildrenOfSubtypeFaster(f, subtype, children, featTree);
4227  }
4228  }
4229  return true;
4230 }
4231 
4232 
4233 // ----------------------------------------------------------------------------
4235  CMappedFeat mf,
4236  CSeqFeatData::ESubtype subtype,
4237  vector<CMappedFeat>& children)
4238 // ----------------------------------------------------------------------------
4239 {
4240  //const CSeq_feat& ff = mf.GetOriginalFeature();
4241  feature::CFeatTree myTree;
4242  myTree.AddFeaturesFor(mf, subtype, mf.GetFeatSubtype());
4243 
4244  vector<CMappedFeat> c = myTree.GetChildren(mf);
4245  for (vector<CMappedFeat>::iterator it = c.begin(); it != c.end(); it++) {
4246  CMappedFeat f = *it;
4247  if (f.GetFeatSubtype() == subtype) {
4248  children.push_back(f);
4249  }
4250  else {
4251  sFeatureGetChildrenOfSubtypeFaster(f, subtype, children, myTree);
4252  }
4253  }
4254  return true;
4255 }
4256 
4257 // ----------------------------------------------------------------------------
4259  feature::CFeatTree& ft,
4260  CMappedFeat mf,
4261  string& biotype,
4262  bool fast)
4263 // ----------------------------------------------------------------------------
4264 {
4265 #define SUBTYPE(x) CSeqFeatData::eSubtype_ ## x
4266 
4267  typedef vector<CMappedFeat> MFS;
4268  typedef MFS::const_iterator MFSit;
4269 
4270  const string strRearrange("rearrangement required for product");
4271 
4272  //0a
4273  // Only genes ever get that new gene_biotype attribute, other feature types
4274  // control whether the parent gene gets it but they don't get the attribute
4275  // themselves.
4276  //
4277  if (mf.GetFeatSubtype() != SUBTYPE(gene)) {
4278  return false;
4279  }
4280 
4281  //for debugging specific genes:
4282  // size_t start = mf.GetLocation().GetInt().GetStart(objects::eExtreme_Positional);
4283  // if (start == 23365505-1) {
4284  // cerr << "";
4285  // }
4286 
4287  vector<CMappedFeat> vecCds;
4288  if (fast) {
4289  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(cdregion), vecCds, ft);
4290  }
4291  else {
4292  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(cdregion), vecCds);
4293  }
4294 
4295  //1a
4296  // If there is at least one non-pseudo CDS child without a
4297  // except-text="rearrangement required for product" qualifier then
4298  // gene_biotype qualifier is "protein_coding".
4299  //
4300  if (!mf.IsSetPseudo() || !mf.GetPseudo()) {
4301  for (MFSit it = vecCds.begin(); it != vecCds.end(); it++) {
4302  if (it->IsSetPseudo() && it->GetPseudo()) {
4303  continue;
4304  }
4305  if (it->IsSetExcept_text() && (it->GetExcept_text() == strRearrange)) {
4306  continue;
4307  }
4308  biotype = "protein_coding";
4309  return true;
4310  }
4311  }
4312 
4313  vector<CMappedFeat> vecOthers;
4314  if (fast) {
4315  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(V_region), vecOthers, ft);
4316  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(C_region), vecOthers, ft);
4317  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(V_segment), vecOthers, ft);
4318  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(D_segment), vecOthers, ft);
4319  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(J_segment), vecOthers, ft);
4320  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(tRNA), vecOthers, ft);
4321  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(rRNA), vecOthers, ft);
4322  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(snRNA), vecOthers, ft);
4323  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(snoRNA), vecOthers, ft);
4324  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(tmRNA), vecOthers, ft);
4325  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(otherRNA), vecOthers, ft);
4326  sFeatureGetChildrenOfSubtypeFaster(mf, SUBTYPE(ncRNA), vecOthers, ft);
4327  }
4328  else{
4329  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(V_region), vecOthers);
4330  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(C_region), vecOthers);
4331  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(V_segment), vecOthers);
4332  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(D_segment), vecOthers);
4333  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(J_segment), vecOthers);
4334  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(tRNA), vecOthers);
4335  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(rRNA), vecOthers);
4336  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(snRNA), vecOthers);
4337  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(snoRNA), vecOthers);
4338  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(tmRNA), vecOthers);
4339  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(otherRNA), vecOthers);
4340  sFeatureGetChildrenOfSubtype(mf, SUBTYPE(ncRNA), vecOthers);
4341  }
4342  CSeqFeatData::ESubtype singleSubtype = SUBTYPE(bad);
4343  CMappedFeat nonPseudo;
4344 
4345  bool geneIsPseudo = mf.IsSetPseudo() && mf.GetPseudo();
4346  for (MFSit it = vecOthers.begin(); it != vecOthers.end(); it++) {
4347  CSeqFeatData::ESubtype currentSubtype = it->GetFeatSubtype();
4348  if (!geneIsPseudo && (!it->IsSetPseudo() || !it->GetPseudo())) {
4349  nonPseudo = *it;
4350  }
4351  if (singleSubtype == SUBTYPE(bad)) {
4352  singleSubtype = currentSubtype;
4353  }
4354  else if (currentSubtype != singleSubtype) {
4355  singleSubtype = SUBTYPE(bad);
4356  break;
4357  }
4358  }
4359 
4360  //2a
4361  // If the only feature type present in vecOthers is ncRNA and at least one
4362  // of the members is non-pseudo then look at CLASS=RNA-ref.ext.gen.class.
4363  // If CLASS=="other", then gene_biotype="ncRNA".
4364  // If not, then gene_biotype=<CLASS>.
4365  //
4366  vector<string> acceptedClasses = {
4367  "antisense_RNA",
4368  "autocatalytically_spliced_intron",
4369  "guide_RNA",
4370  "hammerhead_ribozyme",
4371  "lncRNA",
4372  "miRNA",
4373  "ncRNA",
4374  "other",
4375  "piRNA",
4376  "rasiRNA",
4377  "ribozyme",
4378  "RNase_MRP_RNA",
4379  "RNase_P_RNA",
4380  "scRNA",
4381  "siRNA",
4382  "snoRNA",
4383  "snRNA",
4384  "SRP_RNA",
4385  "stRNA",
4386  "telomerase_RNA",
4387  "vault_RNA",
4388  "Y_RNA"};
4389 
4390  if (singleSubtype == SUBTYPE(ncRNA) && nonPseudo) {
4391  const CRNA_ref& rna = nonPseudo.GetData().GetRna();
4392  if (!rna.IsSetExt()) {
4393  biotype = "ncRNA";
4394  return true;
4395  }
4396  const CRNA_ref::TExt& ext = rna.GetExt();
4397  if (!ext.IsGen()) {
4398  biotype = "ncRNA";
4399  return true;
4400  }
4401  if (ext.IsGen() && ext.GetGen().IsSetClass()) {
4402  string rnaClass = ext.GetGen().GetClass();
4403  if (rnaClass == "other") {
4404  biotype = "ncRNA";
4405  return true;
4406  }
4407  if (std::find(acceptedClasses.begin(), acceptedClasses.end(), rnaClass) ==
4408  acceptedClasses.end()) {
4409  biotype = "ncRNA";
4410  return true;
4411  }
4412  biotype = rnaClass;
4413  return true;
4414  }
4415  else {
4416  biotype = "ncRNA";
4417  return true;
4418  }
4419  }
4420 
4421  //2b
4422  // If still here and all members of vecOthers are of the same feature type FTYPE and
4423  // at least one of the members is non-pseudo, then gene_biotype=<FTYPE>
4424  //
4425  if (singleSubtype != SUBTYPE(bad) && nonPseudo) {
4426  biotype = CSeqFeatData::SubtypeValueToName(singleSubtype);
4427  return true;
4428  }
4429 
4430  //2c
4431  // If all members of vecOthers are of type miscRNA (and also all pseudo or we would no
4432  // longer be here) then gene_biotype="transcribed_pseudogene".
4433  if (singleSubtype == SUBTYPE(otherRNA)) {
4434  biotype = "transcribed_pseudogene";
4435  return true;
4436  }
4437 
4438  //2d
4439  // If all members of vecOthers are of the same feature type FTYPE (and also all pseudo
4440  // or we would no longer be here) then gene_biotype=<FTYPE>"-pseudogene"
4441  if (singleSubtype != SUBTYPE(bad)) {
4442  biotype = CSeqFeatData::SubtypeValueToName(singleSubtype) + "_pseudogene";
4443  return true;
4444  }
4445 
4446  //3a
4447  // If vecCds is empty then gene_biotype="other", unless pseudo=TRUE
4448  if (vecCds.empty() && (!mf.IsSetPseudo() || !mf.GetPseudo())) {
4449  biotype = "other";
4450  return true;
4451  }
4452 
4453  //3b
4454  // If at least one member of vecCds with "except-text=rearrangement required for product"
4455  // then gene_biotype="segment" for pseudo=FALSE and gene_biotype="segment_pseudogene" for
4456  // pseudo=TRUE.
4457  for (MFSit it = vecCds.begin(); it != vecCds.end(); it++) {
4458  if (!it->IsSetExcept_text()) {
4459  continue;
4460  }
4461  if (it->GetExcept_text() != strRearrange) {
4462  continue;
4463  }
4464  if (it->IsSetPseudo() && it->GetPseudo()) {
4465  biotype = "segment_pseudogene";
4466  }
4467  else {
4468  biotype = "segment";
4469  }
4470  return true;
4471  }
4472 
4473  //3c
4474  // If we made it to that point then all members of the non-empty vecCds are pseudo or
4475  // vecCds is empty and the gene itself is pseudo.
4476  // In this case, gene_biotype="pseudogene".
4477  biotype = "pseudogene";
4478 
4479  return true;
4480 #undef SUBTYPE
4481 }
4482 
4483 
4484 // ----------------------------------------------------------------------------
4486  feature::CFeatTree& ft,
4487  CMappedFeat mf,
4488  string& biotype)
4489 // ----------------------------------------------------------------------------
4490 {
4491  return sGetFeatureGeneBiotypeWrapper(ft, mf, biotype, true);
4492 }
4493 
4494 // ----------------------------------------------------------------------------
4496  feature::CFeatTree& ft,
4497  CMappedFeat mf,
4498  string& biotype)
4499 // ----------------------------------------------------------------------------
4500 {
4501  return sGetFeatureGeneBiotypeWrapper(ft, mf, biotype, false);
4502 }
4503 
4504 
4505 END_SCOPE(feature)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eBoth
Both preliminary and traceback stages.
Definition: blast_def.h:332
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
#define bool
Definition: bool.h:34
CSeq_annot_Handle GetAnnot(void) const
CBioseq_EditHandle –.
CBioseq_Handle –.
CBioseq_set_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CCdregion –.
Definition: Cdregion.hpp:66
Definition: Dbtag.hpp:53
int Compare(const CDbtag &dbt2) const
Definition: Dbtag.cpp:176
set< SBestInfo, SBestInfoLess > TBestSet
Definition: feature.cpp:2327
void Disambiguate(TBestArray &bests)
Definition: feature.cpp:2448
bool m_IsAmbiguous
Definition: feature.cpp:2340
CFeatTree::CFeatInfo CFeatInfo
Definition: feature.cpp:2281
map< CFeatInfo *, SCandidates > TChildren
Definition: feature.cpp:2336
bool Add(CFeatInfo *child, CFeatInfo *parent, Int1 quality, Int8 overlap)
Definition: feature.cpp:2294
list< CFeatInfo * > TChildList
Definition: feature.cpp:2282
map< CFeatInfo *, SParentInfo > TParents
Definition: feature.cpp:2337
TChildren m_Children
Definition: feature.cpp:2341
CDisambiguator(CFeatTree::TFeatArray &features)
Definition: feature.cpp:2272
TParents m_Parents
Definition: feature.cpp:2342
TIndex m_Index
Definition: feature.cpp:1914
map< TParentKey, CRef< CFeatTreeParentTypeIndex > > TIndex
Definition: feature.cpp:1894
TRangeArray & GetIndex(CSeqFeatData::ESubtype type, bool by_product, const TInfoArray &feats)
Definition: feature.cpp:1896
pair< CSeqFeatData::ESubtype, bool > TParentKey
Definition: feature.cpp:1893
TRangeArray & GetIndex(const STypeLink &link, const TInfoArray &feats)
Definition: feature.cpp:1907
TCanonicalIdsMap m_CanonicalIds
Definition: feature.cpp:1915
CFeatTree.
Definition: feature.hpp:173
CFeat_CI –.
Definition: feat_ci.hpp:64
CFeat_id –.
Definition: Feat_id.hpp:66
void GetLabel(string *label) const
Definition: Gene_ref.cpp:57
CGiimport_id –.
Definition: Giimport_id.hpp:66
void AddLocation(const CSeq_loc &loc, ETransSplicing trans_splcing=eNoTransSplicing)
TLocMap::const_iterator const_iterator
CMappedFeat –.
Definition: mapped_feat.hpp:59
Base class for all object manager exceptions.
CObject –.
Definition: ncbiobj.hpp:180
Exceptions for objmgr/util library.
void GetLabel(string *label) const
Definition: Org_ref.cpp:57
void GetLabel(string *label) const
Definition: Prot_ref.cpp:62
bool GetLabel(string *label, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const override
Append a label to "label" based on content.
Definition: Pub_equiv.cpp:56
bool GetLabel(string *label, TLabelFlags flags=0, ELabelVersion version=eLabel_DefaultVersion) const override
Get a label that is the concatenation of the pub labels for the pubs in the set.
Definition: Pub_set.cpp:74
C_Ext –.
Definition: RNA_ref_.hpp:119
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
static E_Choice GetTypeFromSubtype(ESubtype subtype)
@ eSubtype_transit_peptide
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
CSeq_annot_CI –.
CSeq_annot_Handle –.
CSeq_entry_Handle –.
CSeq_entry_Handle –.
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static const string & GetCode(CSeq_data::E_Choice code_type, TIndex idx)
static const string & GetIupacaa3(TIndex ncbistdaa)
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:604
CSeq_feat_Handle GetGeneByRef(const CGene_ref &ref) const
Definition: tse_handle.cpp:887
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:635
TSeq_feat_Handles GetGenesByRef(const CGene_ref &ref) const
Definition: tse_handle.cpp:901
size_type size() const
Definition: map.hpp:148
const_iterator end() const
Definition: map.hpp:152
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
bool empty() const
Definition: set.hpp:133
void erase(iterator pos)
Definition: set.hpp:151
static uch flags
static unsigned char depth[2 *(256+1+29)+1]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static void s_GetRnaRefLabelFromComment(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
Definition: feature.cpp:252
static CMappedFeat GetBestOverlappingFeat(CScope &scope, const CSeq_loc &loc, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TBestFeatOpts opts, const SAnnotSelector *base_sel)
Definition: feature.cpp:3628
bool sFeatureGetChildrenOfSubtypeFaster(CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &, feature::CFeatTree &)
Definition: feature.cpp:4210
bool sGetFeatureGeneBiotypeWrapper(feature::CFeatTree &, CMappedFeat, string &, bool)
Definition: feature.cpp:4258
pair< Int8, CMappedFeat > TMappedFeatScore
Definition: feature.cpp:3480
static const bool kOptimizeTestOverlap
Definition: feature.cpp:1592
void s_GetContentLabel(const CSeq_feat &feat, string *label, const string *type_label, TFeatLabelFlags flags, CScope *scope)
Definition: feature.cpp:588
static void s_GetVariationLabel(const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *)
Definition: feature.cpp:563
static EStrandMatchRule s_GetStrandMatchRule(const STypeLink &link, const CFeatTree::CFeatInfo &info, const CFeatTree *tree)
Definition: feature.cpp:2234
static const bool kSplitCircular
Definition: feature.cpp:1591
static void GetOverlappingFeatures(CScope &scope, const CSeq_loc &loc, CSeqFeatData::E_Choice, CSeqFeatData::ESubtype feat_subtype, sequence::EOverlapType overlap_type, TMappedFeatScores &feats, const SAnnotSelector *base_sel)
Definition: feature.cpp:3484
static void s_SetChildrenFeatureIds(CFeatTree &ft, const CMappedFeat &feat, int &feat_id)
Definition: feature.cpp:3776
void s_GetTypeLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags)
Definition: feature.cpp:104
static void s_GetVariationDbtagLabel(string *tlabel, TFeatLabelFlags, const CDbtag &dbtag)
Definition: feature.cpp:406
static bool s_IsNotSubrange(const CRange< TSeqPos > &r1, const CRange< TSeqPos > &r2)
Definition: feature.cpp:2488
static void s_GetRnaRefLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, const string *type_label)
Definition: feature.cpp:274
#define SUBTYPE(x)
static bool s_AllowedParentByOverlap(CSeqFeatData::ESubtype child, CSeqFeatData::ESubtype parent)
Definition: feature.cpp:2712
static bool s_GetImpLabel(const CSeq_feat &feat, string *tlabel, TFeatLabelFlags flags, const string *type_label)
Definition: feature.cpp:430
static void s_GetCdregionLabel(const CSeq_feat &feat, string *tlabel, CScope *scope)
Definition: feature.cpp:140
vector< TMappedFeatScore > TMappedFeatScores
Definition: feature.cpp:3481
static void s_CollectBestOverlaps(CFeatTree::TFeatArray &features, TBestArray &bests, const STypeLink &link, TRangeArray &pp, CFeatTree *tree, TCanonicalIdsMap &ids_map)
Definition: feature.cpp:2494
static void s_SetFeatureId(CFeatTree &ft, const CMappedFeat &feat, int &last_id, const CMappedFeat &parent)
Definition: feature.cpp:3755
USING_SCOPE(sequence)
static CRef< CSeq_loc > s_MakePointForLocationStop(const CSeq_loc &loc)
Definition: feature.cpp:3807
EStrandMatchRule
Definition: feature.cpp:2228
@ eStrandMatch_at_least_one
Definition: feature.cpp:2230
@ eStrandMatch_any
Definition: feature.cpp:2231
@ eStrandMatch_all
Definition: feature.cpp:2229
bool sFeatureGetChildrenOfSubtype(CMappedFeat, CSeqFeatData::ESubtype, vector< CMappedFeat > &)
Definition: feature.cpp:4234
static int type
Definition: getdata.c:31
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
Definition: ncbimisc.hpp:843
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define _DEBUG_ARG(arg)
Definition: ncbidbg.hpp:134
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const float pi
Definition: math.hpp:54
TPrim & Set(void)
Definition: serialbase.hpp:351
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
Definition: serialbase.hpp:347
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
int Compare(const CSeq_loc &loc) const
Definition: Seq_loc.cpp:590
bool CheckId(const CSeq_id *&id, bool may_throw=true) const
check that the 'id' field in all parts of the location is the same as the specifies id.
Definition: Seq_loc.hpp:927
bool IsTruncatedStart(ESeqLocExtremes ext) const
check if parts of the seq-loc are missing
Definition: Seq_loc.cpp:3346
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5196
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ fCompare_Default
Definition: Seq_loc.hpp:245
void x_AssignGenes(void)
Definition: feature.cpp:2862
void GetMrnasForGene(const CMappedFeat &gene_feat, list< CMappedFeat > &mrna_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3384
EGeneCheckMode m_GeneCheckMode
Definition: feature.hpp:423
bool PromoteCDSToNucProtSet(objects::CSeq_feat_Handle &orig_feat)
Promotes coding region from Seq-annot on nucleotide sequence to Seq-annot on nuc-prot-set if necessar...
Definition: feature.cpp:3914
CFeatInfo & x_GetInfo(const CSeq_feat_Handle &feat)
Definition: feature.cpp:2073
CMappedFeat GetBestParentForFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype parent_type, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3462
bool m_IgnoreMissingGeneXref
Definition: feature.hpp:424
TInfoMap m_InfoMap
Definition: feature.hpp:418
CRef< CFeatTreeIndex > m_Index
Definition: feature.hpp:426
void GetCdssForGene(const CMappedFeat &gene_feat, list< CMappedFeat > &cds_feats, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3409
vector< CMappedFeat > GetChildren(const CMappedFeat &feat)
Return all nearest children of a feature.
Definition: feature.cpp:3100
bool x_AssignParentByRef(CFeatInfo &info)
Definition: feature.cpp:2174
CMappedFeat GetBestGeneForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
Definition: feature.cpp:3301
void SetFeatIdMode(EFeatIdMode mode)
Definition: feature.cpp:2012
const CTSE_Handle & GetTSE(void) const
Definition: feature.cpp:3165
bool GetFeatureGeneBiotype(feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
Definition: feature.cpp:4495
void GetLabel(const CSeq_feat &feat, string *label, TFeatLabelFlags flags, CScope *scope)
Definition: feature.cpp:743
void SetSNPStrandMode(ESNPStrandMode mode)
Definition: feature.cpp:2030
void AddGenesForCds(const CMappedFeat &cds_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for a cdregion feature.
Definition: feature.cpp:3250
CFeatInfo m_RootInfo
Definition: feature.hpp:420
void AddCdsForMrna(const CMappedFeat &mrna_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get cdregions for a mRNA feature.
Definition: feature.cpp:3240
bool RetranslateCDS(const CSeq_feat &cds, CScope &scope)
RetranslateCDS A function to replace the protein Bioseq pointed to by cds.product with the current tr...
Definition: feature.cpp:4121
void SetIgnoreMissingGeneXref(bool ignore=true)
Definition: feature.cpp:2024
CFeatInfo * GetChildrenGene(void)
Definition: feature.hpp:362
void x_AssignParents(void)
Definition: feature.cpp:2931
bool AdjustForCDSPartials(const CSeq_feat &cds, CScope &scope)
AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the...
Definition: feature.cpp:4062
void Reset(void)
Definition: feature.cpp:794
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3341
void x_SetGene(CFeatInfo &info, CFeatInfo *gene)
Definition: feature.cpp:3043
pair< int, CTSE_Handle > TFullId
Definition: feature.hpp:115
vector< CFeatInfo * > TChildren
Definition: feature.hpp:366
bool CopyFeaturePartials(CSeq_feat &dst, const CSeq_feat &src)
CopyFeaturePartials A function to copy the start and end partialness from one feature to another.
Definition: feature.cpp:4006
CFeatInfo * m_Gene
Definition: feature.hpp:380
ESNPStrandMode
Mode of processing SNP strands.
Definition: feature.hpp:241
void AddCdsForGene(const CMappedFeat &gene_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get cdregions for a gene feature.
Definition: feature.cpp:3278
const TChildren & x_GetChildren(CFeatInfo &info)
Definition: feature.cpp:3060
const CMappedFeat & GetMappedFeat(const CSeq_feat_Handle &feat) const
Find a corresponding CMappedFeat for a feature already added to a tree.
Definition: feature.cpp:2084
CMappedFeat GetBestGene(const CMappedFeat &feat, EBestGeneType lookup_type=eBestGene_TreeOnly)
Return parent gene if exists or best overlapping gene.
Definition: feature.cpp:3127
CMappedFeat GetBestGeneForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
Definition: feature.cpp:3321
void AddMrnasForGene(const CMappedFeat &gene_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get mRNAs for a gene feature.
Definition: feature.cpp:3268
void AddProteinFeature(const CBioseq &seq, const string &protein_name, const CSeq_feat &cds, CScope &scope)
AddProteinFeature A function to create a protein feature with the specified protein name.
Definition: feature.cpp:4195
CMappedFeat m_Feat
Definition: feature.hpp:369
void ClearFeatureIds(const CSeq_annot_EditHandle &annot)
Definition: feature.cpp:3727
void AddFeatureToBioseq(const CBioseq &seq, const CSeq_feat &f, CScope &scope)
AddFeatureToBioseq A function to add a feature to a Bioseq - will create a new feature table Seq-anno...
Definition: feature.cpp:4169
ELabelType
For compatibility with legacy code.
Definition: feature.hpp:85
EBestGeneFeatIdMode m_BestGeneFeatIdMode
Definition: feature.hpp:422
TChildren m_Children
Definition: feature.hpp:381
void x_AssignParentsByOverlap(TFeatArray &features, const STypeLink &link)
Definition: feature.cpp:2731
ESNPStrandMode m_SNPStrandMode
Definition: feature.hpp:425
ELocationInFrame IsLocationInFrame(const CSeq_feat_Handle &cds, const CSeq_loc &loc)
Determines whether location loc is in frame with coding region cds.
Definition: feature.cpp:3818
CMappedFeat GetBestGeneForFeat(const CMappedFeat &feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel, CFeatTree::EBestGeneType lookup_type)
Definition: feature.cpp:3443
ELocationInFrame
Definition: feature.hpp:531
size_t m_AssignedGenes
Definition: feature.hpp:417
CMappedFeat GetParent(const CMappedFeat &feat)
Return nearest parent of a feature.
Definition: feature.cpp:3067
vector< CFeatInfo * > TFeatArray
Definition: feature.hpp:383
void x_Init(void)
Definition: feature.cpp:2000
EIsLinkedToRoot m_IsLinkedToRoot
Definition: feature.hpp:378
void AddFeaturesFor(CScope &scope, const CSeq_loc &loc, CSeqFeatData::ESubtype bottom_type, CSeqFeatData::ESubtype top_type, const SAnnotSelector *base_sel=0, bool skip_bottom=false)
Add all features from bottom_type to top_type for a feature.
Definition: feature.cpp:3171
CRef< CSeq_loc_Mapper > CreateSeqLocMapperFromFeat(const CSeq_feat &feat, CSeq_loc_Mapper::EFeatMapDirection dir, CScope *scope)
Create CSeq_loc_Mapper from a feature, check for special cases like exceptions in CDS features.
Definition: feature.cpp:3698
void AddFeature(const CMappedFeat &feat)
Add a single feature to the tree.
Definition: feature.cpp:2044
bool GetIgnoreMissingGeneXref(void) const
Mode for taking into account gene xref to a missing genes.
Definition: feature.hpp:235
bool AdjustFeaturePartialFlagForLocation(CSeq_feat &new_feat)
AdjustFeaturePartialFlagForLocation A function to ensure that Seq-feat.partial is set if either end o...
Definition: feature.cpp:3983
bool GetFeatureGeneBiotypeFaster(feature::CFeatTree &ft, CMappedFeat mf, string &biotype)
Definition: feature.cpp:4485
CMappedFeat MapSeq_feat(const CSeq_feat_Handle &feat, const CBioseq_Handle &master_seq, const CRange< TSeqPos > &range)
Definition: feature.cpp:973
void SetGeneCheckMode(EGeneCheckMode mode)
Definition: feature.cpp:2018
void x_AssignGenesByOverlap(TFeatArray &features)
Definition: feature.cpp:2818
void GetChildrenTo(const CMappedFeat &feat, vector< CMappedFeat > &children)
Store all nearest children of a feature into a vector.
Definition: feature.cpp:3108
vector< CFeatInfo * > TChildren
Definition: feature.hpp:386
TIdMap m_IdMap
Definition: feature.hpp:117
void x_SetGeneRecursive(CFeatInfo &info, CFeatInfo *gene)
Definition: feature.cpp:2850
CMappedFeat GetParentFeature(const CMappedFeat &feat)
Definition: feature.cpp:1615
void x_SetNoParent(CFeatInfo &info)
Definition: feature.cpp:3033
pair< int, CFeatInfo * > x_LookupParentByRef(CFeatInfo &info, CSeqFeatData::ESubtype parent_type)
Definition: feature.cpp:2106
EFeatIdMode m_FeatIdMode
Definition: feature.hpp:421
EGeneCheckMode
Mode for taking into account best gene eGeneCheck_match will try to match a parent feature only if th...
Definition: feature.hpp:224
bool IsSetGene(void) const
Definition: feature.hpp:350
size_t GetFeatIdsCount(void) const
Definition: feature.cpp:800
~CFeatTree(void)
Destructor.
Definition: feature.cpp:1966
CSeqFeatData::ESubtype GetSubtype(void) const
Definition: feature.hpp:353
virtual bool Less(const CSeq_feat &f1, const CSeq_feat &f2, CScope *scope)
Definition: feature.cpp:891
void AddGenesForFeat(const CMappedFeat &feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for an arbitrary feature.
Definition: feature.cpp:3288
void x_VerifyLinkedToRoot(CFeatInfo &info)
Definition: feature.cpp:3000
vector< CFeatInfo * > TInfoArray
Definition: feature.hpp:415
CFeatInfo * x_GetParent(CFeatInfo &info)
Definition: feature.cpp:3051
CFeatTree & operator=(const CFeatTree &)
Definition: feature.cpp:1977
TInfoArray m_InfoArray
Definition: feature.hpp:419
size_t m_AssignedParents
Definition: feature.hpp:417
int RemapId(int old_id, const CTSE_Handle &tse)
Definition: feature.cpp:806
void AddMrnasForCds(const CMappedFeat &cds_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get mRNAs for a cdregion feature.
Definition: feature.cpp:3259
EGeneCheckMode GetGeneCheckMode(void) const
Definition: feature.hpp:228
void AddFeatures(CFeat_CI it)
Add all features collected by a CFeat_CI to the tree.
Definition: feature.cpp:2036
void AddGenesForMrna(const CMappedFeat &mrna_feat, const SAnnotSelector *base_sel=0)
Add all necessary features to get genes for a mRNA feature.
Definition: feature.cpp:3231
void x_SetParent(CFeatInfo &info, CFeatInfo &parent)
Definition: feature.cpp:3020
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
Definition: feature.cpp:4024
CFeatInfo * x_FindInfo(const CSeq_feat_Handle &feat)
Definition: feature.cpp:2095
bool RemapIds(CSeq_feat &feat, const CTSE_Handle &tse)
Definition: feature.cpp:853
void ReassignFeatureIds(const CSeq_entry_EditHandle &entry)
Definition: feature.cpp:3787
EFeatIdMode
Mode of processing feature ids.
Definition: feature.hpp:201
int TFeatLabelFlags
binary OR of FFeatLabelFlags
Definition: feature.hpp:78
CMappedFeat GetBestCdsForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree, const SAnnotSelector *base_sel)
Definition: feature.cpp:3360
bool GivesGeneToChildren(void) const
Definition: feature.hpp:359
CFeatTree(void)
Construct empty tree.
Definition: feature.cpp:1923
@ eBestGene_AllowOverlapped
Definition: feature.hpp:332
@ eBestGene_TreeOnly
Definition: feature.hpp:331
@ eSNPStrand_both
Definition: feature.hpp:243
@ eContent
Definition: feature.hpp:87
@ eType
Definition: feature.hpp:86
@ eLocationInFrame_InFrame
Definition: feature.hpp:532
@ eLocationInFrame_BadStart
Definition: feature.hpp:533
@ eLocationInFrame_BadStop
Definition: feature.hpp:534
@ eLocationInFrame_BadStartAndStop
Definition: feature.hpp:535
@ eLocationInFrame_NotIn
Definition: feature.hpp:536
@ fFGL_NoComments
Leave out comments, even as fallbacks.
Definition: feature.hpp:75
@ fFGL_NoQualifiers
Leave out qualifiers.
Definition: feature.hpp:76
@ fFGL_Both
Definition: feature.hpp:74
@ fFGL_Content
Include its content if there is any.
Definition: feature.hpp:73
@ fFGL_Type
Always include the feature's type.
Definition: feature.hpp:72
@ eGeneCheck_match
Definition: feature.hpp:226
@ eBestGeneFeatId_ignore
Definition: feature.hpp:213
@ eBestGeneFeatId_always
Definition: feature.hpp:214
@ eFeatId_always
Definition: feature.hpp:204
@ eFeatId_by_type
Definition: feature.hpp:203
@ eFeatId_ignore
Definition: feature.hpp:202
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
EOverlapType
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
Int8 TestForOverlap64(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
64-bit version of TestForOverlap() Check if the two locations have ovarlap of a given type.
ECompare
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
int TBestFeatOpts
Definition: sequence.hpp:348
@ eGetId_Seq_id_BestRank
use CSeq_id::BestRank() as the scoring function
Definition: sequence.hpp:107
@ fBestFeat_FavorLonger
favor longer features over shorter features
Definition: sequence.hpp:339
EFeatMapDirection
Mapping direction used when initializing the mapper with a feature.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_annot_Handle GetSeq_annotHandle(const CSeq_annot &annot, EMissing action=eMissing_Default)
Definition: scope.cpp:192
const CFeat_id & GetId(void) const
const CSeq_feat::TXref & GetXref(void) const
bool IsSetId(void) const
bool GetPseudo(void) const
TClass GetClass(void) const
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
void SetDescr(TDescr &v) const
void Remove(void) const
Remove current annot.
const CTSE_Handle & GetTSE_Handle(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
TSet GetSet(void) const
bool IsSetXref(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
void ClearFeatIds(void)
Clear feature ids.
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
bool IsSetProduct(void) const
virtual const CSeq_loc & GetLocation(void) const
void SetInst(TInst &v) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Get parent bioseq-set handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
void SetFeatId(int id)
Set single feature id.
bool IsProtein(void) const
TInst_Topology GetInst_Topology(void) const
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.