NCBI C++ ToolKit
autodef.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: autodef.cpp 100082 2023-06-13 21:40:12Z kans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin
27 *
28 * File Description:
29 * Generate unique definition lines for a set of sequences using organism
30 * descriptions and feature clauses.
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <objmgr/util/autodef.hpp>
35 #include <corelib/ncbimisc.hpp>
36 #include <objmgr/annot_ci.hpp>
37 #include <objmgr/seqdesc_ci.hpp>
38 #include <objmgr/bioseq_ci.hpp>
39 #include <objmgr/util/feature.hpp>
40 #include <objmgr/util/sequence.hpp>
42 
45 #include <objects/seq/Seqdesc.hpp>
46 #include <objects/seq/Bioseq.hpp>
53 
54 #include <serial/iterator.hpp>
55 
58 
59 
61  : m_Cancelled(false)
62 {
63 }
64 
65 
67 {
68 }
69 
70 
72 {
73  if (!b.IsSetAnnot()) {
74  return true;
75  }
76  size_t num_features = 0;
77 
78  ITERATE(CBioseq::TAnnot, a, b.GetAnnot()) {
79  if ((*a)->IsFtable()) {
80  num_features += (*a)->GetData().GetFtable().size();
81  if (num_features > 100) {
82  break;
83  }
84  }
85  }
86  if (num_features < 100) {
87  return true;
88  } else {
89  return false;
90  }
91 }
92 
94 {
95  // add sources to modifier combination groups
96  CBioseq_CI seq_iter(se, CSeq_inst::eMol_na);
97  for ( ; seq_iter; ++seq_iter ) {
98  CSeqdesc_CI dit((*seq_iter), CSeqdesc::e_Source);
99  if (dit) {
100  string feature_clauses = s_NeedFeatureClause(*(seq_iter->GetCompleteBioseq())) ? x_GetFeatureClauses(*seq_iter) : kEmptyStr;
101  const CBioSource& bsrc = dit->GetSource();
102  m_OrigModCombo.AddSource(bsrc, feature_clauses);
103  }
104  }
105 
106  // set default exclude_sp values
108 }
109 
110 void CAutoDef::AddDescriptors(const TSources& sources)
111 {
112  for (const auto& it : sources) {
113  // Feature clauses are not necessary at this point. They are formed later on anyway.
115  }
116 
117  // set default exclude_sp values
119 }
120 
122 {
123  unsigned int k, j, tmp;
124  if (index_list.size() < 2) {
125  return;
126  }
127  for (k = 0; k < index_list.size() - 1; k++) {
128  for (j = k + 1; j < index_list.size(); j++) {
129  if (modifier_list[index_list[k]].GetRank() > modifier_list[index_list[j]].GetRank()) {
130  tmp = index_list[k];
131  index_list[k] = index_list[j];
132  index_list[j] = tmp;
133  }
134  }
135  }
136 }
137 
138 
140 {
141  unsigned int k;
142  TModifierIndexVector remaining_list;
143 
144  index_list.clear();
145  remaining_list.clear();
146 
147  // note - required modifiers should be removed from the list
148 
149  // first, look for all_present and all_unique modifiers
150  for (k = 0; k < modifier_list.size(); k++) {
151  if (modifier_list[k].AllPresent() && modifier_list[k].AllUnique()) {
152  index_list.push_back(k);
153  } else if (modifier_list[k].AnyPresent()) {
154  remaining_list.push_back(k);
155  }
156  }
157  x_SortModifierListByRank(index_list, modifier_list);
158  x_SortModifierListByRank(remaining_list, modifier_list);
159 
160  for (k = 0; k < remaining_list.size(); k++) {
161  index_list.push_back(remaining_list[k]);
162  }
163 }
164 
165 
166 bool CAutoDef::x_IsOrgModRequired(unsigned int mod_type)
167 {
168  return false;
169 }
170 
171 
172 bool CAutoDef::x_IsSubSrcRequired(unsigned int mod_type)
173 {
175  || mod_type == CSubSource::eSubtype_plasmid_name
176  || mod_type == CSubSource::eSubtype_transgenic) {
177  return true;
178  } else {
179  return false;
180  }
181 }
182 
183 
185 {
187  modifier_list.clear();
188  m_OrigModCombo.GetAvailableModifiers (modifier_list);
189 
190  unsigned int num_present = 0;
191  for (unsigned int k = 0; k < modifier_list.size(); k++) {
192  if (modifier_list[k].AnyPresent()) {
193  num_present++;
194  }
195  }
196  return num_present;
197 }
198 
199 
202  const CRef<CAutoDefModifierCombo>& s2) const
203  {
204  return (*s1 < *s2);
205  }
206 };
207 
208 
209 
211 {
212  TModifierComboVector combo_list;
213 
214  combo_list.clear();
215  combo_list.emplace_back (new CAutoDefModifierCombo(&m_OrigModCombo));
216 
217 
218  TModifierComboVector tmp, add_list;
219  TModifierComboVector::iterator it;
221  bool stop = false;
222  unsigned int k;
223 
224  mod_list.clear();
225 
226  if (combo_list[0]->GetMaxInGroup() == 1) {
227  stop = true;
228  }
229 
230  while (!stop) {
231  stop = true;
232  it = combo_list.begin();
233  add_list.clear();
234  while (it != combo_list.end()) {
235  tmp = (*it)->ExpandByAnyPresent ();
236  if (!tmp.empty()) {
237  stop = false;
238  for (k = 0; k < tmp.size(); k++) {
239  add_list.emplace_back (new CAutoDefModifierCombo(tmp[k]));
240  }
241  it = combo_list.erase (it);
242  } else {
243  ++it;
244  }
245  tmp.clear();
246  }
247  for (k = 0; k < add_list.size(); k++) {
248  combo_list.emplace_back (new CAutoDefModifierCombo(add_list[k]));
249  }
250  add_list.clear();
251  std::sort (combo_list.begin(), combo_list.end(), SAutoDefModifierComboSort());
252  if (combo_list[0]->GetMaxInGroup() == 1) {
253  stop = true;
254  }
255  }
256 
257  ITERATE (CAutoDefSourceDescription::TModifierVector, it, combo_list[0]->GetModifiers()) {
258  mod_list.push_back (CAutoDefSourceModifierInfo(*it));
259  }
260 
261  return combo_list[0];
262 }
263 
264 
266 {
268 
269  // set all modifiers in combo
271 
272  // first, get the list of modifiers that are available
273  modifier_list.clear();
274  newm->GetAvailableModifiers (modifier_list);
275 
276  // add any modifier not already in the combo to the combo
277  for (unsigned int k = 0; k < modifier_list.size(); k++) {
278  if (modifier_list[k].AnyPresent()) {
279  if (modifier_list[k].IsOrgMod()) {
280  COrgMod::ESubtype subtype = modifier_list[k].GetOrgModType();
281  if (!newm->HasOrgMod(subtype)) {
282  newm->AddOrgMod(subtype);
283  }
284  } else {
285  CSubSource::ESubtype subtype = modifier_list[k].GetSubSourceType();
286  if (!newm->HasSubSource(subtype)) {
287  newm->AddSubsource(subtype);
288  }
289  }
290  }
291  }
292  return newm;
293 }
294 
295 
297 {
299 
300  return newm;
301 }
302 
303 
305 {
307  if (best == NULL) {
308  return "";
309  }
310 
311  for (CSeqdesc_CI dit(bh, CSeqdesc::e_Source); dit; ++dit) {
312  const CBioSource& bsrc = dit->GetSource();
313  return best->GetSourceDescriptionString(bsrc);
314  }
315  return "";
316 }
317 
318 
320 {
321  // remove optional features that have not been requested
322  if (main_clause == NULL) {
323  return;
324  }
325 
326  // keep 5' UTRs only if lonely or requested
329  }
330 
331  // keep 3' UTRs only if lonely or requested
334  }
335 
336  // keep LTRs only if requested or lonely and not in parent
340  }
341 
342  // keep promoters only if requested or lonely and not in mRNA
345  // promoters are requested, remove all regulatory features except promoters
347  } else {
348  bool lonely = main_clause->IsFeatureTypeLonely(CSeqFeatData::eSubtype_regulatory);
349  if (lonely) {
350  // remove regulatory features, including promoters, only in mRNA sequences
352  // remove regulatory features other than promoters everywhere else
354  } else {
355  // remove all regulatory features
357  }
358  }
359  }
360 
361  // keep introns only if requested or lonely and not in mRNA
362  if (!m_Options.GetKeepIntrons()) {
365  } else {
367  }
368  }
369 
370  // keep exons only if requested or lonely or in mRNA or in partial CDS or on segment
371  if (!m_Options.GetKeepExons() && !IsSegment(bh)) {
372  if (main_clause->GetMainFeatureSubtype() != CSeqFeatData::eSubtype_exon) {
373  main_clause->RemoveUnwantedExons();
374  }
375  }
376 
377  // only keep bioseq precursor RNAs if lonely or requested
378  if (!main_clause->IsBioseqPrecursorRNA() && !m_Options.GetKeepPrecursorRNA()) {
379  main_clause->RemoveBioseqPrecursorRNAs();
380  }
381 
382  // keep uORFs if lonely or requested
383  if (!m_Options.GetKeepuORFs() && main_clause->GetNumSubclauses() > 1) {
384  main_clause->RemoveuORFs();
385  }
386 
387  // remove "optional" mobile element features unless lonely or requested
388  if (!m_Options.GetKeepMobileElements() && main_clause->GetNumSubclauses() > 1) {
389  main_clause->RemoveOptionalMobileElements();
390  }
391 
392  // keep misc_recombs only if requested
393  if (!m_Options.GetKeepMiscRecomb()) {
395  }
396 
397  // delete subclauses at end, so that loneliness calculations will be correct
398  main_clause->RemoveDeletedSubclauses();
399 }
400 
401 
403 {
404  return m_Options.IsFeatureSuppressed(subtype);
405 }
406 
407 
408 void CAutoDef::SuppressFeature(const objects::CFeatListItem& feat)
409 {
410  if (feat.GetType() == CSeqFeatData::e_not_set) {
412  } else {
413  m_Options.SuppressFeature((CSeqFeatData::ESubtype)(feat.GetSubtype()));
414  }
415 }
416 
417 
418 void CAutoDef::SuppressFeature(objects::CSeqFeatData::ESubtype subtype)
419 {
420  m_Options.SuppressFeature(subtype);
421 }
422 
423 
425 {
427 
428  seh = seh.GetParentEntry();
429 
430  if (seh && seh.IsSet()) {
431  CBioseq_set_Handle bsh = seh.GetSet();
432  if (bsh.CanGetClass() && bsh.GetClass() == CBioseq_set::eClass_parts) {
433  return true;
434  }
435  }
436  return false;
437 }
438 
439 
441 {
443  CBioseq_Handle master = bh;
444  unsigned int start = 0, stop = bh.GetBioseqLength() - 1;
445  unsigned int offset = 0;
446 
447  seh = seh.GetParentEntry();
448 
449  if (seh && seh.IsSet()) {
450  CBioseq_set_Handle bsh = seh.GetSet();
451  if (bsh.CanGetClass() && bsh.GetClass() == CBioseq_set::eClass_parts) {
452  seh = seh.GetParentEntry();
453  if (seh.IsSet()) {
454  bsh = seh.GetSet();
455  if (bsh.CanGetClass() && bsh.GetClass() == CBioseq_set::eClass_segset) {
456  CBioseq_CI seq_iter(seh);
457  for ( ; seq_iter; ++seq_iter ) {
458  if (seq_iter->CanGetInst_Repr()) {
459  if (seq_iter->GetInst_Repr() == CSeq_inst::eRepr_seg) {
460  master = *seq_iter;
461  } else if (seq_iter->GetInst_Repr() == CSeq_inst::eRepr_raw) {
462  if (*seq_iter == bh) {
463  start = offset;
464  stop = offset + bh.GetBioseqLength() - 1;
465  } else {
466  offset += seq_iter->GetBioseqLength();
467  }
468  }
469  }
470  }
471  }
472  }
473  }
474  }
475  bh = master;
476  range.SetFrom(start);
477  range.SetTo(stop);
478 }
479 
480 
482 {
483  bool is_list = true;
484  bool is_single = true;
485  bool found_single = false;
486 
487  if (!feat_ci) {
488  return false;
489  }
490  ++feat_ci;
491  if (feat_ci) {
492  is_single = false;
493  }
494  feat_ci.Rewind();
495 
496  while (feat_ci && is_list) {
497  if (feat_ci->GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA) {
498  if (!feat_ci->GetData().GetRna().IsSetExt()
499  || !feat_ci->GetData().GetRna().GetExt().IsName()
500  || !NStr::Equal(feat_ci->GetData().GetRna().GetExt().GetName(), "5S ribosomal RNA")) {
501  is_list = false;
502  }
503  } else if (feat_ci->GetData().GetSubtype() == CSeqFeatData::eSubtype_misc_feature) {
504  if (!feat_ci->IsSetComment()) {
505  is_list = false;
506  } else if (NStr::Equal(feat_ci->GetComment(), "contains 5S ribosomal RNA and nontranscribed spacer")) {
507  found_single = true;
508  } else if (!NStr::Equal(feat_ci->GetComment(), "nontranscribed spacer")) {
509  is_list = false;
510  }
511  } else {
512  is_list = false;
513  }
514  ++feat_ci;
515  }
516  if (is_single && !found_single) {
517  is_list = false;
518  }
519  feat_ci.Rewind();
520  return is_list;
521 }
522 
523 
525 {
526  if (!feat_ci ||
528  !feat_ci->IsSetComment()) {
529  return false;
530  }
531  bool is_single = true;
532  ++feat_ci;
533  if (feat_ci) {
534  is_single = false;
535  }
536  feat_ci.Rewind();
537  return is_single;
538 }
539 
540 
542 {
543  bool has_promoter = false;
545  CFeat_CI f_ci (bh, sel);
546  while (f_ci && !has_promoter) {
547  has_promoter = CAutoDefFeatureClause::IsPromoter(*(f_ci->GetSeq_feat()));
548  ++f_ci;
549  }
550  return has_promoter;
551 }
552 
553 
555 {
556  const string& custom = m_Options.GetCustomFeatureClause();
557  if (!NStr::IsBlank(custom)) {
558  return custom;
559  }
560 
562  while (d) {
563  const auto& uo = d->GetUser();
564  if (x_IsHumanSTR(uo)) {
565  return (
566  uo.HasField("Bracketed record seq.", "") ?
569  );
570  }
571  ++d;
572  }
573 
574 
577  CBioseq_Handle master_bh = bh;
578 
579  GetMasterLocation(master_bh, range);
580 
581  // if no promoter, and fake promoters are requested, create one
583  CRef<CSeq_feat> fake_promoter(new CSeq_feat());
584  CRef<CSeq_loc> fake_promoter_loc(new CSeq_loc());
586  CRef <CSeq_id> new_id(new CSeq_id);
587  new_id->Assign(*id);
588  fake_promoter_loc->SetInt().SetId(*new_id);
589  fake_promoter_loc->SetInt().SetFrom(0);
590  fake_promoter_loc->SetInt().SetTo(bh.GetInst_Length() - 1);
591 
592  fake_promoter->SetLocation(*fake_promoter_loc);
593 
595  *fake_promoter,
596  *fake_promoter_loc,
597  m_Options)));
598  }
599 
600  // now create clauses for real features
601  CFeat_CI feat_ci(master_bh);
602 
603  if (x_Is5SList(feat_ci)) {
604  return "5S ribosomal RNA gene region";
605  }
606 
607  bool is_single_misc_feat = x_IsSingleMiscFeat(feat_ci);
608 
609  while (feat_ci)
610  {
611  vector<CRef<CAutoDefFeatureClause > > fclause = FeatureClauseFactory(bh, feat_ci->GetOriginalFeature(), feat_ci->GetMappedFeature().GetLocation(), m_Options, is_single_misc_feat);
612  for (auto it : fclause) {
613  if (it &&
614  (it->IsRecognizedFeature() ||
616  (it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_repeat_region ||
617  it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_LTR)))) {
618  if (it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_exon ||
619  it->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_intron) {
620  it->Label(m_Options.GetSuppressAlleles());
621  }
622  main_clause.AddSubclause(it);
623  }
624  }
625 
626  ++feat_ci;
627  }
628 
629  // optionally remove misc_feature subfeatures
632  }
633 
634  // Group alt-spliced exons first, so that they will be associated with the correct genes and mRNAs
635  main_clause.GroupAltSplicedExons(bh);
636  main_clause.RemoveDeletedSubclauses();
637 
638  // Add mRNAs to other clauses
639  main_clause.GroupmRNAs(m_Options.GetSuppressAlleles());
640  main_clause.RemoveDeletedSubclauses();
641 
642  // Add genes to clauses that need them for descriptions/products
643  main_clause.GroupGenes(m_Options.GetSuppressAlleles());
644 
646  main_clause.RemoveDeletedSubclauses();
647 
648  // Group all features
650  main_clause.RemoveDeletedSubclauses();
651 
652  // now that features have been grouped, can expand lists of spliced exons
653  main_clause.ExpandExonLists();
654 
655  // assign product names for features associated with genes that have products
656  main_clause.AssignGeneProductNames(&main_clause, m_Options.GetSuppressAlleles());
657 
658  // reverse the order of clauses for minus-strand CDSfeatures
659  main_clause.ReverseCDSClauseLists();
660 
661  main_clause.Label(m_Options.GetSuppressAlleles());
662  main_clause.CountUnknownGenes();
663  main_clause.RemoveDeletedSubclauses();
664 
665  x_RemoveOptionalFeatures(&main_clause, bh);
666 
667  // if a gene is listed as part of another clause, they do not need
668  // to be listed as there own clause
669  main_clause.RemoveGenesMentionedElsewhere();
670  main_clause.RemoveDeletedSubclauses();
671 
674  }
675 
676  main_clause.Label(m_Options.GetSuppressAlleles());
677 
679  // GB-8927
680  // no alternate splice calculations for viruses
681  bool is_virus = false;
683  if (src && src->GetSource().IsSetOrg() && src->GetSource().GetOrg().IsSetDivision()
684  && NStr::EqualNocase(src->GetSource().GetOrg().GetDivision(), "VRL")) {
685  is_virus = true;
686  }
687 
688  if (!is_virus) {
690  main_clause.RemoveDeletedSubclauses();
691  }
692  }
693 
695  main_clause.RemoveDeletedSubclauses();
696 
697  main_clause.GroupConsecutiveExons(bh);
698  main_clause.RemoveDeletedSubclauses();
699 
700  main_clause.Label(m_Options.GetSuppressAlleles());
701 
702  return main_clause.ListClauses(true, false, m_Options.GetSuppressAlleles());
703 }
704 
705 
706 string OrganelleByGenome(unsigned int genome_val)
707 {
708  string organelle;
709  switch (genome_val) {
711  organelle = "macronuclear";
712  break;
714  organelle = "nucleomorph";
715  break;
717  organelle = "mitochondrion";
718  break;
720  organelle = "apicoplast";
721  break;
723  organelle = "chloroplast";
724  break;
726  organelle = "chromoplast";
727  break;
729  organelle = "kinetoplast";
730  break;
732  organelle = "plastid";
733  break;
735  organelle = "cyanelle";
736  break;
738  organelle = "leucoplast";
739  break;
741  organelle = "proplastid";
742  break;
744  organelle = "hydrogenosome";
745  break;
746  }
747  return organelle;
748 }
749 
750 
752 {
753  unsigned int product_flag = CBioSource::eGenome_unknown;
754  string::size_type pos;
755 
757  CFeat_CI feat_ci(bh, sel);
758  while (feat_ci && product_flag == CBioSource::eGenome_unknown) {
759  if (feat_ci->IsSetProduct()) {
760  string label;
765  bh.GetScope());
766  if (prot) {
768  if (NStr::Find(label, "mitochondrion") != NCBI_NS_STD::string::npos
769  || NStr::Find(label, "mitochondrial") != NCBI_NS_STD::string::npos) {
770  product_flag = CBioSource::eGenome_mitochondrion;
771  } else if (NStr::Find(label, "apicoplast") != NCBI_NS_STD::string::npos) {
772  product_flag = CBioSource::eGenome_apicoplast;
773  } else if (NStr::Find(label, "chloroplast") != NCBI_NS_STD::string::npos) {
774  product_flag = CBioSource::eGenome_chloroplast;
775  } else if (NStr::Find(label, "chromoplast") != NCBI_NS_STD::string::npos) {
776  product_flag = CBioSource::eGenome_chromoplast;
777  } else if (NStr::Find(label, "kinetoplast") != NCBI_NS_STD::string::npos) {
778  product_flag = CBioSource::eGenome_kinetoplast;
779  } else if (NStr::Find(label, "proplastid") != NCBI_NS_STD::string::npos) {
780  product_flag = CBioSource::eGenome_proplastid;
781  } else if ((pos = NStr::Find(label, "plastid")) != NCBI_NS_STD::string::npos
782  && (pos == 0 || isspace(label.c_str()[pos]))) {
783  product_flag = CBioSource::eGenome_plastid;
784  } else if (NStr::Find(label, "cyanelle") != NCBI_NS_STD::string::npos) {
785  product_flag = CBioSource::eGenome_cyanelle;
786  } else if (NStr::Find(label, "leucoplast") != NCBI_NS_STD::string::npos) {
787  product_flag = CBioSource::eGenome_leucoplast;
788  }
789  }
790  }
791  ++feat_ci;
792  }
793  return product_flag;
794 }
795 
796 
797 string CAutoDef::x_GetFeatureClauseProductEnding(const string& feature_clauses,
798  CBioseq_Handle bh)
799 {
800  bool pluralize = false;
801  unsigned int product_flag_to_use;
802  unsigned int nuclear_copy_flag = CBioSource::eGenome_unknown;
803 
805  product_flag_to_use = s_GetProductFlagFromCDSProductNames (bh);
806  } else {
807  product_flag_to_use = m_Options.GetProductFlag();
808  nuclear_copy_flag = m_Options.GetNuclearCopyFlag();
809  }
810  if (NStr::Find(feature_clauses, "genes") != NCBI_NS_STD::string::npos) {
811  pluralize = true;
812  } else {
813  string::size_type pos = NStr::Find(feature_clauses, "gene");
814  if (pos != NCBI_NS_STD::string::npos
815  && NStr::Find (feature_clauses, "gene", pos + 4) != NCBI_NS_STD::string::npos) {
816  pluralize = true;
817  }
818  }
819 
820  unsigned int genome_val = CBioSource::eGenome_unknown;
821  string genome_from_mods;
822 
823  for (CSeqdesc_CI dit(bh, CSeqdesc::e_Source); dit; ++dit) {
824  const CBioSource& bsrc = dit->GetSource();
825  if (bsrc.CanGetGenome()) {
826  genome_val = bsrc.GetGenome();
827  }
828  if (bsrc.CanGetSubtype()) {
829  ITERATE (CBioSource::TSubtype, subSrcI, bsrc.GetSubtype()) {
830  if ((*subSrcI)->GetSubtype() == CSubSource::eSubtype_other) {
831  string note = (*subSrcI)->GetName();
832  if (NStr::Equal(note, "macronuclear") || NStr::Equal(note, "micronuclear")) {
833  genome_from_mods = note;
834  }
835  }
836  }
837  }
838  break;
839  }
840 
841  string ending = OrganelleByGenome(genome_val);
842  if (NStr::Equal(ending, "mitochondrion")) {
843  ending = "mitochondrial";
844  }
845  if (!NStr::IsBlank(ending)) {
846  ending = "; " + ending;
847  } else {
848  if (product_flag_to_use != CBioSource::eGenome_unknown) {
849  ending = OrganelleByGenome(product_flag_to_use);
850  if (NStr::IsBlank(ending)) {
851  if (!NStr::IsBlank(genome_from_mods)) {
852  ending = "; " + genome_from_mods;
853  }
854  } else {
855  if (NStr::Equal(ending, "mitochondrion")) {
856  ending = "mitochondrial";
857  }
858  if (pluralize) {
859  ending = "; nuclear genes for " + ending + " products";
860  } else {
861  ending = "; nuclear gene for " + ending + " product";
862  }
863  }
864  } else if (nuclear_copy_flag != CBioSource::eGenome_unknown) {
865  ending = OrganelleByGenome(nuclear_copy_flag);
866  if (!NStr::IsBlank(ending)) {
867  if (NStr::Equal(ending, "mitochondrion")) {
868  ending = "mitochondrial";
869  }
870  ending = "; nuclear copy of " + ending + " gene";
871  }
872  }
873  }
874  return ending;
875 }
876 
877 
879 {
880  string end;
881  switch (m_Options.GetFeatureListType())
882  {
884  end = ", complete sequence.";
885  break;
887  end = ", complete genome.";
888  break;
890  end = ", partial sequence.";
891  break;
893  end = ", partial genome.";
894  break;
897  end = " sequence.";
898  break;
900  end = ", whole genome shotgun sequence.";
901  break;
902  default:
903  break;
904  }
905  return end;
906 }
907 
908 
910 {
911  bool is_mRNA = false;
912  for (CSeqdesc_CI desc(bsh, CSeqdesc::e_Molinfo); desc && !is_mRNA; ++desc) {
913  if (desc->GetMolinfo().CanGetBiomol()
914  && desc->GetMolinfo().GetBiomol() == CMolInfo::eBiomol_mRNA) {
915  is_mRNA = true;
916  }
917  }
918  return is_mRNA;
919 }
920 
921 
923 {
925  while (parent) {
926  if (parent.IsSetClass() && parent.GetClass() == CBioseq_set::eClass_gen_prod_set) {
927  return true;
928  }
929  parent = parent.GetParentBioseq_set();
930  }
931  return false;
932 }
933 
934 
935 string CAutoDef::x_GetOneNonFeatureClause(CBioseq_Handle bh, unsigned int genome_val)
936 {
937  string feature_clauses;
938  string organelle;
939 
941  || genome_val == CBioSource::eGenome_apicoplast
942  || genome_val == CBioSource::eGenome_chloroplast
943  || genome_val == CBioSource::eGenome_kinetoplast
944  || genome_val == CBioSource::eGenome_leucoplast
945  || genome_val == CBioSource::eGenome_mitochondrion
946  || genome_val == CBioSource::eGenome_plastid) {
947  organelle = OrganelleByGenome(genome_val);
948  }
949  if (!NStr::IsBlank(organelle)) {
950  feature_clauses = " " + organelle;
952  string biomol;
954  if (mi && mi->GetMolinfo().IsSetBiomol()) {
956  biomol = "mRNA";
957  } else {
958  biomol = CMolInfo::GetBiomolName(mi->GetMolinfo().GetBiomol());
959  }
960  }
961  if (!NStr::IsBlank(biomol)) {
962  feature_clauses = " " + biomol;
963  }
964  }
965 
966  feature_clauses += x_GetNonFeatureListEnding();
967  return feature_clauses;
968 }
969 
970 
971 string CAutoDef::GetOneFeatureClauseList(CBioseq_Handle bh, unsigned int genome_val)
972 {
973  string feature_clauses;
974  bool listAllFeatures = (m_Options.GetFeatureListType() == CAutoDefOptions::eListAllFeatures);
975  if (listAllFeatures) {
976  int numGenes = 0;
977  int numCDSs = 0;
978  CSeq_annot_CI annot_ci(bh);
979  for (; annot_ci; ++annot_ci) {
980  const CSeq_annot_Handle& annt = *annot_ci;
982  const CSeq_annot& antx = *pAnnot;
983  FOR_EACH_SEQFEAT_ON_SEQANNOT (feat_it, antx) {
984  const CSeq_feat& sft = **feat_it;
985  const CSeqFeatData& data = sft.GetData();
986  CSeqFeatData::ESubtype subtype = data.GetSubtype();
987  if (subtype == CSeqFeatData::eSubtype_gene) {
988  numGenes++;
989  } else if (subtype == CSeqFeatData::eSubtype_cdregion) {
990  numCDSs++;
991  }
992  }
993  }
994  if (numGenes + numCDSs > 100) {
995  // too many features will drastically slow down performance, bypass (RW-1578)
996  const string& custom = m_Options.GetCustomFeatureClause();
997  if (!NStr::IsBlank(custom)) {
998  return custom;
999  }
1000  feature_clauses = x_GetOneNonFeatureClause(bh, genome_val);
1001  return feature_clauses;
1002  }
1003  }
1004 
1005  if (listAllFeatures || (IsBioseqmRNA(bh) && IsInGenProdSet(bh))) {
1006  feature_clauses = x_GetFeatureClauses(bh);
1007  if (NStr::IsBlank(feature_clauses)) {
1008  feature_clauses = x_GetOneNonFeatureClause(bh, genome_val);
1009  } else {
1010  feature_clauses = " " + feature_clauses;
1011  string ending = x_GetFeatureClauseProductEnding(feature_clauses, bh);
1012  if (m_Options.GetAltSpliceFlag()) {
1013  if (NStr::IsBlank(ending)) {
1014  ending = "; alternatively spliced";
1015  } else {
1016  ending += ", alternatively spliced";
1017  }
1018  }
1019  feature_clauses += ending;
1020  if (NStr::IsBlank(feature_clauses)) {
1021  feature_clauses = ".";
1022  } else {
1023  feature_clauses += ".";
1024  }
1025  }
1026  } else {
1027  feature_clauses = x_GetOneNonFeatureClause(bh, genome_val);
1028  }
1029  return feature_clauses;
1030 }
1031 
1032 
1034 {
1035  string keyword;
1036 
1038  if (gb) {
1039  if (gb->GetGenbank().IsSetKeywords()) {
1041  if (NStr::EqualNocase(*it, "TPA:inferential")) {
1042  keyword = "TPA_inf: ";
1043  break;
1044  } else if (NStr::EqualNocase(*it, "TPA:experimental")) {
1045  keyword = "TPA_exp: ";
1046  break;
1047  }
1048  }
1049  }
1050  } else {
1052  if (mi && mi->GetMolinfo().IsSetTech() && mi->GetMolinfo().GetTech() == CMolInfo::eTech_tsa) {
1053  keyword = "TSA: ";
1054  }
1055  }
1056  return keyword;
1057 }
1058 
1059 
1061 {
1062  auto start = s.begin();
1063  while (start != s.end() && std::isspace(*start)) {
1064  start++;
1065  }
1066 
1067  auto end = s.end();
1068  do {
1069  end--;
1070  } while (std::distance(start, end) > 0 && std::isspace(*end));
1071 
1072  return std::string(start, end + 1);
1073 }
1074 
1076 {
1077  for (CSeqdesc_CI desc(bsh, CSeqdesc::e_Molinfo); desc; ++desc) {
1078  if (desc->GetMolinfo().CanGetBiomol() && desc->GetMolinfo().GetBiomol() == CMolInfo::eBiomol_genomic) {
1079  return true;
1080  }
1081  }
1082  return false;
1083 }
1084 
1085 static bool s_IsRefSeq(CBioseq_Handle bsh)
1086 {
1087  for (CSeq_id_Handle sid : bsh.GetId()) {
1088  if (sid.Which() == NCBI_SEQID(Other)) {
1089  return true;
1090  }
1091  }
1092  return false;
1093 }
1094 
1096 {
1097  m_Feat_Tree = featTree;
1098 
1099  // for protein sequences, use sequence::GetTitle
1100  if (bh.CanGetInst() && bh.GetInst().CanGetMol() && bh.GetInst().GetMol() == CSeq_inst::eMol_aa) {
1101  return sequence::CDeflineGenerator()
1102  .GenerateDefline(bh,
1103  sequence::CDeflineGenerator::fIgnoreExisting |
1104  sequence::CDeflineGenerator::fAllProteinNames);
1105  }
1106  string org_desc = "Unknown organism";
1107  unsigned int genome_val = CBioSource::eGenome_unknown;
1108  mod_combo->InitOptions(m_Options);
1109 
1110  for (CSeqdesc_CI dit(bh, CSeqdesc::e_Source); dit; ++dit) {
1111  const CBioSource& bsrc = dit->GetSource();
1112  org_desc = mod_combo->GetSourceDescriptionString(bsrc);
1113  if (bsrc.CanGetGenome()) {
1114  genome_val = bsrc.GetGenome();
1115  }
1116  break;
1117  }
1118  string feature_clauses = GetOneFeatureClauseList(bh, genome_val);
1119 
1120  if (org_desc.length() > 0 && isalpha(org_desc.c_str()[0])) {
1121  string first_letter = org_desc.substr(0, 1);
1122  string remainder = org_desc.substr(1);
1123  NStr::ToUpper(first_letter);
1124  org_desc = first_letter + remainder;
1125  }
1126 
1127  string keyword = GetKeywordPrefix(bh);
1128 
1129  if (!NStr::IsBlank(feature_clauses)) {
1130  string key_org = x_trim(keyword + org_desc);
1131  feature_clauses = x_trim(feature_clauses);
1132  if (NStr::StartsWith(feature_clauses, ",")) {
1133  return keyword + org_desc + feature_clauses;
1134  }
1135  if (genome_val == CBioSource::eGenome_chromosome &&
1136  s_IsBioseqGenomic(bh) && s_IsRefSeq(bh) &&
1138  return keyword + org_desc + ", " + feature_clauses;
1139  }
1140  return keyword + org_desc + " " + feature_clauses;
1141  }
1142  return keyword + org_desc;
1143 }
1144 
1145 
1146 // use internal settings to create mod combo
1148 {
1149  // for protein sequences, use sequence::GetTitle
1150  if (bh.CanGetInst() && bh.GetInst().CanGetMol() && bh.GetInst().GetMol() == CSeq_inst::eMol_aa) {
1151  return sequence::CDeflineGenerator()
1152  .GenerateDefline(bh,
1153  sequence::CDeflineGenerator::fIgnoreExisting);
1154  }
1155  string org_desc = "Unknown organism";
1156  unsigned int genome_val = CBioSource::eGenome_unknown;
1157 
1159  mod_combo->InitFromOptions(m_Options);
1160 
1161  for (CSeqdesc_CI dit(bh, CSeqdesc::e_Source); dit; ++dit) {
1162  const CBioSource& bsrc = dit->GetSource();
1163  org_desc = mod_combo->GetSourceDescriptionString(bsrc);
1164  if (bsrc.CanGetGenome()) {
1165  genome_val = bsrc.GetGenome();
1166  }
1167  break;
1168  }
1169  string feature_clauses = GetOneFeatureClauseList(bh, genome_val);
1170 
1171  if (org_desc.length() > 0 && isalpha(org_desc.c_str()[0])) {
1172  string first_letter = org_desc.substr(0, 1);
1173  string remainder = org_desc.substr(1);
1174  NStr::ToUpper(first_letter);
1175  org_desc = first_letter + remainder;
1176  }
1177 
1178  string keyword = GetKeywordPrefix(bh);
1179 
1180  return keyword + org_desc + feature_clauses;
1181 }
1182 
1183 
1185 {
1186  mod_set.clear();
1188  modifier_list.clear();
1189  m_OrigModCombo.GetAvailableModifiers (modifier_list);
1190  for (unsigned int k = 0; k < modifier_list.size(); k++) {
1191  mod_set.insert(CAutoDefAvailableModifier(modifier_list[k]));
1192  }
1193 }
1194 
1195 
1197 {
1199 }
1200 
1202 {
1203  mod_combo.InitOptions(m_Options);
1204 }
1205 
1206 
1207 //starting here, remove when separating autodef from taxonomy options
1209 {
1210  CConstRef<CUser_object> options(NULL);
1212  while (b && !options) {
1213  CSeqdesc_CI desc(*b, CSeqdesc::e_User);
1214  while (desc && desc->GetUser().GetObjectType() != CUser_object::eObjectType_AutodefOptions) {
1215  ++desc;
1216  }
1217  if (desc) {
1218  options.Reset(&(desc->GetUser()));
1219  }
1220  }
1221  return options;
1222 }
1223 
1224 
1226 {
1227  string defline;
1228  if (bh.IsAa()) {
1229  return kEmptyStr;
1230  }
1231  CSeqdesc_CI desc(bh, CSeqdesc::e_User);
1232  while (desc && desc->GetUser().GetObjectType() != CUser_object::eObjectType_AutodefOptions) {
1233  ++desc;
1234  }
1235  if (desc) {
1236  CAutoDef autodef;
1237  autodef.SetOptionsObject(desc->GetUser());
1238  CAutoDefModifierCombo mod_combo;
1239  CAutoDefOptions options;
1240  options.InitFromUserObject(desc->GetUser());
1241  mod_combo.SetOptions(options);
1242  defline = autodef.GetOneDefLine(&mod_combo, bh);
1243  }
1244  return defline;
1245 }
1246 
1247 
1249 {
1250  bool any = false;
1251  CBioseq_CI b_iter(se);
1252  for (; b_iter; ++b_iter) {
1253  if (b_iter->IsAa()) {
1254  continue;
1255  }
1256  CSeqdesc_CI desc(*b_iter, CSeqdesc::e_User);
1257  while (desc && desc->GetUser().GetObjectType() != CUser_object::eObjectType_AutodefOptions) {
1258  ++desc;
1259  }
1260  if (desc) {
1261  string defline = RegenerateDefLine(*b_iter);
1262 
1263  bool found_existing = false;
1264  CBioseq_EditHandle beh(*b_iter);
1266  if ((*it)->IsTitle()) {
1267  if (!NStr::Equal((*it)->GetTitle(), defline)) {
1268  (*it)->SetTitle(defline);
1269  any = true;
1270  }
1271  found_existing = true;
1272  break;
1273  }
1274  }
1275  if (!found_existing) {
1276  CRef<CSeqdesc> new_desc(new CSeqdesc());
1277  new_desc->SetTitle(defline);
1278  beh.SetDescr().Set().push_back(new_desc);
1279  any = true;
1280  }
1281  }
1282  }
1283  return any;
1284 }
1285 
1286 
1288 {
1290  return false;
1291  }
1292  if (!obj.IsSetData()) {
1293  return false;
1294  }
1296  if ((*f)->IsSetLabel() && (*f)->GetLabel().IsStr() &&
1297  NStr::EqualNocase((*f)->GetLabel().GetStr(), "StructuredCommentPrefix") &&
1298  (*f)->IsSetData() && (*f)->GetData().IsStr()) {
1299  if (NStr::EqualNocase((*f)->GetData().GetStr(), "##HumanSTR-START##")) {
1300  return true;
1301  } else {
1302  return false;
1303  }
1304  }
1305  }
1306  return false;
1307 }
1308 
1309 
1311 {
1312  string locus_name;
1313  string allele;
1314  string repeat;
1315  string flanking;
1316  string assay;
1317 
1318  if (comment.IsSetData()) {
1319  ITERATE(CUser_object::TData, it, comment.GetData()) {
1320  if ((*it)->IsSetData() && (*it)->GetData().IsStr() &&
1321  (*it)->IsSetLabel() && (*it)->GetLabel().IsStr()) {
1322  const string& label = (*it)->GetLabel().GetStr();
1323  if (NStr::EqualNocase(label, "STR locus name")) {
1324  locus_name = (*it)->GetData().GetStr();
1325  } else if (NStr::EqualNocase(label, "Length-based allele")) {
1326  allele = (*it)->GetData().GetStr();
1327  } else if (NStr::EqualNocase(label, "Bracketed repeat")) {
1328  repeat = (*it)->GetData().GetStr();
1329  } else if (NStr::EqualNocase(label, "Flanking region")) {
1330  flanking = (*it)->GetData().GetStr();
1331  } else if (NStr::EqualNocase(label, "Sequencing assay code")) {
1332  assay = (*it)->GetData().GetStr();
1333  }
1334  }
1335  }
1336  }
1337 
1338  string clause = "microsatellite " + locus_name + " " + allele + " " + repeat;
1340  while (f) {
1341  if (f->IsSetDbxref()) {
1342  ITERATE(CSeq_feat::TDbxref, db, f->GetDbxref()) {
1343  if ((*db)->IsSetDb() && NStr::Equal((*db)->GetDb(), "dbSNP") &&
1344  (*db)->IsSetTag()) {
1345  if ((*db)->GetTag().IsStr()) {
1346  clause += " " + (*db)->GetTag().GetStr();
1347  } else if ((*db)->GetTag().IsId()) {
1348  clause += " " + NStr::NumericToString((*db)->GetTag().GetId());
1349  }
1350  }
1351  }
1352  }
1353  ++f;
1354  }
1355  if (flanking != "") {
1356  clause += " " + flanking;
1357  }
1358  if (assay != "") {
1359  clause += " " + assay;
1360  }
1361  clause += " sequence";
1362  return clause;
1363 }
1364 
1365 
1367 {
1368  string locusName;
1369  string lengthBasedAllele;
1370  string bracketedRecordSeq;
1371  string assayCode;
1372 
1373  if (comment.IsSetData()) {
1374  ITERATE(CUser_object::TData, it, comment.GetData()) {
1375  if ((*it)->IsSetData() && (*it)->GetData().IsStr() &&
1376  (*it)->IsSetLabel() && (*it)->GetLabel().IsStr()) {
1377 
1378  const string& label = (*it)->GetLabel().GetStr();
1379  if (NStr::EqualNocase(label, "STR locus name")) {
1380  locusName = (*it)->GetData().GetStr();
1381  }
1382  else if (NStr::EqualNocase(label, "Length-based allele")) {
1383  lengthBasedAllele = (*it)->GetData().GetStr();
1384  }
1385  else if (NStr::EqualNocase(label, "Bracketed record seq.")) {
1386  bracketedRecordSeq = (*it)->GetData().GetStr();
1387  }
1388  else if (NStr::EqualNocase(label, "Sequencing assay code")) {
1389  assayCode = (*it)->GetData().GetStr();
1390  }
1391  }
1392  }
1393  }
1394 
1395  string clause = "microsatellite " + locusName + " " + lengthBasedAllele +
1396  " " + bracketedRecordSeq;
1397  if (assayCode != "") {
1398  clause += " " + assayCode;
1399  }
1400  clause += " sequence";
1401  return clause;
1402 }
1403 
1404 
1405 bool s_ChooseModInModList(bool is_org_mod, int subtype, bool require_all, CAutoDefSourceDescription::TAvailableModifierVector& modifiers)
1406 {
1407  bool rval = false;
1408  for (auto & modifier : modifiers) {
1409  if (modifier.IsOrgMod() && is_org_mod) {
1410  if (modifier.GetOrgModType() == subtype) {
1411  if (modifier.AllPresent()) {
1412  rval = true;
1413  }
1414  else if (modifier.AnyPresent() && !require_all) {
1415  rval = true;
1416  }
1417  if (rval) {
1418  modifier.SetRequested(true);
1419  }
1420  break;
1421  }
1422  }
1423  else if (!modifier.IsOrgMod() && !is_org_mod) {
1424  if (modifier.GetSubSourceType() == subtype) {
1425  if (modifier.AllPresent()) {
1426  rval = true;
1427  }
1428  else if (modifier.AnyPresent() && !require_all) {
1429  rval = true;
1430  }
1431  if (rval) {
1432  modifier.SetRequested(true);
1433  }
1434  break;
1435  }
1436  }
1437  }
1438  return rval;
1439 }
1440 
1441 
1443 {
1444  CAutoDef ad;
1445  ad.AddSources(seh);
1446 
1449  src_combo->GetAvailableModifiers(modifiers);
1450 
1451  static int subtypes[] = { COrgMod::eSubtype_strain,
1461  static bool is_orgmod[] = { true, false, true, false, true, true, true, true, true, true };
1462  static int num_subtypes = sizeof(subtypes) / sizeof(int);
1463 
1464 
1465  bool found = false;
1466  // first look for best identifier found in all
1467  for (int i = 0; i < num_subtypes && !found; i++) {
1468  found = s_ChooseModInModList(is_orgmod[i], subtypes[i], true, modifiers);
1469  }
1470  if (!found) {
1471  // if not found in all, use best identifier found in some
1472  for (int i = 0; i < num_subtypes && !found; i++) {
1473  found = s_ChooseModInModList(is_orgmod[i], subtypes[i], false, modifiers);
1474  }
1475  }
1476  if (!src_combo->AreFeatureClausesUnique()) {
1477  // use best
1478  for (auto &modifier : modifiers) {
1479  if (modifier.AnyPresent()) {
1480  if (modifier.IsOrgMod()) {
1481  if (src_combo->HasOrgMod(modifier.GetOrgModType())) {
1482  modifier.SetRequested(true);
1483  }
1484  }
1485  else if (src_combo->HasSubSource(modifier.GetSubSourceType())) {
1486  modifier.SetRequested(true);
1487  }
1488  }
1489  }
1490  }
1491 
1493  CAutoDefOptions options;
1494  options.InitFromUserObject(*user);
1495  for(const auto &it : modifiers) {
1496  if (it.IsRequested()) {
1497  if (it.IsOrgMod()) {
1498  options.AddOrgMod(it.GetOrgModType());
1499  } else {
1500  options.AddSubSource(it.GetSubSourceType());
1501  }
1502  }
1503  }
1504  user = options.MakeUserObject();
1505  return user;
1506 }
1507 
1508 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
std::string x_trim(const std::string &s)
Definition: autodef.cpp:1060
static bool s_IsBioseqGenomic(CBioseq_Handle bsh)
Definition: autodef.cpp:1075
static unsigned int s_GetProductFlagFromCDSProductNames(CBioseq_Handle bh)
Definition: autodef.cpp:751
bool IsBioseqmRNA(CBioseq_Handle bsh)
Definition: autodef.cpp:909
bool IsInGenProdSet(CBioseq_Handle bh)
Definition: autodef.cpp:922
static bool s_IsRefSeq(CBioseq_Handle bsh)
Definition: autodef.cpp:1085
string OrganelleByGenome(unsigned int genome_val)
Definition: autodef.cpp:706
CConstRef< CUser_object > s_GetOptionsForSet(CBioseq_set_Handle set)
Definition: autodef.cpp:1208
bool s_HasPromoter(CBioseq_Handle bh)
Definition: autodef.cpp:541
bool s_NeedFeatureClause(const CBioseq &b)
Definition: autodef.cpp:71
bool s_ChooseModInModList(bool is_org_mod, int subtype, bool require_all, CAutoDefSourceDescription::TAvailableModifierVector &modifiers)
Definition: autodef.cpp:1405
vector< CRef< CAutoDefFeatureClause > > FeatureClauseFactory(CBioseq_Handle bh, const CSeq_feat &cf, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts, bool is_single_misc_feat)
void RemoveFeaturesInmRNAsByType(unsigned int feature_type, bool except_promoter=false)
void GroupConsecutiveExons(CBioseq_Handle bh)
virtual CSeqFeatData::ESubtype GetMainFeatureSubtype() const
void RemoveFeaturesByType(unsigned int feature_type, bool except_promoter=false)
void ConsolidateRepeatedClauses(bool suppress_allele)
string ListClauses(bool allow_semicolons, bool suppress_final_and, bool suppress_allele)
void GroupGenes(bool suppress_allele)
void RemoveFeaturesUnderType(unsigned int feature_type)
bool IsFeatureTypeLonely(unsigned int feature_type) const
void GroupClauses(bool gene_cluster_opp_strand)
virtual void AddSubclause(CRef< CAutoDefFeatureClause_Base > subclause)
void GroupmRNAs(bool suppress_allele)
void GroupSegmentedCDSs(bool suppress_allele)
void AssignGeneProductNames(CAutoDefFeatureClause_Base *main_clause, bool suppress_allele)
virtual void SuppressMobileElementAndInsertionSequenceSubfeatures()
void GroupAltSplicedExons(CBioseq_Handle bh)
void FindAltSplices(bool suppress_allele)
virtual void Label(bool suppress_allele)
virtual bool IsPromoter() const
bool HasSubSource(CSubSource::ESubtype st)
void AddSource(const CBioSource &bs, const string &feature_clauses="")
void GetAvailableModifiers(CAutoDefSourceDescription::TAvailableModifierVector &modifier_list)
bool HasOrgMod(COrgMod::ESubtype st)
void AddOrgMod(COrgMod::ESubtype st, bool even_if_not_uniquifying=false)
string GetSourceDescriptionString(const CBioSource &bsrc)
void AddSubsource(CSubSource::ESubtype st, bool even_if_not_uniquifying=false)
void SetOptions(const CAutoDefOptions &options)
void SetExcludeSpOrgs(bool exclude)
void InitOptions(CAutoDefOptions &options) const
bool GetAltSpliceFlag() const
bool GetKeepuORFs() const
bool GetKeepRegulatoryFeatures() const
bool GetKeepExons() const
void SuppressFeature(CSeqFeatData::ESubtype subtype)
void AddOrgMod(COrgMod::TSubtype subtype)
bool GetSuppressMiscFeatureSubfeatures() const
CRef< CUser_object > MakeUserObject() const
void InitFromUserObject(const CUser_object &obj)
bool GetGeneClusterOppStrand() const
bool GetKeepIntrons() const
string GetCustomFeatureClause() const
bool GetSuppressMobileElementSubfeatures() const
bool GetKeepPrecursorRNA() const
TFeatureListType GetFeatureListType() const
bool GetKeepLTRs() const
bool GetKeep3UTRs() const
bool GetKeep5UTRs() const
bool GetSuppressAlleles() const
bool GetSuppressFeatureAltSplice() const
CBioSource::TGenome GetNuclearCopyFlag() const
bool GetKeepRepeatRegion() const
CBioSource::TGenome GetProductFlag() const
bool GetKeepMiscRecomb() const
bool GetSpecifyNuclearProduct() const
void AddSubSource(CSubSource::TSubtype subtype)
bool IsFeatureSuppressed(CSeqFeatData::ESubtype subtype) const
bool GetUseFakePromoters() const
bool GetKeepMobileElements() const
vector< CAutoDefSourceModifierInfo > TModifierVector
vector< CAutoDefAvailableModifier > TAvailableModifierVector
void GetAvailableModifiers(TAvailableModifierSet &mod_set)
Definition: autodef.cpp:1184
bool x_IsSingleMiscFeat(CFeat_CI feat_ci)
Definition: autodef.cpp:524
vector< CConstRef< objects::CBioSource > > TSources
Definition: autodef.hpp:71
string GetOneSourceDescription(const CBioseq_Handle &bh)
Definition: autodef.cpp:304
string x_GetNonFeatureListEnding()
Definition: autodef.cpp:878
CAutoDefModifierCombo m_OrigModCombo
Definition: autodef.hpp:135
static string GetKeywordPrefix(CBioseq_Handle bh)
Definition: autodef.cpp:1033
bool x_IsFeatureSuppressed(CSeqFeatData::ESubtype subtype)
Definition: autodef.cpp:402
bool x_Is5SList(CFeat_CI feat_ci)
Definition: autodef.cpp:481
bool x_IsSubSrcRequired(unsigned int mod_type)
Definition: autodef.cpp:172
static CRef< CUser_object > CreateIDOptions(CSeq_entry_Handle seh)
Definition: autodef.cpp:1442
void GetMasterLocation(CBioseq_Handle &bh, CRange< TSeqPos > &range)
Definition: autodef.cpp:440
void SetOptionsObject(const CUser_object &user)
Definition: autodef.cpp:1196
bool x_IsOrgModRequired(unsigned int mod_type)
Definition: autodef.cpp:166
void AddDescriptors(const TSources &sources)
Definition: autodef.cpp:110
~CAutoDef()
Definition: autodef.cpp:66
void AddSources(CSeq_entry_Handle se)
Definition: autodef.cpp:93
string x_GetHumanSTRFeatureClauses(CBioseq_Handle bh, const CUser_object &comment)
Definition: autodef.cpp:1310
unsigned int GetNumAvailableModifiers()
Definition: autodef.cpp:184
bool x_IsHumanSTR(const CUser_object &obj)
Definition: autodef.cpp:1287
CRef< CUser_object > GetOptionsObject() const
Definition: autodef.hpp:84
string x_GetFeatureClauseProductEnding(const string &feature_clauses, CBioseq_Handle bh)
Definition: autodef.cpp:797
CRef< feature::CFeatTree > m_Feat_Tree
Definition: autodef.hpp:173
void SetOptions(const CAutoDefModifierCombo &mod_combo)
Definition: autodef.cpp:1201
string x_GetOneNonFeatureClause(CBioseq_Handle bh, unsigned int genome_val)
Definition: autodef.cpp:935
CAutoDefOptions m_Options
Definition: autodef.hpp:137
void x_SortModifierListByRank(TModifierIndexVector &index_list, CAutoDefSourceDescription::TAvailableModifierVector &modifier_list)
Definition: autodef.cpp:121
string GetOneDefLine(CAutoDefModifierCombo *mod_combo, const CBioseq_Handle &bh, CRef< feature::CFeatTree > featTree=null)
Definition: autodef.cpp:1095
string x_GetHumanSTRv2FeatureClauses(CBioseq_Handle bh, const CUser_object &comment)
Definition: autodef.cpp:1366
void x_GetModifierIndexList(TModifierIndexVector &index_list, CAutoDefSourceDescription::TAvailableModifierVector &modifier_list)
Definition: autodef.cpp:139
CRef< CAutoDefModifierCombo > FindBestModifierCombo()
Definition: autodef.cpp:210
static bool RegenerateSequenceDefLines(CSeq_entry_Handle se)
Definition: autodef.cpp:1248
string x_GetFeatureClauses(const CBioseq_Handle &bh)
Definition: autodef.cpp:554
void x_RemoveOptionalFeatures(CAutoDefFeatureClause_Base *main_clause, const CBioseq_Handle &bh)
Definition: autodef.cpp:319
void SuppressFeature(const objects::CFeatListItem &feat)
Definition: autodef.cpp:408
bool IsSegment(const CBioseq_Handle &bh)
Definition: autodef.cpp:424
CAutoDefModifierCombo * GetEmptyCombo()
Definition: autodef.cpp:296
CAutoDefModifierCombo * GetAllModifierCombo()
Definition: autodef.cpp:265
vector< unsigned int > TModifierIndexVector
Definition: autodef.hpp:132
string GetOneFeatureClauseList(CBioseq_Handle bh, unsigned int genome_val)
Definition: autodef.cpp:971
static string RegenerateDefLine(CBioseq_Handle bh)
Definition: autodef.cpp:1225
vector< CRef< CAutoDefModifierCombo > > TModifierComboVector
Definition: autodef.hpp:117
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_EditHandle –.
CBioseq_Handle –.
CBioseq_set_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
static string GetBiomolName(CMolInfo::TBiomol biomol)
Definition: MolInfo.cpp:116
const string & GetDivision(void) const
Definition: Org_ref.cpp:164
bool IsSetDivision(void) const
Definition: Org_ref.cpp:159
CRef –.
Definition: ncbiobj.hpp:618
ESubtype GetSubtype(void) const
CSeq_annot_CI –.
CSeq_annot_Handle –.
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
@ eObjectType_StructuredComment
@ eObjectType_AutodefOptions
EObjectType GetObjectType() const
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
void clear()
Definition: set.hpp:153
#define false
Definition: bool.h:36
static char tmp[3200]
Definition: utf8.c:42
int offset
Definition: replacements.h:160
char data[12]
Definition: iconv.c:80
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
string GetLabel(const CSeq_id &id)
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3653
@ fFGL_Content
Include its content if there is any.
Definition: feature.hpp:73
@ eOverlap_Simple
any overlap of extremes
bool IsSetComment(void) const
bool CanGetInst_Repr(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
void SetDescr(TDescr &v) const
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
bool CanGetClass(void) const
const CSeqFeatData & GetData(void) const
bool CanGetInst(void) const
TSeqPos GetBioseqLength(void) const
TSet GetSet(void) const
bool IsAa(void) const
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
bool IsSetProduct(void) const
const string & GetComment(void) const
TInst_Length GetInst_Length(void) const
bool IsSetClass(void) const
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
bool IsSet(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const TId & GetId(void) const
const TInst & GetInst(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
const CSeq_loc & GetProduct(void) const
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
void Rewind(void)
Definition: feat_ci.hpp:239
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
static const char label[]
list< string > TKeywords
Definition: GB_block_.hpp:93
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
Definition: GB_block_.hpp:526
bool IsSetKeywords(void) const
Check if a value has been assigned to Keywords data member.
Definition: GB_block_.hpp:514
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:422
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
Definition: BioSource_.hpp:533
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
bool CanGetGenome(void) const
Check if it is safe to call GetGenome method.
Definition: BioSource_.hpp:403
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
@ eSubtype_endogenous_virus_name
Definition: SubSource_.hpp:109
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
const TData & GetData(void) const
Get the Data member data.
vector< CRef< CUser_field > > TData
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_isolate
Definition: OrgMod_.hpp:100
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TRna & GetRna(void) const
Get the variant data.
@ e_not_set
No variant selected.
@ eClass_parts
parts for 2 or 3
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_segset
segmented sequence + parts
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
bool CanGetMol(void) const
Check if it is safe to call GetMol method.
Definition: Seq_inst_.hpp:599
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:422
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
TTech GetTech(void) const
Get the Tech member data.
Definition: MolInfo_.hpp:497
const TGenbank & GetGenbank(void) const
Get the variant data.
Definition: Seqdesc_.cpp:334
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:447
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
Definition: MolInfo_.hpp:472
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ eRepr_seg
segmented sequence
Definition: Seq_inst_.hpp:95
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Genbank
GenBank specific info.
Definition: Seqdesc_.hpp:121
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
int i
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
unsigned int a
Definition: ncbi_localip.c:102
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isspace(Uchar c)
Definition: ncbictype.hpp:69
Miscellaneous common-use basic types and functionality.
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
#define FOR_EACH_SEQFEAT_ON_SEQANNOT(Itr, Var)
FOR_EACH_SEQFEAT_ON_SEQANNOT EDIT_EACH_SEQFEAT_ON_SEQANNOT.
Definition: seq_macros.hpp:410
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
SAnnotSelector –.
bool operator()(const CRef< CAutoDefModifierCombo > &s1, const CRef< CAutoDefModifierCombo > &s2) const
Definition: autodef.cpp:201
Modified on Wed Apr 17 13:09:47 2024 by modify_doxy.py rev. 669887