NCBI C++ ToolKit
autodef_feature_clause.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: autodef_feature_clause.cpp 93820 2021-05-25 16:00:18Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin
27 *
28 * File Description:
29 * Generate unique definition lines for a set of sequences using organism
30 * descriptions and feature clauses.
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <algorithm>
35 #include <objmgr/util/autodef.hpp>
36 #include <corelib/ncbimisc.hpp>
37 #include <objmgr/seqdesc_ci.hpp>
38 #include <objmgr/bioseq_ci.hpp>
39 #include <objmgr/feat_ci.hpp>
40 #include <objmgr/util/feature.hpp>
41 #include <objmgr/util/sequence.hpp>
42 
44 #include <objects/seq/Seqdesc.hpp>
45 #include <objects/seq/Bioseq.hpp>
48 
49 #include <serial/iterator.hpp>
50 
53 
54 using namespace sequence;
55 
56 CAutoDefFeatureClause::CAutoDefFeatureClause(CBioseq_Handle bh, const CSeq_feat& main_feat, const CSeq_loc& mapped_loc, const CAutoDefOptions& opts)
58  m_pMainFeat(&main_feat),
59  m_BH(bh)
60 {
61  x_SetBiomol();
62  m_ClauseList.clear();
63  m_GeneName = "";
64  m_AlleleName = "";
65  m_Interval = "";
66  m_IsAltSpliced = false;
67  m_Pluralizable = false;
70  m_Description = "";
71  m_DescriptionChosen = false;
72  m_ProductName = "";
73  m_ProductNameChosen = false;
74 
76 
77  m_ClauseLocation = new CSeq_loc();
78  m_ClauseLocation->Add(mapped_loc);
79 
80  if (subtype == CSeqFeatData::eSubtype_operon || IsGeneCluster()) {
81  m_SuppressSubfeatures = true;
82  }
83 
84  if (m_pMainFeat->CanGetComment() && NStr::Find(m_pMainFeat->GetComment(), "alternatively spliced") != NCBI_NS_STD::string::npos
85  && (subtype == CSeqFeatData::eSubtype_cdregion
86  || subtype == CSeqFeatData::eSubtype_exon
87  || IsNoncodingProductFeat())) {
88  m_IsAltSpliced = true;
89  }
90 }
91 
92 
94 {
95 }
96 
97 
99 {
100  if (IsLTR(*m_pMainFeat)) {
102  }
103  return m_pMainFeat->GetData().GetSubtype();
104 }
105 
106 
108 {
110  return false;
111  } else {
112  return true;
113  }
114 }
115 
116 
118 {
120  || NStr::IsBlank(m_pMainFeat->GetNamedQual("insertion_seq"))) {
121  return false;
122  } else {
123  return true;
124  }
125 }
126 
127 
129 {
131  && feat.CanGetComment()
132  && NStr::StartsWith(feat.GetComment(), "control region")) {
133  return true;
134  } else {
135  return false;
136  }
137 }
138 
139 
141 {
142  return IsControlRegion(*m_pMainFeat);
143 }
144 
145 
147 {
150  return false;
151  }
153  if ((*subSrcI)->GetSubtype() == CSubSource::eSubtype_endogenous_virus_name) {
154  return true;
155  }
156  }
157  return false;
158 }
159 
160 
162 {
163  return IsGeneCluster (*m_pMainFeat);
164 }
165 
166 
168 {
170  || !feat.CanGetComment()) {
171  return false;
172  }
173 
174  string comment = feat.GetComment();
175  if (NStr::Find(comment, "gene cluster") != string::npos
176  || NStr::Find(comment, "gene locus") != string::npos) {
177  return true;
178  } else {
179  return false;
180  }
181 }
182 
183 
185 {
187  if (subtype == CSeqFeatData::eSubtype_3UTR
188  || subtype == CSeqFeatData::eSubtype_5UTR
189  || IsLTR(*m_pMainFeat)
190  || subtype == CSeqFeatData::eSubtype_cdregion
191  || subtype == CSeqFeatData::eSubtype_gene
192  || subtype == CSeqFeatData::eSubtype_mRNA
193  || subtype == CSeqFeatData::eSubtype_operon
194  || subtype == CSeqFeatData::eSubtype_exon
195  || subtype == CSeqFeatData::eSubtype_intron
196  || subtype == CSeqFeatData::eSubtype_rRNA
197  || subtype == CSeqFeatData::eSubtype_tRNA
198  || subtype == CSeqFeatData::eSubtype_otherRNA
199  || subtype == CSeqFeatData::eSubtype_misc_RNA
200  || subtype == CSeqFeatData::eSubtype_ncRNA
201  || subtype == CSeqFeatData::eSubtype_preRNA
202  || subtype == CSeqFeatData::eSubtype_tmRNA
203  || subtype == CSeqFeatData::eSubtype_D_loop
207  || IsMobileElement()
209  || IsControlRegion()
211  || IsSatelliteClause()
212  || IsPromoter()
213  || IsGeneCluster()
214  || GetClauseType() != eDefault) {
215  return true;
216  } else {
217  return false;
218  }
219 }
220 
221 
223 {
226  for ( ; desc_iter; ++desc_iter) {
227  if (desc_iter->GetMolinfo().IsSetBiomol()) {
228  m_Biomol = desc_iter->GetMolinfo().GetBiomol();
229  }
230  }
231 }
232 
233 
235 {
236  bool is_pseudo = false;
237  if (f.CanGetPseudo() && f.IsSetPseudo()) {
238  is_pseudo = true;
239  } else if (f.IsSetQual()) {
240  for (auto& it : f.GetQual()) {
241  if (it->IsSetQual() && NStr::EqualNocase(it->GetQual(), "pseudogene")) {
242  is_pseudo = true;
243  break;
244  }
245  }
246  }
247  return is_pseudo;
248 }
249 
250 
252 {
253  return (m_GeneIsPseudo || IsPseudo(*m_pMainFeat));
254 }
255 
256 
258 {
260  m_Typeword = "genomic sequence";
261  } else if (m_Biomol == CMolInfo::eBiomol_mRNA) {
262  m_Typeword = "mRNA sequence";
263  } else {
264  m_Typeword = "sequence";
265  }
266  m_TypewordChosen = true;
267 }
268 
269 
271 {
272  string qual, comment;
273 
274  if (IsLTR(*m_pMainFeat)) {
275  typeword = "LTR repeat region";
276  return true;
277  }
278 
280  switch (subtype) {
282  typeword = "exon";
283  return true;
284  break;
286  typeword = "intron";
287  return true;
288  break;
290  typeword = "D-loop";
291  return true;
292  break;
294  typeword = "3' UTR";
295  return true;
296  break;
298  typeword = "5' UTR";
299  return true;
300  break;
302  typeword = "operon";
303  return true;
304  break;
306  //if has insertion_seq gbqual
307  if (IsInsertionSequence()) {
308  typeword = "insertion sequence";
309  return true;
310  }
311  qual = m_pMainFeat->GetNamedQual("endogenous_virus");
312  if (!NStr::IsBlank(qual)) {
313  typeword = "endogenous virus";
314  return true;
315  }
316  if (IsMobileElement()) {
317  typeword = "transposon";
318  return true;
319  }
320  typeword = "repeat region";
321  return true;
322  break;
324  if (m_pMainFeat->CanGetComment()) {
325  comment = m_pMainFeat->GetComment();
326  if (NStr::StartsWith(comment, "control region", NStr::eNocase)) {
327  typeword = "control region";
328  return true;
329  }
330  }
331  break;
334  return true;
335  break;
338  typeword = "endogenous virus";
339  return true;
340  }
341  break;
343  if (m_pMainFeat->IsSetQual()) {
345  if ((*q)->IsSetQual() &&
346  NStr::Equal((*q)->GetQual(), "regulatory_class") &&
347  (*q)->IsSetVal() && !NStr::IsBlank((*q)->GetVal())) {
348  typeword = (*q)->GetVal();
349  return true;
350  }
351  }
352  }
353  break;
354  default:
355  break;
356  }
357 
359  if (x_IsPseudo()) {
360  typeword = "pseudogene";
361  return true;
362  } else {
363  typeword = "gene";
364  return true;
365  }
366  } else if (subtype == CSeqFeatData::eSubtype_rRNA
367  || subtype == CSeqFeatData::eSubtype_snoRNA
368  || subtype == CSeqFeatData::eSubtype_snRNA
369  || subtype == CSeqFeatData::eSubtype_ncRNA) {
370  return false;
371  } else if (subtype == CSeqFeatData::eSubtype_precursor_RNA) {
372  typeword = "precursor RNA";
373  return true;
374  } else if (m_Biomol == CMolInfo::eBiomol_mRNA) {
375  if (x_IsPseudo()) {
376  typeword = "pseudogene mRNA";
377  } else {
378  typeword = "mRNA";
379  }
380  return true;
381  } else if (m_Biomol == CMolInfo::eBiomol_pre_RNA) {
382  if (x_IsPseudo()) {
383  typeword = "pseudogene precursor RNA";
384  } else {
385  typeword = "precursor RNA";
386  }
387  return true;
389  typeword = "gene";
390  return true;
391  }
392  typeword = "";
393  return true;
394 }
395 
396 
398 {
399  if (NStr::Equal(typeword, "")) {
400  return false;
401  } else if (NStr::EqualNocase(typeword, "exon")
402  || NStr::EqualNocase(typeword, "intron")
403  || NStr::EqualNocase(typeword, "transposon")
404  || NStr::EqualNocase(typeword, "insertion sequence")
405  || NStr::EqualNocase(typeword, "endogenous virus")
406  || NStr::EqualNocase(typeword, "retrotransposon")
407  || NStr::EqualNocase(typeword, "P-element")
408  || NStr::EqualNocase(typeword, "transposable element")
409  || NStr::EqualNocase(typeword, "integron")
410  || NStr::EqualNocase(typeword, "superintegron")
411  || NStr::EqualNocase(typeword, "MITE")) {
412  return true;
413  } else {
414  return false;
415  }
416 }
417 
418 
419 bool CAutoDefFeatureClause::x_FindNoncodingFeatureKeywordProduct (string comment, string keyword, string &product_name) const
420 {
421  if (NStr::IsBlank(comment) || NStr::IsBlank(keyword)) {
422  return false;
423  }
424  string::size_type start_pos = 0;
425 
426  while (start_pos != NCBI_NS_STD::string::npos) {
427  start_pos = NStr::Find(comment, keyword, start_pos);
428  if (start_pos != NCBI_NS_STD::string::npos) {
429  string possible = comment.substr(start_pos + keyword.length());
431  if (!NStr::StartsWith(possible, "GenBank Accession Number")) {
432  product_name = possible;
433  // truncate at first semicolon
434  string::size_type end = NStr::Find(product_name, ";");
435  if (end != NCBI_NS_STD::string::npos) {
436  product_name = product_name.substr(0, end);
437  }
438  // remove sequence from end of product name if found
439  if (NStr::EndsWith(product_name, " sequence")) {
440  product_name = product_name.substr(0, product_name.length() - 9);
441  }
442  // add "-like" if not present
443  if (!NStr::EndsWith(product_name, "-like")) {
444  product_name += "-like";
445  }
446  return true;
447  } else {
448  start_pos += keyword.length();
449  }
450  }
451  }
452  return false;
453 }
454 
455 
457 {
459  || !m_pMainFeat->CanGetComment()) {
460  return false;
461  }
462  string comment = m_pMainFeat->GetComment();
463  string::size_type start_pos = NStr::Find(comment, "nonfunctional ");
464  if (start_pos != NCBI_NS_STD::string::npos) {
465  string::size_type sep_pos = NStr::Find (comment, " due to ", start_pos);
466  if (sep_pos != NCBI_NS_STD::string::npos) {
467  product_name = comment.substr(start_pos, sep_pos - start_pos);
468  return true;
469  }
470  }
471  if (x_FindNoncodingFeatureKeywordProduct (comment, "similar to ", product_name)) {
472  return true;
473  } else if (x_FindNoncodingFeatureKeywordProduct (comment, "contains ", product_name)) {
474  return true;
475  } else {
476  return false;
477  }
478 }
479 
481 {
482  string product_name;
483  return x_GetNoncodingProductFeatProduct(product_name);
484 }
485 
486 CAutoDefGeneClause::CAutoDefGeneClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions& opts)
487  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
488 {
493  if (!NStr::StartsWith(m_AlleleName, "-")) {
494  m_AlleleName = "-" + m_AlleleName;
495  }
497  }
498  }
500  m_HasGene = true;
501 }
502 
503 
505 {
507  return true;
508  }
509  const CGene_ref& gene = m_pMainFeat->GetData().GetGene();
510  if (gene.CanGetPseudo() && gene.IsSetPseudo()) {
511  return true;
512  }
513  return false;
514 }
515 
516 /*
517 *If the feature is a gene and has different strings in the description than
518 * in the locus or locus tag, the description will be used as the product for
519 * the gene.
520 */
521 bool CAutoDefGeneClause::x_GetProductName(string &product_name)
522 {
525  m_GeneName)) {
526  product_name = m_pMainFeat->GetData().GetGene().GetDesc();
527  return true;
528  } else {
529  return false;
530  }
531 }
532 
533 
534 bool CAutoDefParsedtRNAClause::ParseString(string comment, string& gene_name, string& product_name)
535 {
536  product_name = "";
537  gene_name = "";
538 
540  if (NStr::EndsWith (comment, " gene")) {
541  comment = comment.substr (0, comment.length() - 5);
542  } else if (NStr::EndsWith (comment, " genes")) {
543  comment = comment.substr (0, comment.length() - 6);
544  }
545 
546  string::size_type pos = NStr::Find(comment, "(");
547  if (pos == NCBI_NS_STD::string::npos) {
548  if (NStr::StartsWith (comment, "tRNA-")) {
549  product_name = comment;
550  } else {
551  /* if not tRNA, gene name is required */
552  return false;
553  }
554  } else {
555  product_name = comment.substr(0, pos);
556  comment = comment.substr (pos + 1);
557  pos = NStr::Find(comment, ")");
558  if (pos == NCBI_NS_STD::string::npos) {
559  return false;
560  }
561  gene_name = comment.substr (0, pos);
562  NStr::TruncateSpacesInPlace(gene_name);
563  }
564  NStr::TruncateSpacesInPlace(product_name);
565 
566  if (NStr::StartsWith (product_name, "tRNA-")) {
567  /* tRNA name must start with "tRNA-" and be followed by one uppercase letter and
568  * two lowercase letters.
569  */
570  if (product_name.length() < 8
571  || !isalpha(product_name.c_str()[5]) || !isupper(product_name.c_str()[5])
572  || !isalpha(product_name.c_str()[6]) || !islower(product_name.c_str()[6])
573  || !isalpha(product_name.c_str()[7]) || !islower(product_name.c_str()[7])) {
574  return false;
575  }
576 
577  /* if present, gene name must start with letters "trn",
578  * and end with one uppercase letter.
579  */
580  if (!NStr::IsBlank (gene_name)
581  && (gene_name.length() < 4
582  || !NStr::StartsWith(gene_name, "trn" )
583  || !isalpha(gene_name.c_str()[3])
584  || !isupper(gene_name.c_str()[3]))) {
585  return false;
586  }
587  }
588  if (NStr::IsBlank (product_name)) {
589  return false;
590  }
591  return true;
592 }
593 
594 
595 CAutoDefParsedtRNAClause *s_tRNAClauseFromNote(CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, string comment, bool is_first, bool is_last, const CAutoDefOptions& opts)
596 {
597  string product_name;
598  string gene_name;
599  if (!CAutoDefParsedtRNAClause::ParseString(comment, gene_name, product_name)) {
600  return NULL;
601  }
602 
603  return new CAutoDefParsedtRNAClause(bh, cf, mapped_loc, gene_name, product_name, is_first, is_last, opts);
604 }
605 
606 
607 string CAutoDefFeatureClause::x_GetGeneName(const CGene_ref& gref, bool suppress_locus_tag) const
608 {
609  if (gref.IsSuppressed()) {
610  return "";
611  } else if (gref.CanGetLocus() && !NStr::IsBlank(gref.GetLocus())) {
612  return gref.GetLocus();
613  } else if (!suppress_locus_tag && gref.IsSetLocus_tag() && !NStr::IsBlank(gref.GetLocus_tag())) {
614  return gref.GetLocus_tag();
615  } else if (gref.IsSetDesc() && !NStr::IsBlank(gref.GetDesc())) {
616  return gref.GetDesc();
617  } else {
618  return "";
619  }
620 }
621 
622 
623 void s_UseCommentBeforeSemicolon(const CSeq_feat& feat, string& label)
624 {
625  if (feat.IsSetComment()) {
626  label = feat.GetComment();
627  string::size_type pos = NStr::Find(label, ";");
628  if (pos != NCBI_NS_STD::string::npos) {
629  label = label.substr(0, pos);
630  }
631  }
632 }
633 
634 
635 /* Frequently the product associated with a feature is listed as part of the
636  * description of the feature in the definition line. This function determines
637  * the name of the product associated with this specific feature. Some
638  * features will be listed with the product of a feature that is associated
639  * with the feature being described - this function does not look at other
640  * features to determine a product name.
641  * If the feature is a misc_feat with particular keywords in the comment,
642  * the product will be determined based on the contents of the comment.
643  * If the feature is a CDS and is marked as pseudo, the product will be
644  * determined based on the contents of the comment.
645  * If the feature is a gene and has different strings in the description than
646  * in the locus or locus tag, the description will be used as the product for
647  * the gene.
648  * If none of the above conditions apply, the sequence indexing context label
649  * will be used to obtain the product name for the feature.
650  */
651 bool CAutoDefFeatureClause::x_GetProductName(string &product_name)
652 {
654 
656  return true;
657  } else if (subtype == CSeqFeatData::eSubtype_cdregion
660  && m_pMainFeat->CanGetComment()) {
661  string comment = m_pMainFeat->GetComment();
662  if (!NStr::IsBlank(comment)) {
663  string::size_type pos = NStr::Find(comment, ";");
664  if (pos != NCBI_NS_STD::string::npos) {
665  comment = comment.substr(0, pos);
666  }
667  product_name = comment;
668  return true;
669  }
670  } else if (subtype == CSeqFeatData::eSubtype_tmRNA) {
671  product_name = "tmRNA";
672  return true;
673  } else if (m_pMainFeat->GetData().Which() == CSeqFeatData::e_Rna) {
674  product_name = m_pMainFeat->GetData().GetRna().GetRnaProductName();
675  if (NStr::IsBlank(product_name) && m_pMainFeat->IsSetComment()) {
676  product_name = m_pMainFeat->GetComment();
677  }
678  return true;
679  } else if (subtype == CSeqFeatData::eSubtype_regulatory) {
680  return true;
681  } else if (subtype == CSeqFeatData::eSubtype_misc_recomb) {
682  if (m_pMainFeat->IsSetQual()) {
684  if ((*q)->IsSetQual() && NStr::Equal((*q)->GetQual(), "recombination_class") &&
685  (*q)->IsSetVal() && !NStr::IsBlank((*q)->GetVal())) {
686  product_name = (*q)->GetVal();
687  return true;
688  }
689  }
690  }
692  return true;
693  } else if (subtype == CSeqFeatData::eSubtype_exon || subtype == CSeqFeatData::eSubtype_intron) {
694  return x_GetExonDescription(product_name);
695  } else {
696  string label;
697 
699  const CSeq_loc& product_loc = m_pMainFeat->GetProduct();
700  CBioseq_Handle prot_h = m_BH.GetScope().GetBioseqHandle(product_loc);
701  if (prot_h) {
702  CFeat_CI prot_f(prot_h, CSeqFeatData::eSubtype_prot);
703  if (prot_f) {
706  // RW-1216 suppress mat-peptide region phrase if sig-peptide also present
708  if (!sig_pi) {
710  if (mat_pi && mat_pi->GetData().GetProt().IsSetName()) {
711  const string& m_name = mat_pi->GetData().GetProt().GetName().front();
712  ++mat_pi;
713  if (!mat_pi && !m_name.empty()) {
714  if (label.empty()) {
715  label = m_name;
716  }
717  else {
718  label += ", " + m_name + " region,";
719  }
720  }
721  }
722  }
723  }
724  }
725  }
726  }
727 
728  if (NStr::IsBlank(label)) {
730  }
731  if ((subtype == CSeqFeatData::eSubtype_cdregion && !NStr::Equal(label, "CDS"))
732  || (subtype == CSeqFeatData::eSubtype_mRNA && !NStr::Equal(label, "mRNA"))
733  || (subtype != CSeqFeatData::eSubtype_cdregion && subtype != CSeqFeatData::eSubtype_mRNA)) {
734  } else {
735  label = "";
736  }
737 
738  // remove unwanted "mRNA-" tacked onto label for mRNA features
739  if (subtype == CSeqFeatData::eSubtype_mRNA && NStr::StartsWith(label, "mRNA-")) {
740  label = label.substr(5);
741  } else if (subtype == CSeqFeatData::eSubtype_rRNA && NStr::StartsWith(label, "rRNA-")) {
742  label = label.substr(5);
743  }
744 
745  if (!NStr::IsBlank(label)) {
746  product_name = label;
747  return true;
748  } else {
749  product_name = "";
750  return false;
751  }
752  }
753  return false;
754 }
755 
756 
758 {
759  if (m_pMainFeat->IsSetQual()) {
761  if ((*it)->IsSetQual() && (*it)->IsSetVal()
762  && NStr::EqualNocase((*it)->GetQual(), "number")) {
763  description = (*it)->GetVal();
764  return true;
765  }
766  }
767  }
768  description = kEmptyStr;
769  return false;
770 }
771 
772 
774 {
776 
777  description = "";
778  if (subtype == CSeqFeatData::eSubtype_exon || subtype == CSeqFeatData::eSubtype_intron) {
779  return x_GetExonDescription(description);
780  } else if (NStr::Equal(m_Typeword, "insertion sequence")) {
781  description = m_pMainFeat->GetNamedQual("insertion_seq");
782  if (NStr::Equal(description, "unnamed")
783  || NStr::IsBlank(description)) {
784  description = "";
785  return false;
786  } else {
787  return true;
788  }
789  } else if (subtype == CSeqFeatData::eSubtype_repeat_region) {
790  if (NStr::Equal(m_Typeword, "endogenous virus")) {
791  description = m_pMainFeat->GetNamedQual("endogenous_virus");
792  if (NStr::Equal(description, "unnamed")
793  || NStr::IsBlank(description)) {
794  description = "";
795  return false;
796  } else {
797  return true;
798  }
799  } else {
800  description = m_pMainFeat->GetNamedQual("rpt_family");
801  if (NStr::IsBlank(description) && m_pMainFeat->IsSetComment()) {
802  description = m_pMainFeat->GetComment();
803  if (IsLTR() && NStr::EndsWith(description, " LTR")) {
804  description = description.substr(0, description.length() - 4);
805  }
806  }
807  return true;
808  }
809  } else if (subtype == CSeqFeatData::eSubtype_biosrc
810  && NStr::Equal(m_Typeword, "endogenous virus")) {
813  if ((*subSrcI)->GetSubtype() == CSubSource::eSubtype_endogenous_virus_name) {
814  description = (*subSrcI)->GetName();
815  if (NStr::Equal(description, "unnamed")
816  || NStr::IsBlank(description)) {
817  description = "";
818  } else {
819  return true;
820  }
821  }
822  }
823  }
824  return false;
825  } else if (NStr::Equal(m_Typeword, "control region")
826  || NStr::Equal(m_Typeword, "D-loop")
827  || subtype == CSeqFeatData::eSubtype_3UTR
828  || subtype == CSeqFeatData::eSubtype_5UTR) {
829  return false;
830  } else if (IsLTR(*m_pMainFeat)) {
831  if (m_pMainFeat->CanGetComment()) {
832  string comment = m_pMainFeat->GetComment();
833  if (NStr::StartsWith(comment, "LTR ")) {
834  comment = comment.substr(4);
835  } else if (NStr::EndsWith(comment, " LTR")) {
836  comment = comment.substr(0, comment.length() - 4);
837  }
838  description = comment;
839  }
840  if (NStr::IsBlank(description)) {
841  return false;
842  } else {
843  return true;
844  }
845  } else if (subtype == CSeqFeatData::eSubtype_operon) {
846  description = m_pMainFeat->GetNamedQual("operon");
847  return true;
848  } else {
849  if (!m_ProductNameChosen) {
851  }
852 
854  description = m_ProductName + " (" + m_GeneName + ")";
855  } else if (!NStr::IsBlank(m_GeneName)) {
856  description = m_GeneName;
857  } else if (!NStr::IsBlank(m_ProductName)) {
858  description = m_ProductName;
859  }
860  if (NStr::IsBlank(description)) {
861  return false;
862  } else {
863  return true;
864  }
865  }
866 }
867 
868 
870 {
871  return IsSatellite(*m_pMainFeat);
872 }
873 
874 
876 {
878  && !NStr::IsBlank (feat.GetNamedQual("satellite"))) {
879  return true;
880  }
881  return false;
882 }
883 
884 
886 {
887  return IsPromoter(*m_pMainFeat);
888 }
889 
890 
892 {
893  return IsLTR(*m_pMainFeat);
894 }
895 
896 
898 {
900  return true;
901  } else if (feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_regulatory &&
902  NStr::Equal(feat.GetNamedQual("regulatory_class"), "promoter")) {
903  return true;
904  } else {
905  return false;
906  }
907 }
908 
909 
911 {
913  return true;
915  !feat.IsSetQual()) {
916  return false;
917  }
918  ITERATE(CSeq_feat::TQual, it, feat.GetQual()) {
919  if ((*it)->IsSetQual() && (*it)->IsSetVal() &&
920  NStr::EqualNocase((*it)->GetQual(), "rpt_type") &&
921  NStr::FindNoCase((*it)->GetVal(), "long_terminal_repeat") != string::npos) {
922  return true;
923  }
924  }
925  return false;
926 }
927 
928 /* operons suppress all subfeatures except promoters (see GB-5635) */
930 {
931  bool has_promoter = false;
932 
933  for (auto it : m_ClauseList) {
934  if (it->IsPromoter()) {
935  has_promoter = true;
936  break;
937  }
938  }
939  if (has_promoter) {
940  interval += ", promoter region, ";
941  }
942 }
943 
944 
945 /* This function calculates the "interval" for a clause in the definition
946  * line. The interval could be an empty string, it could indicate whether
947  * the location of the feature is partial or complete and whether or not
948  * the feature is a CDS, the interval could be a description of the
949  * subfeatures of the clause, or the interval could be a combination of the
950  * last two items if the feature is a CDS.
951  */
952 bool CAutoDefFeatureClause::x_GetGenericInterval (string &interval, bool suppress_allele)
953 {
954  interval = "";
955  if (m_IsUnknown) {
956  return false;
957  }
958 
960  if (subtype == CSeqFeatData::eSubtype_exon && m_IsAltSpliced) {
961  interval = "alternatively spliced";
962  return true;
963  }
964 
965  if (IsSatelliteClause()
966  || IsPromoter()
968  || subtype == CSeqFeatData::eSubtype_exon
969  || subtype == CSeqFeatData::eSubtype_intron
970  || subtype == CSeqFeatData::eSubtype_5UTR
971  || subtype == CSeqFeatData::eSubtype_3UTR
972  || (subtype == CSeqFeatData::eSubtype_repeat_region && !NStr::Equal(m_Typeword, "endogenous virus"))
974  || IsLTR()) {
975  return false;
976  }
977 
979 
980  if (subtype == CSeqFeatData::eSubtype_operon) {
981  // suppress subclauses except promoters
982  x_GetOperonSubfeatures(interval);
983  } else if (!m_SuppressSubfeatures) {
984  // label subclauses
985  // check to see if 3'UTR is present, and whether there are any other features
986  auto it = m_ClauseList.begin();
987  while (it != m_ClauseList.end()) {
988  if (*it) {
989  (*it)->Label(suppress_allele);
990  if ((*it)->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_3UTR && subtype == CSeqFeatData::eSubtype_cdregion) {
991  utr3 = *it;
992  it = m_ClauseList.erase(it);
993  }
994  else {
995  ++it;
996  }
997  } else {
998  it = m_ClauseList.erase(it);
999  }
1000  }
1001 
1002  // label any subclauses
1003  if (m_ClauseList.size() > 0) {
1004  bool suppress_final_and = false;
1006  suppress_final_and = true;
1007  }
1008 
1009  // create subclause list for interval
1010  interval += ListClauses(false, suppress_final_and, suppress_allele);
1011 
1013  if (utr3 != NULL) {
1014  interval += ", ";
1015  } else if (m_ClauseList.size() == 1) {
1016  interval += " and ";
1017  } else {
1018  interval += ", and ";
1019  }
1020  } else {
1021  return true;
1022  }
1023  }
1024  }
1025 
1026  if (IsPartial()) {
1027  interval += "partial ";
1028  } else {
1029  interval += "complete ";
1030  }
1031 
1032  if (subtype == CSeqFeatData::eSubtype_cdregion
1033  && (!x_IsPseudo())) {
1034  interval += "cds";
1035  if (m_IsAltSpliced) {
1036  interval += ", alternatively spliced";
1037  }
1038  } else {
1039  interval += "sequence";
1040  string product_name;
1041  if (m_IsAltSpliced && x_GetNoncodingProductFeatProduct (product_name)) {
1042  interval += ", alternatively spliced";
1043  }
1044  }
1045 
1046  if (utr3 != NULL) {
1047  /* tack UTR3 on at end of clause */
1048  if (m_ClauseList.size() == 0) {
1049  interval += " and 3' UTR";
1050  } else {
1051  interval += ", and 3' UTR";
1052  }
1053  m_ClauseList.push_back(utr3);
1054  }
1055 
1056  return true;
1057 }
1058 
1059 
1060 void CAutoDefFeatureClause::Label(bool suppress_allele)
1061 {
1062  if (!m_TypewordChosen) {
1065  m_Pluralizable = true;
1066  }
1067  if (!m_ProductNameChosen) {
1069  }
1070  if (!m_DescriptionChosen) {
1072  }
1073 
1074  x_GetGenericInterval (m_Interval, suppress_allele);
1075 
1076 }
1077 
1078 
1080 {
1081  return sequence::Compare(loc, *m_ClauseLocation, &(m_BH.GetScope()),
1083 }
1084 
1085 
1086 bool CAutoDefFeatureClause::SameStrand(const CSeq_loc& loc) const
1087 {
1088  ENa_strand loc_strand = loc.GetStrand();
1089  ENa_strand this_strand = m_ClauseLocation->GetStrand();
1090 
1091  if ((loc_strand == eNa_strand_minus && this_strand != eNa_strand_minus)
1092  || (loc_strand != eNa_strand_minus && this_strand == eNa_strand_minus)) {
1093  return false;
1094  } else {
1095  return true;
1096  }
1097 
1098 }
1099 
1101 {
1104  return true;
1105  } else {
1106  return false;
1107  }
1108 }
1109 
1110 
1112 {
1113  return m_ClauseLocation;
1114 }
1115 
1116 
1117 void CAutoDefFeatureClause::AddToLocation(CRef<CSeq_loc> loc, bool also_set_partials)
1118 {
1121 
1122  if (also_set_partials) {
1123  partial5 |= loc->IsPartialStart(eExtreme_Biological);
1124  }
1125  if (also_set_partials) {
1126  partial3 |= loc->IsPartialStop(eExtreme_Biological);
1127  }
1130  &(m_BH.GetScope()));
1131 
1132 
1135 }
1136 
1137 
1138 // Match for identical strings or for match at the beginning followed by mat-peptide region
1139 bool CAutoDefFeatureClause::DoesmRNAProductNameMatch(const string& mrna_product) const
1140 {
1141  if (!m_ProductNameChosen) {
1142  return false;
1143  }
1144  if (NStr::Equal(m_ProductName, mrna_product)) {
1145  return true;
1146  }
1147  if (NStr::StartsWith(m_ProductName, mrna_product) && m_ProductName[mrna_product.length()] == ',' && NStr::EndsWith(m_ProductName, " region,")) {
1148  return true;
1149  }
1150  return false;
1151 }
1152 
1153 
1154 /* This function searches this list for clauses to which this mRNA should
1155  * apply. This is not taken care of by the GroupAllClauses function
1156  * because when an mRNA is added to a CDS, the product for the clause is
1157  * replaced and the location for the clause is expanded, rather than simply
1158  * adding the mRNA as an additional feature in the list, and because an
1159  * mRNA can apply to more than one clause, while other features should
1160  * really only belong to one clause.
1161  */
1163 {
1164  bool used_mRNA = false;
1165  string clause_product, mRNA_product;
1166  bool adjust_partials = true;
1167 
1168  if (mRNAClause == NULL || ! mRNAClause->SameStrand(*m_ClauseLocation)) {
1169  return false;
1170  }
1171 
1173  sequence::ECompare loc_compare = mRNAClause->CompareLocation(*m_ClauseLocation);
1174  if (subtype == CSeqFeatData::eSubtype_cdregion) {
1175  adjust_partials = false;
1176  }
1177 
1178  if (subtype == CSeqFeatData::eSubtype_cdregion
1179  && DoesmRNAProductNameMatch(mRNAClause->GetProductName())
1180  && (loc_compare == sequence::eContained || loc_compare == sequence::eSame)) {
1181  m_HasmRNA = true;
1182  // when expanding "location" to include mRNA, leave partials for CDS as they were
1183  AddToLocation(mRNAClause->GetLocation(), adjust_partials);
1184  used_mRNA = true;
1185  } else if ((subtype == CSeqFeatData::eSubtype_cdregion || subtype == CSeqFeatData::eSubtype_gene)
1187  && (loc_compare == sequence::eContained
1188  || loc_compare == sequence::eContains
1189  || loc_compare == sequence::eSame)) {
1190  m_HasmRNA = true;
1191  AddToLocation(mRNAClause->GetLocation(), adjust_partials);
1192  used_mRNA = true;
1193  m_ProductName = mRNAClause->GetProductName();
1194  m_ProductNameChosen = true;
1195  }
1196 
1197  if (used_mRNA && mRNAClause->IsAltSpliced()) {
1198  m_IsAltSpliced = true;
1199  }
1200 
1201  return used_mRNA;
1202 }
1203 
1204 
1205 /* This function searches this list for clauses to which this gene should
1206  * apply. This is not taken care of by the GroupAllClauses function
1207  * because genes are added to clauses as a GeneRefPtr instead of as an
1208  * additional feature in the list, and because a gene can apply to more
1209  * than one clause, while other features should really only belong to
1210  * one clause.
1211  */
1212 bool CAutoDefFeatureClause::AddGene (CAutoDefFeatureClause_Base *gene_clause, bool suppress_allele)
1213 {
1214  bool used_gene = false;
1215 
1216  if (gene_clause == NULL || gene_clause->GetMainFeatureSubtype() != CSeqFeatData::eSubtype_gene) {
1217  return false;
1218  }
1219 
1221 
1222  string noncoding_product_name;
1223 
1224  // only add gene to certain other types of clauses
1225  if (subtype != CSeqFeatData::eSubtype_cdregion
1226  && subtype != CSeqFeatData::eSubtype_mRNA
1227  && subtype != CSeqFeatData::eSubtype_rRNA
1228  && subtype != CSeqFeatData::eSubtype_tRNA
1229  && subtype != CSeqFeatData::eSubtype_misc_RNA
1230  && subtype != CSeqFeatData::eSubtype_otherRNA
1231  && subtype != CSeqFeatData::eSubtype_ncRNA
1233  && subtype != CSeqFeatData::eSubtype_preRNA
1234  && subtype != CSeqFeatData::eSubtype_tmRNA
1235  && subtype != CSeqFeatData::eSubtype_intron
1236  && subtype != CSeqFeatData::eSubtype_exon
1237  && !x_GetNoncodingProductFeatProduct(noncoding_product_name)) {
1238  return false;
1239  }
1240 
1241  if (m_HasGene) {
1242  // already assigned
1243  } else {
1244  // find overlapping gene for this feature
1245  CAutoDefGeneClause *gene = dynamic_cast<CAutoDefGeneClause *>(gene_clause);
1246  bool suppress_locus_tag = gene ? gene->GetSuppressLocusTag() : false;
1248  if (gene_for_feat && NStr::Equal(x_GetGeneName(gene_for_feat->GetData().GetGene(), suppress_locus_tag), gene_clause->GetGeneName())) {
1249  used_gene = true;
1250  m_HasGene = true;
1251  m_GeneName = gene_clause->GetGeneName();
1252  m_AlleleName = gene_clause->GetAlleleName();
1253  m_GeneIsPseudo = gene_clause->GetGeneIsPseudo();
1255  }
1256  }
1257 
1258  if (used_gene && ! m_ProductNameChosen) {
1259  Label(suppress_allele);
1260  if (!m_ProductNameChosen) {
1261  m_ProductNameChosen = true;
1262  m_ProductName = gene_clause->GetProductName();
1263  }
1264  }
1265  if (used_gene) {
1266  m_DescriptionChosen = false;
1267  Label(suppress_allele);
1268  }
1269 
1270  return used_gene;
1271 }
1272 
1273 
1275 {
1276  bool ok_to_group = false;
1277 
1278  if (parent_clause == NULL) {
1279  return false;
1280  }
1282  CSeqFeatData::ESubtype parent_subtype = parent_clause->GetMainFeatureSubtype();
1283 
1284  if (parent_subtype == CSeqFeatData::eSubtype_mobile_element) {
1285  return true;
1286  }
1287 
1288  if (subtype == CSeqFeatData::eSubtype_exon || subtype == CSeqFeatData::eSubtype_intron) {
1289  if (parent_subtype == CSeqFeatData::eSubtype_cdregion
1290  || parent_subtype == CSeqFeatData::eSubtype_D_loop
1291  || parent_subtype == CSeqFeatData::eSubtype_mRNA
1292  || parent_subtype == CSeqFeatData::eSubtype_gene
1293  || parent_subtype == CSeqFeatData::eSubtype_operon
1294  || parent_clause->IsNoncodingProductFeat()
1295  || parent_clause->IsEndogenousVirusSourceFeature()
1296  || parent_clause->IsGeneCluster()) {
1297  ok_to_group = true;
1298  }
1299  } else if (IsPromoter() || subtype == CSeqFeatData::eSubtype_regulatory) {
1300  if (parent_subtype == CSeqFeatData::eSubtype_cdregion
1301  || parent_subtype == CSeqFeatData::eSubtype_mRNA
1302  || parent_subtype == CSeqFeatData::eSubtype_gene
1303  || parent_subtype == CSeqFeatData::eSubtype_operon
1304  || parent_clause->IsEndogenousVirusSourceFeature()
1305  || parent_clause->IsGeneCluster()) {
1306  ok_to_group = true;
1307  }
1308  } else if (subtype == CSeqFeatData::eSubtype_cdregion) {
1309  if (parent_subtype == CSeqFeatData::eSubtype_mRNA
1310  || parent_clause->IsInsertionSequence()
1311  || parent_clause->IsMobileElement()
1312  || parent_clause->IsEndogenousVirusSourceFeature()
1313  || parent_subtype == CSeqFeatData::eSubtype_operon
1314  || parent_clause->IsGeneCluster()) {
1315  ok_to_group = true;
1316  }
1317  } else if (IsInsertionSequence()
1318  || subtype == CSeqFeatData::eSubtype_gene
1319  || IsMobileElement()
1321  || subtype == CSeqFeatData::eSubtype_operon
1322  || IsGeneCluster()) {
1323  if (parent_clause->IsMobileElement()
1324  || parent_clause->IsInsertionSequence()
1325  || parent_clause->IsEndogenousVirusSourceFeature()
1326  || parent_subtype == CSeqFeatData::eSubtype_operon
1327  || parent_clause->IsGeneCluster()) {
1328  ok_to_group = true;
1329  }
1330  } else if (subtype == CSeqFeatData::eSubtype_3UTR
1331  || subtype == CSeqFeatData::eSubtype_5UTR
1332  || IsLTR(*m_pMainFeat)) {
1333  if (parent_subtype == CSeqFeatData::eSubtype_cdregion
1334  || parent_subtype == CSeqFeatData::eSubtype_mRNA
1335  || parent_subtype == CSeqFeatData::eSubtype_gene
1336  || parent_clause->IsEndogenousVirusSourceFeature()
1337  || parent_subtype == CSeqFeatData::eSubtype_operon
1338  || parent_clause->IsGeneCluster()) {
1339  ok_to_group = true;
1340  }
1341  }
1342 
1343  return ok_to_group;
1344 }
1345 
1346 
1347 // Transposons, insertion sequences, and endogenous virii
1348 // take subfeatures regardless of whether the subfeature is
1349 // on the same strand.
1350 // Gene Clusters can optionally take subfeatures on either
1351 // strand (gene_cluster_opp_strand is flag).
1352 // Promoters will match up to features that are adjacent.
1353 // Introns will match up to coding regions if the intron
1354 // location is the space between two coding region intervals.
1355 // Any feature on an mRNA sequence groups locationally.
1356 // All other feature matches must be that the feature to
1357 // go into the clause must fit inside the location of the
1358 // other clause.
1359 bool CAutoDefFeatureClause::OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base *parent_clause, bool gene_cluster_opp_strand) const
1360 {
1361  if (parent_clause == NULL) {
1362  return false;
1363  }
1364 
1365  if (m_HasGene && parent_clause->GetMainFeatureSubtype() == CSeqFeatData::eSubtype_gene) {
1366  // genes must match to be parents
1367  if (!NStr::Equal(m_GeneName, parent_clause->GetGeneName())) {
1368  return false;
1369  }
1370  }
1371 
1373  return true;
1374  }
1375 
1376  sequence::ECompare loc_compare = parent_clause->CompareLocation(*m_ClauseLocation);
1377 
1378  if (loc_compare == sequence::eContained || loc_compare == sequence::eSame) {
1379  if (parent_clause->SameStrand(*m_ClauseLocation)) {
1380  return true;
1381  } else if (parent_clause->IsMobileElement()
1382  || parent_clause->IsInsertionSequence()
1383  || parent_clause->IsEndogenousVirusSourceFeature()
1384  || (parent_clause->IsGeneCluster() && gene_cluster_opp_strand)) {
1385  return true;
1386  }
1387  } else if (IsPromoter()
1388  && parent_clause->SameStrand(*m_ClauseLocation)) {
1389  unsigned int promoter_stop = sequence::GetStop(*m_ClauseLocation, &(m_BH.GetScope()), eExtreme_Biological);
1390  unsigned int parent_start = sequence::GetStart(*(parent_clause->GetLocation()), &(m_BH.GetScope()), eExtreme_Biological);
1392  if (promoter_stop == parent_start + 1) {
1393  return true;
1394  }
1395  } else if (promoter_stop + 1 == parent_start) {
1396  return true;
1397  }
1400  && parent_clause->SameStrand(*m_ClauseLocation)) {
1401  CSeq_loc_CI seq_loc_it(*(parent_clause->GetLocation()));
1402  if (seq_loc_it) {
1405  int prev_start = seq_loc_it.GetRange().GetFrom();
1406  int prev_stop = seq_loc_it.GetRange().GetTo();
1407  ++seq_loc_it;
1408  while (seq_loc_it) {
1409  int cds_start = seq_loc_it.GetRange().GetFrom();
1410  int cds_stop = seq_loc_it.GetRange().GetTo();
1411  if ((intron_start == prev_stop + 1 && intron_stop == cds_start - 1)
1412  || (intron_start == cds_stop + 1 && intron_stop == prev_start - 1)) {
1413  return true;
1414  }
1415  prev_start = cds_start;
1416  prev_stop = cds_stop;
1417  ++seq_loc_it;
1418  }
1419  // intron could also group with coding region if coding region is adjacent
1420  if (intron_start > prev_stop && intron_start - 1 == prev_stop) {
1421  return true;
1422  } else if (prev_start > intron_stop && prev_start - 1 == intron_stop) {
1423  return true;
1424  }
1425  }
1426  }
1427 
1428  return false;
1429 }
1430 
1431 
1433 {
1434  CAutoDefFeatureClause_Base *best_parent;
1435 
1436  if (subclause == NULL || subclause == this) {
1437  return NULL;
1438  }
1439 
1440  if (!NStr::IsBlank(subclause->GetGeneName()) &&
1441  !NStr::IsBlank(this->GetGeneName()) &&
1442  !NStr::Equal(subclause->GetGeneName(), this->GetGeneName())) {
1443  return NULL;
1444  }
1445 
1446  best_parent = CAutoDefFeatureClause_Base::FindBestParentClause(subclause, gene_cluster_opp_strand);
1447 
1448  if (subclause->OkToGroupUnderByLocation(this, gene_cluster_opp_strand)
1449  && subclause->OkToGroupUnderByType(this)) {
1450  if (best_parent == NULL || best_parent->CompareLocation(*m_ClauseLocation) == sequence::eContained) {
1451  best_parent = this;
1452  }
1453  }
1454  return best_parent;
1455 }
1456 
1458 {
1459  ENa_strand this_strand = m_ClauseLocation->GetStrand();
1460  if (this_strand == eNa_strand_minus
1462  std::reverse(m_ClauseList.begin(), m_ClauseList.end());
1463  }
1464 
1465  for (unsigned int k = 0; k < m_ClauseList.size(); k++) {
1466  m_ClauseList[k]->ReverseCDSClauseLists();
1467  }
1468 }
1469 
1470 
1471 
1473 {
1474  unsigned int subtype = GetMainFeatureSubtype();
1475 
1476  if (subtype == CSeqFeatData::eSubtype_mRNA) {
1477  return false;
1478  } else if (subtype == CSeqFeatData::eSubtype_cdregion) {
1479  if (IsPartial()) {
1480  // keep only if exons have numbers
1481  for (size_t k = 0; k < m_ClauseList.size(); k++) {
1482  if (m_ClauseList[k]->IsExonWithNumber()) {
1483  return false;
1484  }
1485  }
1486  return true;
1487  } else {
1488  return true;
1489  }
1490  } else {
1491  return true;
1492  }
1493 }
1494 
1495 
1497 {
1498  if (m_pMainFeat->IsSetData() &&
1500  m_pMainFeat->IsSetQual()) {
1502  if ((*it)->IsSetQual() &&
1503  NStr::Equal((*it)->GetQual(), "number") &&
1504  (*it)->IsSetVal() &&
1505  !NStr::IsBlank((*it)->GetVal())) {
1506  return true;
1507  }
1508  }
1509  }
1510  return false;
1511 }
1512 
1513 
1515 {
1517  return true;
1518  } else {
1519  return false;
1520  }
1521 }
1522 
1523 
1524 CAutoDefNcRNAClause::CAutoDefNcRNAClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions& opts)
1525  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts),
1526  m_UseComment (m_Opts.GetUseNcRNAComment())
1527 {
1528 }
1529 
1530 
1532 {
1533 }
1534 
1535 
1536 bool CAutoDefNcRNAClause::x_GetProductName(string &product_name)
1537 {
1538  string ncrna_product;
1539  string ncrna_class;
1541  && m_pMainFeat->GetData().GetRna().IsSetExt()) {
1542  const CRNA_ref::TExt& ext = m_pMainFeat->GetData().GetRna().GetExt();
1543  if (ext.IsName()) {
1544  ncrna_product = ext.GetName();
1545  if (NStr::EqualNocase(ncrna_product, "ncRNA")) {
1546  ncrna_product = "";
1547  }
1548  } else if (ext.IsGen()) {
1549  if (ext.GetGen().IsSetProduct()) {
1550  ncrna_product = ext.GetGen().GetProduct();
1551  }
1552  if (ext.GetGen().IsSetClass()) {
1553  ncrna_class = ext.GetGen().GetClass();
1554  }
1555  }
1556  }
1557  if (NStr::IsBlank(ncrna_product)) {
1558  ncrna_product = m_pMainFeat->GetNamedQual("product");
1559  }
1560  if (NStr::IsBlank(ncrna_class)) {
1561  ncrna_class = m_pMainFeat->GetNamedQual("ncRNA_class");
1562  }
1563  if (NStr::EqualNocase(ncrna_class, "other")) {
1564  ncrna_class = "";
1565  }
1566  NStr::ReplaceInPlace(ncrna_class, "_", " ");
1567 
1568  string ncrna_comment;
1569  if (m_pMainFeat->IsSetComment()) {
1570  ncrna_comment = m_pMainFeat->GetComment();
1571  if (!NStr::IsBlank(ncrna_comment)) {
1572  string::size_type pos = NStr::Find(ncrna_comment, ";");
1573  if (pos != NCBI_NS_STD::string::npos) {
1574  ncrna_comment = ncrna_comment.substr(0, pos);
1575  }
1576  }
1577  }
1578 
1579  if (!NStr::IsBlank (ncrna_product)) {
1580  product_name = ncrna_product;
1581  if (!NStr::IsBlank (ncrna_class)) {
1582  product_name += " " + ncrna_class;
1583  }
1584  } else if (!NStr::IsBlank(ncrna_class)) {
1585  product_name = ncrna_class;
1586  } else if (m_UseComment && !NStr::IsBlank (ncrna_comment)) {
1587  product_name = ncrna_comment;
1588  } else {
1589  product_name = "non-coding RNA";
1590  }
1591  return true;
1592 
1593 }
1594 
1595 
1596 static string mobile_element_keywords [] = {
1597  "insertion sequence",
1598  "retrotransposon",
1599  "non-LTR retrotransposon",
1600  "transposon",
1601  "P-element",
1602  "transposable element",
1603  "integron",
1604  "superintegron",
1605  "SINE",
1606  "MITE",
1607  "LINE"
1608 };
1609 
1610 
1611 CAutoDefMobileElementClause::CAutoDefMobileElementClause(CBioseq_Handle bh, const CSeq_feat& main_feat, const CSeq_loc& mapped_loc, const CAutoDefOptions& opts)
1612  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
1613 {
1614  string mobile_element_name = m_pMainFeat->GetNamedQual("mobile_element_type");
1615  if (NStr::StartsWith(mobile_element_name, "other:")) {
1616  mobile_element_name = mobile_element_name.substr(6);
1617  }
1618  bool found_keyword = false;
1619 
1620  m_Pluralizable = true;
1621 
1622  if (NStr::IsBlank(mobile_element_name)) {
1623  m_Description = "";
1624  m_ShowTypewordFirst = false;
1625  m_Typeword = "mobile element";
1626  } else {
1627  for (unsigned int k = 0; k < sizeof (mobile_element_keywords) / sizeof (string) && !found_keyword; k++) {
1628  size_t pos;
1629  if (NStr::StartsWith(mobile_element_name, mobile_element_keywords[k])) {
1630  // keyword at the beginning
1632  if (NStr::Equal(mobile_element_name, mobile_element_keywords[k])) {
1633  m_ShowTypewordFirst = false;
1634  m_Description = "";
1635  } else {
1636  m_ShowTypewordFirst = true;
1637  m_Description = mobile_element_name.substr(mobile_element_keywords[k].length());
1639  }
1640  if (mobile_element_name.c_str()[mobile_element_keywords[k].length()] == '-') {
1641  // if keyword is hyphenated portion of name, no pluralization
1642  m_Pluralizable = false;
1643  }
1644  found_keyword = true;
1645  } else if (NStr::EndsWith(mobile_element_name, mobile_element_keywords[k])) {
1646  // keyword at the end
1648  m_ShowTypewordFirst = false;
1649  m_Description = mobile_element_name.substr(0, mobile_element_name.length() - mobile_element_keywords[k].length());
1651  found_keyword = true;
1652  } else if ((pos = NStr::Find(mobile_element_name, mobile_element_keywords[k])) != string::npos
1653  && isspace(mobile_element_name.c_str()[pos])) {
1654  // keyword in the middle
1655  m_Typeword = "";
1656  m_ShowTypewordFirst = false;
1657  m_Description = mobile_element_name.substr(pos);
1658  m_Pluralizable = false;
1659  }
1660  }
1661  if (!found_keyword) {
1662  // keyword not in description
1663  m_Typeword = "mobile element";
1664  m_Description = mobile_element_name;
1665  }
1666  }
1667  if (NStr::EqualNocase(m_Typeword, "integron")) {
1668  m_ShowTypewordFirst = false;
1669  }
1670 
1671  m_DescriptionChosen = true;
1672  m_TypewordChosen = true;
1673  m_ProductName = "";
1674  m_ProductNameChosen = true;
1676  if (NStr::StartsWith(m_Description, ":")) {
1677  m_Description = m_Description.substr(1);
1679  }
1680  if (NStr::Equal(m_Description, "unnamed")) {
1681  m_Description = "";
1682  }
1683 }
1684 
1685 
1687 {
1688 }
1689 
1690 
1691 void CAutoDefMobileElementClause::Label(bool suppress_allele)
1692 {
1693  m_DescriptionChosen = true;
1694  x_GetGenericInterval (m_Interval, suppress_allele);
1695 }
1696 
1697 
1699 {
1700  if (NStr::Equal(m_Typeword, "SINE") ||
1701  NStr::Equal(m_Typeword, "LINE") ||
1702  NStr::Equal(m_Typeword, "MITE")) {
1703  return true;
1704  } else {
1705  return false;
1706  }
1707 
1708 }
1709 
1710 
1711 const char *kMinisatellite = "minisatellite";
1712 const char *kMicrosatellite = "microsatellite";
1713 const char *kSatellite = "satellite";
1714 
1715 CAutoDefSatelliteClause::CAutoDefSatelliteClause(CBioseq_Handle bh, const CSeq_feat& main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions& opts)
1716  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
1717 {
1718  string comment = m_pMainFeat->GetNamedQual("satellite");
1719  string::size_type pos = NStr::Find(comment, ";");
1720  if (pos != NCBI_NS_STD::string::npos) {
1721  comment = comment.substr(0, pos);
1722  }
1723 
1724  size_t len = 0;
1725 
1726  if (NStr::StartsWith(comment, kMinisatellite)) {
1727  len = strlen (kMinisatellite);
1728  } else if (NStr::StartsWith (comment, kMicrosatellite)) {
1729  len = strlen (kMicrosatellite);
1730  } else if (NStr::StartsWith (comment, kSatellite)) {
1731  len = strlen (kSatellite);
1732  } else {
1733  // use default label satellite
1734  string prefix = kSatellite;
1735  comment = prefix + " " + comment;
1736  }
1737  if (len > 0 && NStr::Equal(comment.substr(len, 1), ":")) {
1738  comment = comment.substr (0, len) + " " + comment.substr (len + 1);
1739  }
1740 
1741  m_Description = comment;
1742  m_DescriptionChosen = true;
1743  m_Typeword = "sequence";
1744  m_TypewordChosen = true;
1745 }
1746 
1747 
1749 {
1750 }
1751 
1752 
1753 void CAutoDefSatelliteClause::Label(bool suppress_allele)
1754 {
1755  m_DescriptionChosen = true;
1756  x_GetGenericInterval(m_Interval, suppress_allele);
1757 }
1758 
1759 
1760 CAutoDefPromoterClause::CAutoDefPromoterClause(CBioseq_Handle bh, const CSeq_feat& main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions& opts)
1761  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
1762 {
1763  m_Description = "";
1764  m_DescriptionChosen = true;
1765  m_Typeword = "promoter region";
1766  m_TypewordChosen = true;
1767  m_Interval = "";
1768 }
1769 
1770 
1772 {
1773 }
1774 
1775 
1776 void CAutoDefPromoterClause::Label(bool suppress_allele)
1777 {
1778  m_DescriptionChosen = true;
1779 }
1780 
1781 
1782 /* This class produces the default definition line label for a misc_feature
1783  * that has the word "intergenic spacer" in the comment. If the comment starts
1784  * with the word "contains", "contains" is ignored. If "intergenic spacer"
1785  * appears first in the comment (or first after the word "contains", the text
1786  * after the words "intergenic spacer" but before the first semicolon (if any)
1787  * appear after the words "intergenic spacer" in the definition line. If there
1788  * are words after "contains" or at the beginning of the comment before the words
1789  * "intergenic spacer", this text will appear in the definition line before the words
1790  * "intergenic spacer".
1791  */
1792 
1793 void CAutoDefIntergenicSpacerClause::InitWithString (string comment, bool suppress_allele)
1794 {
1795  m_Typeword = "intergenic spacer";
1796  m_TypewordChosen = true;
1797  m_ShowTypewordFirst = false;
1798  m_Pluralizable = false;
1799 
1800 
1801  if (NStr::StartsWith(comment, "may contain ")) {
1802  m_Description = comment.substr(12);
1803  m_DescriptionChosen = true;
1804  m_Typeword = "";
1805  m_TypewordChosen = true;
1806  m_Interval = "region";
1807  } else {
1808  if (NStr::StartsWith(comment, "contains ")) {
1809  comment = comment.substr(9);
1810  }
1811 
1812  if (NStr::StartsWith(comment, "intergenic spacer")) {
1813  comment = comment.substr(17);
1814  if (NStr::IsBlank(comment)) {
1815  m_ShowTypewordFirst = false;
1816  m_Description = "";
1817  m_DescriptionChosen = true;
1818  } else {
1819  NStr::TruncateSpacesInPlace(comment);
1820  if (NStr::StartsWith(comment, "and ")) {
1821  m_Description = "";
1822  m_DescriptionChosen = true;
1823  m_ShowTypewordFirst = false;
1824  } else {
1825  m_Description = comment;
1826  m_DescriptionChosen = true;
1827  m_ShowTypewordFirst = true;
1828  }
1829  }
1830  } else {
1831  string::size_type pos = NStr::Find(comment, "intergenic spacer");
1832  if (pos != NCBI_NS_STD::string::npos) {
1833  m_Description = comment.substr(0, pos);
1835  m_DescriptionChosen = true;
1836  m_ShowTypewordFirst = false;
1837  }
1838  }
1839  x_GetGenericInterval(m_Interval, suppress_allele);
1840  }
1841 }
1842 
1843 
1844 CAutoDefIntergenicSpacerClause::CAutoDefIntergenicSpacerClause(CBioseq_Handle bh, const CSeq_feat& main_feat, const CSeq_loc &mapped_loc, string comment, const CAutoDefOptions& opts)
1845  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
1846 {
1847  InitWithString (comment, true);
1848 }
1849 
1850 
1852  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
1853 {
1854 
1855  string comment;
1856  if (m_pMainFeat->IsSetComment()) {
1857  comment = m_pMainFeat->GetComment();
1858  }
1859 
1860  /* truncate at first semicolon */
1861  string::size_type pos = NStr::Find(comment, ";");
1862  if (pos != NCBI_NS_STD::string::npos) {
1863  comment = comment.substr(0, pos);
1864  }
1865 
1866  InitWithString (comment, true);
1867 }
1868 
1869 
1871 {
1872 }
1873 
1874 
1875 void CAutoDefIntergenicSpacerClause::Label(bool suppress_allele)
1876 {
1877  m_DescriptionChosen = true;
1878 }
1879 
1880 
1882  const string& description, bool is_first, bool is_last, const CAutoDefOptions& opts)
1883  : CAutoDefIntergenicSpacerClause(bh, main_feat, mapped_loc, opts)
1884 {
1885  if (!NStr::IsBlank(description)) {
1886  m_Description = description;
1887  size_t pos = NStr::Find(m_Description, "intergenic spacer");
1888  if (pos != string::npos) {
1889  m_Description = m_Description.substr(0, pos);
1891  }
1892  m_DescriptionChosen = true;
1893  }
1894  m_Typeword = "intergenic spacer";
1895  m_TypewordChosen = true;
1896 
1897  // adjust partialness of location
1898  bool partial5 = m_ClauseLocation->IsPartialStart(eExtreme_Biological) && is_first;
1899  bool partial3 = m_ClauseLocation->IsPartialStop(eExtreme_Biological) && is_last;
1903  if (NStr::EndsWith(description, " region")) {
1904  MakeRegion();
1905  }
1906 }
1907 
1908 
1910 {
1911 }
1912 
1913 
1915 {
1916 }
1917 
1918 
1919 CAutoDefParsedClause::CAutoDefParsedClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, bool is_first, bool is_last, const CAutoDefOptions& opts)
1920  : CAutoDefFeatureClause (bh, main_feat, mapped_loc, opts)
1921 {
1922  // adjust partialness of location
1923  bool partial5 = m_ClauseLocation->IsPartialStart(eExtreme_Biological) && is_first;
1924  bool partial3 = m_ClauseLocation->IsPartialStop(eExtreme_Biological) && is_last;
1927 }
1928 
1930 {
1931 }
1932 
1933 void CAutoDefParsedClause::SetMiscRNAWord(const string& phrase)
1934 {
1935  ERnaMiscWord word_type = x_GetRnaMiscWordType(phrase);
1936  if (word_type == eMiscRnaWordType_InternalSpacer ||
1937  word_type == eMiscRnaWordType_ExternalSpacer ||
1938  word_type == eMiscRnaWordType_RNAIntergenicSpacer ||
1939  word_type == eMiscRnaWordType_IntergenicSpacer) {
1940  const string& item_name = x_GetRnaMiscWord(word_type);
1941  if (NStr::StartsWith(phrase, item_name)) {
1942  SetTypewordFirst(true);
1943  m_Description = phrase.substr(item_name.length());
1944  } else {
1945  SetTypewordFirst(false);
1946  m_Description = phrase.substr(0, NStr::Find(phrase, item_name));
1947  }
1948  if (NStr::EndsWith(phrase, " region") &&
1949  (!m_ShowTypewordFirst || m_Description != " region")) {
1950  SetTypeword(item_name + " region");
1951  } else {
1952  SetTypeword(item_name);
1953  }
1954  } else if (word_type == eMiscRnaWordType_RNA) {
1955  m_Description = phrase;
1956  if (NStr::EndsWith(m_Description, " gene")) {
1957  m_Description = m_Description.substr(0, m_Description.length() - 5);
1958  }
1959  SetTypeword("gene");
1960  SetTypewordFirst(false);
1961  } else if (word_type == eMiscRnaWordType_tRNA) {
1962  string gene_name;
1963  string product_name;
1964  if (CAutoDefParsedtRNAClause::ParseString(phrase, gene_name, product_name)) {
1965  m_TypewordChosen = true;
1966  m_GeneName = gene_name;
1967  if (!NStr::IsBlank(m_GeneName)) {
1968  m_HasGene = true;
1969  }
1970  m_ProductName = product_name;
1971  m_ProductNameChosen = true;
1973  } else {
1974  m_Description = phrase;
1975  }
1976  SetTypeword("gene");
1977  SetTypewordFirst(false);
1978  }
1980  m_DescriptionChosen = true;
1981 }
1982 
1983 
1984 CAutoDefParsedtRNAClause::CAutoDefParsedtRNAClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc,
1985  string gene_name, string product_name,
1986  bool is_first, bool is_last, const CAutoDefOptions& opts)
1987  : CAutoDefParsedClause (bh, main_feat, mapped_loc, is_first, is_last, opts)
1988 {
1989  m_Typeword = "gene";
1990  m_TypewordChosen = true;
1991  m_GeneName = gene_name;
1992  if (!NStr::IsBlank (m_GeneName)) {
1993  m_HasGene = true;
1994  }
1995  m_ProductName = product_name;
1996  m_ProductNameChosen = true;
1997 }
1998 
1999 
2000 CAutoDefGeneClusterClause::CAutoDefGeneClusterClause(CBioseq_Handle bh, const CSeq_feat& main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions& opts)
2001  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
2002 {
2003  m_Pluralizable = false;
2004  m_ShowTypewordFirst = false;
2005  string comment = m_pMainFeat->GetComment();
2006 
2007  string::size_type pos = NStr::Find(comment, "gene cluster");
2008  if (pos == NCBI_NS_STD::string::npos) {
2009  pos = NStr::Find(comment, "gene locus");
2010  m_Typeword = "gene locus";
2011  m_TypewordChosen = true;
2012  } else {
2013  m_Typeword = "gene cluster";
2014  m_TypewordChosen = true;
2015  }
2016 
2017  if (pos != NCBI_NS_STD::string::npos) {
2018  comment = comment.substr(0, pos);
2019  }
2020  NStr::TruncateSpacesInPlace(comment);
2021  m_Description = comment;
2022  m_DescriptionChosen = true;
2023  m_SuppressSubfeatures = true;
2024 }
2025 
2026 
2028 {
2029 }
2030 
2031 
2032 void CAutoDefGeneClusterClause::Label(bool suppress_allele)
2033 {
2034  x_GetGenericInterval(m_Interval, suppress_allele);
2035  m_DescriptionChosen = true;
2036 }
2037 
2038 
2039 CAutoDefMiscCommentClause::CAutoDefMiscCommentClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions& opts)
2040  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
2041 {
2042  if (m_pMainFeat->CanGetComment()) {
2044  string::size_type pos = NStr::Find(m_Description, ";");
2045  if (pos != NCBI_NS_STD::string::npos) {
2046  m_Description = m_Description.substr(0, pos);
2047  }
2048  m_DescriptionChosen = true;
2049  }
2050  if (NStr::EndsWith(m_Description, " sequence")) {
2051  m_Description = m_Description.substr(0, m_Description.length() - 9);
2052  m_Typeword = "sequence";
2053  m_TypewordChosen = true;
2054  } else {
2056  }
2057  m_Interval = "";
2058 }
2059 
2060 
2062 {
2063 }
2064 
2065 
2066 void CAutoDefMiscCommentClause::Label(bool suppress_allele)
2067 {
2068  m_DescriptionChosen = true;
2069 }
2070 
2071 
2073 (CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, string product, const CAutoDefOptions& opts)
2074 : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
2075 {
2076  vector<string> elements = GetMiscRNAElements(product);
2077  if (elements.empty()) {
2078  m_Description = product;
2079  } else {
2080  ITERATE(vector<string>, it, elements) {
2081  if (!NStr::IsBlank(m_Description)) {
2082  m_Description += ", ";
2083  if (*it == elements.back()) {
2084  m_Description += "and ";
2085  }
2086  }
2087  m_Description += *it;
2088  if (NStr::Find(*it, "RNA") != string::npos && !NStr::EndsWith(*it, "gene") && !NStr::EndsWith(*it, "genes")) {
2089  m_Description += " gene";
2090  }
2091  }
2092  }
2093  m_DescriptionChosen = true;
2094 
2095  m_Typeword = "";
2096  m_TypewordChosen = true;
2097  m_Interval = "region";
2098 }
2099 
2100 
2102 {
2103 }
2104 
2105 
2106 void CAutoDefParsedRegionClause::Label(bool suppress_allele)
2107 {
2108 }
2109 
2110 CAutoDefFakePromoterClause::CAutoDefFakePromoterClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions& opts)
2111  : CAutoDefFeatureClause (bh, main_feat, mapped_loc, opts)
2112 {
2113  m_Description = "";
2114  m_DescriptionChosen = true;
2115  m_Typeword = "promoter region";
2116  m_TypewordChosen = true;
2117  m_ShowTypewordFirst = false;
2118  m_Interval = "";
2119 
2120 
2121  m_ClauseLocation = new CSeq_loc();
2123  CRef <CSeq_id> new_id(new CSeq_id);
2124  new_id->Assign(*id);
2125  m_ClauseLocation->SetInt().SetId(*new_id);
2126  m_ClauseLocation->SetInt().SetFrom(0);
2127  m_ClauseLocation->SetInt().SetTo(bh.GetInst_Length() - 1);
2128 
2129 }
2130 
2131 
2133 {
2134 }
2135 
2136 
2137 void CAutoDefFakePromoterClause::Label(bool suppress_allele)
2138 {
2139 }
2140 
2141 
2142 bool CAutoDefFakePromoterClause::OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base *parent_clause, bool gene_cluster_opp_strand) const
2143 {
2144  if (parent_clause == NULL) {
2145  return false;
2146  } else {
2147  return true;
2148  }
2149 }
2150 
2151 
2153 {
2154  bool ok_to_group = false;
2155 
2156  if (parent_clause == NULL) {
2157  return false;
2158  }
2159  CSeqFeatData::ESubtype parent_subtype = parent_clause->GetMainFeatureSubtype();
2160 
2161  if (parent_subtype == CSeqFeatData::eSubtype_cdregion
2162  || parent_subtype == CSeqFeatData::eSubtype_mRNA
2163  || parent_subtype == CSeqFeatData::eSubtype_gene
2164  || parent_subtype == CSeqFeatData::eSubtype_operon
2165  || parent_clause->IsEndogenousVirusSourceFeature()
2166  || parent_clause->IsGeneCluster()) {
2167  ok_to_group = true;
2168  }
2169 
2170  return ok_to_group;
2171 }
2172 
2173 
2175  : CAutoDefFeatureClause(bh, main_feat, mapped_loc, opts)
2176 {
2177  m_Description = "promoter region and 5' UTR";
2178  m_DescriptionChosen = true;
2179  m_Typeword = "";
2180  m_TypewordChosen = true;
2181  m_ShowTypewordFirst = false;
2182  m_Interval = "genomic sequence";
2183 
2184 
2185  m_ClauseLocation = new CSeq_loc();
2187  CRef <CSeq_id> new_id(new CSeq_id);
2188  new_id->Assign(*id);
2189  m_ClauseLocation->SetInt().SetId(*new_id);
2190  m_ClauseLocation->SetInt().SetFrom(0);
2191  m_ClauseLocation->SetInt().SetTo(bh.GetInst_Length() - 1);
2192 
2193 }
2194 
2195 
2197 {
2198  return (feat.IsSetData() &&
2200  feat.IsSetComment() &&
2201  NStr::Equal(feat.GetComment(), "contains promoter and 5' UTR"));
2202 }
2203 
2204 
2205 void CAutoDefPromoterAnd5UTRClause::Label(bool suppress_allele)
2206 {
2207 
2208 }
2209 
2210 
2212 {
2214  if (subtype == CSeqFeatData::eSubtype_repeat_region) {
2215  if (!NStr::IsBlank(m_pMainFeat->GetNamedQual("endogenous_virus"))) {
2217  }
2218  }
2219  return eDefault;
2220 }
2221 
2222 
2223 // Some misc_RNA clauses have a comment that actually lists multiple
2224 // features. These functions create a clause for each element in the
2225 // comment.
2226 
2227 vector<CRef<CAutoDefFeatureClause > > AddMiscRNAFeatures(const CBioseq_Handle& bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, const CAutoDefOptions& opts)
2228 {
2229  vector<CRef<CAutoDefFeatureClause > > rval;
2230  string comment;
2231  string::size_type pos;
2232 
2233  if (cf.GetData().Which() == CSeqFeatData::e_Rna) {
2234  comment = cf.GetNamedQual("product");
2235  if (NStr::IsBlank(comment)
2236  && cf.IsSetData()
2237  && cf.GetData().IsRna()
2238  && cf.GetData().GetRna().IsSetExt()) {
2239  if (cf.GetData().GetRna().GetExt().IsName()) {
2240  comment = cf.GetData().GetRna().GetExt().GetName();
2241  }
2242  else if (cf.GetData().GetRna().GetExt().IsGen()
2243  && cf.GetData().GetRna().GetExt().GetGen().IsSetProduct()) {
2244  comment = cf.GetData().GetRna().GetExt().GetGen().GetProduct();
2245  }
2246  }
2247  }
2248 
2249  if ((NStr::Equal(comment, "misc_RNA") || NStr::IsBlank(comment)) && cf.CanGetComment()) {
2250  comment = cf.GetComment();
2251  }
2252  if (NStr::IsBlank(comment)) {
2253  return rval;
2254  }
2255 
2256  pos = NStr::Find(comment, "spacer");
2257  if (pos == NPOS) {
2258  return rval;
2259  }
2260 
2261  bool is_region = false;
2262 
2263  NStr::TrimPrefixInPlace(comment, "contains ");
2264  if (NStr::StartsWith(comment, "may contain ")) {
2265  NStr::TrimPrefixInPlace(comment, "may contain ");
2266  is_region = true;
2267  }
2268 
2269  pos = NStr::Find(comment, ";");
2270  if (pos != string::npos) {
2271  comment = comment.substr(0, pos);
2272  }
2273 
2274  if (is_region) {
2275  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefParsedRegionClause(bh, cf, mapped_loc, comment, opts)));
2276  } else {
2277  vector<string> elements = CAutoDefFeatureClause::GetMiscRNAElements(comment);
2278  if (!elements.empty()) {
2279  for (auto s : elements) {
2280  CRef<CAutoDefParsedClause> new_clause(new CAutoDefParsedClause(bh, cf, mapped_loc,
2281  (s == elements.front()), (s == elements.back()), opts));
2282  new_clause->SetMiscRNAWord(s);
2283  rval.push_back(new_clause);
2284  }
2285  } else {
2287  if (!elements.empty()) {
2288  for (auto s : elements) {
2289  size_t pos = NStr::Find(s, "intergenic spacer");
2290  if (pos != string::npos) {
2292  cf,
2293  mapped_loc,
2294  (s),
2295  (s == elements.front()),
2296  (s == elements.back()), opts)));
2297  } else {
2298  rval.push_back(CRef<CAutoDefFeatureClause>(s_tRNAClauseFromNote(bh, cf, mapped_loc, s, (s == elements.front()), (s == elements.back()), opts)));
2299  }
2300  }
2301  } else {
2303  cf,
2304  mapped_loc,
2305  comment,
2306  true,
2307  true,
2308  opts)));
2309  }
2310  }
2311  }
2312  return rval;
2313 }
2314 
2315 
2316 vector<CRef<CAutoDefFeatureClause > > AddtRNAAndOther(const CBioseq_Handle& bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, const CAutoDefOptions& opts)
2317 {
2318  vector<CRef<CAutoDefFeatureClause> > rval;
2320  !cf.IsSetComment()) {
2321  return rval;
2322  }
2323 
2324  vector<string> phrases = CAutoDefFeatureClause_Base::GetFeatureClausePhrases(cf.GetComment());
2325  if (phrases.size() < 2) {
2326  return rval;
2327  }
2328 
2329  bool first = true;
2330  string last = phrases.back();
2331  phrases.pop_back();
2332  ITERATE(vector<string>, it, phrases) {
2333  rval.push_back(CRef<CAutoDefFeatureClause>(CAutoDefFeatureClause_Base::ClauseFromPhrase(*it, bh, cf, mapped_loc, first, false, opts)));
2334  first = false;
2335  }
2336  rval.push_back(CRef<CAutoDefFeatureClause>(CAutoDefFeatureClause_Base::ClauseFromPhrase(last, bh, cf, mapped_loc, first, true, opts)));
2337 
2338  return rval;
2339 }
2340 
2341 
2342 vector<CRef<CAutoDefFeatureClause > > FeatureClauseFactory(CBioseq_Handle bh, const CSeq_feat& cf, const CSeq_loc& mapped_loc, const CAutoDefOptions& opts, bool is_single_misc_feat)
2343 {
2344  vector<CRef<CAutoDefFeatureClause> > rval;
2345 
2346  auto subtype = cf.GetData().GetSubtype();
2347 
2348  if (opts.IsFeatureSuppressed(subtype)) {
2349  return rval;
2350  }
2351 
2352  if (subtype == CSeqFeatData::eSubtype_gene) {
2353  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefGeneClause(bh, cf, mapped_loc, opts)));
2354  } else if (subtype == CSeqFeatData::eSubtype_ncRNA) {
2355  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefNcRNAClause(bh, cf, mapped_loc, opts)));
2356  } else if (subtype == CSeqFeatData::eSubtype_mobile_element) {
2357  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefMobileElementClause(bh, cf, mapped_loc, opts)));
2358  } else if (CAutoDefFeatureClause::IsSatellite(cf)) {
2359  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefSatelliteClause(bh, cf, mapped_loc, opts)));
2360  } else if (subtype == CSeqFeatData::eSubtype_otherRNA
2361  || subtype == CSeqFeatData::eSubtype_misc_RNA
2362  || subtype == CSeqFeatData::eSubtype_rRNA) {
2363  auto misc_rna = AddMiscRNAFeatures(bh, cf, mapped_loc, opts);
2364  if (misc_rna.empty()) {
2365  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefFeatureClause(bh, cf, mapped_loc, opts)));
2366  } else {
2367  for (auto it : misc_rna) {
2368  rval.push_back(it);
2369  }
2370  }
2371  } else if (CAutoDefFeatureClause::IsPromoter(cf)) {
2372  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefPromoterClause(bh, cf, mapped_loc, opts)));
2373  } else if (CAutoDefFeatureClause::IsGeneCluster(cf)) {
2374  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefGeneClusterClause(bh, cf, mapped_loc, opts)));
2375  } else if (CAutoDefFeatureClause::IsControlRegion(cf)) {
2376  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefFeatureClause(bh, cf, mapped_loc, opts)));
2377  }
2378  else if (subtype == CSeqFeatData::eSubtype_otherRNA) {
2379  auto misc_rna = AddMiscRNAFeatures(bh, cf, mapped_loc, opts);
2380  if (misc_rna.empty()) {
2381  // try to make trna clauses
2382  misc_rna = AddtRNAAndOther(bh, cf, mapped_loc, opts);
2383  }
2384  if (misc_rna.empty()) {
2385  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefFeatureClause(bh, cf, mapped_loc, opts)));
2386  }
2387  else {
2388  for (auto it : misc_rna) {
2389  rval.push_back(it);
2390  }
2391  }
2392  } else if (subtype == CSeqFeatData::eSubtype_misc_feature &&
2393  is_single_misc_feat && CAutoDefPromoterAnd5UTRClause::IsPromoterAnd5UTR(cf)) {
2394  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefPromoterAnd5UTRClause(bh, cf, mapped_loc, opts)));
2395  } else if (subtype == CSeqFeatData::eSubtype_misc_feature) {
2396  auto misc_rna = AddMiscRNAFeatures(bh, cf, mapped_loc, opts);
2397  if (misc_rna.empty()) {
2398  // try to make trna clauses
2399  misc_rna = AddtRNAAndOther(bh, cf, mapped_loc, opts);
2400  }
2401  if (misc_rna.empty()) {
2402  // some misc-features may require more parsing
2403  CRef<CAutoDefFeatureClause> new_clause(new CAutoDefFeatureClause(bh, cf, mapped_loc, opts));
2404  if (!is_single_misc_feat &&
2406  || (opts.GetMiscFeatRule() == CAutoDefOptions::eNoncodingProductFeat && !new_clause->IsNoncodingProductFeat()))) {
2407  // do not create a clause at all
2408  new_clause.Reset(NULL);
2409  } else if (opts.GetMiscFeatRule() == CAutoDefOptions::eCommentFeat) {
2410  new_clause.Reset(NULL);
2411  if (cf.CanGetComment() && !NStr::IsBlank(cf.GetComment())) {
2412  misc_rna.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefMiscCommentClause(bh, cf, mapped_loc, opts)));
2413  }
2414  } else {
2415  misc_rna.push_back(new_clause);
2416  }
2417  }
2418  if (!misc_rna.empty()) {
2419  for (auto it : misc_rna) {
2420  rval.push_back(it);
2421  }
2422  }
2423 
2424  } else {
2425  rval.push_back(CRef<CAutoDefFeatureClause>(new CAutoDefFeatureClause(bh, cf, mapped_loc, opts)));
2426  }
2427  return rval;
2428 }
2429 
2430 
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
vector< CRef< CAutoDefFeatureClause > > AddMiscRNAFeatures(const CBioseq_Handle &bh, const CSeq_feat &cf, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
CAutoDefParsedtRNAClause * s_tRNAClauseFromNote(CBioseq_Handle bh, const CSeq_feat &cf, const CSeq_loc &mapped_loc, string comment, bool is_first, bool is_last, const CAutoDefOptions &opts)
vector< CRef< CAutoDefFeatureClause > > AddtRNAAndOther(const CBioseq_Handle &bh, const CSeq_feat &cf, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
const char * kMicrosatellite
const char * kSatellite
void s_UseCommentBeforeSemicolon(const CSeq_feat &feat, string &label)
static string mobile_element_keywords[]
vector< CRef< CAutoDefFeatureClause > > FeatureClauseFactory(CBioseq_Handle bh, const CSeq_feat &cf, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts, bool is_single_misc_feat)
const char * kMinisatellite
virtual bool OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base *parent_clause, bool gene_cluster_opp_strand) const
virtual void Label(bool suppress_allele)
virtual bool OkToGroupUnderByType(const CAutoDefFeatureClause_Base *parent_clause) const
CAutoDefFakePromoterClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
virtual bool SameStrand(const CSeq_loc &loc) const
virtual CSeqFeatData::ESubtype GetMainFeatureSubtype() const
static vector< string > GetFeatureClausePhrases(string comment)
virtual CRef< CSeq_loc > GetLocation() const
virtual bool OkToGroupUnderByType(const CAutoDefFeatureClause_Base *) const
string ListClauses(bool allow_semicolons, bool suppress_final_and, bool suppress_allele)
static ERnaMiscWord x_GetRnaMiscWordType(const string &phrase)
virtual CAutoDefFeatureClause_Base * FindBestParentClause(CAutoDefFeatureClause_Base *subclause, bool gene_cluster_opp_strand)
static CRef< CAutoDefFeatureClause > ClauseFromPhrase(const string &phrase, CBioseq_Handle bh, const CSeq_feat &cf, const CSeq_loc &mapped_loc, bool first, bool last, const CAutoDefOptions &opts)
static vector< string > GetMiscRNAElements(const string &product)
virtual bool OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base *, bool) const
virtual sequence::ECompare CompareLocation(const CSeq_loc &loc) const
virtual bool IsEndogenousVirusSourceFeature() const
static vector< string > GetTrnaIntergenicSpacerClausePhrases(const string &comment)
static const string & x_GetRnaMiscWord(ERnaMiscWord word_type)
string x_GetGeneName(const CGene_ref &gref, bool suppress_locus_tag) const
bool x_GetFeatureTypeWord(string &typeword)
bool DoesmRNAProductNameMatch(const string &mrna_product) const
virtual bool OkToGroupUnderByType(const CAutoDefFeatureClause_Base *parent_clause) const
virtual bool IsExonWithNumber() const
virtual bool IsRecognizedFeature() const
virtual bool IsPartial() const
bool x_GetNoncodingProductFeatProduct(string &product) const
virtual bool IsPromoter() const
void x_GetOperonSubfeatures(string &interval)
virtual CAutoDefFeatureClause_Base * FindBestParentClause(CAutoDefFeatureClause_Base *subclause, bool gene_cluster_opp_strand)
virtual bool IsMobileElement() const
virtual bool IsInsertionSequence() const
virtual bool IsGeneCluster() const
static bool IsPseudo(const CSeq_feat &f)
virtual bool ShouldRemoveExons() const
virtual void Label(bool suppress_allele)
virtual CSeqFeatData::ESubtype GetMainFeatureSubtype() const
virtual bool IsSatelliteClause() const
CConstRef< CSeq_feat > m_pMainFeat
virtual bool OkToGroupUnderByLocation(const CAutoDefFeatureClause_Base *parent_clause, bool gene_cluster_opp_strand) const
virtual bool IsBioseqPrecursorRNA() const
virtual bool SameStrand(const CSeq_loc &loc) const
virtual void AddToLocation(CRef< CSeq_loc > loc, bool also_set_partials=true)
bool x_FindNoncodingFeatureKeywordProduct(string comment, string keyword, string &product_name) const
virtual sequence::ECompare CompareLocation(const CSeq_loc &loc) const
static bool IsSatellite(const CSeq_feat &feat)
virtual bool IsNoncodingProductFeat() const
virtual bool x_GetProductName(string &product_name)
virtual bool IsControlRegion() const
virtual bool AddmRNA(CAutoDefFeatureClause_Base *mRNAClause)
virtual bool AddGene(CAutoDefFeatureClause_Base *gene_clause, bool suppress_allele)
virtual CRef< CSeq_loc > GetLocation() const
bool x_GetExonDescription(string &description)
bool x_GetGenericInterval(string &interval, bool suppress_allele)
virtual bool IsEndogenousVirusSourceFeature() const
bool x_ShowTypewordFirst(string typeword)
bool x_GetDescription(string &description)
virtual EClauseType GetClauseType() const
CAutoDefGeneClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
virtual bool x_GetProductName(string &product_name)
virtual void Label(bool suppress_allele)
CAutoDefGeneClusterClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
void InitWithString(string comment, bool suppress_allele)
virtual void Label(bool suppress_allele)
CAutoDefIntergenicSpacerClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
virtual void Label(bool suppress_allele)
CAutoDefMiscCommentClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
virtual void Label(bool suppress_allele)
CAutoDefMobileElementClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
virtual bool x_GetProductName(string &product_name)
CAutoDefNcRNAClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
TMiscFeatRule GetMiscFeatRule() const
bool IsFeatureSuppressed(CSeqFeatData::ESubtype subtype) const
void SetTypewordFirst(bool typeword_first)
CAutoDefParsedClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, bool is_first, bool is_last, const CAutoDefOptions &opts)
void SetTypeword(string typeword)
void SetMiscRNAWord(const string &phrase)
CAutoDefParsedIntergenicSpacerClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const string &description, bool is_first, bool is_last, const CAutoDefOptions &opts)
CAutoDefParsedRegionClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, string product, const CAutoDefOptions &opts)
virtual void Label(bool suppress_allele)
CAutoDefParsedtRNAClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, string gene_name, string product_name, bool is_first, bool is_last, const CAutoDefOptions &opts)
static bool ParseString(string comment, string &gene_name, string &product_name)
virtual void Label(bool suppress_allele)
static bool IsPromoterAnd5UTR(const CSeq_feat &feat)
CAutoDefPromoterAnd5UTRClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
CAutoDefPromoterClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
virtual void Label(bool suppress_allele)
CAutoDefSatelliteClause(CBioseq_Handle bh, const CSeq_feat &main_feat, const CSeq_loc &mapped_loc, const CAutoDefOptions &opts)
virtual void Label(bool suppress_allele)
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
bool IsSuppressed(void) const
Definition: Gene_ref.cpp:75
string GetRnaProductName(void) const
Definition: RNA_ref.cpp:145
CRef –.
Definition: ncbiobj.hpp:618
ESubtype GetSubtype(void) const
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:742
string GetLabel(const CSeq_id &id)
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
Definition: Seq_loc.cpp:3280
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3313
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
@ fMerge_Overlapping
Definition: Seq_loc.hpp:330
@ fFGL_Content
Include its content if there is any.
Definition: feature.hpp:73
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
ECompare
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Finds gene for feature, but obeys SeqFeatXref directives.
Definition: sequence.cpp:1529
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
const CSeqFeatData & GetData(void) const
TInst_Length GetInst_Length(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3197
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static void TrimPrefixInPlace(string &str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string (in-place)
Definition: ncbistr.cpp:3238
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
Definition: BioSource_.hpp:533
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
@ eSubtype_endogenous_virus_name
Definition: SubSource_.hpp:109
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Gene_ref_.hpp:599
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
Definition: Gene_ref_.hpp:681
bool CanGetLocus(void) const
Check if it is safe to call GetLocus method.
Definition: Gene_ref_.hpp:499
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetDesc(void) const
descriptive name Check if a value has been assigned to Desc data member.
Definition: Gene_ref_.hpp:587
bool CanGetAllele(void) const
Check if it is safe to call GetAllele method.
Definition: Gene_ref_.hpp:546
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
Definition: Gene_ref_.hpp:593
bool CanGetPseudo(void) const
Check if it is safe to call GetPseudo method.
Definition: Gene_ref_.hpp:687
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
const TAllele & GetAllele(void) const
Get the Allele member data.
Definition: Gene_ref_.hpp:552
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
bool IsSetProduct(void) const
Check if a value has been assigned to Product data member.
Definition: RNA_gen_.hpp:294
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: RNA_gen_.hpp:306
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
E_Choice Which(void) const
Which variant is currently selected.
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TBiosrc & GetBiosrc(void) const
Get the variant data.
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
const TGene & GetGene(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
const TProt & GetProt(void) const
Get the variant data.
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsRna(void) const
Check if variant Rna is selected.
bool CanGetPseudo(void) const
Check if it is safe to call GetPseudo method.
Definition: Seq_feat_.hpp:1352
bool CanGetComment(void) const
Check if it is safe to call GetComment method.
Definition: Seq_feat_.hpp:1043
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:422
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:447
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ eBiomol_pre_RNA
precursor RNA of any sort really
Definition: MolInfo_.hpp:102
@ eBiomol_cRNA
viral RNA genome copy intermediate
Definition: MolInfo_.hpp:111
@ eBiomol_other_genetic
other genetic material
Definition: MolInfo_.hpp:109
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
use n only the LMDB cache</td > n</tr > n< tr > n< td ></td > n< td > use the LMDB cache if at all possible
int len
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int islower(Uchar c)
Definition: ncbictype.hpp:66
int isupper(Uchar c)
Definition: ncbictype.hpp:70
Miscellaneous common-use basic types and functionality.
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static const char * prefix[]
Definition: pcregrep.c:405
Modified on Thu Mar 28 17:05:59 2024 by modify_doxy.py rev. 669887