NCBI C++ ToolKit
readfeat.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1  /*
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jonathan Kans, Michael Kornbluh
27  *
28  * File Description:
29  * Feature table reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbithr.hpp>
36 
37 #include <util/static_map.hpp>
38 
39 #include <serial/iterator.hpp>
40 #include <serial/objistrasn.hpp>
41 
42 // Objects includes
48 
53 
57 #include <objects/pub/Pub.hpp>
59 #include <objects/seq/Pubdesc.hpp>
62 
81 
83 
86 
89 #include <objtools/error_codes.hpp>
90 
91 #include <algorithm>
92 #include <unordered_set>
93 
96 #include "best_feat_finder.hpp"
97 
98 #define NCBI_USE_ERRCODE_X Objtools_Rd_Feature
99 
100 
102 
103 BEGIN_objects_SCOPE // namespace ncbi::objects::
104 
105 
106 
107 namespace {
108  static const char * const kCdsFeatName = "CDS";
109  // priorities, inherited from C toolkit
110  static Uchar std_order[CSeq_id::e_MaxChoice] = {
111  83, /* 0 = not set */
112  80, /* 1 = local Object-id */
113  70, /* 2 = gibbsq */
114  70, /* 3 = gibbmt */
115  70, /* 4 = giim Giimport-id */
116  60, /* 5 = genbank */
117  60, /* 6 = embl */
118  60, /* 7 = pir */
119  60, /* 8 = swissprot */
120  81, /* 9 = patent */
121  65, /* 10 = other TextSeqId */
122  80, /* 11 = general Dbtag */
123  82, /* 12 = gi */
124  60, /* 13 = ddbj */
125  60, /* 14 = prf */
126  60, /* 15 = pdb */
127  60, /* 16 = tpg */
128  60, /* 17 = tpe */
129  60, /* 18 = tpd */
130  68, /* 19 = gpp */
131  69 /* 20 = nat */
132  };
133 
135 {
136  if (ids.size() == 1)
137  return ids.front();
138 
139  CRef<CSeq_id> id;
140  if (!ids.empty())
141  {
142  Uchar best_weight = UCHAR_MAX;
143  ITERATE(CBioseq::TId, it, ids)
144  {
145  Uchar new_weight = std_order[(*it)->Which()];
146  if (new_weight < best_weight)
147  {
148  id = *it;
149  best_weight = new_weight;
150  }
151  };
152  }
153 
154  return id;
155 }
156 
157 
158 map<char, list<char>> s_IUPACmap
159 {
160  {'A', list<char>({'A'})},
161  {'G', list<char>({'G'})},
162  {'C', list<char>({'C'})},
163  {'T', list<char>({'T'})},
164  {'U', list<char>({'U'})},
165  {'M', list<char>({'A', 'C'})},
166  {'R', list<char>({'A', 'G'})},
167  {'W', list<char>({'A', 'T'})},
168  {'S', list<char>({'C', 'G'})},
169  {'Y', list<char>({'C', 'T'})},
170  {'K', list<char>({'G', 'T'})},
171  {'V', list<char>({'A', 'C', 'G'})},
172  {'H', list<char>({'A', 'C', 'T'})},
173  {'D', list<char>({'A', 'G', 'T'})},
174  {'B', list<char>({'C', 'G', 'T'})},
175  {'N', list<char>({'A', 'C', 'G', 'T'})}
176 };
177 
178 }
179 
180 
181 class /* NCBI_XOBJREAD_EXPORT */ CFeatureTableReader_Imp
182 {
183 public:
184  enum EQual {
285  };
286 
287  enum EOrgRef {
294  };
295 
298 
299  // constructor
300  CFeatureTableReader_Imp(ILineReader* reader, unsigned int line_num, ILineErrorListener* pMessageListener);
301  // destructor
303 
304  // read 5-column feature table and return Seq-annot
306  const CTempString& annotname,
307  const TFlags flags,
308  ITableFilter *filter);
309 
310  // create single feature from key
311  CRef<CSeq_feat> CreateSeqFeat (const string& feat,
313  const TFlags flags,
314  const string &seq_id,
315  ITableFilter *filter);
316 
317  // add single qualifier to feature
318  void AddFeatQual (CRef<CSeq_feat> sfp,
319  const string& feat_name,
320  const string& qual,
321  const string& val,
322  const TFlags flags,
323  const string &seq_id );
324 
325  static bool ParseInitialFeatureLine (
326  const CTempString& line_arg,
327  CTempStringEx& out_seqid,
328  CTempStringEx& out_annotname );
329 
330  static void PutProgress(const CTempString& seq_id,
331  const unsigned int line_number,
332  ILineErrorListener* pListener);
333 
335  return m_reader;
336  }
337 
339  return m_pMessageListener;
340  }
341 
342 private:
343 
344  unsigned int x_GetLineNumber() const;
345 
346  // Prohibit copy constructor and assignment operator
349 
350  void x_InitId(const CTempString& seq_id, const TFlags flags);
351  // returns true if parsed (otherwise, out_offset is left unchanged)
352  bool x_TryToParseOffset(const CTempString & sLine, Int4 & out_offset );
353 
354 
355  struct SFeatLocInfo {
360  bool is_point;
362  };
363 
364 
366  const CTempString& line,
367  SFeatLocInfo& loc_info,
368  string& feat,
369  string& qual,
370  string& val,
371  Int4 offset);
372 
373 
374  bool x_IsWebComment(CTempString line);
375 
377  CTempString strFeatureName,
378  CRef<CSeq_feat>& sfp,
379  const SFeatLocInfo& loc_info);
380 
382  const string &feat_name,
383  const string& qual, const string& val,
384  const TFlags flags);
385 
386  void x_ProcessQualifier(const string& qual_name,
387  const string& qual_val,
388  const string& feat_name,
389  CRef<CSeq_feat> feat,
390  TFlags flags);
391 
392  bool x_AddQualifierToGene (CSeqFeatData& sfdata,
393  EQual qtype, const string& val);
395  EQual qtype, const string& val);
397  EQual qtype, const string& val);
399  EQual qtype, const string& qual, const string& val);
400  bool x_AddQualifierToBioSrc (CSeqFeatData& sfdata,
401  const string &feat_name,
402  EOrgRef rtype, const string& val);
403  bool x_AddQualifierToBioSrc (CSeqFeatData& sfdata,
404  CSubSource::ESubtype stype, const string& val);
405  bool x_AddQualifierToBioSrc (CSeqFeatData& sfdata,
406  COrgMod::ESubtype mtype, const string& val);
407 
408  bool x_AddNoteToFeature(CRef<CSeq_feat> sfp, const string& note);
409 
411  const string& feat_name,
412  const string& qual,
413  const string& val);
414 
416  const string& qual, const string& val);
417 
418  bool x_AddCodons(const string& val, CTrna_ext& trna_ext) const;
419 
423  TFeatConstRef pFeat,
424  TSeqPos uLineNum ) :
425  m_pFeat(pFeat), m_uLineNum(uLineNum) {
426  _ASSERT(pFeat);
427  }
428 
429  bool operator==(const SFeatAndLineNum & rhs) const {
430  return Compare(rhs) == 0; }
431  bool operator!=(const SFeatAndLineNum & rhs) const {
432  return Compare(rhs) != 0; }
433  bool operator<(const SFeatAndLineNum & rhs) const {
434  return Compare(rhs) < 0; }
435 
436  int Compare(const SFeatAndLineNum & rhs) const {
437  if( m_uLineNum != rhs.m_uLineNum ) {
438  return ( m_uLineNum < rhs.m_uLineNum ? -1 : 1 );
439  }
440  return (m_pFeat.GetPointerOrNull() < rhs.m_pFeat.GetPointerOrNull() ? -1 : 1 );
441  }
442 
443  TFeatConstRef m_pFeat; // must be non-NULL
444  TSeqPos m_uLineNum; // the line where this feature was created (or zero if programmatically created)
445  };
448  CRef<CSeq_annot> sap,
449  TChoiceToFeatMap & choiceToFeatMap, // an input param, but might get more items added
450  const TFlags flags);
451 
452  bool x_StringIsJustQuotes (const string& str);
453 
454  string x_TrnaToAaString(const string& val);
455 
456  bool x_ParseTrnaExtString(CTrna_ext & ext_trna, const string & str);
457  SIZE_TYPE x_MatchingParenPos( const string &str, SIZE_TYPE open_paren_pos );
458 
459  long x_StringToLongNoThrow (
460  CTempString strToConvert,
461  CTempString strFeatureName,
462  CTempString strQualifierName,
463  // user can override the default problem types that are set on error
465  );
466 
467  bool x_SetupSeqFeat (CRef<CSeq_feat> sfp, const string& feat,
468  const TFlags flags,
469  ITableFilter *filter);
470 
472  ILineError::EProblem eProblem,
473  EDiagSev eSeverity,
474  const std::string & strFeatureName = kEmptyStr,
475  const std::string & strQualifierName = kEmptyStr,
476  const std::string & strQualifierValue = kEmptyStr,
477  const std::string & strErrorMessage = kEmptyStr,
478  const ILineError::TVecOfLines & vecOfOtherLines =
480 
482  int line_num,
483  ILineError::EProblem eProblem,
484  EDiagSev eSeverity,
485  const std::string & strFeatureName = kEmptyStr,
486  const std::string & strQualifierName = kEmptyStr,
487  const std::string & strQualifierValue = kEmptyStr,
488  const std::string & strErrorMessage = kEmptyStr,
489  const ILineError::TVecOfLines & vecOfOtherLines =
491 
492  void x_TokenizeStrict( const CTempString &line, vector<string> &out_tokens );
493  void x_TokenizeLenient( const CTempString &line, vector<string> &out_tokens );
495  void x_ResetFeat(CRef<CSeq_feat>& feat, bool & curr_feat_intervals_done);
496  void x_UpdatePointStrand(CSeq_feat& feat, CSeq_interval::TStrand strand) const;
497  void x_GetPointStrand(const CSeq_feat& feat, CSeq_interval::TStrand& strand) const;
498 
501  string m_real_seqid;
504  unsigned int m_LineNumber;
506  unordered_set<string> m_ProcessedTranscriptIds;
507  unordered_set<string> m_ProcessedProteinIds;
509 };
510 
511 
513 
514 static const TQualKey qual_key_to_subtype [] = {
516  { "PCR_conditions", CFeatureTableReader_Imp::eQual_PCR_conditions },
528  { "codon_recognized", CFeatureTableReader_Imp::eQual_codon_recognized },
530  { "codons_recognized", CFeatureTableReader_Imp::eQual_codon_recognized },
537  { "estimated_length", CFeatureTableReader_Imp::eQual_estimated_length },
548  { "gene_synonym", CFeatureTableReader_Imp::eQual_gene_syn },
556  { "linkage_evidence", CFeatureTableReader_Imp::eQual_linkage_evidence },
562  { "mobile_element_type", CFeatureTableReader_Imp::eQual_mobile_element_type },
581  { "regulatory_class", CFeatureTableReader_Imp::eQual_regulatory_class },
583  { "ribosomal_slippage", CFeatureTableReader_Imp::eQual_ribosomal_slippage },
587  { "rpt_unit_range", CFeatureTableReader_Imp::eQual_rpt_unit_range },
591  { "secondary_accession", CFeatureTableReader_Imp::eQual_secondary_accession },
592  { "secondary_accessions", CFeatureTableReader_Imp::eQual_secondary_accession },
606  { "trans_splicing", CFeatureTableReader_Imp::eQual_trans_splicing },
615 };
616 
619 
620 
622 
630 };
631 
634 
635 
637 
639  { "apicoplast", CBioSource::eGenome_apicoplast },
640  { "chloroplast", CBioSource::eGenome_chloroplast },
641  { "chromatophore", CBioSource::eGenome_chromatophore },
642  { "chromoplast", CBioSource::eGenome_chromoplast },
643  { "chromosome", CBioSource::eGenome_chromosome },
644  { "cyanelle", CBioSource::eGenome_cyanelle },
645  { "endogenous_virus", CBioSource::eGenome_endogenous_virus },
646  { "extrachrom", CBioSource::eGenome_extrachrom },
647  { "genomic", CBioSource::eGenome_genomic },
648  { "hydrogenosome", CBioSource::eGenome_hydrogenosome },
649  { "insertion_seq", CBioSource::eGenome_insertion_seq },
650  { "kinetoplast", CBioSource::eGenome_kinetoplast },
651  { "leucoplast", CBioSource::eGenome_leucoplast },
652  { "macronuclear", CBioSource::eGenome_macronuclear },
653  { "mitochondrion", CBioSource::eGenome_mitochondrion },
654  { "mitochondrion:kinetoplast", CBioSource::eGenome_kinetoplast },
655  { "nucleomorph", CBioSource::eGenome_nucleomorph },
656  { "plasmid", CBioSource::eGenome_plasmid },
657  { "plastid", CBioSource::eGenome_plastid },
658  { "plastid:apicoplast", CBioSource::eGenome_apicoplast },
659  { "plastid:chloroplast", CBioSource::eGenome_chloroplast },
660  { "plastid:chromoplast", CBioSource::eGenome_chromoplast },
661  { "plastid:cyanelle", CBioSource::eGenome_cyanelle },
662  { "plastid:leucoplast", CBioSource::eGenome_leucoplast },
663  { "plastid:proplastid", CBioSource::eGenome_proplastid },
664  { "proplastid", CBioSource::eGenome_proplastid },
665  { "proviral", CBioSource::eGenome_proviral },
666  { "transposon", CBioSource::eGenome_transposon },
667  { "unknown", CBioSource::eGenome_unknown },
668  { "virion", CBioSource::eGenome_virion }
669 };
670 
673 
674 
676 
678  { "altitude", CSubSource::eSubtype_altitude },
679  { "cell_line", CSubSource::eSubtype_cell_line },
680  { "cell_type", CSubSource::eSubtype_cell_type },
681  { "chromosome", CSubSource::eSubtype_chromosome },
682  { "clone", CSubSource::eSubtype_clone },
683  { "clone_lib", CSubSource::eSubtype_clone_lib },
684  { "collected_by", CSubSource::eSubtype_collected_by },
685  { "collection_date", CSubSource::eSubtype_collection_date },
686  { "country", CSubSource::eSubtype_country },
687  { "dev_stage", CSubSource::eSubtype_dev_stage },
688  { "endogenous_virus", CSubSource::eSubtype_endogenous_virus_name },
689  { "environmental_sample", CSubSource::eSubtype_environmental_sample },
690  { "frequency", CSubSource::eSubtype_frequency },
691  { "fwd_primer_name", CSubSource::eSubtype_fwd_primer_name },
692  { "fwd_primer_seq", CSubSource::eSubtype_fwd_primer_seq },
693  { "genotype", CSubSource::eSubtype_genotype },
694  { "geo_loc_name", CSubSource::eSubtype_country },
695  { "germline", CSubSource::eSubtype_germline },
696  { "haplotype", CSubSource::eSubtype_haplotype },
697  { "identified_by", CSubSource::eSubtype_identified_by },
698  { "insertion_seq", CSubSource::eSubtype_insertion_seq_name },
699  { "isolation_source", CSubSource::eSubtype_isolation_source },
700  { "lab_host", CSubSource::eSubtype_lab_host },
701  { "lat_lon", CSubSource::eSubtype_lat_lon },
702  { "map", CSubSource::eSubtype_map },
703  { "metagenomic", CSubSource::eSubtype_metagenomic },
704  { "plasmid", CSubSource::eSubtype_plasmid_name },
705  { "plastid", CSubSource::eSubtype_plastid_name },
706  { "pop_variant", CSubSource::eSubtype_pop_variant },
707  { "rearranged", CSubSource::eSubtype_rearranged },
708  { "rev_primer_name", CSubSource::eSubtype_rev_primer_name },
709  { "rev_primer_seq", CSubSource::eSubtype_rev_primer_seq },
710  { "segment", CSubSource::eSubtype_segment },
711  { "sex", CSubSource::eSubtype_sex },
712  { "subclone", CSubSource::eSubtype_subclone },
713  { "tissue_lib ", CSubSource::eSubtype_tissue_lib },
714  { "tissue_type", CSubSource::eSubtype_tissue_type },
715  { "transgenic", CSubSource::eSubtype_transgenic },
716  { "transposon", CSubSource::eSubtype_transposon_name }
717 };
718 
721 
722 // case-insensitive version of sm_SubSrcKeys
725  TSubSrcNoCaseMap, sm_SubSrcNoCaseKeys, subsrc_key_to_subtype);
726 
728 
730  { "acronym", COrgMod::eSubtype_acronym },
731  { "anamorph", COrgMod::eSubtype_anamorph },
732  { "authority", COrgMod::eSubtype_authority },
733  { "bio_material", COrgMod::eSubtype_bio_material },
734  { "biotype", COrgMod::eSubtype_biotype },
735  { "biovar", COrgMod::eSubtype_biovar },
736  { "breed", COrgMod::eSubtype_breed },
737  { "chemovar", COrgMod::eSubtype_chemovar },
738  { "common", COrgMod::eSubtype_common },
739  { "cultivar", COrgMod::eSubtype_cultivar },
740  { "culture_collection", COrgMod::eSubtype_culture_collection },
741  { "dosage", COrgMod::eSubtype_dosage },
742  { "ecotype", COrgMod::eSubtype_ecotype },
743  { "forma", COrgMod::eSubtype_forma },
744  { "forma_specialis", COrgMod::eSubtype_forma_specialis },
745  { "gb_acronym", COrgMod::eSubtype_gb_acronym },
746  { "gb_anamorph", COrgMod::eSubtype_gb_anamorph },
747  { "gb_synonym", COrgMod::eSubtype_gb_synonym },
748  { "group", COrgMod::eSubtype_group },
749  { "isolate", COrgMod::eSubtype_isolate },
750  { "metagenome_source", COrgMod::eSubtype_metagenome_source },
751  { "nat_host", COrgMod::eSubtype_nat_host },
752  { "natural_host", COrgMod::eSubtype_nat_host },
753  { "old_lineage", COrgMod::eSubtype_old_lineage },
754  { "old_name", COrgMod::eSubtype_old_name },
755  { "pathovar", COrgMod::eSubtype_pathovar },
756  { "serogroup", COrgMod::eSubtype_serogroup },
757  { "serotype", COrgMod::eSubtype_serotype },
758  { "serovar", COrgMod::eSubtype_serovar },
759  { "spec_host", COrgMod::eSubtype_nat_host },
760  { "specific_host", COrgMod::eSubtype_nat_host },
761  { "specimen_voucher", COrgMod::eSubtype_specimen_voucher },
762  { "strain", COrgMod::eSubtype_strain },
763  { "sub_species", COrgMod::eSubtype_sub_species },
764  { "subgroup", COrgMod::eSubtype_subgroup },
765  { "substrain", COrgMod::eSubtype_substrain },
766  { "subtype", COrgMod::eSubtype_subtype },
767  { "synonym", COrgMod::eSubtype_synonym },
768  { "teleomorph", COrgMod::eSubtype_teleomorph },
769  { "type", COrgMod::eSubtype_type },
770  { "type_material", COrgMod::eSubtype_type_material },
771  { "variety", COrgMod::eSubtype_variety }
772 };
773 
776 
778 {
779  { "Ala", 'A' },
780  { "Alanine", 'A' },
781  { "Arg", 'R' },
782  { "Arginine", 'R' },
783  { "Asn", 'N' },
784  { "Asp", 'D' },
785  { "Asp or Asn", 'B' },
786  { "Asparagine", 'N' },
787  { "Aspartate", 'D' },
788  { "Aspartic Acid", 'D' },
789  { "Asx", 'B' },
790  { "Cys", 'C' },
791  { "Cysteine", 'C' },
792  { "Gln", 'Q' },
793  { "Glu", 'E' },
794  { "Glu or Gln", 'Z' },
795  { "Glutamate", 'E' },
796  { "Glutamic Acid", 'E' },
797  { "Glutamine", 'Q' },
798  { "Glx", 'Z' },
799  { "Gly", 'G' },
800  { "Glycine", 'G' },
801  { "His", 'H' },
802  { "Histidine", 'H' },
803  { "Ile", 'I' },
804  { "Ile2", 'I' },
805  { "Isoleucine", 'I' },
806  { "Leu", 'L' },
807  { "Leu or Ile", 'J' },
808  { "Leucine", 'L' },
809  { "Lys", 'K' },
810  { "Lysine", 'K' },
811  { "Met", 'M' },
812  { "Methionine", 'M' },
813  { "OTHER", 'X' },
814  { "Phe", 'F' },
815  { "Phenylalanine", 'F' },
816  { "Pro", 'P' },
817  { "Proline", 'P' },
818  { "Pyl", 'O' },
819  { "Pyrrolysine", 'O' },
820  { "Sec", 'U' },
821  { "Selenocysteine", 'U' },
822  { "Ser", 'S' },
823  { "Serine", 'S' },
824  { "TERM", '*' },
825  { "Ter", '*' },
826  { "Termination", '*' },
827  { "Thr", 'T' },
828  { "Threonine", 'T' },
829  { "Trp", 'W' },
830  { "Tryptophan", 'W' },
831  { "Tyr", 'Y' },
832  { "Tyrosine", 'Y' },
833  { "Val", 'V' },
834  { "Valine", 'V' },
835  { "Xle", 'J' },
836  { "Xxx", 'X' },
837  { "Undet", 'X' },
838  { "fMet", 'M' },
839  { "iMet", 'M' }
840 };
841 
842 
843 static
846  "environmental_sample",
847  "germline",
848  "metagenomic",
849  "partial",
850  "pseudo",
851  "rearranged",
852  "ribosomal_slippage",
853  "trans_splicing",
854  "transgenic",
855  "replace" // RW-882
856 };
857 
858 // constructor
860  : m_reader(reader), m_LineNumber(line_num), m_pMessageListener(pMessageListener)
861 {
862 }
863 
864 // destructor
866 {
867 }
868 
869 
871  const CTempString & sLine, Int4 & out_offset )
872 {
873  // offset strings are of the form [offset=SOME_NUMBER], but here we try
874  // to be as forgiving of whitespace as possible.
875 
876  CTempString sKey;
877  CTempString sValue;
878  if( ! NStr::SplitInTwo(sLine, "=", sKey, sValue) ) {
879  // "=" not found
880  return false;
881  }
882 
883  // check key
885  if( NStr::StartsWith(sKey, "[") ) {
886  sKey = sKey.substr(1); // remove initial "["
887  }
889  if( ! NStr::EqualNocase(sKey, "offset") ) {
890  // key is not offset
891  return false;
892  }
893 
894  // check value
896  if( ! NStr::EndsWith(sValue, "]") ) {
897  // no closing bracket
898  return false;
899  }
900  // remove closing bracket
901  sValue = sValue.substr(0, (sValue.length() - 1) );
903  // is it a number?
904  try {
905  Int4 new_offset = NStr::StringToInt(sValue);
906  // if( new_offset < 0 ) {
907  // return false;
908  // }
909  out_offset = new_offset;
910  return true;
911  } catch ( CStringException & ) {
912  return false;
913  }
914 }
915 
917  const CTempString& line,
918  SFeatLocInfo& loc_info,
919  string& featP,
920  string& qualP,
921  string& valP,
922  Int4 offset
923 )
924 
925 {
926  SIZE_TYPE numtkns;
927  bool isminus = false;
928  bool ispoint = false;
929  size_t len;
930  bool partial5 = false;
931  bool partial3 = false;
932  Int4 startv = -1;
933  Int4 stopv = -1;
934  Int4 swp;
935  string start, stop, feat, qual, val, stnd;
936  vector<string> tkns;
937 
938 
939  if (line.empty ()) return false;
940 
941  /* offset and other instructions encoded in brackets */
942  if (NStr::StartsWith (line, '[')) return false;
943 
944  tkns.clear ();
945  x_TokenizeLenient(line, tkns);
946  numtkns = tkns.size ();
947 
948  if (numtkns > 0) {
949  start = NStr::TruncateSpaces(tkns[0]);
950  }
951  if (numtkns > 1) {
952  stop = NStr::TruncateSpaces(tkns[1]);
953  }
954  if (numtkns > 2) {
955  feat = NStr::TruncateSpaces(tkns[2]);
956  }
957  if (numtkns > 3) {
958  qual = NStr::TruncateSpaces(tkns[3]);
959  }
960  if (numtkns > 4) {
961  val = NStr::TruncateSpaces(tkns[4]);
962  // trim enclosing double-quotes
963  if( val.length() >= 2 && val[0] == '"' && val[val.length()-1] == '"' ) {
964  val = val.substr(1, val.length() - 2);
965  }
966  }
967  if (numtkns > 5) {
968  stnd = NStr::TruncateSpaces(tkns[5]);
969  }
970 
971  bool has_start = false;
972  if (! start.empty ()) {
973  if (start [0] == '<') {
974  partial5 = true;
975  start.erase (0, 1);
976  }
977  len = start.length ();
978  if (len > 1 && start [len - 1] == '^') {
979  ispoint = true;
980  start [len - 1] = '\0';
981  }
982  startv = x_StringToLongNoThrow(start, feat, qual,
984  has_start = true;
985  }
986 
987  bool has_stop = false;
988  if (! stop.empty ()) {
989  if (stop [0] == '>') {
990  partial3 = true;
991  stop.erase (0, 1);
992  }
993  stopv = x_StringToLongNoThrow (stop, feat, qual,
995  has_stop = true;
996  }
997 
998  if ( startv <= 0 || stopv <= 0 ) {
999  startv = -1;
1000  stopv = -1;
1001  } else {
1002  startv--;
1003  stopv--;
1004  if (! stnd.empty ()) {
1005  if (stnd == "minus" || stnd == "-" || stnd == "complement") {
1006  if (start < stop) {
1007  swp = startv;
1008  startv = stopv;
1009  stopv = swp;
1010  }
1011  isminus = true;
1012  }
1013  }
1014  }
1015 
1016  if (startv >= 0) {
1017  startv += offset;
1018  }
1019  if (stopv >= 0) {
1020  stopv += offset;
1021  }
1022 
1023  if ((has_start && startv < 0) || (has_stop && stopv < 0)) {
1024  x_ProcessMsg(
1026  eDiag_Error,
1027  feat);
1028  }
1029 
1030  loc_info.start_pos = ( startv < 0 ? -1 : startv);
1031  loc_info.stop_pos = ( stopv < 0 ? -1 : stopv);
1032 
1033  loc_info.is_5p_partial = partial5;
1034  loc_info.is_3p_partial = partial3;
1035  loc_info.is_point = ispoint;
1036  loc_info.is_minus_strand = isminus;
1037  featP = feat;
1038  qualP = qual;
1039  valP = val;
1040 
1041  return true;
1042 }
1043 
1044 
1046  const CTempString &line,
1047  vector<string> &out_tokens )
1048 {
1049  out_tokens.clear();
1050 
1051  // each token has spaces before it and a tab or end-of-line after it
1052  string::size_type startPosOfNextRoundOfTokenization = 0;
1053  while ( startPosOfNextRoundOfTokenization < line.size() ) {
1054  auto posAfterSpaces = line.find_first_not_of( " ", startPosOfNextRoundOfTokenization );
1055  if( posAfterSpaces == string::npos ) {
1056  return;
1057  }
1058 
1059  string::size_type posOfTab = line.find( '\t', posAfterSpaces );
1060  if( posOfTab == string::npos ) {
1061  posOfTab = line.length();
1062  }
1063 
1064  // The next token is between the spaces and the tab (or end of string)
1065  out_tokens.push_back(kEmptyStr);
1066  string &new_token = out_tokens.back();
1067  copy( line.begin() + posAfterSpaces, line.begin() + posOfTab, back_inserter(new_token) );
1068  NStr::TruncateSpacesInPlace( new_token );
1069 
1070  startPosOfNextRoundOfTokenization = ( posOfTab + 1 );
1071  }
1072 }
1073 
1074 // since some compilers won't let me use isspace for find_if
1075 class CIsSpace {
1076 public:
1077  bool operator()( char c ) { return isspace(c); }
1078 };
1079 
1081 public:
1082  bool operator()( char c ) { return ! isspace(c); }
1083 };
1084 
1086  const CTempString &line,
1087  vector<string> &out_tokens )
1088 {
1089  out_tokens.clear();
1090 
1091  if( line.empty() ) {
1092  return;
1093  }
1094 
1095  // if it starts with whitespace, it must be a qual line, else it's a feature line
1096  if( isspace(line[0]) ) {
1097  // In regex form, we're doing something like this:
1098  // \s+(\S+)(\s+(\S.*))?
1099  // Where the first is the qual, and the rest is the val
1100  auto start_of_qual = find_if( line.begin(), line.end(), CIsNotSpace() );
1101  if( start_of_qual == line.end() ) {
1102  return;
1103  }
1104  auto start_of_whitespace_after_qual = find_if( start_of_qual, line.end(), CIsSpace() );
1105  auto start_of_val = find_if( start_of_whitespace_after_qual, line.end(), CIsNotSpace() );
1106 
1107  // first 3 are empty
1108  out_tokens.push_back(kEmptyStr);
1109  out_tokens.push_back(kEmptyStr);
1110  out_tokens.push_back(kEmptyStr);
1111 
1112  // then qual
1113  out_tokens.push_back(kEmptyStr);
1114  string &qual = out_tokens.back();
1115  copy( start_of_qual, start_of_whitespace_after_qual, back_inserter(qual) );
1116 
1117  // then val
1118  if( start_of_val != line.end() ) {
1119  out_tokens.push_back(kEmptyStr);
1120  string &val = out_tokens.back();
1121  copy( start_of_val, line.end(), back_inserter(val) );
1123  }
1124 
1125  } else {
1126  // parse a feature line
1127 
1128  // Since we're being lenient, we consider it to be 3 ( or 6 ) parts separated by whitespace
1129  auto first_column_start = line.begin();
1130  auto first_whitespace = find_if( first_column_start, line.end(), CIsSpace() );
1131  auto second_column_start = find_if( first_whitespace, line.end(), CIsNotSpace() );
1132  auto second_whitespace = find_if( second_column_start, line.end(), CIsSpace() );
1133  auto third_column_start = find_if( second_whitespace, line.end(), CIsNotSpace() );
1134  auto third_whitespace = find_if( third_column_start, line.end(), CIsSpace() );
1135  // columns 4 and 5 are unused on feature lines
1136  auto sixth_column_start = find_if( third_whitespace, line.end(), CIsNotSpace() );
1137  auto sixth_whitespace = find_if( sixth_column_start, line.end(), CIsSpace() );
1138 
1139  out_tokens.push_back(kEmptyStr);
1140  string &first = out_tokens.back();
1141  copy( first_column_start, first_whitespace, back_inserter(first) );
1142 
1143  out_tokens.push_back(kEmptyStr);
1144  string &second = out_tokens.back();
1145  copy( second_column_start, second_whitespace, back_inserter(second) );
1146 
1147  out_tokens.push_back(kEmptyStr);
1148  string &third = out_tokens.back();
1149  copy( third_column_start, third_whitespace, back_inserter(third) );
1150 
1151  if( sixth_column_start != line.end() ) {
1152  // columns 4 and 5 are unused
1153  out_tokens.push_back(kEmptyStr);
1154  out_tokens.push_back(kEmptyStr);
1155 
1156  out_tokens.push_back(kEmptyStr);
1157  string &sixth = out_tokens.back();
1158  copy( sixth_column_start, sixth_whitespace, back_inserter(sixth) );
1159  }
1160  }
1161 }
1162 
1163 
1165  CSeqFeatData& sfdata,
1166  EQual qtype,
1167  const string& val
1168 )
1169 
1170 {
1171  CGene_ref& grp = sfdata.SetGene ();
1172  switch (qtype) {
1173  case eQual_gene:
1174  grp.SetLocus (val);
1175  return true;
1176  case eQual_allele:
1177  grp.SetAllele (val);
1178  return true;
1179  case eQual_gene_desc:
1180  grp.SetDesc (val);
1181  return true;
1182  case eQual_gene_syn:
1183  {
1184  CGene_ref::TSyn& syn = grp.SetSyn ();
1185  syn.push_back (val);
1186  return true;
1187  }
1188  case eQual_map:
1189  grp.SetMaploc (val);
1190  return true;
1191  case eQual_locus_tag:
1192  grp.SetLocus_tag (val);
1193  return true;
1194  case eQual_nomenclature:
1195  /* !!! need to implement !!! */
1196  return true;
1197  default:
1198  break;
1199  }
1200  return false;
1201 }
1202 
1203 
1205  CRef<CSeq_feat> sfp,
1206  CSeqFeatData& sfdata,
1207  EQual qtype, const string& val
1208 )
1209 
1210 {
1211  CCdregion& crp = sfdata.SetCdregion ();
1212  switch (qtype) {
1213  case eQual_codon_start:
1214  {
1215  int frame = x_StringToLongNoThrow (val, kCdsFeatName, "codon_start");
1216  switch (frame) {
1217  case 0:
1219  break;
1220  case 1:
1222  break;
1223  case 2:
1225  break;
1226  case 3:
1228  break;
1229  default:
1230  break;
1231  }
1232  return true;
1233  }
1234  case eQual_EC_number:
1235  {
1236  CProt_ref& prp = sfp->SetProtXref ();
1237  CProt_ref::TEc& ec = prp.SetEc ();
1238  ec.push_back (val);
1239  return true;
1240  }
1241  case eQual_function:
1242  {
1243  CProt_ref& prp = sfp->SetProtXref ();
1244  CProt_ref::TActivity& fun = prp.SetActivity ();
1245  fun.push_back (val);
1246  return true;
1247  }
1248  case eQual_product:
1249  {
1250  CProt_ref& prp = sfp->SetProtXref ();
1251  CProt_ref::TName& prod = prp.SetName ();
1252  prod.push_back (val);
1253  return true;
1254  }
1255  case eQual_prot_desc:
1256  {
1257  CProt_ref& prp = sfp->SetProtXref ();
1258  prp.SetDesc (val);
1259  return true;
1260  }
1261  case eQual_prot_note:
1262  return x_AddGBQualToFeature(sfp, "prot_note", val);
1263  case eQual_transl_except:
1264  // add as GBQual, let cleanup convert to code_break
1265  return x_AddGBQualToFeature(sfp, "transl_except", val);
1266  case eQual_translation:
1267  // we should accept, but ignore this qual on CDSs.
1268  // so, do nothing but return success
1269  return true;
1270  case eQual_transl_table:
1271  // set genetic code directly, or add qualifier and let cleanup convert?
1272  try {
1273  int num = NStr::StringToLong(val);
1274  CGen_code_table::GetTransTable(num); // throws if bad num
1276  code->SetId(num);
1277  crp.SetCode().Set().push_back(code);
1278  return true;
1279  } catch( CStringException ) {
1280  // if val is not a number, add qualifier directly and
1281  // let cleanup convert?
1282  return x_AddGBQualToFeature(sfp, "transl_table", val);
1283  } catch( ... ) {
1284  // invalid genome code table so don't even try to make
1285  // the transl_table qual
1286  x_ProcessMsg(
1288  kCdsFeatName, "transl_table", val);
1289  return true;
1290  }
1291  break;
1292 
1293  default:
1294  break;
1295  }
1296  return false;
1297 }
1298 
1299 
1301  const string& str
1302 )
1303 
1304 {
1305  ITERATE (string, it, str) {
1306  char ch = *it;
1307  if (ch > ' ' && ch != '"' && ch != '\'') return false;
1308  }
1309 
1310  return true;
1311 }
1312 
1313 static bool
1315 {
1316  // basically, this is true if the line starts with "order" (whitespaces disregarded)
1317 
1318  const static char* kOrder = "ORDER";
1319 
1320  // find first non-whitespace character
1321  string::size_type pos = 0;
1322  for( ; pos < line.length() && isspace(line[pos]); ++pos) {
1323  // nothing to do here
1324  }
1325 
1326  // line is all whitespace
1327  if( pos >= line.length() ) {
1328  return false;
1329  }
1330 
1331  // check if starts with "order" after whitespace
1332  return ( 0 == NStr::CompareNocase( line, pos, strlen(kOrder), kOrder ) );
1333 }
1334 
1335 // Turns a "join" location into an "order" by putting nulls between it
1336 // Returns an unset CRef if the loc doesn't need nulls (e.g. if it's just an interval)
1337 static CRef<CSeq_loc>
1339 {
1340  // create result we're returning
1342  CSeq_loc_mix::Tdata & mix_pieces = result->SetMix().Set();
1343 
1344  // keep this around for whenever we need a "null" piece
1345  CRef<CSeq_loc> loc_piece_null( new CSeq_loc );
1346  loc_piece_null->SetNull();
1347 
1348  // push pieces of source, with NULLs between
1349  CSeq_loc_CI loc_iter( loc );
1350  for( ; loc_iter; ++loc_iter ) {
1351  if( ! mix_pieces.empty() ) {
1352  mix_pieces.push_back( loc_piece_null );
1353  }
1354  CRef<CSeq_loc> new_piece( new CSeq_loc );
1355  new_piece->Assign( loc_iter.GetEmbeddingSeq_loc() );
1356  mix_pieces.push_back( new_piece );
1357  }
1358 
1359  // Only wrap in "mix" if there was more than one piece
1360  if( mix_pieces.size() > 1 ) {
1361  return result;
1362  } else {
1363  return CRef<CSeq_loc>();
1364  }
1365 }
1366 
1367 
1369  const string& val
1370 )
1371 {
1373 
1374  if (NStr::StartsWith(value, "tRNA-")) {
1375  value.assign(value, strlen("tRNA-"), CTempString::npos);
1376  }
1377 
1378  CTempString::size_type pos = value.find_first_of("-,;:()=\'_~");
1379  if (pos != CTempString::npos) {
1380  value.erase(pos);
1382  }
1383 
1384  return string(value);
1385 }
1386 
1387 
1388 bool
1390 {
1391  if (NStr::IsBlank (str)) return false;
1392 
1393  string normalized_string = str;
1394  normalized_string.erase(
1395  remove_if(begin(normalized_string),
1396  end(normalized_string),
1397  [](char c) { return isspace(c);}),
1398  end(normalized_string));
1399 
1400  if ( NStr::StartsWith(normalized_string, "(pos:") ) {
1401  // find position of closing paren
1402  string::size_type pos_end = x_MatchingParenPos( normalized_string, 0 );
1403  if (pos_end != string::npos) {
1404  string pos_str = normalized_string.substr (5, pos_end - 5);
1405  string::size_type aa_start = NStr::FindNoCase(pos_str, "aa:");
1406  if (aa_start != string::npos) {
1407  auto seq_start = NStr::FindNoCase(pos_str, ",seq:");
1408  if (seq_start != string::npos &&
1409  seq_start < aa_start+3) {
1410  return false;
1411  }
1412 
1413  size_t aa_length = (seq_start == NPOS) ?
1414  pos_str.size() - (aa_start+3) :
1415  seq_start - (aa_start+3);
1416 
1417  string abbrev = pos_str.substr (aa_start + 3, aa_length);
1418  //TTrnaMap::const_iterator
1419  auto t_iter = sm_TrnaKeys.find (abbrev.c_str ());
1420  if (t_iter == sm_TrnaKeys.end ()) {
1421  // unable to parse
1422  return false;
1423  }
1425  aa->SetNcbieaa (t_iter->second);
1426  ext_trna.SetAa(*aa);
1427  pos_str = pos_str.substr (0, aa_start);
1428  NStr::TruncateSpacesInPlace (pos_str);
1429  if (NStr::EndsWith (pos_str, ",")) {
1430  pos_str = pos_str.substr (0, pos_str.length() - 1);
1431  }
1432  }
1434  CRef<CSeq_loc> anticodon = GetSeqLocFromString (pos_str, m_seq_id, & helper);
1435  if (! anticodon) {
1436  ext_trna.ResetAa();
1437  return false;
1438  } else {
1439  switch( anticodon->GetStrand() ) {
1440  case eNa_strand_unknown:
1441  case eNa_strand_plus:
1442  case eNa_strand_minus:
1443  ext_trna.SetAnticodon(*anticodon);
1444  return true;
1445  default:
1446  ext_trna.ResetAa();
1447  return false;
1448  }
1449  }
1450  }
1451  }
1452 
1453  return false;
1454 }
1455 
1456 
1458  const string &str, SIZE_TYPE open_paren_pos )
1459 {
1460  _ASSERT( str[open_paren_pos] == '(' );
1461  _ASSERT( open_paren_pos < str.length() );
1462 
1463  // nesting level. start at 1 since we know there's an open paren
1464  int level = 1;
1465 
1466  SIZE_TYPE pos = open_paren_pos + 1;
1467  for( ; pos < str.length(); ++pos ) {
1468  switch( str[pos] ) {
1469  case '(':
1470  // nesting deeper
1471  ++level;
1472  break;
1473  case ')':
1474  // closed a level of nesting
1475  --level;
1476  if( 0 == level ) {
1477  // reached the top: we're closing the initial paren,
1478  // so we return our position
1479  return pos;
1480  }
1481  break;
1482  default:
1483  // ignore other characters.
1484  // maybe in the future we'll handle ignoring parens in quotes or
1485  // things like that.
1486  break;
1487  }
1488  }
1489  return NPOS;
1490 }
1491 
1493  CTempString strToConvert,
1494  CTempString strFeatureName,
1495  CTempString strQualifierName,
1496  ILineError::EProblem eProblem
1497 )
1498 {
1499  try {
1500  return NStr::StringToLong(strToConvert);
1501  } catch( ... ) {
1502  // See if we start with a number, but there's extra junk after it, try again
1503  if( ! strToConvert.empty() && isdigit(strToConvert[0]) ) {
1504  try {
1506 
1507  ILineError::EProblem problem =
1509  if( eProblem != ILineError::eProblem_Unset ) {
1510  problem = eProblem;
1511  }
1512 
1513  x_ProcessMsg(
1514  problem,
1515  eDiag_Warning,
1516  strFeatureName, strQualifierName, strToConvert );
1517  return result;
1518  } catch( ... ) { } // fall-thru to usual handling
1519  }
1520 
1521  ILineError::EProblem problem =
1523  if( eProblem != ILineError::eProblem_Unset ) {
1524  problem = eProblem;
1525  }
1526 
1527  x_ProcessMsg(
1528  problem,
1529  eDiag_Warning,
1530  strFeatureName, strQualifierName, strToConvert );
1531  // we have no idea, so just return zero
1532  return 0;
1533  }
1534 }
1535 
1536 
1538  CRef<CSeq_feat> sfp,
1539  EQual qtype,
1540  const string& val
1541 )
1542 {
1543  CSeqFeatData& sfdata = sfp->SetData();
1544  CRNA_ref& rrp = sfdata.SetRna ();
1545  CRNA_ref::EType rnatyp = rrp.GetType ();
1546  switch (rnatyp) {
1548  case CRNA_ref::eType_mRNA:
1549  case CRNA_ref::eType_rRNA:
1550  switch (qtype) {
1551  case eQual_product:
1552  {
1553  CRNA_ref::TExt& tex = rrp.SetExt ();
1554  CRNA_ref::C_Ext::E_Choice exttype = tex.Which ();
1555  if (exttype == CRNA_ref::C_Ext::e_TRNA) return false;
1556  tex.SetName (val);
1557  return true;
1558  }
1559  default:
1560  break;
1561  }
1562  break;
1563  case CRNA_ref::eType_ncRNA:
1564  switch (qtype) {
1565  case eQual_product:
1566  rrp.SetExt().SetGen().SetProduct(val);
1567  return true;
1568  break;
1569  case eQual_ncRNA_class:
1570  rrp.SetExt().SetGen().SetClass(val);
1571  return true;
1572  break;
1573  default:
1574  break;
1575  }
1576  break;
1577  case CRNA_ref::eType_tmRNA:
1578  switch (qtype) {
1579  case eQual_product:
1580  rrp.SetExt().SetGen().SetProduct(val);
1581  return true;
1582  case eQual_tag_peptide:
1583  {
1584  CRef<CRNA_qual> q(new CRNA_qual());
1585  q->SetQual("tag_peptide");
1586  q->SetVal(val);
1587  rrp.SetExt().SetGen().SetQuals().Set().push_back(q);
1588  return true;
1589  }
1590  break;
1591  default:
1592  break;
1593  }
1594  break;
1595  case CRNA_ref::eType_snRNA:
1596  case CRNA_ref::eType_scRNA:
1598  case CRNA_ref::eType_other:
1599  return false;
1600  case CRNA_ref::eType_tRNA:
1601  switch (qtype) {
1602  case eQual_product: {
1603  if (rrp.IsSetExt() && rrp.GetExt().Which() == CRNA_ref::C_Ext::e_Name)
1604  return false;
1605 
1606  const string& aa_string = x_TrnaToAaString(val);
1607  const auto aaval_it = sm_TrnaKeys.find(aa_string.c_str());
1608 
1609  if (aaval_it != sm_TrnaKeys.end()) {
1610  CRNA_ref::TExt& tex = rrp.SetExt ();
1611  CTrna_ext& trx = tex.SetTRNA();
1612  CTrna_ext::TAa& taa = trx.SetAa();
1613  taa.SetNcbieaa(aaval_it->second);
1614  if (aa_string == "fMet" ||
1615  aa_string == "iMet" ||
1616  aa_string == "Ile2") {
1617  x_AddGBQualToFeature(sfp, "product", val);
1618  }
1619  }
1620  else {
1621  x_ProcessMsg(
1623  "tRNA", "product", val);
1624  }
1625  return true;
1626  }
1627  break;
1628  case eQual_anticodon:
1629  {
1630  CRNA_ref::TExt& tex = rrp.SetExt ();
1631  CRNA_ref::C_Ext::TTRNA & ext_trna = tex.SetTRNA();
1632  if( ! x_ParseTrnaExtString(ext_trna, val) ) {
1633  x_ProcessMsg(
1635  "tRNA", "anticodon", val );
1636  }
1637  return true;
1638  }
1639  break;
1641  {
1642  //const auto codon_index = CGen_code_table::CodonToIndex(val);
1643  //if (codon_index >= 0) {
1644  CRNA_ref::TExt& tex = rrp.SetExt ();
1645  CRNA_ref::C_Ext::TTRNA & ext_trna = tex.SetTRNA();
1646  if (!x_AddCodons(val, ext_trna)) {
1647  return false;
1648  }
1649  //}
1650  return true;
1651  }
1652  break;
1653  default:
1654  break;
1655  }
1656  break;
1657  default:
1658  break;
1659  }
1660  return false;
1661 }
1662 
1663 
1665  const string& val,
1666  CTrna_ext& trna_ext
1667  ) const
1668 {
1669  if (val.size() != 3) {
1670  return false;
1671  }
1672 
1673  set<int> codons;
1674  try {
1675  for (char char1 : s_IUPACmap.at(val[0])) {
1676  for (char char2 : s_IUPACmap.at(val[1])) {
1677  for (char char3 : s_IUPACmap.at(val[2])) {
1678  const auto codon_index = CGen_code_table::CodonToIndex(char1, char2, char3);
1679  codons.insert(codon_index);
1680  }
1681  }
1682  }
1683 
1684  if (!codons.empty()) {
1685  trna_ext.SetAa().SetNcbieaa();
1686  for (const auto codon_index : codons) {
1687  trna_ext.SetCodon().push_back(codon_index);
1688  }
1689  }
1690  return true;
1691  }
1692  catch(...) {}
1693 
1694  return false;
1695 }
1696 
1697 
1699  CRef<CSeq_feat> sfp,
1700  CSeqFeatData& sfdata,
1701  EQual qtype,
1702  const string& qual,
1703  const string& val
1704 )
1705 
1706 {
1707  const char* str = nullptr;
1708 
1709  CSeqFeatData::ESubtype subtype = sfdata.GetSubtype ();
1710 
1711  // used if-statement because CSeqFeatData::IsRegulatory won't work in a
1712  // switch statement.
1713  if( (subtype == CSeqFeatData::eSubtype_regulatory) ||
1714  CSeqFeatData::IsRegulatory(subtype) )
1715  {
1716  if (qtype == eQual_regulatory_class) {
1717  if (val != "other") { // RW-374 "other" is a special case
1718 
1719  const vector<string>& allowed_values =
1721  if (find(allowed_values.cbegin(), allowed_values.cend(), val)
1722  == allowed_values.cend()) {
1723  return false;
1724  }
1725 
1726 /*
1727  const CSeqFeatData::ESubtype regulatory_class_subtype =
1728  CSeqFeatData::GetRegulatoryClass(val);
1729  if( regulatory_class_subtype == CSeqFeatData::eSubtype_bad ) {
1730  // msg will be sent in caller x_AddQualifierToFeature
1731  return false;
1732  }
1733  */
1734  }
1735  // okay
1736  // (Note that at this time we don't validate
1737  // if the regulatory_class actually matches the
1738  // subtype)
1739  x_AddGBQualToFeature(sfp, qual, val);
1740  return true;
1741  }
1742  }
1743 
1744  switch (subtype) {
1746  {
1747  switch (qtype) {
1748  case eQual_chrcnt:
1749  case eQual_ctgcnt:
1750  case eQual_loccnt:
1751  case eQual_snp_class:
1752  case eQual_snp_gtype:
1753  case eQual_snp_het:
1754  case eQual_snp_het_se:
1755  case eQual_snp_linkout:
1756  case eQual_snp_maxrate:
1757  case eQual_snp_valid:
1758  case eQual_weight:
1759  str = "dbSnpSynonymyData";
1760  break;
1761  default:
1762  break;
1763  }
1764  }
1765  break;
1767  {
1768  switch (qtype) {
1769  case eQual_sts_aliases:
1770  case eQual_sts_dsegs:
1771  case eQual_weight:
1772  str = "stsUserObject";
1773  break;
1774  default:
1775  break;
1776  }
1777  }
1778  break;
1780  {
1781  switch (qtype) {
1782  case eQual_bac_ends:
1783  case eQual_clone_id:
1784  case eQual_method:
1785  case eQual_sequence:
1786  case eQual_STS:
1787  case eQual_weight:
1788  str = "cloneUserObject";
1789  break;
1790  default:
1791  break;
1792  }
1793  }
1794  break;
1795  default:
1796  break;
1797  }
1798 
1799  if (str) {
1800  CSeq_feat::TExt& ext = sfp->SetExt ();
1801  CObject_id& obj = ext.SetType ();
1802  if ((! obj.IsStr ()) || obj.GetStr ().empty ()) {
1803  obj.SetStr ();
1804  }
1806  return true;
1807  }
1808 
1809  return false;
1810 }
1811 
1812 
1814  CSeqFeatData& sfdata,
1815  const string &feat_name,
1816  EOrgRef rtype,
1817  const string& val
1818 )
1819 {
1820  CBioSource& bsp = sfdata.SetBiosrc ();
1821 
1822  switch (rtype) {
1823  case eOrgRef_organism:
1824  {
1825  CBioSource::TOrg& orp = bsp.SetOrg ();
1826  orp.SetTaxname (val);
1827  return true;
1828  }
1829  case eOrgRef_organelle:
1830  {
1831  TGenomeMap::const_iterator g_iter = sm_GenomeKeys.find (val.c_str ());
1832  if (g_iter != sm_GenomeKeys.end ()) {
1833  CBioSource::EGenome gtype = g_iter->second;
1834  bsp.SetGenome (gtype);
1835  } else {
1836  x_ProcessMsg(
1838  feat_name, "organelle", val );
1839  }
1840  return true;
1841  }
1842  case eOrgRef_div:
1843  {
1844  CBioSource::TOrg& orp = bsp.SetOrg ();
1845  COrg_ref::TOrgname& onp = orp.SetOrgname ();
1846  onp.SetDiv (val);
1847  return true;
1848  }
1849  case eOrgRef_lineage:
1850  {
1851  CBioSource::TOrg& orp = bsp.SetOrg ();
1852  COrg_ref::TOrgname& onp = orp.SetOrgname ();
1853  onp.SetLineage (val);
1854  return true;
1855  }
1856  case eOrgRef_gcode:
1857  {
1858  CBioSource::TOrg& orp = bsp.SetOrg ();
1859  COrg_ref::TOrgname& onp = orp.SetOrgname ();
1860  int code = x_StringToLongNoThrow (val, feat_name, "gcode");
1861  onp.SetGcode (code);
1862  return true;
1863  }
1864  case eOrgRef_mgcode:
1865  {
1866  CBioSource::TOrg& orp = bsp.SetOrg ();
1867  COrg_ref::TOrgname& onp = orp.SetOrgname ();
1868  int code = x_StringToLongNoThrow (val, feat_name, "mgcode");
1869  onp.SetMgcode (code);
1870  return true;
1871  }
1872  default:
1873  break;
1874  }
1875  return false;
1876 }
1877 
1878 
1880  CSeqFeatData& sfdata,
1881  CSubSource::ESubtype stype,
1882  const string& val
1883 )
1884 
1885 {
1886  CBioSource& bsp = sfdata.SetBiosrc ();
1887  CBioSource::TSubtype& slist = bsp.SetSubtype ();
1888  CRef<CSubSource> ssp (new CSubSource);
1889  ssp->SetSubtype (stype);
1890  ssp->SetName (val);
1891  slist.push_back (ssp);
1892  return true;
1893 }
1894 
1895 
1897  CSeqFeatData& sfdata,
1898  COrgMod::ESubtype mtype,
1899  const string& val
1900 )
1901 
1902 {
1903  CBioSource& bsp = sfdata.SetBiosrc ();
1904  CBioSource::TOrg& orp = bsp.SetOrg ();
1905  COrg_ref::TOrgname& onp = orp.SetOrgname ();
1906  COrgName::TMod& mlist = onp.SetMod ();
1907  CRef<COrgMod> omp (new COrgMod);
1908  omp->SetSubtype (mtype);
1909  omp->SetSubname (val);
1910  mlist.push_back (omp);
1911  return true;
1912 }
1913 
1914 
1916  CRef<CSeq_feat> sfp,
1917  const string& qual,
1918  const string& val
1919 )
1920 
1921 {
1922  if (qual.empty ()) return false;
1923 
1924  // need this pointer because references can't be repointed
1925  CTempString normalized_qual = qual;
1926 
1927  // normalize qual if needed, especially regarding case, and
1928  // use as-is if no normalization applies
1929  auto qual_type = CSeqFeatData::GetQualifierType(qual);
1930  if( qual_type != CSeqFeatData::eQual_bad ) {
1931  // swap is constant time
1932  CTempString potential_normalized_qual = CSeqFeatData::GetQualifierAsString(qual_type);
1933  if( ! potential_normalized_qual.empty() ) {
1934  normalized_qual = potential_normalized_qual;
1935  }
1936  }
1937 
1938  auto& qlist = sfp->SetQual ();
1939  CRef<CGb_qual> gbq (new CGb_qual);
1940  gbq->SetQual() = normalized_qual;
1941  if (x_StringIsJustQuotes (val)) {
1942  gbq->SetVal() = kEmptyStr;
1943  } else {
1944  gbq->SetVal() = val;
1945  }
1946  qlist.push_back (gbq);
1947 
1948  return true;
1949 }
1950 
1951 
1953  CRef<CSeq_annot> sap,
1954  TChoiceToFeatMap & choiceToFeatMap,
1955  const TFlags flags)
1956 {
1957  // load cds_equal_range to hold the CDSs
1958  typedef TChoiceToFeatMap::iterator TChoiceCI;
1959  typedef pair<TChoiceCI, TChoiceCI> TChoiceEqualRange;
1960  TChoiceEqualRange cds_equal_range =
1961  choiceToFeatMap.equal_range(CSeqFeatData::e_Cdregion);
1962  if( cds_equal_range.first == cds_equal_range.second )
1963  {
1964  // nothing to do if there are no CDSs
1965  return;
1966  }
1967 
1968  // load mappings from locus or locus-tag to gene
1969  typedef multimap<string, SFeatAndLineNum> TStringToGeneAndLineMap;
1970  TStringToGeneAndLineMap locusToGeneAndLineMap;
1971  TStringToGeneAndLineMap locusTagToGeneAndLineMap;
1972  const TChoiceEqualRange gene_equal_range =
1973  choiceToFeatMap.equal_range(CSeqFeatData::e_Gene);
1974  for( TChoiceCI gene_choice_ci = gene_equal_range.first;
1975  gene_choice_ci != gene_equal_range.second;
1976  ++gene_choice_ci )
1977  {
1978  SFeatAndLineNum gene_feat_ref_and_line = gene_choice_ci->second;
1979  const CGene_ref & gene_ref = gene_feat_ref_and_line.m_pFeat->GetData().GetGene();
1980  if( ! RAW_FIELD_IS_EMPTY_OR_UNSET(gene_ref, Locus) ) {
1981  locusToGeneAndLineMap.insert(
1983  gene_ref.GetLocus(), gene_feat_ref_and_line));
1984  }
1985  if( ! RAW_FIELD_IS_EMPTY_OR_UNSET(gene_ref, Locus_tag) ) {
1986  locusTagToGeneAndLineMap.insert(
1988  gene_ref.GetLocus_tag(), gene_feat_ref_and_line));
1989  }
1990  }
1991 
1992  // for each CDS, check for gene conflicts or create genes,
1993  // depending on various flags
1994  for( TChoiceCI cds_choice_ci = cds_equal_range.first;
1995  cds_choice_ci != cds_equal_range.second ; ++cds_choice_ci)
1996  {
1997  TFeatConstRef cds_feat_ref = cds_choice_ci->second.m_pFeat;
1998  const TSeqPos cds_line_num = cds_choice_ci->second.m_uLineNum;
1999 
2000  const CSeq_loc & cds_loc = cds_feat_ref->GetLocation();
2001 
2002  const CGene_ref * pGeneXrefOnCDS = cds_feat_ref->GetGeneXref();
2003  if( ! pGeneXrefOnCDS ) {
2004  // no xref, so can't do anything for this CDS
2005  // (this is NOT an error)
2006  continue;
2007  }
2008 
2009  // get all the already-existing genes that
2010  // this CDS xrefs. It should be somewhat uncommon for there
2011  // to be more than one matching gene.
2012  set<SFeatAndLineNum> matchingGenes;
2013 
2014  const string locus =
2015  pGeneXrefOnCDS->IsSetLocus() ?
2016  pGeneXrefOnCDS->GetLocus() :
2017  "";
2018 
2019  const string locus_tag =
2020  pGeneXrefOnCDS->IsSetLocus_tag() ?
2021  pGeneXrefOnCDS->GetLocus_tag() :
2022  "";
2023 
2024 
2025  {{
2026  // all the code in this scope is all just for setting up matchingGenes
2027 
2028  typedef TStringToGeneAndLineMap::iterator TStrToGeneCI;
2029  typedef pair<TStrToGeneCI, TStrToGeneCI> TStrToGeneEqualRange;
2030  set<SFeatAndLineNum> locusGeneMatches;
2031  // add the locus matches (if any) to genesAlreadyCreated
2032  if( !NStr::IsBlank(locus) ) {
2033  TStrToGeneEqualRange locus_equal_range =
2034  locusToGeneAndLineMap.equal_range(locus);
2035  for( TStrToGeneCI locus_gene_ci = locus_equal_range.first;
2036  locus_gene_ci != locus_equal_range.second;
2037  ++locus_gene_ci )
2038  {
2039  if (!NStr::IsBlank(locus_tag)) {
2040  auto gene_feat = locus_gene_ci->second.m_pFeat;
2041  if (gene_feat->GetData().GetGene().IsSetLocus_tag() &&
2042  gene_feat->GetData().GetGene().GetLocus_tag() != locus_tag) {
2043  continue;
2044  }
2045  }
2046  locusGeneMatches.insert(locus_gene_ci->second);
2047  }
2048  }
2049  // remove any that don't also match the locus-tag (if any)
2050  set<SFeatAndLineNum> locusTagGeneMatches;
2051  if( !NStr::IsBlank(locus_tag) ) {
2052  TStrToGeneEqualRange locus_tag_equal_range =
2053  locusTagToGeneAndLineMap.equal_range(locus_tag);
2054  for( TStrToGeneCI locus_tag_gene_ci = locus_tag_equal_range.first;
2055  locus_tag_gene_ci != locus_tag_equal_range.second;
2056  ++locus_tag_gene_ci )
2057  {
2058  if (!NStr::IsBlank(locus)) {
2059  auto gene_feat = locus_tag_gene_ci->second.m_pFeat;
2060  if (gene_feat->GetData().GetGene().IsSetLocus() &&
2061  gene_feat->GetData().GetGene().GetLocus() != locus) {
2062  continue;
2063  }
2064  }
2065  locusTagGeneMatches.insert(locus_tag_gene_ci->second);
2066  }
2067  }
2068  // analyze locusGeneMatches and locusTagGeneMatches to find matchingGenes.
2069  if( locusGeneMatches.empty() ) {
2070  // swap is faster than assignment
2071  matchingGenes.swap(locusTagGeneMatches);
2072  } else if( locusTagGeneMatches.empty() ) {
2073  // swap is faster than assignment
2074  matchingGenes.swap(locusGeneMatches);
2075  } else {
2076  // get only the genes that match both (that is, the intersection)
2077  set_intersection(
2078  locusGeneMatches.begin(), locusGeneMatches.end(),
2079  locusTagGeneMatches.begin(), locusTagGeneMatches.end(),
2080  inserter(matchingGenes, matchingGenes.begin()));
2081  }
2082  }}
2083 
2084  // if requested, check that the genes really do contain the CDS
2085  // (also check if we're trying to create a gene that already exists)
2086 
2087  ITERATE(set<SFeatAndLineNum>, gene_feat_and_line_ci, matchingGenes) {
2088  const CSeq_loc & gene_loc = gene_feat_and_line_ci->m_pFeat->GetLocation();
2089  const TSeqPos gene_line_num = gene_feat_and_line_ci->m_uLineNum;
2090 
2092 
2093  // CDS's loc minus gene's loc should be an empty location
2094  // because the CDS should be entirely on the gene
2095  CRef<CSeq_loc> pCdsMinusGeneLoc = cds_loc.Subtract(
2096  gene_loc, CSeq_loc::fSortAndMerge_All, nullptr, nullptr);
2097  if( pCdsMinusGeneLoc &&
2098  ! pCdsMinusGeneLoc->IsNull() &&
2099  ! pCdsMinusGeneLoc->IsEmpty() )
2100  {
2101  ILineError::TVecOfLines gene_lines;
2102  if( gene_line_num > 0 ) {
2103  gene_lines.push_back(gene_line_num);
2104  }
2105  x_ProcessMsg(
2106  cds_line_num,
2108  kCdsFeatName,
2110  gene_lines );
2111  }
2112  }
2113  }
2114 
2115  // if requested, create genes for the CDS if there isn't already one
2116  // (it is NOT an error if the gene is already created)
2118  matchingGenes.empty() )
2119  {
2120  // create the gene
2121  CRef<CSeq_feat> pNewGene( new CSeq_feat );
2122  pNewGene->SetData().SetGene().Assign( *pGeneXrefOnCDS );
2123  if( FIELD_EQUALS(*cds_feat_ref, Partial, true) ) pNewGene->SetPartial(true);
2124  pNewGene->SetLocation().Assign( cds_feat_ref->GetLocation() );
2125 
2126  // add gene the annot
2127  _ASSERT( sap->IsFtable() );
2128  TFtable & the_ftable = sap->SetData().SetFtable();
2129  the_ftable.push_back(pNewGene);
2130 
2131  // add it to our local information for later CDSs
2132  SFeatAndLineNum gene_feat_and_line(pNewGene, 0);
2133  choiceToFeatMap.insert(
2135  pNewGene->GetData().Which(), gene_feat_and_line ) );
2136  if( ! RAW_FIELD_IS_EMPTY_OR_UNSET(*pGeneXrefOnCDS, Locus) ) {
2137  locusToGeneAndLineMap.insert(
2139  pGeneXrefOnCDS->GetLocus(), gene_feat_and_line));
2140  }
2141  if( ! RAW_FIELD_IS_EMPTY_OR_UNSET(*pGeneXrefOnCDS, Locus_tag) ) {
2142  locusTagToGeneAndLineMap.insert(
2144  pGeneXrefOnCDS->GetLocus_tag(), gene_feat_and_line));
2145  }
2146  }
2147  } // end of iteration through the CDS's
2148 }
2149 
2150 static const string s_QualsWithCaps[] = {
2151  "EC_number",
2152  "PCR_conditions",
2153  "PubMed",
2154  "STS",
2155  "ncRNA_class"
2156 };
2157 
2158 static const int s_NumQualsWithCaps = sizeof (s_QualsWithCaps) / sizeof (string);
2159 
2160 static string s_FixQualCapitalization (const string& qual)
2161 {
2162  string lqual = qual;
2163  lqual = NStr::ToLower(lqual);
2164  for (int j = 0; j < s_NumQualsWithCaps; j++) {
2165  if (NStr::EqualNocase(lqual, s_QualsWithCaps[j])) {
2166  lqual = s_QualsWithCaps[j];
2167  break;
2168  }
2169  }
2170  return lqual;
2171 }
2172 
2173 
2175  CRef<CSeq_feat> sfp,
2176  const string& note)
2177 {
2178  if (sfp.IsNull()) {
2179  return false;
2180  }
2181 
2182  if (NStr::IsBlank(note)) { // Nothing to do
2183  return true;
2184  }
2185 
2186  string comment = (sfp->CanGetComment()) ?
2187  sfp->GetComment() + "; " + note :
2188  note;
2189  sfp->SetComment(comment);
2190  return true;
2191 }
2192 
2193 
2195  CRef<CSeq_feat> sfp,
2196  const string& feat_name,
2197  const string& qual,
2198  const string& val) {
2199 
2200  if (!x_AddNoteToFeature(sfp, val)) {
2201  return false;
2202  }
2203  // Else convert qualifier to note and issue warning
2204  if (qual != "note") {
2205  string error_message =
2206  qual + " is not a valid qualifier for this feature. Converting to note.";
2207  x_ProcessMsg(
2209  feat_name, qual, kEmptyStr, error_message);
2210  }
2211  return true;
2212 }
2213 
2215  CRef<CSeq_feat> sfp,
2216  const string &feat_name,
2217  const string& qual,
2218  const string& val,
2219  const TFlags flags
2220 )
2221 
2222 {
2223  CSeqFeatData& sfdata = sfp->SetData ();
2224  CSeqFeatData::E_Choice featType = sfdata.Which ();
2225 
2226  const CSeqFeatData::EQualifier qual_type =
2229  if( CSeqFeatData::IsDiscouragedQual(qual_type) ) {
2230  x_ProcessMsg(
2232  eDiag_Warning, feat_name, qual);
2233  }
2234  }
2235 
2236  if (featType == CSeqFeatData::e_Biosrc) {
2237 
2238  TOrgRefMap::const_iterator o_iter = sm_OrgRefKeys.find (qual.c_str ());
2239  if (o_iter != sm_OrgRefKeys.end ()) {
2240  EOrgRef rtype = o_iter->second;
2241  if (x_AddQualifierToBioSrc (sfdata, feat_name, rtype, val)) return true;
2242  } else {
2243 
2244  TSubSrcMap::const_iterator s_iter = sm_SubSrcKeys.find (qual.c_str ());
2245  if (s_iter != sm_SubSrcKeys.end ()) {
2246 
2247  CSubSource::ESubtype stype = s_iter->second;
2248  if (x_AddQualifierToBioSrc (sfdata, stype, val)) return true;
2249 
2250  } else {
2251 
2252  TOrgModMap::const_iterator m_iter = sm_OrgModKeys.find (qual.c_str ());
2253  if (m_iter != sm_OrgModKeys.end ()) {
2254 
2255  COrgMod::ESubtype mtype = m_iter->second;
2256  if (x_AddQualifierToBioSrc (sfdata, mtype, val)) return true;
2257  }
2258  }
2259  }
2260  return false;
2261  }
2262 
2263 
2264  // else type != CSeqFeatData::e_Biosrc
2265  string lqual = s_FixQualCapitalization(qual);
2266  TQualMap::const_iterator q_iter = sm_QualKeys.find (lqual.c_str ());
2267  if (q_iter != sm_QualKeys.end ()) {
2268  EQual qtype = q_iter->second;
2269  switch (featType) {
2270  case CSeqFeatData::e_Gene:
2271  if (x_AddQualifierToGene (sfdata, qtype, val)) return true;
2272  break;
2274  if (x_AddQualifierToCdregion (sfp, sfdata, qtype, val)) return true;
2275  break;
2276  case CSeqFeatData::e_Rna:
2277  if (x_AddQualifierToRna (sfp, qtype, val)) return true;
2278  break;
2279  case CSeqFeatData::e_Imp:
2280  if (x_AddQualifierToImp (sfp, sfdata, qtype, qual, val)) return true;
2281  break;
2283  if (qtype == eQual_region_name) {
2284  sfdata.SetRegion (val);
2285  return true;
2286  }
2287  break;
2288  case CSeqFeatData::e_Bond:
2289  if (qtype == eQual_bond_type) {
2291  if (CSeqFeatData::GetBondList()->IsBondName(val.c_str(), btyp)) {
2292  sfdata.SetBond (btyp);
2293  return true;
2294  }
2295  }
2296  break;
2297  case CSeqFeatData::e_Site:
2298  if (qtype == eQual_site_type) {
2300  if (CSeqFeatData::GetSiteList()->IsSiteName( val.c_str(), styp)) {
2301  sfdata.SetSite (styp);
2302  return true;
2303  }
2304  }
2305  break;
2306  case CSeqFeatData::e_Pub:
2307  if( qtype == eQual_PubMed ) {
2308  CRef<CPub> new_pub( new CPub );
2309  new_pub->SetPmid( CPubMedId( ENTREZ_ID_FROM(long, x_StringToLongNoThrow(val, feat_name, qual)) ) );
2310  sfdata.SetPub().SetPub().Set().push_back( new_pub );
2311  return true;
2312  }
2313  break;
2314  case CSeqFeatData::e_Prot:
2315  switch( qtype ) {
2316  case eQual_product:
2317  sfdata.SetProt().SetName().push_back( val );
2318  return true;
2319  case eQual_function:
2320  sfdata.SetProt().SetActivity().push_back( val );
2321  return true;
2322  case eQual_EC_number:
2323  sfdata.SetProt().SetEc().push_back( val );
2324  return true;
2325  default:
2326  break;
2327  }
2328  break;
2329  default:
2330  break;
2331  }
2332 
2333  switch (qtype) {
2334  case eQual_pseudo:
2335  sfp->SetPseudo (true);
2336  return true;
2337  case eQual_partial:
2338  sfp->SetPartial (true);
2339  return true;
2340  case eQual_exception:
2341  sfp->SetExcept (true);
2342  sfp->SetExcept_text (val);
2343  return true;
2345  sfp->SetExcept (true);
2346  sfp->SetExcept_text (qual);
2347  return true;
2348  case eQual_trans_splicing:
2349  sfp->SetExcept (true);
2350  sfp->SetExcept_text (qual);
2351  return true;
2352  case eQual_evidence:
2353  if (val == "experimental") {
2355  } else if (val == "not_experimental" || val == "non_experimental" ||
2356  val == "not-experimental" || val == "non-experimental") {
2358  }
2359  return true;
2360  case eQual_note:
2361  return x_AddNoteToFeature(sfp, val);
2362  case eQual_inference:
2363  {
2364  string prefix, remainder;
2366  if (!NStr::IsBlank(prefix)) {
2367  x_AddGBQualToFeature(sfp, qual, val);
2368  }
2369  else {
2370  x_ProcessMsg(
2372  feat_name, qual, val);
2373  }
2374  return true;
2375  }
2376  case eQual_replace:
2377  {
2378  string val_copy = val;
2379  NStr::ToLower( val_copy );
2380  x_AddGBQualToFeature (sfp, qual, val_copy );
2381  return true;
2382  }
2383  case eQual_allele:
2384  case eQual_bound_moiety:
2385  case eQual_clone:
2386  case eQual_compare:
2387  case eQual_cons_splice:
2388  case eQual_direction:
2389  case eQual_EC_number:
2391  case eQual_experiment:
2392  case eQual_frequency:
2393  case eQual_function:
2394  case eQual_gap_type:
2395  case eQual_insertion_seq:
2396  case eQual_label:
2398  case eQual_map:
2399  case eQual_ncRNA_class:
2400  case eQual_number:
2401  case eQual_old_locus_tag:
2402  case eQual_operon:
2403  case eQual_organism:
2404  case eQual_PCR_conditions:
2405  case eQual_phenotype:
2406  case eQual_product:
2407  case eQual_pseudogene:
2408  case eQual_satellite:
2409  case eQual_rpt_family:
2410  case eQual_rpt_type:
2411  case eQual_rpt_unit:
2412  case eQual_rpt_unit_range:
2413  case eQual_rpt_unit_seq:
2414  case eQual_standard_name:
2415  case eQual_tag_peptide:
2416  case eQual_transposon:
2417  case eQual_usedin:
2418  case eQual_cyt_map:
2419  case eQual_gen_map:
2420  case eQual_rad_map:
2422  {
2423  x_AddGBQualToFeature (sfp, qual, val);
2424  return true;
2425  }
2426  case eQual_gene:
2427  {
2428  if (CSeqFeatData::CanHaveGene(sfdata.GetSubtype())) {
2429  CGene_ref& grp = sfp->SetGeneXref ();
2430  if (val != "-") {
2431  grp.SetLocus (val);
2432  }
2433  return true;
2434  }
2435  // else:
2436  return x_AddNoteToFeature(sfp, feat_name, qual, val);
2437  }
2438  case eQual_gene_desc:
2439  {
2440  if (CSeqFeatData::CanHaveGene(sfdata.GetSubtype())) {
2441  CGene_ref& grp = sfp->SetGeneXref ();
2442  grp.SetDesc (val);
2443  return true;
2444  }
2445  // else:
2446  return x_AddNoteToFeature(sfp, feat_name, qual, val);
2447  }
2448  case eQual_gene_syn:
2449  {
2450  if (CSeqFeatData::CanHaveGene(sfdata.GetSubtype())) {
2451  CGene_ref& grp = sfp->SetGeneXref ();
2452  CGene_ref::TSyn& syn = grp.SetSyn ();
2453  syn.push_back (val);
2454  return true;
2455  }
2456  // else:
2457  return x_AddNoteToFeature(sfp, feat_name, qual, val);
2458  }
2459  case eQual_locus_tag:
2460  {
2461  if (CSeqFeatData::CanHaveGene(sfdata.GetSubtype())) {
2462  CGene_ref& grp = sfp->SetGeneXref ();
2463  grp.SetLocus_tag (val);
2464  return true;
2465  }
2466  // else:
2467  return x_AddNoteToFeature(sfp, feat_name, qual, val);
2468  }
2469  case eQual_db_xref:
2470  {
2471  CTempString db, tag;
2472  if (NStr::SplitInTwo (val, ":", db, tag)) {
2473  CSeq_feat::TDbxref& dblist = sfp->SetDbxref ();
2474  CRef<CDbtag> dbt (new CDbtag);
2475  dbt->SetDb (db);
2476  CRef<CObject_id> oid (new CObject_id);
2477  static const char* digits = "0123456789";
2478  if (tag.find_first_not_of(digits) == string::npos && !NStr::IsBlank(tag))
2479  oid->SetId(NStr::StringToLong(tag));
2480  else
2481  oid->SetStr(tag);
2482  dbt->SetTag (*oid);
2483  dblist.push_back (dbt);
2484  return true;
2485  }
2486  return true;
2487  }
2488  case eQual_nomenclature:
2489  {
2490  /* !!! need to implement !!! */
2491  return true;
2492  }
2493  case eQual_go_component:
2494  case eQual_go_function:
2495  case eQual_go_process:
2496  if (featType == CSeqFeatData::e_Gene ||
2497  featType == CSeqFeatData::e_Cdregion ||
2498  featType == CSeqFeatData::e_Rna) {
2499  try {
2500  CReadUtil::AddGeneOntologyTerm(*sfp, qual, val);
2501  }
2502  catch( ILineError& err) {
2503  x_ProcessMsg(
2504  err.Problem(),
2505  err.Severity(),
2506  feat_name, qual, val,
2507  err.ErrorMessage());
2508  }
2509  //rw-621: throw out the faulty qualifier but retain the rest of the feature.
2510  return true;
2511  }
2512  return false;
2513  case eQual_transcript_id:
2514  {
2515  if (featType == CSeqFeatData::e_Rna &&
2516  sfdata.GetRna().GetType() == CRNA_ref::eType_mRNA) {
2517  CBioseq::TId ids;
2518  try {
2519  CSeq_id::ParseIDs(ids, val,
2522  }
2523  catch (CSeqIdException&)
2524  {
2525  x_ProcessMsg(
2527  feat_name, qual, val,
2528  "Invalid transcript_id : " + val);
2529  return true;
2530  }
2531 
2532  for (const auto& id : ids) {
2533  auto id_string = id->GetSeqIdString(true);
2534  auto res = m_ProcessedTranscriptIds.insert(id_string);
2535  if (res.second == false) { // Insertion failed because Seq-id already encountered
2536  x_ProcessMsg(
2538  feat_name, qual, val,
2539  "Transcript ID " + id_string + " appears on multiple mRNA features"
2540  );
2541  }
2542  }
2543  }
2544  x_AddGBQualToFeature(sfp, qual, val);
2545  return true;
2546  }
2547  case eQual_protein_id:
2548  // see SQD-1535 and SQD-3496
2549  if (featType == CSeqFeatData::e_Cdregion ||
2550  (featType == CSeqFeatData::e_Rna &&
2551  sfdata.GetRna().GetType() == CRNA_ref::eType_mRNA) ||
2552  (featType == CSeqFeatData::e_Prot &&
2553  sfdata.GetProt().IsSetProcessed() &&
2555  {
2556  CBioseq::TId ids;
2557  try {
2558  CSeq_id::ParseIDs(ids, val,
2561  }
2562  catch (CSeqIdException&)
2563  {
2564  x_ProcessMsg(
2566  feat_name, qual, val,
2567  "Invalid protein_id : " + val);
2568  return true;
2569  }
2570 
2571  if (featType == CSeqFeatData::e_Cdregion) {
2572  for (const auto& id : ids) {
2573  auto id_string = id->GetSeqIdString(true);
2574  auto res = m_ProcessedProteinIds.insert(id_string);
2575  if (res.second == false) { // Insertion failed because Seq-id already encountered
2576  x_ProcessMsg(
2578  feat_name, qual, val,
2579  "Protein ID " + id_string + " appears on multiple CDS features"
2580  );
2581  }
2582  }
2583  }
2584 
2585  if (featType != CSeqFeatData::e_Rna) { // mRNA only has a protein_id qualifier
2586  auto pBestId = GetBestId(ids);
2587  if (pBestId) {
2588  sfp->SetProduct().SetWhole(*pBestId);
2589  }
2590  }
2591  }
2592 
2593  if (featType != CSeqFeatData::e_Prot) { // Mat-peptide has an instantiated product, but no qualifier
2594  x_AddGBQualToFeature(sfp, qual, val);
2595  }
2596  return true;
2598  // This should've been handled up in x_AddQualifierToImp
2599  // so it's always a bad value to be here
2600  x_ProcessMsg(
2602  feat_name, qual, val );
2603  return true;
2604  default:
2605  break;
2606  }
2607  }
2608  return false;
2609 }
2610 
2612 {
2613  // This function is testing for a match against the following regular
2614  // expression, but we avoid actual regexps for max speed:
2615  // "^(===================================================================| INFO:| WARNING:| ERROR:).*"
2616 
2617  // (that magic number is the size of the smallest possible match)
2618  if( line.length() < 6 ) {
2619  return false;
2620  }
2621 
2622  if( line[0] == '=' ) {
2623  static const CTempString kAllEqualsMatch =
2624  "===================================================================";
2625  if( NStr::StartsWith(line, kAllEqualsMatch) ) {
2626  return true;
2627  }
2628  } else if( line[0] == ' ') {
2629  switch(line[1]) {
2630  case 'I':
2631  {
2632  static const CTempString kInfo = " INFO:";
2633  if( NStr::StartsWith(line, kInfo) ) {
2634  return true;
2635  }
2636  }
2637  break;
2638  case 'W':
2639  {
2640  static const CTempString kWarning = " WARNING:";
2641  if( NStr::StartsWith(line, kWarning) ) {
2642  return true;
2643  }
2644  }
2645  break;
2646  case 'E':
2647  {
2648  static const CTempString kError = " ERROR:";
2649  if( NStr::StartsWith(line, kError) ) {
2650  return true;
2651  }
2652  }
2653  break;
2654  default:
2655  // no match
2656  break;
2657  }
2658  }
2659 
2660  // no match
2661  return false;
2662 }
2663 
2665  CTempString strFeatureName,
2666  CRef<CSeq_feat>& sfp,
2667  const SFeatLocInfo& loc_info
2668 )
2669 
2670 {
2671 
2672  auto start = loc_info.start_pos;
2673  auto stop = loc_info.stop_pos;
2674 
2675  const Int4 orig_start = start;
2677 
2678  if (start > stop) {
2679  swap(start, stop);
2680  strand = eNa_strand_minus;
2681  }
2682  if (loc_info.is_minus_strand) {
2683  strand = eNa_strand_minus;
2684  }
2685 
2686  // construct loc, which will be added to the mix
2687  CSeq_loc_mix::Tdata & mix_set = sfp->SetLocation().SetMix();
2688  CRef<CSeq_loc> loc(new CSeq_loc);
2689  if (loc_info.is_point || start == stop ) {
2690  // a point of some kind
2691  if (mix_set.empty())
2692  m_need_check_strand = true;
2693  else
2694  x_GetPointStrand(*sfp, strand);
2695 
2696  // note usage of orig_start instead of start
2697  // because we want the first part of the point
2698  // specified in the file, not the smallest because SetRightOf
2699  // works differently for plus vs. minus strand
2700  CRef<CSeq_point> pPoint(
2701  new CSeq_point(*m_seq_id, orig_start, strand) );
2702  if( loc_info.is_point ) {
2703  // between two bases
2704  pPoint->SetRightOf (true);
2705  // warning if stop is not start plus one
2706  if( stop != (start+1) ) {
2707  x_ProcessMsg(
2709  strFeatureName );
2710  }
2711  } else {
2712  // just a point. do nothing
2713  }
2714 
2715  if (loc_info.is_5p_partial) {
2716  pPoint->SetPartialStart (true, eExtreme_Biological);
2717  }
2718  if (loc_info.is_3p_partial) {
2719  pPoint->SetPartialStop (true, eExtreme_Biological);
2720  }
2721 
2722  loc->SetPnt( *pPoint );
2723  } else {
2724  // interval
2725  CRef<CSeq_interval> pIval( new CSeq_interval(*m_seq_id, start, stop, strand) );
2726  if (loc_info.is_5p_partial) {
2727  pIval->SetPartialStart (true, eExtreme_Biological);
2728  }
2729  if (loc_info.is_3p_partial) {
2730  pIval->SetPartialStop (true, eExtreme_Biological);
2731  }
2732  loc->SetInt(*pIval);
2733  if (m_need_check_strand)
2734  {
2735  x_UpdatePointStrand(*sfp, strand);
2736  m_need_check_strand = false;
2737  }
2738  }
2739 
2740  // check for internal partials
2741  if( ! mix_set.empty() ) {
2742  const CSeq_loc & last_loc = *mix_set.back();
2743  if( last_loc.IsPartialStop(eExtreme_Biological) ||
2745  {
2746  // internal partials
2748  eDiag_Warning, strFeatureName );
2749  }
2750  }
2751 
2752  mix_set.push_back(loc);
2753 
2754 
2755  if (loc_info.is_5p_partial || loc_info.is_3p_partial) {
2756  sfp->SetPartial (true);
2757  }
2758 
2759  return true;
2760 }
2761 
2762 
2763 
2765  CRef<CSeq_feat> sfp,
2766  const string& feat,
2767  const TFlags flags,
2768  ITableFilter *filter
2769 )
2770 
2771 {
2772  if (feat.empty ()) return false;
2773 
2774  // check filter, if any
2775  if (filter) {
2776  ITableFilter::EAction action = filter->GetFeatAction(feat);
2777  if( action != ITableFilter::eAction_Okay ) {
2778  x_ProcessMsg(
2780  eDiag_Warning, feat );
2781  if( action == ITableFilter::eAction_Disallowed ) {
2782  return false;
2783  }
2784  }
2785  }
2786 
2788  if (sbtyp != CSeqFeatData::eSubtype_bad) {
2789 
2790  // populate *sfp here...
2791 
2793  sfp->SetData ().Select (typ);
2794  CSeqFeatData& sfdata = sfp->SetData ();
2795 
2796  if (typ == CSeqFeatData::e_Rna) {
2797  CRNA_ref& rrp = sfdata.SetRna ();
2799  switch (sbtyp) {
2801  rnatyp = CRNA_ref::eType_premsg;
2802  break;
2804  rnatyp = CRNA_ref::eType_mRNA;
2805  break;
2807  rnatyp = CRNA_ref::eType_tRNA;
2808  break;
2810  rnatyp = CRNA_ref::eType_rRNA;
2811  break;
2813  rnatyp = CRNA_ref::eType_ncRNA;
2814  rrp.SetExt().SetGen().SetClass("snRNA");
2815  break;
2817  rnatyp = CRNA_ref::eType_ncRNA;
2818  rrp.SetExt().SetGen().SetClass("scRNA");
2819  break;
2821  rnatyp = CRNA_ref::eType_ncRNA;
2822  rrp.SetExt().SetGen().SetClass("snoRNA");
2823  break;
2825  rnatyp = CRNA_ref::eType_ncRNA;
2826  rrp.SetExt().SetGen();
2827  break;
2829  rnatyp = CRNA_ref::eType_tmRNA;
2830  rrp.SetExt().SetGen();
2831  break;
2833  rrp.SetExt().SetName("misc_RNA");
2834  rnatyp = CRNA_ref::eType_other;
2835  break;
2836  default :
2837  break;
2838  }
2839  rrp.SetType (rnatyp);
2840 
2841  } else if (typ == CSeqFeatData::e_Imp) {
2842  CImp_feat_Base& imp = sfdata.SetImp ();
2843  imp.SetKey (feat);
2844 
2845  } else if (typ == CSeqFeatData::e_Bond) {
2847 
2848  } else if (typ == CSeqFeatData::e_Site) {
2850  } else if (typ == CSeqFeatData::e_Prot ) {
2851  CProt_ref &prot_ref = sfdata.SetProt();
2852  switch (sbtyp) {
2853  default:
2854  break;
2857  break;
2860  break;
2863  break;
2866  break;
2869  break;
2870  }
2871  }
2872 
2873  // check for discouraged feature name
2875  if( CSeqFeatData::IsDiscouragedSubtype(sbtyp) ) {
2876  x_ProcessMsg(
2878  eDiag_Warning, feat);
2879  }
2880  }
2881 
2882  return true;
2883  }
2884 
2885  // unrecognized feature key
2886 
2889  }
2890 
2892 
2893  sfp->SetData ().Select (CSeqFeatData::e_Imp);
2894  CSeqFeatData& sfdata = sfp->SetData ();
2895  CImp_feat_Base& imp = sfdata.SetImp ();
2896  imp.SetKey ("misc_feature");
2897  x_AddQualifierToFeature (sfp, kEmptyStr, "standard_name", feat, flags);
2898 
2899  return true;
2900 
2901  } else if ((flags & CFeature_table_reader::fKeepBadKey) != 0) {
2902 
2903  sfp->SetData ().Select (CSeqFeatData::e_Imp);
2904  CSeqFeatData& sfdata = sfp->SetData ();
2905  CImp_feat_Base& imp = sfdata.SetImp ();
2906  imp.SetKey (feat);
2907 
2908  return true;
2909  }
2910 
2911  return false;
2912 }
2913 
2915  ILineError::EProblem eProblem,
2916  EDiagSev eSeverity,
2917  const string& strFeatureName,
2918  const string& strQualifierName,
2919  const string& strQualifierValue,
2920  const string& strErrorMessage,
2921  const ILineError::TVecOfLines & vecOfOtherLines)
2922 {
2924  eProblem,
2925  eSeverity,
2926  strFeatureName,
2927  strQualifierName,
2928  strQualifierValue,
2929  strErrorMessage,
2930  vecOfOtherLines);
2931 }
2932 
2933 
2935  int line_num,
2936  ILineError::EProblem eProblem,
2937  EDiagSev eSeverity,
2938  const string & strFeatureName,
2939  const string & strQualifierName,
2940  const string & strQualifierValue,
2941  const string& strErrorMessage,
2942  const ILineError::TVecOfLines & vecOfOtherLines )
2943 {
2944 
2945  if (!m_pMessageListener) {
2946  return;
2947  }
2948 
2949  unique_ptr<CObjReaderLineException> pErr (
2951  eSeverity, line_num, strErrorMessage, eProblem, m_real_seqid, strFeatureName,
2952  strQualifierName, strQualifierValue));
2953 
2955  pErr->SetObject(m_pCurrentFeat);
2956  }
2957 
2958  for (auto line : vecOfOtherLines) {
2959  pErr->AddOtherLine(line);
2960  }
2961 
2962  if (!m_pMessageListener->PutError(*pErr)) {
2963  pErr->Throw();
2964  }
2965 }
2966 
2967 
2969  const CTempString& seq_id,
2970  const unsigned int line_number,
2971  ILineErrorListener* pListener)
2972 {
2973  if (!pListener) {
2974  return;
2975  }
2976 
2977  string msg = "Seq-id " + seq_id + ", line " + NStr::IntToString(line_number);
2978  pListener->PutProgress(msg);
2979 }
2980 
2981 
2982 // helper for CFeatureTableReader_Imp::ReadSequinFeatureTable,
2983 // just so we don't forget a step when we reset the feature
2984 //
2985 void CFeatureTableReader_Imp::x_ResetFeat(CRef<CSeq_feat> & sfp, bool & curr_feat_intervals_done)
2986 {
2987  m_need_check_strand = false;
2988  sfp.Reset(new CSeq_feat());
2989  curr_feat_intervals_done = false;
2990 }
2991 
2993 {
2994  if (feat.IsSetLocation() && feat.GetLocation().IsMix())
2995  {
2996  const CSeq_loc& last = *feat.GetLocation().GetMix().Get().back();
2997  if (last.IsInt() && last.GetInt().IsSetStrand())
2998  {
2999  strand = last.GetInt().GetStrand();
3000  }
3001  else
3002  if (last.IsPnt() && last.GetPnt().IsSetStrand())
3003  {
3004  strand = last.GetPnt().GetStrand();
3005  }
3006  }
3007 }
3008 
3010 {
3011  if (feat.IsSetLocation() && feat.GetLocation().IsMix())
3012  {
3013 
3014  for (auto pSeqLoc : feat.SetLocation().SetMix().Set()) {
3015  if (pSeqLoc->IsPnt()) {
3016  auto& seq_point = pSeqLoc->SetPnt();
3017  const auto old_strand =
3018  seq_point.IsSetStrand() ?
3019  seq_point.GetStrand() :
3021 
3022  seq_point.SetStrand(strand);
3023  if (old_strand != strand) {
3024  const bool is_5p_partial = seq_point.IsPartialStop(eExtreme_Biological);
3025  const bool is_3p_partial = seq_point.IsPartialStart(eExtreme_Biological);
3026  seq_point.SetPartialStart(is_5p_partial, eExtreme_Biological);
3027  seq_point.SetPartialStop(is_3p_partial, eExtreme_Biological);
3028  }
3029  }
3030  }
3031  }
3032 }
3033 
3034 
3036  TFtable& ftable)
3037 {
3038  if ( !feat ||
3039  !feat->IsSetData() ||
3040  (feat->GetData().Which() == CSeqFeatData::e_not_set) )
3041  {
3042  return;
3043  }
3044 
3045  // Check for missing publication - RW-626
3046  const auto& featData = feat->GetData();
3047  if (featData.GetSubtype() == CSeqFeatData::eSubtype_pub &&
3048  (!featData.GetPub().IsSetPub() ||
3049  !featData.GetPub().GetPub().IsSet() ||
3050  featData.GetPub().GetPub().Get().empty())) {
3051 
3052  const int line_number = m_reader->AtEOF() ?
3053  x_GetLineNumber() :
3054  x_GetLineNumber()-1;
3055 
3056  string msg = "Reference feature is empty. Skipping feature.";
3057 
3058  x_ProcessMsg(line_number,
3060  eDiag_Warning,
3061  "Reference",
3062  kEmptyStr,
3063  kEmptyStr,
3064  msg);
3065  return;
3066  }
3067 
3068  if (feat->IsSetLocation() && feat->GetLocation().IsMix())
3069  {
3070  if (feat->GetLocation().GetMix().Get().empty()) {
3071  // turn empty seqlocmix into a null seq-loc
3072  feat->SetLocation().SetNull();
3073  }
3074  else
3075  if (feat->GetLocation().GetMix().Get().size() == 1) {
3076  // demote 1-part seqlocmixes to seq-loc with just that part
3077  CRef<CSeq_loc> keep_loc = *feat->SetLocation().SetMix().Set().begin();
3078  feat->SetLocation(*keep_loc);
3079  }
3080  }
3081  ftable.push_back(feat);
3082 }
3083 
3084 
3085 
3086 void CFeatureTableReader_Imp::x_ProcessQualifier(const string& qual_name,
3087  const string& qual_val,
3088  const string& feat_name,
3089  CRef<CSeq_feat> feat,
3090  TFlags flags)
3091 {
3092  if (NStr::IsBlank(qual_name)) {
3093  return;
3094  }
3095 
3096  if (!feat) {
3099  eDiag_Warning, kEmptyStr, qual_name, qual_val);
3100  }
3101  return;
3102  }
3103 
3104  if (NStr::IsBlank(qual_val)) {
3105  if (sc_SingleKeys.find(qual_name.c_str()) != sc_SingleKeys.end()) {
3106  x_AddQualifierToFeature(feat, feat_name, qual_name, qual_val, flags);
3107  }
3108  else {
3110  eDiag_Warning, feat_name, qual_name);
3111  }
3112  return;
3113  }
3114 
3115  // else qual_name and qual_val are not blank
3116  if (!x_AddQualifierToFeature(feat, feat_name, qual_name, qual_val, flags)) {
3119  eDiag_Warning, feat_name, qual_name, qual_val);
3120  }
3121 
3123  x_AddGBQualToFeature(feat, qual_name, qual_val);
3124  }
3125  }
3126 }
3127 
3128 
3129 
3131  const CTempString& in_seqid,
3132  const CTempString& in_annotname,
3133  const TFlags flags,
3134  ITableFilter *filter
3135 )
3136 {
3137  m_Flags = flags;
3138  string feat, qual, qual_value;
3139  string curr_feat_name;
3140  // Int4 start, stop;
3141  //bool partial5, partial3, ispoint, isminus,
3142 
3143  bool ignore_until_next_feature_key = false;
3144  Int4 offset = 0;
3145  SFeatLocInfo loc_info;
3146 
3147  CRef<CSeq_annot> sap(new CSeq_annot);
3148 
3149  TFtable& ftable = sap->SetData().SetFtable();
3150  const bool bIgnoreWebComments =
3152 
3153  // if sequence ID is a list, use just one sequence ID string
3154  x_InitId(in_seqid, flags);
3155 
3156  // Use this to efficiently find the best CDS for a prot feature
3157  // (only add CDS's for it to work right)
3158  CBestFeatFinder best_CDS_finder;
3159 
3160  // map feature types to features
3161  TChoiceToFeatMap choiceToFeatMap;
3162 
3164  // This is true once this feature should not
3165  // have any more intervals.
3166  // This allows us to catch errors like the following:
3167  //
3168  //
3169  //>Feature lcl|Seq1
3170  //1 1008 CDS
3171  // gene THE_GENE_NAME
3172  //50 200
3173  // product THE_GENE_PRODUCT
3174  bool curr_feat_intervals_done = false;
3175 
3176  if (! in_annotname.empty ()) {
3177  CAnnot_descr& descr = sap->SetDesc ();
3178  CRef<CAnnotdesc> annot(new CAnnotdesc);
3179  annot->SetName (in_annotname);
3180  descr.Set().push_back (annot);
3181  }
3182 
3183  while ( !m_reader->AtEOF() ) {
3184 
3185  CTempString line = *++(*m_reader);
3186 
3187  if( m_reader->GetLineNumber() % 10000 == 0 &&
3188  m_reader->GetLineNumber() > 0 )
3189  {
3191  }
3192 
3193  // skip empty lines.
3194  // if requested, also skip webcomment lines
3195  if( line.empty () || (bIgnoreWebComments && x_IsWebComment(line) ) ) {
3196  continue;
3197  }
3198 
3199  // if next line is a new feature table, return current sap
3200  CTempStringEx dummy1, dummy2;
3201  if( ParseInitialFeatureLine(line, dummy1, dummy2) ) {
3202  m_reader->UngetLine(); // we'll get this feature line the next time around
3203  break;
3204  }
3205 
3206  if (line [0] == '[') {
3207 
3208  // try to parse it as an offset
3209  if( x_TryToParseOffset(line, offset) ) {
3210  // okay, known command
3211  } else {
3212  // warn for unknown square-bracket commands
3213  x_ProcessMsg(
3215  eDiag_Warning);
3216  }
3217 
3218  } else if ( s_LineIndicatesOrder(line) ) {
3219 
3221 
3222  // put nulls between feature intervals
3224  // loc_with_nulls is unset if no change was needed
3225  if( loc_with_nulls ) {
3226  m_pCurrentFeat->SetLocation( *loc_with_nulls );
3227  }
3228 
3229  } else if (x_ParseFeatureTableLine (line, loc_info, feat, qual, qual_value, offset)) {
3230  // process line in feature table
3231 
3232  replace( qual_value.begin(), qual_value.end(), '\"', '\'' );
3233 
3234  if ((! feat.empty ()) && loc_info.start_pos >= 0 && loc_info.stop_pos >= 0) {
3235 
3236  // process start - stop - feature line
3237 
3239  x_ResetFeat(m_pCurrentFeat, curr_feat_intervals_done );
3240 
3241  if (x_SetupSeqFeat (m_pCurrentFeat, feat, flags, filter)) {
3242 
3243  // figure out type of feat, and store in map for later use
3245  if( m_pCurrentFeat->CanGetData() ) {
3246  eChoice = m_pCurrentFeat->GetData().Which();
3247  }
3248  choiceToFeatMap.insert(
3250  eChoice,
3252 
3253  // if new feature is a CDS, remember it for later lookups
3254  if( eChoice == CSeqFeatData::e_Cdregion ) {
3255  best_CDS_finder.AddFeat( *m_pCurrentFeat );
3256  }
3257 
3258  // and add first interval
3259  x_AddIntervalToFeature (curr_feat_name, m_pCurrentFeat, loc_info);
3260 
3261  ignore_until_next_feature_key = false;
3262 
3263  curr_feat_name = feat;
3264 
3265  } else {
3266 
3267  // bad feature, set ignore flag
3268 
3269  ignore_until_next_feature_key = true;
3270  }
3271 
3272  } else if (ignore_until_next_feature_key) {
3273 
3274  // bad feature was found before, so ignore
3275  // qualifiers until next feature key
3276 
3277  }
3278  else
3279  if (loc_info.start_pos >= 0 &&
3280  loc_info.stop_pos >= 0 &&
3281  feat.empty () &&
3282  qual.empty () &&
3283  qual_value.empty ()) {
3284 
3285  if( curr_feat_intervals_done ) {
3286  // the feat intervals were done, so it's an error for there to be more intervals
3288  // this feature is in bad shape, so we ignore the rest of it
3289  ignore_until_next_feature_key = true;
3290  x_ResetFeat(m_pCurrentFeat, curr_feat_intervals_done);
3292  // process start - stop multiple interval line
3293  x_AddIntervalToFeature (curr_feat_name, m_pCurrentFeat, loc_info);
3294  // start, stop, partial5, partial3, ispoint, isminus);
3295  } else {
3298  eDiag_Warning);
3299  }
3300  }
3301 
3302  } else if (!NStr::IsBlank(qual)) {
3303  curr_feat_intervals_done = true;
3304  x_ProcessQualifier(qual, qual_value, curr_feat_name, m_pCurrentFeat, flags);
3305  }
3306  else if (!feat.empty()) {
3307 
3308  // unrecognized location
3309 
3310  // there should no more ranges for this feature
3311  // (although there still can be ranges for quals, of course).
3312  curr_feat_intervals_done = true;
3313 
3315  x_ProcessMsg(
3317  feat );
3318  }
3319  }
3320  }
3321  }
3322 
3323  // make sure last feature is finished
3325  x_ResetFeat(m_pCurrentFeat, curr_feat_intervals_done );
3326 
3329  {
3330  x_CreateGenesFromCDSs(sap, choiceToFeatMap, flags);
3331  }
3332  return sap;
3333 }
3334 
3335 
3337  const string& feat,
3338  CSeq_loc& location,
3339  const TFlags flags,
3340  const string &seq_id,
3341  ITableFilter *filter
3342 )
3343 
3344 {
3345  CRef<CSeq_feat> sfp (new CSeq_feat);
3346 
3347  sfp->ResetLocation ();
3348 
3349  if ( ! x_SetupSeqFeat (sfp, feat, flags, filter) ) {
3350 
3351  // bad feature, make dummy
3352  sfp->SetData ().Select (CSeqFeatData::e_not_set);
3353  }
3354  sfp->SetLocation (location);
3355 
3356  return sfp;
3357 }
3358 
3360 {
3361  if (!NStr::IsBlank(seq_id)) {
3362  CBioseq::TId ids;
3363  CSeq_id::ParseIDs(ids, seq_id,
3365 
3366  m_seq_id.Reset();
3368  {
3369  for (auto id : ids)
3370  {
3371  if (id->IsGenbank())
3372  m_seq_id = id;
3373  }
3374  };
3375 
3376  if (m_seq_id.Empty())
3377  m_seq_id = ids.front();
3378 
3379  m_real_seqid.clear();
3381  }
3382 }
3383 
3385  CRef<CSeq_feat> sfp,
3386  const string& feat_name,
3387  const string& qual,
3388  const string& val,
3389  const TFlags flags,
3390  const string &seq_id1 )
3391 
3392 {
3393  x_InitId(seq_id1, flags);
3394 
3395  if (NStr::IsBlank(qual)) {
3396  return;
3397  }
3398 
3399  if (!val.empty ()) { // Should probably use NStr::IsBlank()
3400  if (! x_AddQualifierToFeature (sfp, feat_name, qual, val, flags)) {
3401  // unrecognized qualifier key
3403  ERR_POST_X (5, Warning << "Unrecognized qualifier '" << qual << "'");
3404  }
3406  x_AddGBQualToFeature (sfp, qual, val);
3407  }
3408  }
3409  }
3410  else { // empty val
3411  // check for the few qualifiers that do not need a value
3412  auto s_iter = sc_SingleKeys.find (qual.c_str ());
3413  if (s_iter != sc_SingleKeys.end ()) {
3414  x_AddQualifierToFeature (sfp, feat_name, qual, val, flags);
3415  }
3416  }
3417 }
3418 
3419 // static
3421  const CTempString& line_arg,
3422  CTempStringEx& out_seqid,
3423  CTempStringEx& out_annotname )
3424 {
3425  out_seqid.clear();
3426  out_annotname.clear();
3427 
3428  // copy the line_arg because we can't edit line_arg itself
3429  CTempString line = line_arg;
3430 
3431  // handle ">"
3433  if( ! NStr::StartsWith(line, ">") ) {
3434  return false;
3435  }
3436  line = line.substr(1); // remove '>'
3437 
3438  // handle "Feature"
3440  const CTempString kFeatureStr("Feature");
3441  if( ! NStr::StartsWith(line, kFeatureStr, NStr::eNocase) ) {
3442  return false;
3443  }
3444  line = line.substr( kFeatureStr.length() ); // remove "Feature"
3445 
3446  // throw out any non-space characters at the beginning,
3447  // so we can, for example, handle ">Features" (note the "s")
3448  while( !line.empty() && !isspace(line[0]) ) {
3449  line = line.substr(1);
3450  }
3451 
3452  // extract seqid and annotname
3454  NStr::SplitInTwo(line, " \t", out_seqid, out_annotname, NStr::fSplit_Tokenize);
3455 
3456  return true;
3457 }
3458 
3459 
3461 {
3462  return m_reader ?
3463  static_cast<unsigned int>(m_reader->GetLineNumber()) :
3464  0;
3465 }
3466 
3467 
3468 // public access functions
3469 
3471  TReaderFlags fReaderFlags)
3472  : CReaderBase(fReaderFlags)
3473 {
3474 }
3475 
3477  ILineReader& lr,
3478  ILineErrorListener* pErrors) :
3479  CReaderBase(0),
3480  m_pImpl(new CFeatureTableReader_Imp(&lr, 0, pErrors))
3481  {}
3482 
3485  ILineReader &lr, ILineErrorListener *pMessageListener)
3486 {
3487  CRef<CSerialObject> object(
3488  ReadSeqAnnot( lr, pMessageListener ).ReleaseOrNull() );
3489  return object;
3490 }
3491 
3492 
3495  ILineReader &lr, ILineErrorListener *pMessageListener)
3496 {
3497  return ReadSequinFeatureTable(lr, m_iFlags, pMessageListener);
3498 }
3499 
3500 
3502  CNcbiIstream& ifs,
3503  const string& seqid,
3504  const string& annotname,
3505  const TFlags flags,
3506  ILineErrorListener* pMessageListener,
3507  ITableFilter *filter
3508 )
3509 {
3510  CStreamLineReader reader(ifs);
3511  return ReadSequinFeatureTable(reader, seqid, annotname, flags, pMessageListener, filter);
3512 }
3513 
3515  ILineReader& reader,
3516  const string& seqid,
3517  const string& annotname,
3518  const TFlags flags,
3519  ILineErrorListener* pMessageListener,
3520  ITableFilter *filter
3521 )
3522 {
3523  // just read features from 5-column table
3524  CFeatureTableReader_Imp impl(&reader, 0, pMessageListener);
3525  return impl.ReadSequinFeatureTable(seqid, annotname, flags, filter);
3526 }
3527 
3529  CFeatureTableReader_Imp& reader,
3530  const CTempString& seqid,
3531  const CTempString& annot_name,
3532  TFlags flags,
3533  ITableFilter* filter) {
3534  return reader.ReadSequinFeatureTable(seqid, annot_name, flags, filter);
3535 }
3536 
3537 
3539  CNcbiIstream& ifs,
3540  const TFlags flags,
3541  ILineErrorListener* pMessageListener,
3542  ITableFilter *filter
3543 )
3544 {
3545  CStreamLineReader reader(ifs);
3546  return ReadSequinFeatureTable(reader, flags, pMessageListener, filter);
3547 }
3548 
3549 
3551  CFeatureTableReader_Imp& reader,
3552  const TFlags flags,
3553  ITableFilter* filter,
3554  const string& seqid_prefix)
3555 {
3556  auto pLineReader = reader.GetLineReaderPtr();
3557  if (!pLineReader) {
3558  return CRef<CSeq_annot>();
3559  }
3560 
3561 
3562  CTempStringEx orig_seqid, annotname;
3563  // first look for >Feature line, extract seqid and optional annotname
3564  while (orig_seqid.empty () && !pLineReader->AtEOF() ) {
3565  CTempString line = *++(*pLineReader);
3566  if( ParseInitialFeatureLine(line, orig_seqid, annotname) ) {
3568  static_cast<unsigned>(pLineReader->GetLineNumber()),
3569  reader.GetErrorListenerPtr());
3570  }
3571  }
3572 
3573  string temp_seqid;
3574  if (seqid_prefix.empty()) {
3575  //seqid = orig_seqid;
3576  } else {
3577  if (orig_seqid.find('|') == string::npos)
3578  temp_seqid = seqid_prefix + orig_seqid;
3579  else
3580  if (NStr::StartsWith(orig_seqid, "lcl|"))
3581  {
3582  temp_seqid = seqid_prefix + orig_seqid.substr(4);
3583  }
3584  orig_seqid = temp_seqid;
3585  }
3586  return x_ReadFeatureTable(reader, orig_seqid, annotname, flags, filter);
3587 }
3588 
3589 
3591  ILineReader& reader,
3592  const TFlags flags,
3593  ILineErrorListener* pMessageListener,
3594  ITableFilter* pFilter,
3595  const string& seqid_prefix
3596 )
3597 {
3598  CFeatureTableReader_Imp ftable_reader(&reader, 0, pMessageListener);
3599  return x_ReadFeatureTable(ftable_reader, flags, pFilter, seqid_prefix);
3600 }
3601 
3602 
3604  const TFlags flags,
3605  ITableFilter* pFilter,
3606  const string& seqid_prefix
3607 )
3608 {
3609  return x_ReadFeatureTable(*m_pImpl, flags, pFilter, seqid_prefix);
3610 }
3611 
3612 
3614  CNcbiIstream& ifs,
3615  CSeq_entry& entry,
3616  const TFlags flags,
3617  ILineErrorListener* pMessageListener,
3618  ITableFilter *filter
3619 )
3620 {
3621  CStreamLineReader reader(ifs);
3622  return ReadSequinFeatureTables(reader, entry, flags, pMessageListener, filter);
3623 }
3624 
3625 void
3627  const list<string>& stringFlags,
3628  TFlags& baseFlags)
3629 {
3630  static const map<string, CFeature_table_reader::TReaderFlags> flagsMap = {
3631  { "KeepBadKey", CFeature_table_reader::fKeepBadKey},
3632  { "TranslateBadKey", CFeature_table_reader::fTranslateBadKey},
3633  { "IgnoreWebComments", CFeature_table_reader::fIgnoreWebComments},
3634  { "CreateGenesFromCDSs", CFeature_table_reader::fCreateGenesFromCDSs},
3635  { "CDSsMustBeInTheirGenes", CFeature_table_reader::fCDSsMustBeInTheirGenes},
3636  { "ReportDiscouragedKey", CFeature_table_reader::fReportDiscouragedKey},
3637  { "LeaveProteinIds", CFeature_table_reader::fLeaveProteinIds},
3638  { "AllIdsAsLocal", CFeature_table_reader::fAllIdsAsLocal},
3639  { "PreferGenbankId", CFeature_table_reader::fPreferGenbankId},
3640  { "SuppressBadKeyWarning", CFeature_table_reader::fSuppressBadKeyWarning},
3641  };
3642 
3643  return CReaderBase::xAddStringFlagsWithMap(stringFlags, flagsMap, baseFlags);
3644 };
3645 
3646 
3648 {
3649  inline
3650  bool operator()(const CSeq_id* left, const CSeq_id* right) const
3651  {
3652  return *left < *right;
3653  };
3654 };
3655 
3657  ILineReader& reader,
3658  CSeq_entry& entry,
3659  const TFlags flags,
3660  ILineErrorListener* pMessageListener,
3661  ITableFilter *filter
3662 )
3663 {
3664  // let's use map to speedup matching on very large files, see SQD-1847
3666 
3667  for (CTypeIterator<CBioseq> seqit(entry); seqit; ++seqit) {
3668  ITERATE (CBioseq::TId, seq_id, seqit->GetId()) {
3669  seq_map[seq_id->GetPointer()].Reset(&*seqit);
3670  }
3671  }
3672 
3673  CFeatureTableReader_Imp ftable_reader(&reader, 0, pMessageListener);
3674  while ( !reader.AtEOF() ) {
3675  auto annot = x_ReadFeatureTable(ftable_reader, flags, filter);
3676  if (entry.IsSeq()) { // only one place to go
3677  entry.SetSeq().SetAnnot().push_back(annot);
3678  continue;
3679  }
3680  _ASSERT(annot->GetData().IsFtable());
3681  if (annot->GetData().GetFtable().empty()) {
3682  continue;
3683  }
3684  // otherwise, take the first feature, which should be representative
3685  const CSeq_feat& feat = *annot->GetData().GetFtable().front();
3686  const CSeq_id* feat_id = feat.GetLocation().GetId();
3687  CBioseq* seq = nullptr;
3688  _ASSERT(feat_id); // we expect a uniform sequence ID
3689  seq = seq_map[feat_id].GetPointer();
3690  if (seq) { // found a match
3691  seq->SetAnnot().push_back(annot);
3692  } else { // just package on the set
3693  ERR_POST_X(6, Warning
3694  << "ReadSequinFeatureTables: unable to find match for "
3695  << feat_id->AsFastaString());
3696  entry.SetSet().SetAnnot().push_back(annot);
3697  }
3698  }
3699 }
3700 
3701 
3703  const string& feat,
3704  CSeq_loc& location,
3705  const TFlags flags,
3706  ILineErrorListener* pMessageListener,
3707  unsigned int line_number,
3708  string *seq_id,
3709  ITableFilter *filter
3710 )
3711 {
3712  CFeatureTableReader_Imp impl(nullptr, line_number, pMessageListener);
3713  return impl.CreateSeqFeat (feat, location, flags, (seq_id ? *seq_id : string() ), filter);
3714 }
3715 
3716 
3718  CRef<CSeq_feat> sfp,
3719  const string& feat_name,
3720  const string& qual,
3721  const string& val,
3723  ILineErrorListener* pMessageListener,
3724  int line_number,
3725  const string &seq_id
3726 )
3727 
3728 {
3729  CFeatureTableReader_Imp impl(nullptr, line_number, pMessageListener);
3730  impl.AddFeatQual (sfp, feat_name, qual, val, flags, seq_id) ;
3731 }
3732 
3733 bool
3735  const CTempString& line_arg,
3736  CTempStringEx& out_seqid,
3737  CTempStringEx& out_annotname )
3738 {
3739  return CFeatureTableReader_Imp::ParseInitialFeatureLine(line_arg, out_seqid, out_annotname);
3740 }
3741 
3742 
3744 
3745 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
Definition: chainer.hpp:69
CAnnot_descr –.
Definition: Annot_descr.hpp:66
CAnnotdesc –.
Definition: Annotdesc.hpp:66
bool AddFeat(const CSeq_feat &new_cds)
CCdregion –.
Definition: Cdregion.hpp:66
Definition: Dbtag.hpp:53
bool x_AddQualifierToFeature(CRef< CSeq_feat > sfp, const string &feat_name, const string &qual, const string &val, const TFlags flags)
Definition: readfeat.cpp:2214
static void PutProgress(const CTempString &seq_id, const unsigned int line_number, ILineErrorListener *pListener)
Definition: readfeat.cpp:2968
CSeq_annot::C_Data::TFtable TFtable
Definition: readfeat.cpp:297
bool x_TryToParseOffset(const CTempString &sLine, Int4 &out_offset)
Definition: readfeat.cpp:870
void AddFeatQual(CRef< CSeq_feat > sfp, const string &feat_name, const string &qual, const string &val, const TFlags flags, const string &seq_id)
Definition: readfeat.cpp:3384
bool x_AddQualifierToImp(CRef< CSeq_feat > sfp, CSeqFeatData &sfdata, EQual qtype, const string &qual, const string &val)
Definition: readfeat.cpp:1698
static bool ParseInitialFeatureLine(const CTempString &line_arg, CTempStringEx &out_seqid, CTempStringEx &out_annotname)
Definition: readfeat.cpp:3420
bool x_AddNoteToFeature(CRef< CSeq_feat > sfp, const string &note)
Definition: readfeat.cpp:2174
CConstRef< CSeq_feat > TFeatConstRef
Definition: readfeat.cpp:420
void x_TokenizeStrict(const CTempString &line, vector< string > &out_tokens)
Definition: readfeat.cpp:1045
bool x_AddQualifierToGene(CSeqFeatData &sfdata, EQual qtype, const string &val)
Definition: readfeat.cpp:1164
ILineReader *const GetLineReaderPtr(void)
Definition: readfeat.cpp:334
string x_TrnaToAaString(const string &val)
Definition: readfeat.cpp:1368
bool x_AddIntervalToFeature(CTempString strFeatureName, CRef< CSeq_feat > &sfp, const SFeatLocInfo &loc_info)
Definition: readfeat.cpp:2664
unsigned int m_LineNumber
Definition: readfeat.cpp:504
void x_ProcessQualifier(const string &qual_name, const string &qual_val, const string &feat_name, CRef< CSeq_feat > feat, TFlags flags)
Definition: readfeat.cpp:3086
void x_InitId(const CTempString &seq_id, const TFlags flags)
Definition: readfeat.cpp:3359
multimap< CSeqFeatData::E_Choice, SFeatAndLineNum > TChoiceToFeatMap
Definition: readfeat.cpp:446
ILineReader * m_reader
Definition: readfeat.cpp:503
bool x_AddQualifierToRna(CRef< CSeq_feat > sfp, EQual qtype, const string &val)
Definition: readfeat.cpp:1537
void x_ResetFeat(CRef< CSeq_feat > &feat, bool &curr_feat_intervals_done)
Definition: readfeat.cpp:2985
CRef< CSeq_annot > ReadSequinFeatureTable(const CTempString &seqid, const CTempString &annotname, const TFlags flags, ITableFilter *filter)
Definition: readfeat.cpp:3130
bool x_ParseTrnaExtString(CTrna_ext &ext_trna, const string &str)
Definition: readfeat.cpp:1389
void x_CreateGenesFromCDSs(CRef< CSeq_annot > sap, TChoiceToFeatMap &choiceToFeatMap, const TFlags flags)
Definition: readfeat.cpp:1952
long x_StringToLongNoThrow(CTempString strToConvert, CTempString strFeatureName, CTempString strQualifierName, ILineError::EProblem eProblem=ILineError::eProblem_Unset)
Definition: readfeat.cpp:1492
ILineErrorListener * m_pMessageListener
Definition: readfeat.cpp:505
void x_FinishFeature(CRef< CSeq_feat > &feat, TFtable &ftable)
Definition: readfeat.cpp:3035
CFeatureTableReader_Imp(ILineReader *reader, unsigned int line_num, ILineErrorListener *pMessageListener)
Definition: readfeat.cpp:859
bool x_AddGBQualToFeature(CRef< CSeq_feat > sfp, const string &qual, const string &val)
Definition: readfeat.cpp:1915
bool x_IsWebComment(CTempString line)
Definition: readfeat.cpp:2611
CRef< CSeq_id > m_seq_id
Definition: readfeat.cpp:502
CFeature_table_reader::TFlags TFlags
Definition: readfeat.cpp:296
ILineErrorListener *const GetErrorListenerPtr(void)
Definition: readfeat.cpp:338
bool x_StringIsJustQuotes(const string &str)
Definition: readfeat.cpp:1300
void x_GetPointStrand(const CSeq_feat &feat, CSeq_interval::TStrand &strand) const
Definition: readfeat.cpp:2992
CRef< CSeq_feat > CreateSeqFeat(const string &feat, CSeq_loc &location, const TFlags flags, const string &seq_id, ITableFilter *filter)
Definition: readfeat.cpp:3336
SIZE_TYPE x_MatchingParenPos(const string &str, SIZE_TYPE open_paren_pos)
Definition: readfeat.cpp:1457
bool x_AddQualifierToCdregion(CRef< CSeq_feat > sfp, CSeqFeatData &sfdata, EQual qtype, const string &val)
Definition: readfeat.cpp:1204
void x_TokenizeLenient(const CTempString &line, vector< string > &out_tokens)
Definition: readfeat.cpp:1085
void x_UpdatePointStrand(CSeq_feat &feat, CSeq_interval::TStrand strand) const
Definition: readfeat.cpp:3009
unordered_set< string > m_ProcessedProteinIds
Definition: readfeat.cpp:507
bool x_ParseFeatureTableLine(const CTempString &line, SFeatLocInfo &loc_info, string &feat, string &qual, string &val, Int4 offset)
Definition: readfeat.cpp:916
CRef< CSeq_feat > m_pCurrentFeat
Definition: readfeat.cpp:499
bool x_AddQualifierToBioSrc(CSeqFeatData &sfdata, const string &feat_name, EOrgRef rtype, const string &val)
Definition: readfeat.cpp:1813
unsigned int x_GetLineNumber() const
Definition: readfeat.cpp:3460
CFeatureTableReader_Imp & operator=(const CFeatureTableReader_Imp &value)
bool x_AddCodons(const string &val, CTrna_ext &trna_ext) const
Definition: readfeat.cpp:1664
bool x_SetupSeqFeat(CRef< CSeq_feat > sfp, const string &feat, const TFlags flags, ITableFilter *filter)
Definition: readfeat.cpp:2764
unordered_set< string > m_ProcessedTranscriptIds
Definition: readfeat.cpp:506
void x_ProcessMsg(int line_num, ILineError::EProblem eProblem, EDiagSev eSeverity, const std::string &strFeatureName=kEmptyStr, const std::string &strQualifierName=kEmptyStr, const std::string &strQualifierValue=kEmptyStr, const std::string &strErrorMessage=kEmptyStr, const ILineError::TVecOfLines &vecOfOtherLines=ILineError::TVecOfLines())
void x_ProcessMsg(ILineError::EProblem eProblem, EDiagSev eSeverity, const std::string &strFeatureName=kEmptyStr, const std::string &strQualifierName=kEmptyStr, const std::string &strQualifierValue=kEmptyStr, const std::string &strErrorMessage=kEmptyStr, const ILineError::TVecOfLines &vecOfOtherLines=ILineError::TVecOfLines())
CFeatureTableReader_Imp(const CFeatureTableReader_Imp &value)
CFeature_table_reader(TReaderFlags fReaderFlags=0)
Definition: readfeat.cpp:3470
CRef< CSerialObject > ReadObject(ILineReader &lr, ILineErrorListener *pErrors) override
Read an object from a given line reader, render it as the most appropriate Genbank object.
Definition: readfeat.cpp:3484
long TFlags
binary OR of EFlags
Definition: readfeat.hpp:80
static bool ParseInitialFeatureLine(const CTempString &line_arg, CTempStringEx &out_seqid, CTempStringEx &out_annotname)
If line_arg is a feature line (e.g.
Definition: readfeat.cpp:3734
static void ReadSequinFeatureTables(ILineReader &reader, CSeq_entry &entry, const TFlags flags=0, ILineErrorListener *pMessageListener=nullptr, ITableFilter *filter=nullptr)
Definition: readfeat.cpp:3656
@ fSuppressBadKeyWarning
= 0x400 (Suppress 'bad key' errors; Not recommended.)
Definition: readfeat.hpp:77
@ fReportDiscouragedKey
= 0x40 (Report discouraged keys into the error container)
Definition: readfeat.hpp:73
@ fKeepBadKey
= 0x02 (As much as possible, try to use bad keys as if they were acceptable)
Definition: readfeat.hpp:68
@ fIgnoreWebComments
= 0x08 (ignore web comment lines such as lines that start with " INFO:", or consist of many equals si...
Definition: readfeat.hpp:70
@ fIncludeObjectInMsg
= 0x800 (Include reference to feature object in message).
Definition: readfeat.hpp:78
@ fAllIdsAsLocal
= 0x100 (Do not attempt to parse accessions)
Definition: readfeat.hpp:75
@ fLeaveProteinIds
= 0x80 (Leave all protein_id as a qualifiers)
Definition: readfeat.hpp:74
@ fCreateGenesFromCDSs
= 0x10 (If a CDS has a gene xref, create a gene with the same intervals if one doesn't already exist....
Definition: readfeat.hpp:71
@ fPreferGenbankId
= 0x200 (Prefer Genbank accession ids)
Definition: readfeat.hpp:76
@ fTranslateBadKey
= 0x04 (yields misc_feature /standard_name="...")
Definition: readfeat.hpp:69
@ fCDSsMustBeInTheirGenes
= 0x20 (If a CDS has a gene xref, it *must* be inside of that gene)
Definition: readfeat.hpp:72
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
Definition: readfeat.cpp:3494
unique_ptr< CFeatureTableReader_Imp > m_pImpl
Definition: readfeat.hpp:192
static CRef< CSeq_annot > x_ReadFeatureTable(CFeatureTableReader_Imp &reader, const CTempString &seqid, const CTempString &annot_name, const TFlags flags, ITableFilter *filter)
Definition: readfeat.cpp:3528
static CRef< CSeq_feat > CreateSeqFeat(const string &feat, CSeq_loc &location, const TFlags flags=0, ILineErrorListener *pMessageListener=nullptr, unsigned int line=0, std::string *seq_id=nullptr, ITableFilter *filter=nullptr)
Definition: readfeat.cpp:3702
static void AddFeatQual(CRef< CSeq_feat > sfp, const string &feat_name, const string &qual, const string &val, const TFlags flags=0, ILineErrorListener *pMessageListener=nullptr, int line=0, const string &seq_id=std::string())
Definition: readfeat.cpp:3717
static void AddStringFlags(const list< string > &stringFlags, TFlags &baseFlags)
Definition: readfeat.cpp:3626
CRef< CSeq_annot > ReadSequinFeatureTable(const TFlags flags=0, ITableFilter *filter=nullptr, const string &seqid_prefix=kEmptyStr)
Definition: readfeat.cpp:3603
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
static const CTrans_table & GetTransTable(int id)
static int CodonToIndex(char base1, char base2, char base3)
*** Import *********************************************** * * Features imported from other databases...
Definition: Imp_feat_.hpp:77
static void GetPrefixAndRemainder(const string &inference, string &prefix, string &remainder)
Definition: Gb_qual.cpp:381
bool operator()(char c)
Definition: readfeat.cpp:1082
bool operator()(char c)
Definition: readfeat.cpp:1077
static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())
Please use this instead of the constructor because the ctor is protected.
Definition: line_error.cpp:417
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
Definition: Pub.hpp:56
CRNA_qual –.
Definition: RNA_qual.hpp:66
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
static void AddGeneOntologyTerm(CSeq_feat &feature, const CTempString &qual, const CTempString &val)
Definition: read_util.cpp:296
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
long TReaderFlags
Definition: reader_base.hpp:84
TReaderFlags m_iFlags
static void xAddStringFlagsWithMap(const list< string > &stringFlags, const map< string, TReaderFlags > flagMap, TReaderFlags &baseFlags)
void SetProt(TProt &v)
void SetRegion(const TRegion &v)
void SetBiosrc(TBiosrc &v)
static bool IsDiscouragedQual(EQualifier qual)
EQualifier
List of available qualifiers for feature keys.
void SetBond(const TBond &v)
static bool CanHaveGene(ESubtype subtype)
void SetSite(const TSite &v)
static const CSiteList * GetSiteList()
void SetPub(TPub &v)
ESubtype GetSubtype(void) const
void SetImp(TImp &v)
static bool IsDiscouragedSubtype(ESubtype subtype)
static E_Choice GetTypeFromSubtype(ESubtype subtype)
void SetRna(TRna &v)
void SetCdregion(TCdregion &v)
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
@ eSubtype_transit_peptide_aa
static EQualifier GetQualifierType(CTempString qual)
convert qual string to enumerated value
static const CBondList * GetBondList()
static CTempString GetQualifierAsString(EQualifier qual)
Convert a qualifier from an enumerated value to a string representation or empty if not found.
static ESubtype SubtypeNameToValue(CTempString sName)
Turn a string into its ESubtype which is NOT necessarily related to the identifier of the enum.
static bool IsRegulatory(ESubtype subtype)
void SetGene(TGene &v)
static const vector< string > & GetRegulatoryClassList()
CSeqIdException –.
Definition: Seq_id.hpp:1001
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
Definition: Seq_feat.cpp:181
void SetGeneXref(CGene_ref &value)
Definition: Seq_feat.cpp:192
void SetProtXref(CProt_ref &value)
Definition: Seq_feat.cpp:233
void SetPartialStart(bool val, ESeqLocExtremes ext)
void SetPartialStop(bool val, ESeqLocExtremes ext)
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
void SetRightOf(bool val)
Definition: Seq_point.cpp:193
void SetPartialStart(bool val, ESeqLocExtremes ext)
Definition: Seq_point.cpp:100
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_point.cpp:116
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105
TBase::const_iterator const_iterator
Definition: static_map.hpp:109
Simple implementation of ILineReader for i(o)streams.
CStringException –.
Definition: ncbistr.hpp:4500
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
aa this carries
Definition: Trna_ext_.hpp:96
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
@ eParse_Number
Parse a real or integer number, otherwise string.
Definition: User_object.hpp:62
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
virtual void PutProgress(const string &sMessage, const Uint8 iNumDone=0, const Uint8 iNumTotal=0)=0
This is used for processing progress messages.
virtual EDiagSev Severity(void) const
Definition: line_error.hpp:161
@ eProblem_InvalidQualifier
Definition: line_error.hpp:91
@ eProblem_QualifierBadValue
Definition: line_error.hpp:67
@ eProblem_NumericQualifierValueIsNotANumber
Definition: line_error.hpp:60
@ eProblem_InternalPartialsInFeatLocation
Definition: line_error.hpp:71
@ eProblem_FeatMustBeInXrefdGene
Definition: line_error.hpp:72
@ eProblem_UnrecognizedFeatureName
Definition: line_error.hpp:57
@ eProblem_FeatureNameNotAllowed
Definition: line_error.hpp:61
@ eProblem_DuplicateIDs
Definition: line_error.hpp:93
@ eProblem_IncompleteFeature
Definition: line_error.hpp:64
@ eProblem_QualifierWithoutFeature
Definition: line_error.hpp:63
@ eProblem_FeatureBadStartAndOrStop
Definition: line_error.hpp:65
@ eProblem_NumericQualifierValueHasExtraTrailingCharacters
Definition: line_error.hpp:59
@ eProblem_UnrecognizedSquareBracketCommand
Definition: line_error.hpp:74
@ eProblem_UnrecognizedQualifierName
Definition: line_error.hpp:58
@ eProblem_BadFeatureInterval
Definition: line_error.hpp:66
@ eProblem_DiscouragedFeatureName
Definition: line_error.hpp:89
@ eProblem_NoFeatureProvidedOnIntervals
Definition: line_error.hpp:62
@ eProblem_DiscouragedQualifierName
Definition: line_error.hpp:90
vector< unsigned int > TVecOfLines
Definition: line_error.hpp:121
virtual EProblem Problem(void) const =0
virtual const string & ErrorMessage() const
Definition: line_error.cpp:140
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
Use to give a feature filter to CFeature_table_reader.
EAction
How a given feature name should be handled.
@ eAction_Okay
Just accept the feat.
@ eAction_Disallowed
Do not accept the feat and give message eProblem_FeatureNameNotAllowed.
virtual EAction GetFeatAction(const string &feature_name) const =0
Returns how we should treat the given feature name.
Definition: map.hpp:338
const_iterator_pair equal_range(const key_type &key) const
Definition: map.hpp:296
iterator insert(const value_type &val)
Definition: map.hpp:305
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
const_iterator_pair equal_range(const key_type &key) const
Definition: set.hpp:140
bool empty() const
Definition: set.hpp:133
const_iterator end() const
Definition: set.hpp:136
void swap(this_type &m)
Definition: set.hpp:102
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static unsigned int line_num
Definition: attributes.c:11
static const char * str(char *buf, int n)
Definition: stats.c:84
int offset
Definition: replacements.h:160
static const char location[]
Definition: config.c:97
const TResidue codons[4][4]
Definition: gnomon_seq.cpp:76
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define ENTREZ_ID_FROM(T, value)
Definition: ncbimisc.hpp:1098
string
Definition: cgiapp.hpp:690
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
virtual void UngetLine(void)=0
Unget current line, which must be valid.
virtual Uint8 GetLineNumber(void) const =0
Returns the current line number (counting from 1, not 0).
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2613
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
Definition: Seq_id.hpp:80
@ fParse_AnyLocal
Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...
Definition: Seq_id.hpp:90
@ fParse_Default
By default in ParseIDs and IsValid, allow raw parsable non-numeric accessions and plausible local acc...
Definition: Seq_id.hpp:102
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
Definition: Seq_id.hpp:87
@ eFasta
Tagged ID in NCBI's traditional FASTA style.
Definition: Seq_id.hpp:607
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5087
const CSeq_loc & GetEmbeddingSeq_loc(void) const
Get the nearest seq-loc containing the current range.
Definition: Seq_loc.cpp:2573
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void SetNull(void)
Override all setters to incorporate cache invalidation.
Definition: Seq_loc.hpp:960
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
@ fSortAndMerge_All
Definition: Seq_loc.hpp:334
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1672
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
const_iterator end() const
Return an iterator to the string's ending position (one past the end of the represented sequence)
Definition: tempstr.hpp:306
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2984
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5424
CTempStringEx substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:1010
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
void clear(void)
Clear value to an empty string.
Definition: tempstr.hpp:1003
static long StringToLong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to long.
Definition: ncbistr.cpp:653
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
size_t size_type
Definition: tempstr.hpp:70
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3545
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
size_type find_first_not_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character not in the matching string within the current string,...
Definition: tempstr.hpp:553
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
Definition: tempstr.hpp:655
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
static const size_type npos
Definition: tempstr.hpp:72
const_iterator begin() const
Return an iterator to the string's starting position.
Definition: tempstr.hpp:299
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
Definition: ncbistr.hpp:298
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2510
@ eTrunc_End
Truncate trailing whitespace only.
Definition: ncbistr.hpp:2241
@ eTrunc_Begin
Truncate leading whitespace only.
Definition: ncbistr.hpp:2240
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
Definition: SubSource_.hpp:319
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
void SetGenome(TGenome value)
Assign a value to Genome data member.
Definition: BioSource_.hpp:428
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
void SetName(const TName &value)
Assign a value to Name data member.
Definition: SubSource_.hpp:359
EGenome
biological context
Definition: BioSource_.hpp:97
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
@ eSubtype_collection_date
DD-MMM-YYYY format.
Definition: SubSource_.hpp:114
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
Definition: SubSource_.hpp:117
@ eSubtype_lat_lon
+/- decimal degrees
Definition: SubSource_.hpp:113
@ eSubtype_collected_by
name of person who collected the sample
Definition: SubSource_.hpp:115
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
Definition: SubSource_.hpp:118
@ eSubtype_environmental_sample
Definition: SubSource_.hpp:111
@ eSubtype_endogenous_virus_name
Definition: SubSource_.hpp:109
@ eSubtype_identified_by
name of person who identified the sample
Definition: SubSource_.hpp:116
TSyn & SetSyn(void)
Assign a value to Syn data member.
Definition: Gene_ref_.hpp:774
void SetAllele(const TAllele &value)
Assign a value to Allele data member.
Definition: Gene_ref_.hpp:561
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
void SetLocus(const TLocus &value)
Assign a value to Locus data member.
Definition: Gene_ref_.hpp:514
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
Definition: Gene_ref_.hpp:802
void SetMaploc(const TMaploc &value)
Assign a value to Maploc data member.
Definition: Gene_ref_.hpp:655
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
list< string > TSyn
Definition: Gene_ref_.hpp:102
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
void SetDesc(const TDesc &value)
Assign a value to Desc data member.
Definition: Gene_ref_.hpp:608
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetType(TType &value)
Assign a value to Type data member.
void SetDb(const TDb &value)
Assign a value to Db data member.
Definition: Dbtag_.hpp:229
TId & SetId(void)
Select the variant.
Definition: Object_id_.hpp:277
void SetDiv(const TDiv &value)
Assign a value to Div data member.
Definition: OrgName_.hpp:1014
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
Definition: OrgMod_.hpp:316
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
void SetGcode(TGcode value)
Assign a value to Gcode data member.
Definition: OrgName_.hpp:927
void SetMgcode(TMgcode value)
Assign a value to Mgcode data member.
Definition: OrgName_.hpp:974
TMod & SetMod(void)
Assign a value to Mod data member.
Definition: OrgName_.hpp:845
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
Definition: Org_ref_.cpp:87
void SetSubname(const TSubname &value)
Assign a value to Subname data member.
Definition: OrgMod_.hpp:356
void SetLineage(const TLineage &value)
Assign a value to Lineage data member.
Definition: OrgName_.hpp:873
@ eSubtype_biotype
Definition: OrgMod_.hpp:97
@ eSubtype_subgroup
Definition: OrgMod_.hpp:99
@ eSubtype_gb_acronym
used by taxonomy database
Definition: OrgMod_.hpp:115
@ eSubtype_gb_synonym
used by taxonomy database
Definition: OrgMod_.hpp:117
@ eSubtype_substrain
Definition: OrgMod_.hpp:86
@ eSubtype_anamorph
Definition: OrgMod_.hpp:112
@ eSubtype_pathovar
Definition: OrgMod_.hpp:94
@ eSubtype_dosage
chromosome dosage of hybrid
Definition: OrgMod_.hpp:103
@ eSubtype_authority
Definition: OrgMod_.hpp:107
@ eSubtype_sub_species
Definition: OrgMod_.hpp:105
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_variety
Definition: OrgMod_.hpp:89
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
@ eSubtype_biovar
Definition: OrgMod_.hpp:96
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
@ eSubtype_subtype
Definition: OrgMod_.hpp:88
@ eSubtype_teleomorph
Definition: OrgMod_.hpp:113
@ eSubtype_serogroup
Definition: OrgMod_.hpp:91
@ eSubtype_synonym
Definition: OrgMod_.hpp:111
@ eSubtype_group
Definition: OrgMod_.hpp:98
@ eSubtype_type_material
Definition: OrgMod_.hpp:121