NCBI C++ ToolKit
gff2_data.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff2_data.cpp 100303 2023-07-18 23:12:04Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * GFF file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
37 #include <objects/seq/so_map.hpp>
42 #include <objects/seq/Annot_id.hpp>
58 
64 
67 
68 BEGIN_objects_SCOPE // namespace ncbi::objects::
69 
70 // ----------------------------------------------------------------------------
72  const string& str,
73  CSeq_id& id,
75 // ----------------------------------------------------------------------------
76 {
77  const string cdstr_start = "(pos:";
78  const string cdstr_div = ",aa:";
79  const string cdstr_end = ")";
80 
81  CRef<CCode_break> pCodeBreak;
82  if (!NStr::StartsWith(str, cdstr_start) || !NStr::EndsWith(str, cdstr_end)) {
83  return pCodeBreak;
84  }
85  size_t pos_start = cdstr_start.length();
86  size_t pos_stop = str.find(cdstr_div);
87  string posstr = str.substr(pos_start, pos_stop-pos_start);
88  string aaa = str.substr(pos_stop+cdstr_div.length());
89  aaa = aaa.substr(0, aaa.length()-cdstr_end.length());
90 
91  const string posstr_compl = "complement(";
92  ENa_strand strand = eNa_strand_plus;
93  if (NStr::StartsWith(posstr, posstr_compl)) {
94  posstr = posstr.substr(posstr_compl.length());
95  posstr = posstr.substr(0, posstr.length()-1);
96  strand = eNa_strand_minus;
97  }
98  const string posstr_div = "..";
99  size_t pos_div = posstr.find(posstr_div);
100  if (pos_div == string::npos) {
101  return pCodeBreak;
102  }
103 
104  int from, to;
105  try {
106  from = NStr::StringToInt(posstr.substr(0, pos_div))-1;
107  to = NStr::StringToInt(posstr.substr(pos_div + posstr_div.length()))-1;
108  }
109  catch(...) {
110  return pCodeBreak;
111  }
112 
113  int aacode = 'U'; //for now
114 
115  pCodeBreak.Reset(new CCode_break);
116  pCodeBreak->SetLoc().SetInt().SetId(id);
117  pCodeBreak->SetLoc().SetInt().SetFrom(from);
118  pCodeBreak->SetLoc().SetInt().SetTo(to);
119  pCodeBreak->SetLoc().SetInt().SetStrand(strand);
120  pCodeBreak->SetAa().SetNcbieaa(aacode);
121  return pCodeBreak;
122 }
123 
124 // ----------------------------------------------------------------------------
126  const string& genome,
128 // ----------------------------------------------------------------------------
129 {
130  typedef map<string, CBioSource::EGenome> GENOME_MAP;
131  static CSafeStatic<GENOME_MAP> s_GenomeMap;
132  GENOME_MAP& sGenomeMap = *s_GenomeMap;
133  if (sGenomeMap.empty()) {
134  sGenomeMap["apicoplast"] = CBioSource::eGenome_apicoplast;
135  sGenomeMap["chloroplast"] = CBioSource::eGenome_chloroplast;
136  sGenomeMap["chromatophore"] = CBioSource::eGenome_chromatophore;
137  sGenomeMap["chromoplast"] = CBioSource::eGenome_chromoplast;
138  sGenomeMap["chromosome"] = CBioSource::eGenome_chromosome;
139  sGenomeMap["cyanelle"] = CBioSource::eGenome_cyanelle;
140  sGenomeMap["endogenous_virus"] = CBioSource::eGenome_endogenous_virus;
141  sGenomeMap["extrachrom"] = CBioSource::eGenome_extrachrom;
142  sGenomeMap["genomic"] = CBioSource::eGenome_genomic;
143  sGenomeMap["hydrogenosome"] = CBioSource::eGenome_hydrogenosome;
144  sGenomeMap["insertion_seq"] = CBioSource::eGenome_insertion_seq;
145  sGenomeMap["kinetoplast"] = CBioSource::eGenome_kinetoplast;
146  sGenomeMap["leucoplast"] = CBioSource::eGenome_leucoplast;
147  sGenomeMap["macronuclear"] = CBioSource::eGenome_macronuclear;
148  sGenomeMap["mitochondrion"] = CBioSource::eGenome_mitochondrion;
149  sGenomeMap["nucleomorph"] = CBioSource::eGenome_nucleomorph;
150  sGenomeMap["plasmid"] = CBioSource::eGenome_plasmid;
151  sGenomeMap["plastid"] = CBioSource::eGenome_plastid;
152  sGenomeMap["proplastid"] = CBioSource::eGenome_proplastid;
153  sGenomeMap["proviral"] = CBioSource::eGenome_proviral;
154  sGenomeMap["transposon"] = CBioSource::eGenome_transposon;
155  sGenomeMap["virion"] = CBioSource::eGenome_virion;
156  }
157  GENOME_MAP::const_iterator cit = sGenomeMap.find(genome);
158  if (cit != sGenomeMap.end()) {
159  return cit->second;
160  }
162 }
163 
164 // -----------------------------------------------------------------------------
166  vector<CTempStringEx>& columns,
167  const CTempStringEx& in_line)
168 // -----------------------------------------------------------------------------
169 {
170  columns.clear();
171  columns.reserve(9);
172  size_t index;
173  // first try to split just using tabs
175  if (columns.size() == 9)
176  return;
177  columns.clear();
178 
179  // better to be thread-safe static
180  const CTempString space_tab_delim("\t ");
181  const CTempString digits("0123456789");
182 
183  size_t current = 0;
184  while (current != CTempStringEx::npos && columns.size()<8 &&
185  (index = in_line.find_first_of(space_tab_delim, current)) != CTempStringEx::npos) {
186  CTempStringEx next = in_line.substr(current, index-current);
187  current = in_line.find_first_not_of(space_tab_delim, index);
188  if (columns.size() == 5) {
189  // reminder:
190  // columns [3] and [4] are positions and must always be numeric
191  // column [5] is a score (floating point or "." if absent)
192  bool isNumericCol3 = columns[3].find_first_not_of(digits) == CTempStringEx::npos;
193  bool isNumericCol4 = columns[4].find_first_not_of(digits) == CTempStringEx::npos;
194  bool isNumericNext = next.find_first_not_of(digits) == CTempStringEx::npos;
195 
196  if (!isNumericCol3 && isNumericCol4 && isNumericNext) {
197  // merge col2 with col3 and shift subsequent columns
198  // operations with iterators are legal since all belong to the same memory space
199  size_t sizeof_col1 = (columns[2].begin() + columns[2].size() - columns[1].begin());
200  size_t startof_col1 = columns[1].begin() - in_line.begin();
201  columns[1] = in_line.substr(startof_col1, sizeof_col1);
202  columns[2] = columns[3];
203  columns[3] = columns[4];
204  columns[4] = next;
205  continue;
206  }
207  }
208  columns.push_back(next);
209  }
210  if (current != CTempStringEx::npos)
211  columns.push_back(in_line.substr(current));
212 }
213 // ----------------------------------------------------------------------------
215  const string& strRawInput )
216 // ----------------------------------------------------------------------------
217 {
218  vector< CTempStringEx > columns;
219 
220  TokenizeGFF(columns, strRawInput);
221  if ( columns.size() < 9 ) {
224  eDiag_Error,
225  0,
226  "Bad data line: not enough columns",
228  pErr->Throw();
229  }
230  // to do: more sanity checks
231 
232  columns[0].Copy(mSeqId, 0, CTempString::npos);
234  columns[2].Copy(m_strType, 0, CTempString::npos);
237 
238  try {
241  }
242  catch (const CException&) {
243  string message = "Bad data line: Both \"start\" and \"stop\" must be positive integers.";
245  eDiag_Error,
246  0,
247  message);
248  throw error;
249  }
250  if (m_uSeqStop < m_uSeqStart) {
251  string message = "Bad data line: location start is greater than location stop (start="
252  + string(columns[3]) + ", stop=" + string(columns[4]) + ").";
254  eDiag_Error,
255  0,
256  message);
257  throw error;
258  }
259 
260  if ( columns[5] != "." && columns[5] != "NA" ) {
262  }
263 
264  enum ENa_strand strand;
265  switch (columns[6][0]) {
266  default:
268  break;
269  case '+':
270  strand = objects::eNa_strand_plus;
271  break;
272  case '-':
273  strand = objects::eNa_strand_minus;
274  break;
275  case '.':
276  strand = objects::eNa_strand_both;
277  break;
278  }
279  m_peStrand = new ENa_strand(strand);
280 
281 
283 
284  if ( columns[7] == "0" ) {
285  frame = CCdregion::eFrame_one;
286  }
287  else
288  if ( columns[7] == "1" ) {
289  frame = CCdregion::eFrame_two;
290  }
291  else
292  if ( columns[7] == "2" ) {
293  frame = CCdregion::eFrame_three;
294  }
295 
296  if (frame != CCdregion::eFrame_not_set)
297  m_pePhase = new TFrame(frame);
298 
300 
302 }
303 
304 // ----------------------------------------------------------------------------
306  const string& strKey,
307  string& strValue ) const
308 // ----------------------------------------------------------------------------
309 {
310  TAttrCit it = m_Attributes.find( strKey );
311  if ( it == m_Attributes.end() ) {
312  strValue.clear();
313  return false;
314  }
315  strValue = it->second;
316  return true;
317 }
318 
319 // ----------------------------------------------------------------------------
321  const string& strKey,
322  list<string>& values ) const
323 // ----------------------------------------------------------------------------
324 {
325  values.clear();
326  TAttrCit it = m_Attributes.find(strKey);
327  if (it == m_Attributes.end()) {
328  return false;
329  }
330  NStr::Split(it->second, ",", values, 0);
331  return !values.empty();
332 }
333 
334 // ----------------------------------------------------------------------------
336  const CTempString& strRawKey )
337 // ----------------------------------------------------------------------------
338 {
339  return NStr::TruncateSpaces_Unsafe(strRawKey);
340 }
341 
342 // ----------------------------------------------------------------------------
344  const CTempString& strRawValue )
345 // ----------------------------------------------------------------------------
346 {
347  CTempString strValue = NStr::TruncateSpaces_Unsafe(strRawValue);
348  if ( NStr::StartsWith( strValue, "\"" ) ) {
349  strValue = strValue.substr( 1, string::npos );
350  }
351  if ( NStr::EndsWith( strValue, "\"" ) ) {
352  strValue = strValue.substr( 0, strValue.length() - 1 );
353  }
354  return NStr::URLDecode(strValue, NStr::eUrlDec_Percent);
355 }
356 
357 
358 // ----------------------------------------------------------------------------
360 {
362  bool inQuotes = false;
363  size_t i = 0;
364  for (; i < input.length(); i++)
365  {
366  if (inQuotes) {
367  if (input[i] == '\"') {
368  inQuotes = false;
369  }
370  }
371  else { // not in quotes
372  if (input[i] == ';') {
374  if (!result.empty())
375  {
376  input = input.substr(i+1);
377  return result;
378  }
379  }
380  else {
381  if (input[i] == '\"') {
382  inQuotes = true;
383  }
384  }
385  }
386  }
388  input.clear();
389  return result;
390 }
391 
393 {
394  size_t semicolon = CTempString::npos;
395  size_t space = CTempString::npos;
396  size_t equal = CTempString::npos;
397 
398  key.clear();
399  value.clear();
400 
401  bool inQuotes = false;
402  size_t i = 0;
403  while (input[i] == ';') {
404  ++i;
405  }
406  input = input.substr(i);
407  for (i=0; i < input.length(); i++)
408  {
409  if (inQuotes) {
410  if (input[i] == '\"') {
411  inQuotes = false;
412  }
413  }
414  else { // not in quotes
415  switch (input[i])
416  {
417  case ';':
418  semicolon = i;
419  break;
420  case ' ':
421  if (space == CTempString::npos && equal == CTempString::npos)
422  space = i;
423  continue;
424  case '"':
425  inQuotes = true;
426  continue;
427  case '=':
428  if (equal == CTempString::npos)
429  equal = i;
430  continue;
431  default:
432  continue;
433  }
434  break;
435  }
436  }
437 
438  if (semicolon == CTempString::npos)
439  semicolon = input.length();
440 
441  if (equal == CTempString::npos)
442  equal = min(space, semicolon);
443 
444  key = NStr::TruncateSpaces_Unsafe(input.substr(0, equal));
445  value = NStr::TruncateSpaces_Unsafe(input.substr(equal + 1, semicolon - equal - 1));
446 
447  input = NStr::TruncateSpaces_Unsafe(input.substr(semicolon+1));
448 
449  return !key.empty();
450 }
451 
453  const string& strType,
454  const string& strRawAttributes )
455 // ----------------------------------------------------------------------------
456 {
458  CTempString input(strRawAttributes);
460  CTempString strKey;
461  CTempString strValue;
462 
463  while (!input.empty() && x_GetNextAttribute(input, strKey, strValue))
464  {
465  m_Attributes[strKey] = strValue;
466  }
467  return true;
468 }
469 
470 // ----------------------------------------------------------------------------
472  const string& strRawAttributes,
473  vector< string >& attributes) const
474 // ----------------------------------------------------------------------------
475 {
476  string strCurrAttrib;
477  bool inQuotes = false;
478 
479  ITERATE (string, iterChar, strRawAttributes) {
480  if (inQuotes) {
481  if (*iterChar == '\"') {
482  inQuotes = false;
483  }
484  strCurrAttrib += *iterChar;
485  } else { // not in quotes
486  if (*iterChar == ';') {
487  NStr::TruncateSpacesInPlace( strCurrAttrib );
488  if(!strCurrAttrib.empty())
489  attributes.push_back(strCurrAttrib);
490  strCurrAttrib.clear();
491  } else {
492  if(*iterChar == '\"') {
493  inQuotes = true;
494  }
495  strCurrAttrib += *iterChar;
496  }
497  }
498  }
499 
500  NStr::TruncateSpacesInPlace( strCurrAttrib );
501  if (!strCurrAttrib.empty())
502  attributes.push_back(strCurrAttrib);
503 
504  return true;
505 }
506 
507 // ----------------------------------------------------------------------------
510  CRef<CSeq_feat> pFeature,
511  SeqIdResolver seqidresolve ) const
512  // ----------------------------------------------------------------------------
513 {
514  if (!CGffBaseColumns::InitializeFeature(flags, pFeature, seqidresolve)) {
515  return false;
516  }
517  return xMigrateAttributes(flags, pFeature);
518 }
519 
520 // ----------------------------------------------------------------------------
523  CRef<CSeq_feat> pFeature,
524  SeqIdResolver seqidresolve ) const
525  // ----------------------------------------------------------------------------
526 {
527  auto subtype = pFeature->GetData().GetSubtype();
528  auto recType = NormalizedType();
529  CRef<CSeq_loc> pAddLoc = GetSeqLoc(flags, seqidresolve);
530 
531  pFeature->SetLocation().SetMix().AddSeqLoc(*pAddLoc);
532  if (!xUpdateFeatureData(flags, pFeature)) {
533  return false;
534  }
535  if (subtype == CSeqFeatData::eSubtype_cdregion && recType == "cds") {
536  string cdsId;
537  GetAttribute("ID", cdsId);
538  if (!cdsId.empty()) {
539  pFeature->AddOrReplaceQualifier("ID", cdsId);
540  }
541  }
542  return true;
543 }
544 
545 // ----------------------------------------------------------------------------
548  CRef<CSeq_feat> pFeature,
549  SeqIdResolver seqidresolve) const
550  // ----------------------------------------------------------------------------
551 {
552  const CSeq_loc& target = pFeature->GetLocation();
553  CSeqFeatData::ESubtype subtype = pFeature->GetData().GetSubtype();
554  CRef<CSeq_loc> pAddLoc = GetSeqLoc(flags, seqidresolve);
555 
556  switch(subtype) {
557  default: {
558  return true;
559  }
561  // if incoming piece is new feature start then it will provide the
562  // frame
563  //
564  if (!pAddLoc->GetInt().CanGetStrand()) {
565  return true;
566  }
567  if (pAddLoc->GetInt().GetStrand() == eNa_strand_plus) {
568  size_t curStart = target.GetStart(eExtreme_Positional);
569  size_t newStart = pAddLoc->GetStart(eExtreme_Positional);
570  if (curStart == newStart) {
571  pFeature->SetData().SetCdregion().SetFrame(Phase());
572  }
573  return true;
574  }
575  if (pAddLoc->GetInt().GetStrand() == eNa_strand_minus) {
576  size_t curStop = target.GetStop(eExtreme_Positional);
577  size_t newStop = pAddLoc->GetStop(eExtreme_Positional);
578  if (curStop == newStop) {
579  pFeature->SetData().SetCdregion().SetFrame(Phase());
580  }
581  return true;
582  }
583  }
584  }
585  return true;
586 }
587 
588 // ----------------------------------------------------------------------------
591  CRef<CSeq_feat> pFeature ) const
592 // ----------------------------------------------------------------------------
593 {
594  TAttributes attrs_left(m_Attributes.begin(), m_Attributes.end());
595  TAttrIt it;
596 
597  it = attrs_left.find("Note");
598  if (it != attrs_left.end()) {
599  pFeature->SetComment(xNormalizedAttributeValue(it->second));
600  attrs_left.erase(it);
601  }
602 
603  it = attrs_left.find("Dbxref");
604  if (it != attrs_left.end()) {
605  vector<string> dbxrefs;
607  for (vector<string>::iterator it1 = dbxrefs.begin(); it1 != dbxrefs.end();
608  ++it1 ) {
609  string dbtag = xNormalizedAttributeValue(*it1);
610  pFeature->SetDbxref().push_back(CGff2Reader::x_ParseDbtag(dbtag));
611  }
612  attrs_left.erase(it);
613  }
614 
615  it = attrs_left.find("Is_circular");
616  if (it != attrs_left.end()) {
617  if (pFeature->GetData().IsBiosrc()) {
618  CRef<CSubSource> pSubSource(new CSubSource);
620  pSubSource->SetName("is_circular");
621  pFeature->SetData().SetBiosrc().SetSubtype().push_back(pSubSource);
622  }
623  }
624 
625  it = attrs_left.find("Parent");
626  if (it != attrs_left.end()) {
627  if (Type() != "CDS") {
628  xMigrateAttributeSingle(
629  attrs_left, "Parent", pFeature, "Parent", flags);
630  }
631  else attrs_left.erase(it);
632  }
633 
634  it = attrs_left.find("Name");
635  if (it != attrs_left.end()) {
636  auto soType = NormalizedType();
637  string gbKey;
638  GetAttribute("gbkey", gbKey);
639  if (soType == "cds" || soType == "mrna" || soType == "biological_region" ||
640  (soType == "region" && gbKey != "Src")) {
641  attrs_left.erase(it);
642  }
643  }
644 
645  it = attrs_left.find("codon_start");
646  if (it != attrs_left.end()) {
647  if (pFeature->GetData().GetSubtype() == CSeqFeatData::eSubtype_cdregion) {
648  int codon_start = NStr::StringToInt(it->second);
649  switch(codon_start) {
650  default:
651  break;
652  case 1:
653  pFeature->SetData().SetCdregion().SetFrame(CCdregion::eFrame_one);
654  break;
655  case 2:
656  pFeature->SetData().SetCdregion().SetFrame(CCdregion::eFrame_two);
657  break;
658  case 3:
659  pFeature->SetData().SetCdregion().SetFrame(CCdregion::eFrame_three);
660  break;
661  }
662  attrs_left.erase(it);
663  }
664  }
665 
666  it = attrs_left.find("description");
667  if (it != attrs_left.end()) {
668  if (pFeature->GetData().IsGene()) {
669  string description = xNormalizedAttributeValue(it->second);
670  pFeature->SetData().SetGene().SetDesc(description);
671  attrs_left.erase(it);
672  }
673  }
674 
675  it = attrs_left.find("exception");
676  if (it != attrs_left.end()) {
677  pFeature->SetExcept(true);
678  pFeature->SetExcept_text(xNormalizedAttributeValue(it->second));
679  attrs_left.erase(it);
680  }
681 
682  it = attrs_left.find("exon_number");
683  if (it != attrs_left.end()) {
684  CRef<CGb_qual> pQual( new CGb_qual);
685  pQual->SetQual("number");
686  pQual->SetVal(it->second);
687  pFeature->SetQual().push_back(pQual);
688  attrs_left.erase(it);
689  }
690 
691  it = attrs_left.find("experiment");
692  if (it != attrs_left.end()) {
693  const string strExperimentDefault(
694  "experimental evidence, no additional details recorded" );
695  string value = xNormalizedAttributeValue(it->second);
696  if (value == strExperimentDefault) {
697  pFeature->SetExp_ev(CSeq_feat::eExp_ev_experimental);
698  }
699  else {
700  CRef<CGb_qual> pQual(new CGb_qual);
701  pQual->SetQual("experiment");
702  pQual->SetVal(value);
703  pFeature->SetQual().push_back(pQual);
704  }
705  attrs_left.erase(it);
706  }
707 
708  it = attrs_left.find("gbkey");
709  if (it != attrs_left.end()) {
710  attrs_left.erase(it); //ignore
711  }
712 
713  it = attrs_left.find("gene");
714  if (it != attrs_left.end()) {
715  if (pFeature->GetData().IsGene()) {
716  list<string> geneValues;
717  NStr::Split(it->second, ",", geneValues, 0);
718  string value;
719  list<string>::const_iterator cit = geneValues.begin();
720  if (cit != geneValues.end()) {
721  value = xNormalizedAttributeValue(*cit);
722  pFeature->SetData().SetGene().SetLocus(value);
723  cit++;
724  while (cit != geneValues.end()) {
725  value = xNormalizedAttributeValue(*cit);
726  pFeature->SetData().SetGene().SetSyn().push_back(value);
727  cit++;
728  }
729  }
730  attrs_left.erase(it);
731  }
732  }
733 
734  it = attrs_left.find("genome");
735  if (it != attrs_left.end()) {
736  if (pFeature->GetData().IsBiosrc()) {
737  pFeature->SetData().SetBiosrc().SetGenome(
738  s_StringToGenome(it->second, flags));
739  attrs_left.erase(it);
740  }
741  }
742 
743  it = attrs_left.find("gene_synonym");
744  if (it != attrs_left.end()) {
745  if (pFeature->GetData().IsGene()) {
746  vector<string> synonyms;
747  NStr::Split(it->second, ",", synonyms, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
748  for (vector<string>::iterator it1 = synonyms.begin(); it1 != synonyms.end();
749  ++it1 ) {
750  string synonym = xNormalizedAttributeValue(*it1);
751  pFeature->SetData().SetGene().SetSyn().push_back(synonym);
752  }
753  }
754  attrs_left.erase(it);
755  }
756 
757  it = attrs_left.find("inference");
758  if (it != attrs_left.end()) {
759  auto inferenceVal = it->second;
760  const string strInferenceDefault(
761  "non-experimental evidence, no additional details recorded" );
762  auto value = xNormalizedAttributeValue(inferenceVal);
763  if (value == strInferenceDefault) {
764  pFeature->SetExp_ev(CSeq_feat::eExp_ev_not_experimental);
765  }
766  else {
767  vector<string> inferenceVals;
768  NStr::Split(inferenceVal, ",", inferenceVals);
769  for (auto val: inferenceVals) {
770  CRef<CGb_qual> pQual(new CGb_qual);
771  pQual->SetQual("inference");
772  pQual->SetVal(xNormalizedAttributeValue(val));
773  pFeature->SetQual().push_back(pQual);
774  }
775  }
776  attrs_left.erase(it);
777  }
778 
779  it = attrs_left.find("locus_tag");
780  if (it != attrs_left.end()) {
781  if (pFeature->GetData().IsGene()) {
782  string tag = xNormalizedAttributeValue(it->second);
783  pFeature->SetData().SetGene().SetLocus_tag(tag);
784  }
785  attrs_left.erase(it);
786  }
787 
788  it = attrs_left.find("map");
789  if (it != attrs_left.end()) {
790  if (pFeature->GetData().IsGene()) {
791  pFeature->SetData().SetGene().SetMaploc(
792  xNormalizedAttributeValue(it->second));
793  }
794  }
795 
796  it = attrs_left.find("ncrna_class");
797  if (it != attrs_left.end()) {
798  if (pFeature->GetData().GetSubtype() == CSeqFeatData::eSubtype_ncRNA) {
799  pFeature->SetData().SetRna().SetExt().SetGen().SetClass(
800  xNormalizedAttributeValue(it->second));
801  }
802  attrs_left.erase(it);
803  }
804 
805  it = attrs_left.find("partial");
806  if (it != attrs_left.end()) {
807  if (!(flags & CGff2Reader::fGenbankMode)) {
808  pFeature->AddQualifier("partial", it->second);
809  }
810  attrs_left.erase(it);
811  }
812 
813  it = attrs_left.find("pseudo");
814  if (it != attrs_left.end()) {
815  pFeature->SetPseudo(true);
816  attrs_left.erase(it);
817  }
818 
819  it = attrs_left.find("regulatory_class");
820  if (it != attrs_left.end()) {
821  if (pFeature->GetData().IsImp() && (pFeature->GetData().GetImp().GetKey() == "regulatory")) {
822  //pFeature->SetData().SetImp().SetKey(it->second);
823  pFeature->RemoveQualifier("regulatory_class");
824  pFeature->AddQualifier("regulatory_class", it->second);
825  attrs_left.erase(it);
826  }
827  //pFeature->RemoveQualifier("regulatory_class");
828  }
829 
830  it = attrs_left.find("rpt_type");
831  if (it != attrs_left.end()) {
832  map<string, string> satellites = {
833  {"microsatellite", "microsatellite"},
834  {"minisatellite", "minisatellite"},
835  {"satellite_DNA", "satellite"},
836  {"satellite", "satellite"},
837  };
838  pFeature->RemoveQualifier("rpt_type");
839  pFeature->RemoveQualifier("satellite");
840  auto rpt_type = it->second;
841  auto satelliteIt = satellites.find(rpt_type);
842  if (satelliteIt != satellites.end()) {
843  pFeature->AddQualifier("satellite", satelliteIt->second);
844  }
845  else {
846  pFeature->AddQualifier("rpt_type", rpt_type);
847  }
848  attrs_left.erase(it);
849  }
850 
851  it = attrs_left.find("satellite");
852  if (it != attrs_left.end()) {
853  if (pFeature->GetData().IsImp() && pFeature->GetData().GetImp().GetKey() == "repeat_region") {
854  attrs_left.erase(it);
855  }
856  }
857 
858  it = attrs_left.find("transl_except");
859  if (it != attrs_left.end()) {
860  if (pFeature->GetData().IsCdregion()) {
861  vector<string> codebreaks;
862  NStr::Split(it->second, ",", codebreaks, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
863  for (vector<string>::iterator it1 = codebreaks.begin();
864  it1 != codebreaks.end(); ++it1 ) {
865  string breakData = xNormalizedAttributeValue(*it1);
866  CRef<CSeq_id> pBreakId = GetSeqId(flags);
868  breakData, *pBreakId, flags);
869  if (pCodeBreak) {
870  pFeature->SetData().SetCdregion().SetCode_break().push_back(
871  pCodeBreak);
872  }
873  }
874  attrs_left.erase(it);
875  }
876  }
877 
878  it = attrs_left.find("transl_table");
879  if (it != attrs_left.end()) {
880  if (pFeature->GetData().IsCdregion()) {
882  pCe->SetId(NStr::StringToInt(it->second));
883  pFeature->SetData().SetCdregion().SetCode().Set().push_back(pCe);
884  attrs_left.erase(it);
885  }
886  }
887 
888  if (!xMigrateAttributesGo(flags, pFeature, attrs_left)) {
889  return false;
890  }
891 
892  if (pFeature->GetData().IsBiosrc()) {
893  if (!xMigrateAttributesSubSource(flags, pFeature, attrs_left)) {
894  return false;
895  }
896  if (!xMigrateAttributesOrgName(flags, pFeature, attrs_left)) {
897  return false;
898  }
899  }
900 
901  //
902  // Turn whatever is left into a gbqual:
903  //
904  while (!attrs_left.empty()) {
905  const string& key = attrs_left.begin()->first;
906  if (!xMigrateAttributeDefault(attrs_left, key, pFeature, key, flags)) {
907  return false;
908  }
909  }
910  return true;
911 }
912 
913 // ----------------------------------------------------------------------------
916  const string& attrKey,
917  CRef<CSeq_feat> pFeature,
918  const string& qualKey,
920  // -----------------------------------------------------------------------------
921 {
922  //retrieve GFF3 attribute as a single value,
923  // turn unescaped value into gbqual of the same key.
924 
925  TAttributes::iterator it = attributes.find(attrKey);
926  if (it == attributes.end()) {
927  return true;
928  }
929  string value = xNormalizedAttributeValue(it->second);
930  pFeature->AddQualifier(qualKey, value);
931  attributes.erase(it);
932  return true;
933 }
934 
935 // ----------------------------------------------------------------------------
938  const string& attrKey,
939  CRef<CSeq_feat> pFeature,
940  const string& qualKey,
942  // -----------------------------------------------------------------------------
943 {
944  //split GFF3 multi-value into individual values, create a gbqual of the
945  // same key for each of the individual values.
946  //
947  TAttributes::iterator it = attributes.find(attrKey);
948  if (it == attributes.end()) {
949  return true;
950  }
951  list<CTempStringEx> values;
952  NStr::Split(it->second, ",", values, 0);
953  for (list<CTempStringEx>::const_iterator cit = values.begin(); cit != values.end();
954  cit++) {
955  if (cit->empty()) {
956  continue;
957  }
958  string value = xNormalizedAttributeValue(*cit);
959  pFeature->AddQualifier(qualKey, value);
960  }
961  attributes.erase(it);
962  return true;
963 }
964 
965 // ----------------------------------------------------------------------------
968  CRef<CSeq_feat> pFeature,
969  TAttributes& attrs_left) const
970 // ----------------------------------------------------------------------------
971 {
972  typedef map<string, COrgMod::ESubtype> ORGMOD_MAP;
973  static CSafeStatic<ORGMOD_MAP> s_OrgModMap;
974  ORGMOD_MAP& sOrgModMap = *s_OrgModMap;
975  if (sOrgModMap.empty()) {
976  sOrgModMap["strain"] = COrgMod::eSubtype_strain;
977  sOrgModMap["substrain"] = COrgMod::eSubtype_substrain;
978  sOrgModMap["type"] = COrgMod::eSubtype_type;
979  sOrgModMap["subtype"] = COrgMod::eSubtype_subtype;
980  sOrgModMap["variety"] = COrgMod::eSubtype_variety;
981  sOrgModMap["serotype"] = COrgMod::eSubtype_serotype;
982  sOrgModMap["serogroup"] = COrgMod::eSubtype_serogroup;
983  sOrgModMap["serovar"] = COrgMod::eSubtype_serovar;
984  sOrgModMap["cultivar"] = COrgMod::eSubtype_cultivar;
985  sOrgModMap["pathovar"] = COrgMod::eSubtype_pathovar;
986  sOrgModMap["chemovar"] = COrgMod::eSubtype_chemovar;
987  sOrgModMap["biovar"] = COrgMod::eSubtype_biovar;
988  sOrgModMap["biotype"] = COrgMod::eSubtype_biotype;
989  sOrgModMap["group"] = COrgMod::eSubtype_group;
990  sOrgModMap["subgroup"] = COrgMod::eSubtype_subgroup;
991  sOrgModMap["isolate"] = COrgMod::eSubtype_isolate;
992  sOrgModMap["common"] = COrgMod::eSubtype_common;
993  sOrgModMap["acronym"] = COrgMod::eSubtype_acronym;
994  sOrgModMap["dosage"] = COrgMod::eSubtype_dosage;
995  sOrgModMap["nat_host"] = COrgMod::eSubtype_nat_host;
996  sOrgModMap["sub_species"] = COrgMod::eSubtype_sub_species;
997  sOrgModMap["specimen_voucher"] = COrgMod::eSubtype_specimen_voucher;
998  sOrgModMap["authority"] = COrgMod::eSubtype_authority;
999  sOrgModMap["forma"] = COrgMod::eSubtype_forma;
1000  sOrgModMap["dosage"] = COrgMod::eSubtype_forma_specialis;
1001  sOrgModMap["ecotype"] = COrgMod::eSubtype_ecotype;
1002  sOrgModMap["synonym"] = COrgMod::eSubtype_synonym;
1003  sOrgModMap["anamorph"] = COrgMod::eSubtype_anamorph;
1004  sOrgModMap["teleomorph"] = COrgMod::eSubtype_teleomorph;
1005  sOrgModMap["breed"] = COrgMod::eSubtype_breed;
1006  sOrgModMap["gb_acronym"] = COrgMod::eSubtype_gb_acronym;
1007  sOrgModMap["gb_anamorph"] = COrgMod::eSubtype_gb_anamorph;
1008  sOrgModMap["gb_synonym"] = COrgMod::eSubtype_gb_synonym;
1009  sOrgModMap["old_lineage"] = COrgMod::eSubtype_old_lineage;
1010  sOrgModMap["old_name"] = COrgMod::eSubtype_old_name;
1011  sOrgModMap["culture_collection"] = COrgMod::eSubtype_culture_collection;
1012  sOrgModMap["bio_material"] = COrgMod::eSubtype_bio_material;
1013  sOrgModMap["note"] = COrgMod::eSubtype_other;
1014  }
1015  list<CRef<COrgMod> >& orgMod =
1016  pFeature->SetData().SetBiosrc().SetOrg().SetOrgname().SetMod();
1017  for ( ORGMOD_MAP::const_iterator sit = sOrgModMap.begin();
1018  sit != sOrgModMap.end(); ++sit) {
1019  TAttributes::iterator ait = attrs_left.find(sit->first);
1020  if (ait == attrs_left.end()) {
1021  continue;
1022  }
1023  CRef<COrgMod> pOrgMod(new COrgMod);
1024  pOrgMod->SetSubtype(sit->second);
1025  pOrgMod->SetSubname(ait->second);
1026  orgMod.push_back(pOrgMod);
1027  attrs_left.erase(ait);
1028  }
1029  return true;
1030 }
1031 
1032 // ----------------------------------------------------------------------------
1035  CRef<CSeq_feat> pFeature,
1036  TAttributes& attrs) const
1037 // ----------------------------------------------------------------------------
1038 {
1039  for (auto it = attrs.begin(); it != attrs.end(); /**/) {
1040  if (NStr::StartsWith(it->first, "go_")) {
1041  try {
1042  CReadUtil::AddGeneOntologyTerm(*pFeature, it->first, it->second);
1043  }
1044  catch(ILineError&) {
1045  }
1046  it = attrs.erase(it);
1047  }
1048  else {
1049  it++;
1050  }
1051  }
1052  return true;
1053 }
1054 
1055 // ----------------------------------------------------------------------------
1058  CRef<CSeq_feat> pFeature,
1059  TAttributes& attrs_left) const
1060 // ----------------------------------------------------------------------------
1061 {
1062  typedef map<string, CSubSource::ESubtype> SUBSOURCE_MAP;
1063  static CSafeStatic<SUBSOURCE_MAP> s_SubSourceMap;
1064  SUBSOURCE_MAP& sSubSourceMap = *s_SubSourceMap;
1065  if (sSubSourceMap.empty()) {
1066  sSubSourceMap["chromosome"] = CSubSource::eSubtype_chromosome;
1067  sSubSourceMap["map"] = CSubSource::eSubtype_map;
1068  sSubSourceMap["clone"] = CSubSource::eSubtype_clone;
1069  sSubSourceMap["subclone"] = CSubSource::eSubtype_subclone;
1070  sSubSourceMap["haplotype"] = CSubSource::eSubtype_haplotype;
1071  sSubSourceMap["genotype"] = CSubSource::eSubtype_genotype;
1072  sSubSourceMap["sex"] = CSubSource::eSubtype_sex;
1073  sSubSourceMap["cell_line"] = CSubSource::eSubtype_cell_line;
1074  sSubSourceMap["cell_type"] = CSubSource::eSubtype_cell_type;
1075  sSubSourceMap["tissue_type"] = CSubSource::eSubtype_tissue_type;
1076  sSubSourceMap["clone_lib"] = CSubSource::eSubtype_clone_lib;
1077  sSubSourceMap["dev_stage"] = CSubSource::eSubtype_dev_stage;
1078  sSubSourceMap["frequency"] = CSubSource::eSubtype_frequency;
1079  sSubSourceMap["germline"] = CSubSource::eSubtype_germline;
1080  sSubSourceMap["rearranged"] = CSubSource::eSubtype_rearranged;
1081  sSubSourceMap["lab_host"] = CSubSource::eSubtype_lab_host;
1082  sSubSourceMap["pop_variant"] = CSubSource::eSubtype_pop_variant;
1083  sSubSourceMap["tissue_lib"] = CSubSource::eSubtype_tissue_lib;
1084  sSubSourceMap["plasmid_name"] = CSubSource::eSubtype_plasmid_name;
1085  sSubSourceMap["transposon_name"] = CSubSource::eSubtype_transposon_name;
1086  sSubSourceMap["insertion_seq_name"] = CSubSource::eSubtype_insertion_seq_name;
1087  sSubSourceMap["plastid_name"] = CSubSource::eSubtype_plastid_name;
1088  sSubSourceMap["country"] = CSubSource::eSubtype_country;
1089  sSubSourceMap["segment"] = CSubSource::eSubtype_segment;
1090  sSubSourceMap["endogenous_virus_name"] = CSubSource::eSubtype_endogenous_virus_name;
1091  sSubSourceMap["transgenic"] = CSubSource::eSubtype_transgenic;
1092  sSubSourceMap["environmental_sample"] = CSubSource::eSubtype_environmental_sample;
1093  sSubSourceMap["isolation_source"] = CSubSource::eSubtype_isolation_source;
1094  sSubSourceMap["lat_lon"] = CSubSource::eSubtype_lat_lon;
1095  sSubSourceMap["altitude"] = CSubSource::eSubtype_altitude;
1096  sSubSourceMap["collection_date"] = CSubSource::eSubtype_collection_date;
1097  sSubSourceMap["collected_by"] = CSubSource::eSubtype_collected_by;
1098  sSubSourceMap["identified_by"] = CSubSource::eSubtype_identified_by;
1099  sSubSourceMap["fwd_primer_seq"] = CSubSource::eSubtype_fwd_primer_seq;
1100  sSubSourceMap["fwd_primer_name"] = CSubSource::eSubtype_fwd_primer_name;
1101  sSubSourceMap["rev_primer_seq"] = CSubSource::eSubtype_rev_primer_seq;
1102  sSubSourceMap["rev_primer_name"] = CSubSource::eSubtype_rev_primer_name;
1103  sSubSourceMap["metagenomic"] = CSubSource::eSubtype_metagenomic;
1104  sSubSourceMap["mating_type"] = CSubSource::eSubtype_mating_type;
1105  sSubSourceMap["linkage_group"] = CSubSource::eSubtype_linkage_group;
1106  sSubSourceMap["haplogroup"] = CSubSource::eSubtype_haplogroup;
1107  sSubSourceMap["whole_replicon"] = CSubSource::eSubtype_whole_replicon;
1108  sSubSourceMap["phenotype"] = CSubSource::eSubtype_phenotype;
1109  sSubSourceMap["note"] = CSubSource::eSubtype_other;
1110  }
1111 
1112  list<CRef<CSubSource> >& subType =
1113  pFeature->SetData().SetBiosrc().SetSubtype();
1114  for ( SUBSOURCE_MAP::const_iterator sit = sSubSourceMap.begin();
1115  sit != sSubSourceMap.end(); ++sit) {
1116  TAttributes::iterator ait = attrs_left.find(sit->first);
1117  if (ait == attrs_left.end()) {
1118  continue;
1119  }
1120  CRef<CSubSource> pSubSource(new CSubSource);
1121  pSubSource->SetSubtype(sit->second);
1122  pSubSource->SetName(xNormalizedAttributeValue(ait->second));
1123  subType.push_back(pSubSource);
1124  attrs_left.erase(ait);
1125  }
1126  return true;
1127 }
1128 
1129 // ----------------------------------------------------------------------------
1132  CRef<CSeq_feat> pFeature ) const
1133 // ----------------------------------------------------------------------------
1134 {
1135  auto recognizedType = NormalizedType();
1136 
1137  if (recognizedType == "region" || recognizedType == "biological_region") {
1138  string gbkey;
1139  if (GetAttribute("gbkey", gbkey)) {
1140  if (gbkey == "Src") {
1141  pFeature->SetData().SetBiosrc();
1142  return true;
1143  }
1144  }
1145  // regardless of gbkey (rw-1062)
1146  string name;
1147  GetAttribute("Name", name);
1148  pFeature->SetData().SetRegion(name);
1149  return true;
1150  }
1151 
1152  if (recognizedType == "start_codon" || recognizedType == "stop_codon") {
1153  recognizedType = "cds";
1154  }
1155 
1156  bool invalidFeaturesToRegion = !(flags & CGff2Reader::fGenbankMode);
1158  Type(), *pFeature, invalidFeaturesToRegion)) {
1159  string message = "Bad data line: Invalid feature type \"" + Type() + "\"";
1161  eDiag_Error,
1162  0,
1163  message);
1164  throw error;
1165  }
1166  return CGffBaseColumns::xInitFeatureData(flags, pFeature);
1167 }
1168 
1169 // ============================================================================
1170 bool
1172 // =============================================================================
1173 {
1174  list<string> parentIds;
1175  if (!GetAttribute("Parent", parentIds)) {
1176  return false;
1177  }
1178  return (parentIds.size() > 1);
1179 }
1180 
1181 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Data storage class.
User-defined methods of the data storage class.
AutoPtr –.
Definition: ncbimisc.hpp:401
CCode_break –.
Definition: Code_break.hpp:66
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
static CRef< CDbtag > x_ParseDbtag(const string &)
virtual bool xUpdateFeatureData(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
Definition: gff2_data.cpp:546
bool IsMultiParent() const
Definition: gff2_data.cpp:1171
TAttributes::const_iterator TAttrCit
Definition: gff2_data.hpp:49
static string xNormalizedAttributeValue(const CTempString &)
Definition: gff2_data.cpp:343
virtual bool xMigrateAttributes(TReaderFlags, CRef< CSeq_feat >) const
Definition: gff2_data.cpp:589
TAttributes::iterator TAttrIt
Definition: gff2_data.hpp:48
bool GetAttribute(const string &, string &) const
Definition: gff2_data.cpp:305
virtual bool UpdateFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
Definition: gff2_data.cpp:521
virtual bool AssignFromGff(const string &)
Definition: gff2_data.cpp:214
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
Definition: gff2_data.cpp:508
static void TokenizeGFF(vector< CTempStringEx > &columns, const CTempStringEx &line)
Definition: gff2_data.cpp:165
string m_strAttributes
Definition: gff2_data.hpp:170
virtual bool xInitFeatureData(TReaderFlags, CRef< CSeq_feat >) const
Definition: gff2_data.cpp:1130
static bool xMigrateAttributeDefault(TAttributes &, const string &, CRef< CSeq_feat >, const string &, TReaderFlags)
Definition: gff2_data.cpp:936
static bool xMigrateAttributeSingle(TAttributes &, const string &, CRef< CSeq_feat >, const string &, TReaderFlags)
Definition: gff2_data.cpp:914
virtual bool xMigrateAttributesGo(TReaderFlags, CRef< CSeq_feat >, TAttributes &) const
Definition: gff2_data.cpp:1033
virtual bool xMigrateAttributesOrgName(TReaderFlags, CRef< CSeq_feat >, TAttributes &) const
Definition: gff2_data.cpp:966
virtual bool xMigrateAttributesSubSource(TReaderFlags, CRef< CSeq_feat >, TAttributes &) const
Definition: gff2_data.cpp:1056
bool xSplitGffAttributes(const string &, vector< string > &) const
Definition: gff2_data.cpp:471
static string xNormalizedAttributeKey(const CTempString &)
Definition: gff2_data.cpp:335
virtual bool xAssignAttributesFromGff(const string &, const string &)
Definition: gff2_data.cpp:452
TAttributes m_Attributes
Definition: gff2_data.hpp:171
ENa_strand * m_peStrand
virtual bool xInitFeatureData(TReaderFlags, CRef< CSeq_feat >) const
CCdregion::EFrame TFrame
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())
Please use this instead of the constructor because the ctor is protected.
Definition: line_error.cpp:194
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
static void AddGeneOntologyTerm(CSeq_feat &feature, const CTempString &qual, const CTempString &val)
Definition: read_util.cpp:296
CSafeStatic<>::
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
Definition: Seq_feat.cpp:291
static bool SoTypeToFeature(const string &, CSeq_feat &, bool=false)
Definition: so_map.cpp:411
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
@ eProblem_FeatureBadStartAndOrStop
Definition: line_error.hpp:66
void erase(iterator pos)
Definition: map.hpp:167
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
static const struct attribute attributes[]
Definition: attributes.c:165
static const char * str(char *buf, int n)
Definition: stats.c:84
static const column_t columns[]
Definition: utf8_2.c:22
CBioSource::EGenome s_StringToGenome(const string &genome, CGff2Record::TReaderFlags flags)
Definition: gff2_data.cpp:125
CTempString x_GetNextAttribute(CTempString &input)
Definition: gff2_data.cpp:359
CRef< CCode_break > s_StringToCodeBreak(const string &str, CSeq_id &id, CGff2Record::TReaderFlags flags)
Definition: gff2_data.cpp:71
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
string
Definition: cgiapp.hpp:687
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3191
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
CTempStringEx substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:1010
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387
static string URLDecode(const CTempString str, EUrlDecode flag=eUrlDec_All)
URL-decode string.
Definition: ncbistr.cpp:6214
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
size_type find_first_not_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character not in the matching string within the current string,...
Definition: tempstr.hpp:553
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
Definition: tempstr.hpp:538
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
static const size_type npos
Definition: tempstr.hpp:72
const_iterator begin() const
Return an iterator to the string's starting position.
Definition: tempstr.hpp:299
@ fAllowLeadingSpaces
Ignore leading spaces in converted string.
Definition: ncbistr.hpp:294
@ fSplit_Truncate
Definition: ncbistr.hpp:2501
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2498
@ eUrlDec_Percent
Decode only XX.
Definition: ncbistr.hpp:3159
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
Definition: SubSource_.hpp:319
void SetName(const TName &value)
Assign a value to Name data member.
Definition: SubSource_.hpp:359
EGenome
biological context
Definition: BioSource_.hpp:97
@ eSubtype_collection_date
DD-MMM-YYYY format.
Definition: SubSource_.hpp:114
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
Definition: SubSource_.hpp:117
@ eSubtype_lat_lon
+/- decimal degrees
Definition: SubSource_.hpp:113
@ eSubtype_collected_by
name of person who collected the sample
Definition: SubSource_.hpp:115
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
Definition: SubSource_.hpp:118
@ eSubtype_environmental_sample
Definition: SubSource_.hpp:111
@ eSubtype_endogenous_virus_name
Definition: SubSource_.hpp:109
@ eSubtype_identified_by
name of person who identified the sample
Definition: SubSource_.hpp:116
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
Definition: OrgMod_.hpp:316
void SetSubname(const TSubname &value)
Assign a value to Subname data member.
Definition: OrgMod_.hpp:356
@ eSubtype_biotype
Definition: OrgMod_.hpp:97
@ eSubtype_subgroup
Definition: OrgMod_.hpp:99
@ eSubtype_gb_acronym
used by taxonomy database
Definition: OrgMod_.hpp:115
@ eSubtype_gb_synonym
used by taxonomy database
Definition: OrgMod_.hpp:117
@ eSubtype_substrain
Definition: OrgMod_.hpp:86
@ eSubtype_anamorph
Definition: OrgMod_.hpp:112
@ eSubtype_pathovar
Definition: OrgMod_.hpp:94
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_dosage
chromosome dosage of hybrid
Definition: OrgMod_.hpp:103
@ eSubtype_authority
Definition: OrgMod_.hpp:107
@ eSubtype_sub_species
Definition: OrgMod_.hpp:105
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_variety
Definition: OrgMod_.hpp:89
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_biovar
Definition: OrgMod_.hpp:96
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
@ eSubtype_subtype
Definition: OrgMod_.hpp:88
@ eSubtype_teleomorph
Definition: OrgMod_.hpp:113
@ eSubtype_serogroup
Definition: OrgMod_.hpp:91
@ eSubtype_synonym
Definition: OrgMod_.hpp:111
@ eSubtype_group
Definition: OrgMod_.hpp:98
@ eSubtype_acronym
Definition: OrgMod_.hpp:102
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_serotype
Definition: OrgMod_.hpp:90
@ eSubtype_chemovar
Definition: OrgMod_.hpp:95
@ eSubtype_serovar
Definition: OrgMod_.hpp:92
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_gb_anamorph
used by taxonomy database
Definition: OrgMod_.hpp:116
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_forma_specialis
Definition: OrgMod_.hpp:109
@ eSubtype_old_lineage
Definition: OrgMod_.hpp:123
@ eSubtype_isolate
Definition: OrgMod_.hpp:100
void SetAa(TAa &value)
Assign a value to Aa data member.
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:268
@ eExp_ev_experimental
any reasonable experimental check
Definition: Seq_feat_.hpp:102
@ eExp_ev_not_experimental
similarity, pattern, etc
Definition: Seq_feat_.hpp:103
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
bool CanGetStrand(void) const
Check if it is safe to call GetStrand method.
TStrand GetStrand(void) const
Get the Strand member data.
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
static int input()
int i
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const char * tag
T min(T x_, T y_)
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define Type
else result
Definition: token2.c:20
#define const
Definition: zconf.h:232
Modified on Mon May 27 04:37:24 2024 by modify_doxy.py rev. 669887