NCBI C++ ToolKit
OrgMod.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: OrgMod.cpp 101510 2023-12-21 18:59:54Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using the following specifications:
34  * 'seqfeat.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 #include <util/static_map.hpp>
40 #include <util/util_misc.hpp>
41 #include <util/line_reader.hpp>
42 #include <util/compile_time.hpp>
43 #include <serial/enumvalues.hpp>
44 
46 
47 // generated includes
49 
50 // generated classes
51 
53 
54 BEGIN_objects_SCOPE // namespace ncbi::objects::
55 
56 // destructor
58 {
59 }
60 
61 
63  EVocabulary vocabulary)
64 {
65  string name = NStr::TruncateSpaces(str);
66  NStr::ToLower(name);
67  replace(name.begin(), name.end(), '_', '-');
68  replace(name.begin(), name.end(), ' ', '-');
69 
70  if (name == "note" ||
71  NStr::EqualNocase(name, "orgmod-note") ||
72  NStr::EqualNocase(name, "note-orgmod")) {
73  return eSubtype_other;
74  } else if (vocabulary == eVocabulary_insdc) {
75  if (name == "host" || name == "specific-host") {
76  return eSubtype_nat_host;
77  } else if (name == "sub-strain") {
78  return eSubtype_substrain;
79  }
80  }
81 
82  return ENUM_METHOD_NAME(ESubtype)()->FindValue(name);
83 }
84 
85 
86 bool COrgMod::IsValidSubtypeName(const string& str,
87  EVocabulary vocabulary)
88 {
89  string name = NStr::TruncateSpaces(str);
90  NStr::ToLower(name);
91  replace(name.begin(), name.end(), '_', '-');
92  replace(name.begin(), name.end(), ' ', '-');
93 
94  if (name == "note" ||
95  name == "orgmod-note" ||
96  name == "note-orgmod") {
97  return true;
98  } else if (vocabulary == eVocabulary_insdc) {
99  if (name == "host" || name == "sub-strain") {
100  return true;
101  }
102  }
103 
104  return ENUM_METHOD_NAME(ESubtype)()->IsValidName(name);
105 }
106 
107 
109 {
110  if (stype == eSubtype_other) {
111  return "note";
112  } else if (vocabulary == eVocabulary_insdc) {
113  switch (stype) {
114  case eSubtype_substrain: return "sub_strain";
115  case eSubtype_nat_host: return "host";
116  default:
117  return NStr::Replace
118  (ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true),
119  "-", "_");
120  }
121  } else {
122  return ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true);
123  }
124 }
125 
126 
128 {
129  switch( subtype ) { // per TM-863
130  case eSubtype_strain: // (2) ,
131  case eSubtype_substrain: // (3) ,
132  case eSubtype_variety: // (6) ,
133  case eSubtype_serotype: // (7) ,
134  case eSubtype_serogroup: // (8) ,
135  case eSubtype_serovar: // (9) ,
136  case eSubtype_cultivar: // (10) ,
137  case eSubtype_pathovar: // (11) ,
138  case eSubtype_chemovar: // (12) ,
139  case eSubtype_biovar: // (13) ,
140  case eSubtype_biotype: // (14) ,
141  case eSubtype_nat_host: // (21) , -- natural host of this specimen
142  case eSubtype_sub_species: // (22) ,
143  case eSubtype_forma: // (25) ,
144  case eSubtype_forma_specialis: // (26) ,
145  case eSubtype_ecotype: // (27) ,
146  case eSubtype_breed: // (31) ,
147  case eSubtype_gb_acronym: // (32) , -- used by taxonomy database
148  case eSubtype_gb_anamorph: // (33) , -- used by taxonomy database
149  case eSubtype_gb_synonym: // (34) , -- used by taxonomy database
150  case eSubtype_metagenome_source: // (37) ,
151  case eSubtype_nomenclature: // (39) ,
152  case eSubtype_old_name: // (254) ,
153  return false;
154  default: return true;
155  }
156 }
157 
158 
159 bool COrgMod::IsDiscouraged(const TSubtype subtype, bool indexer)
160 {
161  if (subtype == eSubtype_dosage
162  || subtype == eSubtype_gb_acronym
163  || subtype == eSubtype_gb_anamorph
164  || subtype == eSubtype_gb_synonym
165  || subtype == eSubtype_old_lineage
166  || subtype == eSubtype_old_name
167  || (subtype == eSubtype_metagenome_source && !indexer)) {
168  return true;
169  } else {
170  return false;
171  }
172 }
173 
174 
176 {
177  switch(stype) {
181  return true;
182  default:
183  return false;
184  }
185 }
186 
187 
188 bool COrgMod::ParseStructuredVoucher(const string& str, string& inst, string& coll, string& id)
189 {
190  if (NStr::IsBlank(str)) {
191  return false;
192  }
193  inst = kEmptyStr;
194  coll = kEmptyStr;
195  id = kEmptyStr;
196  size_t pos = NStr::Find(str, ":");
197  if (pos == string::npos) {
198  id = str;
199  return true;
200  }
201  inst = str.substr(0, pos);
202  id = str.substr(pos + 1);
203  pos = NStr::Find(id, ":");
204  if (pos != string::npos) {
205  coll = id.substr(0, pos);
206  id = id.substr(pos + 1);
207  }
208  return true;
209 }
210 
211 
212 // ===== biomaterial, and culture-collection BioSource subsource modifiers ================
213 
217 
218 // holds all the data in the specific ones above
224 
225 DEFINE_STATIC_FAST_MUTEX(s_InstitutionCollectionCodeMutex);
226 
227 #include "institution_codes.inc"
228 
230 {
231  if (NStr::StartsWith(line, "#")) {
232  // ignore line, this is a comment
233  return;
234  }
235  vector<string> tokens;
236  NStr::Split(line, "\t", tokens);
237  if (tokens.size() < 3) {
238 // ERR_POST_X(1, Warning << "Bad format in institution_codes.txt entry " << line
239 // << "; disregarding");
240  } else {
241  NStr::TruncateSpacesInPlace( tokens[0] );
242  NStr::TruncateSpacesInPlace( tokens[1] );
243  NStr::TruncateSpacesInPlace( tokens[2] );
244  string& vouch_types = tokens[1];
245  for (size_t i = 0; i < vouch_types.size(); i++) {
246  switch (vouch_types[i]) {
247  case 'b':
248  s_BiomaterialInstitutionCodeMap[tokens[0]] = tokens[2];
249  break;
250  case 'c':
251  s_CultureCollectionInstitutionCodeMap[tokens[0]] = tokens[2];
252  break;
253  case 's':
254  s_SpecimenVoucherInstitutionCodeMap[tokens[0]] = tokens[2];
255  break;
256  default:
257 // ERR_POST_X(1, Warning << "Bad format in institution_codes.txt entry " << line
258 // << "; unrecognized subtype (" << tokens[1] << "); disregarding");
259  break;
260  }
261  }
262  s_CompleteInstitutionCodeMap[tokens[0]] = tokens[2];
263  s_CompleteInstitutionFullNameMap[tokens[2]] = tokens[0];
264  s_InstitutionCodeTypeMap[tokens[0]] = tokens[1];
265  if (tokens.size() > 3 && !NStr::IsBlank(tokens[3])) {
266  NStr::TruncateSpacesInPlace(tokens[3]);
267  vector<string> synonyms;
268  NStr::Split(tokens[3], ",", synonyms);
269  NON_CONST_ITERATE(vector<string>, s, synonyms) {
271  s_InstitutionCodeSynonymsMap[*s] = tokens[0];
272  }
273  }
274  }
275 }
276 
277 
279 {
280  CFastMutexGuard GUARD(s_InstitutionCollectionCodeMutex);
282  return;
283  }
284  string file = g_FindDataFile("institution_codes.txt");
285  CTime builtin_timestamp
286  (static_cast<time_t>(kInstitutionCollectionCodeList_Timestamp));
288  if ( !file.empty() && !g_IsDataFileOld(file, builtin_timestamp) ) {
289  try {
290  lr = ILineReader::New(file);
291  } NCBI_CATCH("s_InitializeInstitutionCollectionCodeMaps")
292  }
293 
294  if (lr.Empty()) {
295  if (getenv("NCBI_DEBUG")) {
296  LOG_POST("Falling back on built-in data for institution code list.");
297  }
298  size_t num_codes = sizeof (kInstitutionCollectionCodeList) / sizeof (char *);
299  for (size_t i = 0; i < num_codes; i++) {
300  const char *p = kInstitutionCollectionCodeList[i];
302  }
303  } else {
304  if (getenv("NCBI_DEBUG")) {
305  LOG_POST("Reading from " + file + " for instition code list.");
306  }
307  do {
309  } while ( !lr->AtEOF() );
310  }
311 
313 }
314 
315 
317  bool& is_miscapitalized, string& correct_cap, bool& needs_country, bool& erroneous_country)
318 {
319  TInstitutionCodeMap::iterator it = code_map.find(inst_coll);
320  if (it != code_map.end()) {
321  if (NStr::EqualCase(it->first, inst_coll)) {
322  } else if (NStr::EqualNocase(it->first, inst_coll)) {
323  is_miscapitalized = true;
324  }
325  correct_cap = it->first;
326  return it;
327  } else {
328  size_t pos = NStr::Find(inst_coll, "<");
329  if (pos == string::npos) {
330  string check = inst_coll + "<";
331  it = code_map.begin();
332  while (it != code_map.end()) {
333  if (NStr::StartsWith(it->first, check, NStr::eNocase)) {
334  needs_country = true;
335  if (!NStr::StartsWith(it->first, check, NStr::eCase)) {
336  is_miscapitalized = true;
337  }
338  correct_cap = it->first.substr(0, inst_coll.length());
339  return it;
340  }
341  ++it;
342  }
343  } else {
344  string inst_sub = inst_coll.substr(0, pos);
345  it = code_map.find(inst_sub);
346  if (it != code_map.end()) {
347  erroneous_country = true;
348  return it;
349  }
350  }
351  }
352  return code_map.end();
353 }
354 
355 
356 bool COrgMod::IsInstitutionCodeValid(const string& inst_coll, string &voucher_type, bool& is_miscapitalized, string& correct_cap, bool& needs_country, bool& erroneous_country)
357 {
358  is_miscapitalized = false;
359  needs_country = false;
360  erroneous_country = false;
361  correct_cap.clear();
362 
364 
365  TInstitutionCodeMap::iterator ic = FindInstitutionCode(inst_coll, s_InstitutionCodeTypeMap, is_miscapitalized, correct_cap, needs_country, erroneous_country);
366  if (ic != s_InstitutionCodeTypeMap.end()) {
367  if (needs_country) {
368  // check to see if non-country-requiring code is in synonyms
369  bool syn_is_miscapitalized = false;
370  string syn_correct_cap = "";
371  bool syn_needs_country = false;
372  bool syn_erroneous_country = false;
374  s_InstitutionCodeSynonymsMap, syn_is_miscapitalized, syn_correct_cap,
375  syn_needs_country, syn_erroneous_country);
376  if (it != s_InstitutionCodeSynonymsMap.end() && !syn_needs_country) {
378  if (is != s_InstitutionCodeTypeMap.end()) {
379  is_miscapitalized = syn_is_miscapitalized;
380  correct_cap = syn_correct_cap;
381  needs_country = syn_needs_country;
382  erroneous_country = syn_erroneous_country;
383  voucher_type = is->second;
384  return true;
385  }
386  }
387  } else if (erroneous_country) {
388  // check to see if country-requiring code is in synonyms
389  bool syn_is_miscapitalized = false;
390  string syn_correct_cap = "";
391  bool syn_needs_country = false;
392  bool syn_erroneous_country = false;
394  s_InstitutionCodeSynonymsMap, syn_is_miscapitalized, syn_correct_cap,
395  syn_needs_country, syn_erroneous_country);
396  if (it != s_InstitutionCodeSynonymsMap.end() && !syn_needs_country) {
398  if (is != s_InstitutionCodeTypeMap.end()) {
399  is_miscapitalized = syn_is_miscapitalized;
400  correct_cap = syn_correct_cap;
401  needs_country = syn_needs_country;
402  erroneous_country = syn_erroneous_country;
403  voucher_type = is->second;
404  return true;
405  }
406  }
407  }
408  voucher_type = ic->second;
409  return true;
410  }
411  ic = FindInstitutionCode(inst_coll, s_InstitutionCodeSynonymsMap, is_miscapitalized, correct_cap, needs_country, erroneous_country);
412  if (ic != s_InstitutionCodeSynonymsMap.end()) {
414  if (it != s_InstitutionCodeTypeMap.end()) {
415  voucher_type = it->second;
416  }
417  return true;
418  }
419  return false;
420 }
421 
422 
423 string
424 COrgMod::IsCultureCollectionValid(const string& culture_collection)
425 {
426  if (NStr::Find(culture_collection, ":") == string::npos) {
427  return "Culture_collection should be structured, but is not";
428  } else {
429  return IsStructuredVoucherValid(culture_collection, "c");
430  }
431 }
432 
433 
434 string
435 COrgMod::IsSpecimenVoucherValid(const string& specimen_voucher)
436 {
437  if (NStr::Find(specimen_voucher, ":") == string::npos) {
438  return kEmptyStr;
439  } else {
440  return IsStructuredVoucherValid(specimen_voucher, "s");
441  }
442 }
443 
444 
445 string
446 COrgMod::IsBiomaterialValid(const string& biomaterial)
447 {
448  if (NStr::Find(biomaterial, ":") == string::npos) {
449  return kEmptyStr;
450  } else {
451  return IsStructuredVoucherValid(biomaterial, "b");
452  }
453 }
454 
455 
456 const string kMissingInst = "Voucher is missing institution code";
457 const string kMissingId = "Voucher is missing specific identifier";
458 
459 string
460 COrgMod::IsStructuredVoucherValid(const string& val, const string& v_type)
461 {
462  string inst_code;
463  string coll_code;
464  string inst_coll;
465  string id;
466 
467  ParseStructuredVoucher(val, inst_code, coll_code, id);
468  string rval = kEmptyStr;
469  if (NStr::IsBlank(inst_code)) {
470  rval = kMissingInst;
471  }
472  if (NStr::IsBlank(id)) {
473  rval = NStr::IsBlank(rval) ? kMissingId : rval + "\n" + kMissingId;
474  }
475  if (!NStr::IsBlank(rval)) {
476  return rval;
477  }
478 
479  if (NStr::IsBlank (coll_code)) {
480  inst_coll = inst_code;
481  } else {
482  inst_coll = inst_code + ":" + coll_code;
483  }
484 
485  // first, check combination of institution and collection (if collection found)
486  string voucher_type;
487  bool is_miscapitalized;
488  bool needs_country;
489  bool erroneous_country;
490  string correct_cap;
491  if (COrgMod::IsInstitutionCodeValid(inst_coll, voucher_type, is_miscapitalized, correct_cap, needs_country, erroneous_country)) {
492  if (needs_country) {
493  return "Institution code " + inst_coll + " needs to be qualified with a <COUNTRY> designation";
494  } else if (erroneous_country) {
495  return "Institution code " + inst_coll + " should not be qualified with a <COUNTRY> designation";
496  } else if (is_miscapitalized) {
497  return "Institution code " + inst_coll + " exists, but correct capitalization is " + correct_cap;
498  } else {
499  if (NStr::FindNoCase(voucher_type, v_type) == string::npos) {
500  if (NStr::FindNoCase (voucher_type, "b") != string::npos) {
501  return "Institution code " + inst_coll + " should be bio_material";
502  } else if (NStr::FindNoCase (voucher_type, "c") != string::npos) {
503  return "Institution code " + inst_coll + " should be culture_collection";
504  } else if (NStr::FindNoCase (voucher_type, "s") != string::npos) {
505  return "Institution code " + inst_coll + " should be specimen_voucher";
506  }
507  }
508  return kEmptyStr;
509  }
510  } else if (NStr::StartsWith(inst_coll, "personal", NStr::eNocase)) {
511  if (NStr::EqualNocase (inst_code, "personal") && NStr::IsBlank (coll_code)) {
512  return "Personal collection does not have name of collector";
513  }
514  return kEmptyStr;
515  } else if (NStr::IsBlank(coll_code)) {
516  return "Institution code " + inst_coll + " is not in list";
517  } else if (IsInstitutionCodeValid(inst_code, voucher_type, is_miscapitalized, correct_cap, needs_country, erroneous_country)) {
518  if (needs_country) {
519  return "Institution code in " + inst_coll + " needs to be qualified with a <COUNTRY> designation";
520  } else if (erroneous_country) {
521  return "Institution code " + inst_code + " should not be qualified with a <COUNTRY> designation";
522  } else if (is_miscapitalized) {
523  return "Institution code " + inst_code + " exists, but correct capitalization is " + correct_cap;
524  } else if (NStr::Equal (coll_code, "DNA")) {
525  // DNA is a valid collection for any institution (using bio_material)
526  if (!NStr::Equal(v_type, "b")) {
527  return "DNA should be bio_material";
528  }
529  } else {
530  return "Institution code " + inst_code + " exists, but collection "
531  + inst_coll + " is not in list";
532  }
533  } else {
534  return "Institution code " + inst_coll + " is not in list";
535  }
536  return kEmptyStr;
537 }
538 
539 
540 string COrgMod::MakeStructuredVoucher(const string& inst, const string& coll, const string& id)
541 {
542  string rval;
543  if (NStr::IsBlank(inst) && NStr::IsBlank(coll) && NStr::IsBlank(id)) {
544  rval = kEmptyStr;
545  } else if (NStr::IsBlank(inst) && NStr::IsBlank(coll)) {
546  rval = id;
547  } else if (NStr::IsBlank(coll)) {
548  rval = inst + ":" + id;
549  } else {
550  rval = inst + ":" + coll + ":" + id;
551  }
552  return rval;
553 }
554 
555 
556 // As described in SQD-1655, we can only rescue an unstructured
557 // structured voucher if it consists of a series of three or
558 // more letters followed by a series of digits, optionally separated
559 // by space, and if the series of letters looks up as a valid
560 // institution code.
562 {
563  // nothing to do if value is blank
564  if (NStr::IsBlank(val)) {
565  return false;
566  }
567 
568  // find first non-letter position
569  size_t len = 0;
570  string::iterator sit = val.begin();
571  while (sit != val.end() && isalpha(*sit)) {
572  len++;
573  sit++;
574  }
575  if (len < 3 || len == val.length()) {
576  // institution code too short or no second token
577  return false;
578  }
579  string inst_code = val.substr(0, len);
580  string remainder = val.substr(len);
581  NStr::TruncateSpacesInPlace(remainder);
582  if (NStr::IsBlank(remainder)) {
583  // no second token
584  return false;
585  }
586  // remainder must be all digits
587  sit = remainder.begin();
588  while (sit != remainder.end()) {
589  if (!isdigit(*sit)) {
590  return false;
591  }
592  sit++;
593  }
594 
595  bool rval = false;
596  COrgMod::TInstitutionCodeMap::iterator it = code_map.find(inst_code);
597  if (it != code_map.end()) {
598  val = inst_code + ":" + remainder;
599  rval = true;
600  }
601 
602  return rval;
603 }
604 
605 
606 bool COrgMod::AddStructureToVoucher(string& val, const string& v_type)
607 {
608  // nothing to do if value is blank
609  if (NStr::IsBlank(val)) {
610  return false;
611  }
612 
614  if (NStr::Find(v_type, "b") != string::npos && FindInstCodeAndSpecID(s_BiomaterialInstitutionCodeMap, val)) {
615  return true;
616  } else if (NStr::Find(v_type, "c") != string::npos && FindInstCodeAndSpecID(s_CultureCollectionInstitutionCodeMap, val)) {
617  return true;
618  } else if (NStr::Find(v_type, "s") != string::npos && FindInstCodeAndSpecID(s_SpecimenVoucherInstitutionCodeMap, val)) {
619  return true;
620  } else {
621  return false;
622  }
623 }
624 
625 
626 bool COrgMod::RescueInstFromParentheses(string& val, const string& voucher_type)
627 {
628  bool rval = false;
629 
630  if (!NStr::EndsWith(val, ")")) {
631  return false;
632  }
633  size_t colon_pos = NStr::Find(val, ":");
634  if (colon_pos != 0 && colon_pos != string::npos) {
635  return false;
636  }
637  size_t pos = NStr::Find(val, "(", NStr::eNocase, NStr::eReverseSearch);
638  if (pos == string::npos) {
639  return false;
640  }
641  string inst = val.substr(pos + 1, val.length() - pos - 2);
642  bool miscap = false, needs_country = false, wrong_country = false;
643  string capfix;
644 
645  string v_type = voucher_type;
646  if (IsInstitutionCodeValid(inst, v_type, miscap, capfix, needs_country, wrong_country)) {
647  if (colon_pos == 0) {
648  val = inst + val.substr(0, pos);
649  } else {
650  val = inst + ":" + val.substr(0, pos);
651  }
653  rval = true;
654  }
655 
656 
657  return rval;
658 }
659 
660 
661 bool
662 COrgMod::FixStructuredVoucher(string& val, const string& v_type)
663 {
664  string inst_code;
665  string coll_code;
666  string id;
667 
668  ParseStructuredVoucher(val, inst_code, coll_code, id);
669  if (NStr::IsBlank(inst_code)) {
670  if (AddStructureToVoucher(val, v_type)) {
671  return true;
672  } else {
673  return RescueInstFromParentheses(val, v_type);
674  }
675  }
676  bool rval = false;
677  bool found = false;
679 
681 
682  string new_inst_code = inst_code;
683  while ((!found) && (it != s_InstitutionCodeTypeMap.end())) {
684  if (NStr::Find(it->second, v_type) != string::npos) {
685  if (NStr::EqualNocase (it->first, inst_code)) {
686  if (!NStr::Equal (it->first, inst_code)) {
687  new_inst_code = it->first;
688  rval = true;
689  }
690  found = true;
691  } else if (NStr::StartsWith(inst_code, it->first)
692  && inst_code.c_str()[it->first.length()] == '<') {
693  /*
694  new_inst_code = it->first;
695  rval = true;
696  */
697  }
698  }
699  ++it;
700  }
701 
702 
703  if (rval) {
704  val = MakeStructuredVoucher(new_inst_code, coll_code, id);
705  }
706  return rval;
707 }
708 
709 
710 const string &
711 COrgMod::GetInstitutionFullName( const string &short_name )
712 {
715  if( iter != s_CompleteInstitutionCodeMap.end() ) {
716  return iter->second;
717  } else {
718  return kEmptyStr;
719  }
720 }
721 
722 const string &
723 COrgMod::GetInstitutionShortName( const string &full_name )
724 {
727  if( iter != s_CompleteInstitutionFullNameMap.end() ) {
728  return iter->second;
729  } else {
730  return kEmptyStr;
731  }
732 }
733 
734 
735 // look for multiple source vouchers
736 string COrgMod::CheckMultipleVouchers(const vector<string>& vouchers)
737 {
738  ITERATE(vector<string>, it, vouchers) {
739  string inst1, coll1, id1;
740  COrgMod::ParseStructuredVoucher(*it, inst1, coll1, id1);
741  if (NStr::IsBlank(inst1)) continue;
742  if (NStr::EqualNocase(inst1, "personal") || NStr::EqualCase(coll1, "DNA")) continue;
743 
744  vector<string>::const_iterator it_next = it;
745  for (++it_next; it_next != vouchers.end(); ++it_next) {
746  string inst2, coll2, id2;
747  COrgMod::ParseStructuredVoucher(*it_next, inst2, coll2, id2);
748  if (NStr::IsBlank(inst2)) continue;
749  if (NStr::EqualNocase(inst2, "personal") || NStr::EqualCase(coll2, "DNA")) continue;
750  if (!NStr::EqualNocase (inst1, inst2) || NStr::IsBlank(inst1)) continue;
751  return NStr::EqualNocase(coll1, coll2) && !NStr::IsBlank(coll1) ? "Multiple vouchers with same institution:collection" : "Multiple vouchers with same institution";
752  }
753  }
754  return kEmptyStr;
755 }
756 
757 
758 bool s_IsAllDigits(string str)
759 {
760  return (str.find_first_not_of("0123456789") == NPOS);
761 }
762 
763 
764 bool s_FixStrainForPrefix(const string& prefix, string& strain)
765 {
766  bool rval = false;
767 
768  if (NStr::StartsWith(strain, prefix, NStr::eNocase)) {
769  string tmp = strain.substr(prefix.length());
771  if (NStr::StartsWith(tmp, ":") || NStr::StartsWith(tmp, "/")) {
772  tmp = tmp.substr(1);
773  }
775  if (!NStr::IsBlank(tmp) && s_IsAllDigits(tmp)) {
776  strain = prefix + " " + tmp;
777  rval = true;
778  }
779  }
780  return rval;
781 }
782 
783 
784 string s_FixOneStrain( const string& strain)
785 {
786  string new_val = strain;
787  if (s_FixStrainForPrefix("ATCC", new_val)) {
788  // fixed for ATCC
789  } else if (s_FixStrainForPrefix("DSM", new_val)) {
790  // fixed for DSM
791  } else {
792  // no fix
793  new_val = kEmptyStr;
794  }
795  return new_val;
796 }
797 
798 
799 string COrgMod::FixStrain( const string& strain)
800 {
801  string new_val = strain;
802  vector<string> words;
803  vector<string> results;
804  NStr::Split(strain, ";", words);
805  FOR_EACH_STRING_IN_VECTOR(itr, words) {
806  string str = *itr;
808  string fixed = s_FixOneStrain(str);
809  if (fixed.empty()) {
810  results.push_back (str);
811  } else {
812  results.push_back (fixed);
813  }
814  }
815  return NStr::Join(results,"; ");
816 }
817 
818 
819 const char* sm_BadStrainValues[] = {
820  "yes",
821  "no",
822  "-",
823  "bacteria",
824  "sp.",
825  "sp",
826  "strain",
827  "environmental",
828  "soil",
829  "clinical isolate",
830  "NA",
831  "whole organism",
832  "microbial"
833 };
834 
835 bool COrgMod::IsStrainValid(const string& strain)
836 {
837  size_t max = sizeof(sm_BadStrainValues) / sizeof(const char*);
838  for (size_t i = 0; i < max; i++) {
839  if (NStr::EqualNocase(strain, sm_BadStrainValues[i])) {
840  return false;
841  }
842  }
843  return true;
844 }
845 
846 
847 const char* sm_KnownHostWords[] = {
848  "alfalfa",
849  "almond",
850  "apple",
851  "asparagus",
852  "badger",
853  "bean",
854  "bitter melon",
855  "blackberry",
856  "blossoms",
857  "blueberry",
858  "bovine",
859  "brinjal",
860  "broad bean",
861  "cabbage",
862  "canine",
863  "cantaloupe",
864  "caprine",
865  "carrot",
866  "cassava",
867  "cat",
868  "catfish",
869  "cattle",
870  "cauliflower",
871  "Channel catfish",
872  "chestnut",
873  "chicken",
874  "chimpanzee",
875  "clover",
876  "corn",
877  "cotton",
878  "cow",
879  "cowpea",
880  "crab",
881  "cucumber",
882  "curd",
883  "dairy cow",
884  "dog",
885  "duck",
886  "equine",
887  "feline",
888  "fish",
889  "fox",
890  "goat",
891  "goldfish",
892  "goose",
893  "guanabana",
894  "honeydew",
895  "horse",
896  "ice cream",
897  "juniper",
898  "larva",
899  "laurel",
900  "leek",
901  "lentil",
902  "lilac",
903  "lily",
904  "maize",
905  "mamey",
906  "mamey sapote",
907  "mango",
908  "mangrove",
909  "mangroves",
910  "marigold",
911  "marine sponge",
912  "melon",
913  "mosquito",
914  "mulberry",
915  "mungbean",
916  "nematode",
917  "oat",
918  "ornamental pear",
919  "ovine",
920  "papaya",
921  "pea",
922  "peach",
923  "peacock",
924  "pear",
925  "pepper",
926  "pig",
927  "pomegranate",
928  "porcine",
929  "potato",
930  "raccoon dog",
931  "red fox",
932  "rhizospheric soil",
933  "rice",
934  "salmon",
935  "seagrass",
936  "sesame",
937  "sheep",
938  "shrimp",
939  "sorghum",
940  "sour cherry",
941  "sourdough",
942  "soybean",
943  "sponge",
944  "squash",
945  "strawberry",
946  "sugar beet",
947  "sunflower",
948  "sweet cherry",
949  "swine",
950  "tobacco",
951  "tomato",
952  "turf",
953  "turfgrass",
954  "turkey",
955  "turtle",
956  "watermelon",
957  "wheat",
958  "white clover",
959  "willow",
960  "wolf",
961  "yak",
962 };
963 
964 
966 {
967  string fix = value;
968 
969  size_t max = sizeof(sm_KnownHostWords) / sizeof(const char*);
970  for (size_t i = 0; i < max; i++) {
972  fix = sm_KnownHostWords[i];
973  break;
974  }
975  }
976  return fix;
977 }
978 
979 
980 static constexpr auto s_hostFixupMap =
982  { "-", "missing" },
983  { "no", "missing" },
984  { "none", "missing" },
985  { "NA", "not available" },
986  { "N/A", "not available" },
987  { "n/a", "not available" },
988  { "free-living", "natural / free-living" },
989  { "natural", "natural / free-living" },
990  { "not available", "not available" },
991  { "not collected", "not collected" },
992  { "not applicable", "not applicable" },
993  { "NR", "not applicable" },
994  { "not known", "unknown" },
995  { "other", "missing" },
996  { "misc", "missing" },
997  { "not determined", "unknown" },
998  { "unknown", "unknown" },
999  { "not available: to be reported later", "not available" },
1000  { "obscured", "obscured" },
1001  { "human", "Homo sapiens" },
1002  { "homo sapiens", "Homo sapiens" }
1003 });
1004 
1005 
1006 
1007 
1008 string COrgMod::FixHost(const string& value)
1009 {
1010  string fix = value;
1011 
1012  auto possible_fix = s_hostFixupMap.find(value);
1013  if (possible_fix != s_hostFixupMap.end()) {
1014  fix = possible_fix->second;
1015  }
1016 
1017  return fix;
1018 }
1019 
1020 
1021 string COrgMod::FixCapitalization(TSubtype subtype, const string& value)
1022 {
1023  string new_val = value;
1024  switch (subtype) {
1026  new_val = FixHostCapitalization(value);
1027  break;
1028  default:
1029  new_val = value;
1030  break;
1031  }
1032  return new_val;
1033 }
1034 
1035 
1037 {
1038  if (!IsSetSubtype() || !IsSetSubname()) {
1039  return;
1040  }
1041 
1042  string new_val = FixCapitalization(GetSubtype(), GetSubname());
1043 
1044  if (!NStr::IsBlank(new_val)) {
1045  SetSubname(new_val);
1046  }
1047 
1048 }
1049 
1050 
1051 string COrgMod::AutoFix(TSubtype subtype, const string& value)
1052 {
1053  string new_val;
1054  switch (subtype) {
1056  new_val = FixStrain(value);
1057  break;
1059  new_val = FixHost(value);
1060  break;
1061  default:
1062  break;
1063  }
1064  return new_val;
1065 }
1066 
1067 
1069 {
1070  if (!IsSetSubtype() || !IsSetSubname()) {
1071  return;
1072  }
1073 
1074  string new_val = AutoFix(GetSubtype(), GetSubname());
1075 
1076  if (!NStr::IsBlank(new_val)) {
1077  SetSubname(new_val);
1078  }
1079 
1080 }
1081 
1082 
1083 void s_HarmonizeString(string& s)
1084 {
1085  NStr::ReplaceInPlace (s, " ", "");
1086  NStr::ReplaceInPlace (s, "_", "");
1087  NStr::ReplaceInPlace (s, "-", "");
1088  NStr::ReplaceInPlace (s, ":", "");
1089  NStr::ReplaceInPlace (s, "/", "");
1090 }
1091 
1092 
1093 bool COrgMod::FuzzyStrainMatch( const string& strain1, const string& strain2 )
1094 {
1095  string s1 = strain1;
1096  string s2 = strain2;
1097 
1098  s_HarmonizeString(s1);
1099  s_HarmonizeString(s2);
1100  return NStr::EqualNocase(s1, s2);
1101 }
1102 
1103 
1105 {
1106  bool any_change = false;
1107 
1108  if (IsSetSubtype() && IsSetSubname()) {
1109  string& val = SetSubname();
1110  switch (GetSubtype()) {
1111  case eSubtype_serovar:
1112  if (NStr::StartsWith(val, "serovar ")) {
1113  val = val.substr(8);
1114  any_change = true;
1115  }
1116  break;
1117  case eSubtype_sub_species:
1118  if (NStr::StartsWith(val, "subsp. ")) {
1119  val = val.substr(7);
1120  any_change = true;
1121  }
1122  break;
1123  default:
1124  break;
1125  }
1126  }
1127  return any_change;
1128 }
1129 
1130 
1135 };
1136 
1138 
1140 {
1141  bool rval = false;
1142 
1143  for (size_t i = 0; i < sNumUnexpectedViralOrgModQualifiers && !rval; i++) {
1144  if (subtype == sUnexpectedViralOrgModQualifiers[i]) {
1145  rval = true;
1146  }
1147  }
1148  return rval;
1149 }
1150 
1151 
1153 {
1155  return true;
1156  } else {
1157  return false;
1158  }
1159 }
1160 
1161 
1162 static const string sValidTypeMaterialPrefixes[] = {
1163  "type material",
1164  "type strain",
1165  "reference material",
1166  "reference strain",
1167  "neotype strain",
1168  "paralectotype",
1169  "hapantotype",
1170  "allotype",
1171  "culture from reference material",
1172  "culture from type material",
1173  "ex-type",
1174  "culture from hapantotype",
1175  "pathotype strain"
1176 };
1177 
1178 static const int sNumValidTypeMaterialPrefixes = sizeof(sValidTypeMaterialPrefixes) / sizeof(string);
1179 
1180 static const string sValidCultureTypeMaterialPrefixes[] = {
1181  "epitype",
1182  "hapantotype",
1183  "holotype",
1184  "isoepitype",
1185  "isoepitype",
1186  "isolectotype",
1187  "isoneotype",
1188  "isoparatype",
1189  "isosyntype",
1190  "isotype",
1191  "lectotype",
1192  "neotype",
1193  "paratype",
1194  "reference",
1195  "syntype",
1196  "type material"
1197 };
1198 
1200 
1201 bool COrgMod::IsValidTypeMaterial(const string& type_material)
1202 {
1203  for (int i = 0; i < sNumValidTypeMaterialPrefixes; i++) {
1204  if (NStr::StartsWith(type_material, sValidTypeMaterialPrefixes[i])) {
1205  return true;
1206  }
1207  }
1208 
1209  for (int i = 0; i < sNumValidCultureTypeMaterialPrefixes; i++) {
1210  if (NStr::StartsWith(type_material, sValidCultureTypeMaterialPrefixes[i])) {
1211  return true;
1212  } else if (NStr::StartsWith(type_material, "culture from " + sValidCultureTypeMaterialPrefixes[i])) {
1213  return true;
1214  } else if (NStr::StartsWith(type_material, "ex-" + sValidCultureTypeMaterialPrefixes[i])) {
1215  return true;
1216  }
1217  }
1218  return false;
1219 }
1220 
1221 
1222 // note that the INSDC method now calls IsValidTypeMaterial
1223 bool COrgMod::IsINSDCValidTypeMaterial(const string& type_material)
1224 {
1225  if (NStr::IsBlank(type_material)) {
1226  return false;
1227  }
1228 
1229  return IsValidTypeMaterial(type_material);
1230 }
1231 
1232 
1233 
1234 END_objects_SCOPE // namespace ncbi::objects::
1235 
1237 
1238 /* Original file checksum: lines: 65, chars: 1882, CRC32: efba64e1 */
static COrgMod::TInstitutionCodeMap s_CultureCollectionInstitutionCodeMap
Definition: OrgMod.cpp:216
bool FindInstCodeAndSpecID(COrgMod::TInstitutionCodeMap &code_map, string &val)
Definition: OrgMod.cpp:561
static const size_t sNumUnexpectedViralOrgModQualifiers
Definition: OrgMod.cpp:1137
static COrgMod::TInstitutionCodeMap s_CompleteInstitutionFullNameMap
Definition: OrgMod.cpp:220
void s_HarmonizeString(string &s)
Definition: OrgMod.cpp:1083
const char * sm_KnownHostWords[]
Definition: OrgMod.cpp:847
const char * sm_BadStrainValues[]
Definition: OrgMod.cpp:819
static const string sValidTypeMaterialPrefixes[]
Definition: OrgMod.cpp:1162
static const int sNumValidTypeMaterialPrefixes
Definition: OrgMod.cpp:1178
const string kMissingInst
Definition: OrgMod.cpp:456
static const COrgMod::TSubtype sUnexpectedViralOrgModQualifiers[]
Definition: OrgMod.cpp:1131
static const string sValidCultureTypeMaterialPrefixes[]
Definition: OrgMod.cpp:1180
static constexpr auto s_hostFixupMap
Definition: OrgMod.cpp:980
static bool s_InstitutionCollectionCodeMapInitialized
Definition: OrgMod.cpp:223
static COrgMod::TInstitutionCodeMap s_BiomaterialInstitutionCodeMap
Definition: OrgMod.cpp:214
bool s_FixStrainForPrefix(const string &prefix, string &strain)
Definition: OrgMod.cpp:764
static COrgMod::TInstitutionCodeMap s_SpecimenVoucherInstitutionCodeMap
Definition: OrgMod.cpp:215
bool s_IsAllDigits(string str)
Definition: OrgMod.cpp:758
const string kMissingId
Definition: OrgMod.cpp:457
static COrgMod::TInstitutionCodeMap s_InstitutionCodeTypeMap
Definition: OrgMod.cpp:221
DEFINE_STATIC_FAST_MUTEX(s_InstitutionCollectionCodeMutex)
static COrgMod::TInstitutionCodeMap s_CompleteInstitutionCodeMap
Definition: OrgMod.cpp:219
string s_FixOneStrain(const string &strain)
Definition: OrgMod.cpp:784
static COrgMod::TInstitutionCodeMap s_InstitutionCodeSynonymsMap
Definition: OrgMod.cpp:222
static const int sNumValidCultureTypeMaterialPrefixes
Definition: OrgMod.cpp:1199
static void s_InitializeInstitutionCollectionCodeMaps(void)
Definition: OrgMod.cpp:278
static void s_ProcessInstitutionCollectionCodeLine(const CTempString &line)
Definition: OrgMod.cpp:229
static bool FixStructuredVoucher(string &val, const string &voucher_type)
Definition: OrgMod.cpp:662
static bool FuzzyStrainMatch(const string &strain1, const string &strain2)
Definition: OrgMod.cpp:1093
EVocabulary
Definition: OrgMod.hpp:67
@ eVocabulary_insdc
Definition: OrgMod.hpp:69
static string IsCultureCollectionValid(const string &culture_collection)
Definition: OrgMod.cpp:424
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:86
void FixCapitalization()
Definition: OrgMod.cpp:1036
static const string & GetInstitutionShortName(const string &full_name)
Definition: OrgMod.cpp:723
static bool IsINSDCValidTypeMaterial(const string &type_material)
Definition: OrgMod.cpp:1223
static bool IsInstitutionCodeValid(const string &inst_coll, string &voucher_type, bool &is_miscapitalized, string &correct_cap, bool &needs_country, bool &erroneous_country)
Definition: OrgMod.cpp:356
static bool AddStructureToVoucher(string &val, const string &voucher_type)
Definition: OrgMod.cpp:606
static bool IsStrainValid(const string &strain)
Definition: OrgMod.cpp:835
static bool IsMultipleValuesAllowed(TSubtype)
Definition: OrgMod.cpp:127
static bool IsDiscouraged(const TSubtype stype, bool indexer=false)
Definition: OrgMod.cpp:159
static const string & GetInstitutionFullName(const string &short_name)
Definition: OrgMod.cpp:711
void AutoFix()
Definition: OrgMod.cpp:1068
static string IsStructuredVoucherValid(const string &val, const string &voucher_type)
Definition: OrgMod.cpp:460
static string FixHost(const string &value)
Definition: OrgMod.cpp:1008
bool RemoveAbbreviation()
Definition: OrgMod.cpp:1104
static bool IsValidTypeMaterial(const string &type_material)
Definition: OrgMod.cpp:1201
static string IsBiomaterialValid(const string &biomaterial)
Definition: OrgMod.cpp:446
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
static string CheckMultipleVouchers(const vector< string > &)
Definition: OrgMod.cpp:736
static string FixHostCapitalization(const string &value)
Definition: OrgMod.cpp:965
static bool HoldsInstitutionCode(const TSubtype stype)
This indicates if the given Org-mod subtype is supposed to hold an institution code (Example: "ATCC:2...
Definition: OrgMod.cpp:175
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:62
~COrgMod(void)
Definition: OrgMod.cpp:57
bool IsUnexpectedViralOrgModQualifier() const
Definition: OrgMod.cpp:1152
static bool RescueInstFromParentheses(string &val, const string &voucher_type)
Definition: OrgMod.cpp:626
static TInstitutionCodeMap::iterator FindInstitutionCode(const string &inst_coll, TInstitutionCodeMap &code_map, bool &is_miscapitalized, string &correct_cap, bool &needs_country, bool &erroneous_country)
Definition: OrgMod.cpp:316
static string MakeStructuredVoucher(const string &inst, const string &coll, const string &id)
Definition: OrgMod.cpp:540
static string IsSpecimenVoucherValid(const string &specimen_voucher)
Definition: OrgMod.cpp:435
static string FixStrain(const string &strain)
Definition: OrgMod.cpp:799
static bool ParseStructuredVoucher(const string &str, string &inst, string &coll, string &id)
Definition: OrgMod.cpp:188
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CTime –.
Definition: ncbitime.hpp:296
static constexpr auto construct(typename _Enabled::type const (&init)[N])
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
char value[7]
Definition: config.c:431
#define check(s)
Definition: describecol2.c:21
Utility macros and typedefs for exploring NCBI objects from general.asn.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_CATCH(message)
Catch CExceptions as well This macro is deprecated - use *_X or *_XX variant instead of it.
Definition: ncbiexpt.hpp:580
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3197
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5325
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3310
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
TSubname & SetSubname(void)
Assign a value to Subname data member.
Definition: OrgMod_.hpp:370
TSubtype GetSubtype(void) const
Get the Subtype member data.
Definition: OrgMod_.hpp:307
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: OrgMod_.hpp:288
const TSubname & GetSubname(void) const
Get the Subname member data.
Definition: OrgMod_.hpp:347
bool IsSetSubname(void) const
Check if a value has been assigned to Subname data member.
Definition: OrgMod_.hpp:335
@ eSubtype_biotype
Definition: OrgMod_.hpp:97
@ eSubtype_gb_acronym
used by taxonomy database
Definition: OrgMod_.hpp:115
@ eSubtype_gb_synonym
used by taxonomy database
Definition: OrgMod_.hpp:117
@ eSubtype_substrain
Definition: OrgMod_.hpp:86
@ eSubtype_pathovar
Definition: OrgMod_.hpp:94
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_dosage
chromosome dosage of hybrid
Definition: OrgMod_.hpp:103
@ eSubtype_sub_species
Definition: OrgMod_.hpp:105
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_variety
Definition: OrgMod_.hpp:89
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
@ eSubtype_biovar
Definition: OrgMod_.hpp:96
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
@ eSubtype_serogroup
Definition: OrgMod_.hpp:91
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_serotype
Definition: OrgMod_.hpp:90
@ eSubtype_chemovar
Definition: OrgMod_.hpp:95
@ eSubtype_nomenclature
code of nomenclature in subname (B,P,V,Z or combination)
Definition: OrgMod_.hpp:122
@ eSubtype_serovar
Definition: OrgMod_.hpp:92
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_gb_anamorph
used by taxonomy database
Definition: OrgMod_.hpp:116
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_forma_specialis
Definition: OrgMod_.hpp:109
@ eSubtype_old_lineage
Definition: OrgMod_.hpp:123
FILE * file
int i
int len
Lightweight interface for getting lines of data with minimal memory copying.
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
static char tmp[2048]
Definition: utf8.c:42
static const char * prefix[]
Definition: pcregrep.c:405
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
static const char * str(char *buf, int n)
Definition: stats.c:84
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
Definition: util_misc.cpp:139
bool g_IsDataFileOld(const CTempString &path, const CTempString &id_line)
Check whether the given file (a full path, as returned by g_FindDataFile) is older than a built-in ve...
Definition: util_misc.cpp:193
Modified on Wed Mar 27 11:17:59 2024 by modify_doxy.py rev. 669887