NCBI C++ ToolKit
OrgMod.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: OrgMod.cpp 99337 2023-03-13 16:05:25Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using the following specifications:
34  * 'seqfeat.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 #include <util/static_map.hpp>
40 #include <util/util_misc.hpp>
41 #include <util/line_reader.hpp>
42 #include <util/compile_time.hpp>
43 #include <serial/enumvalues.hpp>
44 
46 
47 // generated includes
49 
50 // generated classes
51 
53 
54 BEGIN_objects_SCOPE // namespace ncbi::objects::
55 
56 // destructor
58 {
59 }
60 
61 
63  EVocabulary vocabulary)
64 {
65  string name = NStr::TruncateSpaces(str);
66  NStr::ToLower(name);
67  replace(name.begin(), name.end(), '_', '-');
68  replace(name.begin(), name.end(), ' ', '-');
69 
70  if (name == "note" ||
71  NStr::EqualNocase(name, "orgmod-note") ||
72  NStr::EqualNocase(name, "note-orgmod")) {
73  return eSubtype_other;
74  } else if (vocabulary == eVocabulary_insdc) {
75  if (name == "host" || name == "specific-host") {
76  return eSubtype_nat_host;
77  } else if (name == "sub-strain") {
78  return eSubtype_substrain;
79  }
80  }
81 
82  return ENUM_METHOD_NAME(ESubtype)()->FindValue(name);
83 }
84 
85 
86 bool COrgMod::IsValidSubtypeName(const string& str,
87  EVocabulary vocabulary)
88 {
89  string name = NStr::TruncateSpaces(str);
90  NStr::ToLower(name);
91  replace(name.begin(), name.end(), '_', '-');
92  replace(name.begin(), name.end(), ' ', '-');
93 
94  if (name == "note" ||
95  name == "orgmod-note" ||
96  name == "note-orgmod") {
97  return true;
98  } else if (vocabulary == eVocabulary_insdc) {
99  if (name == "host" || name == "sub-strain") {
100  return true;
101  }
102  }
103 
104  return ENUM_METHOD_NAME(ESubtype)()->IsValidName(name);
105 }
106 
107 
109 {
110  if (stype == eSubtype_other) {
111  return "note";
112  } else if (vocabulary == eVocabulary_insdc) {
113  switch (stype) {
114  case eSubtype_substrain: return "sub_strain";
115  case eSubtype_nat_host: return "host";
116  default:
117  return NStr::Replace
118  (ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true),
119  "-", "_");
120  }
121  } else {
122  return ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true);
123  }
124 }
125 
126 
128 {
129  switch( subtype ) { // per TM-863
130  case eSubtype_strain: // (2) ,
131  case eSubtype_substrain: // (3) ,
132  case eSubtype_variety: // (6) ,
133  case eSubtype_serotype: // (7) ,
134  case eSubtype_serogroup: // (8) ,
135  case eSubtype_serovar: // (9) ,
136  case eSubtype_cultivar: // (10) ,
137  case eSubtype_pathovar: // (11) ,
138  case eSubtype_chemovar: // (12) ,
139  case eSubtype_biovar: // (13) ,
140  case eSubtype_biotype: // (14) ,
141  case eSubtype_nat_host: // (21) , -- natural host of this specimen
142  case eSubtype_sub_species: // (22) ,
143  case eSubtype_forma: // (25) ,
144  case eSubtype_forma_specialis: // (26) ,
145  case eSubtype_ecotype: // (27) ,
146  case eSubtype_breed: // (31) ,
147  case eSubtype_gb_acronym: // (32) , -- used by taxonomy database
148  case eSubtype_gb_anamorph: // (33) , -- used by taxonomy database
149  case eSubtype_gb_synonym: // (34) , -- used by taxonomy database
150  case eSubtype_metagenome_source: // (37) ,
151  case eSubtype_nomenclature: // (39) ,
152  case eSubtype_old_name: // (254) ,
153  return false;
154  default: return true;
155  }
156 }
157 
158 
159 bool COrgMod::IsDiscouraged(const TSubtype subtype, bool indexer)
160 {
161  if (subtype == eSubtype_dosage
162  || subtype == eSubtype_gb_acronym
163  || subtype == eSubtype_gb_anamorph
164  || subtype == eSubtype_gb_synonym
165  || subtype == eSubtype_old_lineage
166  || subtype == eSubtype_old_name
167  || (subtype == eSubtype_metagenome_source && !indexer)) {
168  return true;
169  } else {
170  return false;
171  }
172 }
173 
174 
176 {
177  switch(stype) {
181  return true;
182  default:
183  return false;
184  }
185 }
186 
187 
188 bool COrgMod::ParseStructuredVoucher(const string& str, string& inst, string& coll, string& id)
189 {
190  if (NStr::IsBlank(str)) {
191  return false;
192  }
193  inst = kEmptyStr;
194  coll = kEmptyStr;
195  id = kEmptyStr;
196  size_t pos = NStr::Find(str, ":");
197  if (pos == string::npos) {
198  id = str;
199  return true;
200  }
201  inst = str.substr(0, pos);
202  id = str.substr(pos + 1);
203  pos = NStr::Find(id, ":");
204  if (pos != string::npos) {
205  coll = id.substr(0, pos);
206  id = id.substr(pos + 1);
207  }
208  return true;
209 }
210 
211 
212 // ===== biomaterial, and culture-collection BioSource subsource modifiers ================
213 
217 
218 // holds all the data in the specific ones above
224 
225 DEFINE_STATIC_FAST_MUTEX(s_InstitutionCollectionCodeMutex);
226 
227 #include "institution_codes.inc"
228 
230 {
231  if (NStr::StartsWith(line, "#")) {
232  // ignore line, this is a comment
233  return;
234  }
235  vector<string> tokens;
236  NStr::Split(line, "\t", tokens);
237  if (tokens.size() < 3) {
238 // ERR_POST_X(1, Warning << "Bad format in institution_codes.txt entry " << line
239 // << "; disregarding");
240  } else {
241  NStr::TruncateSpacesInPlace( tokens[0] );
242  NStr::TruncateSpacesInPlace( tokens[1] );
243  NStr::TruncateSpacesInPlace( tokens[2] );
244  string& vouch_types = tokens[1];
245  for (size_t i = 0; i < vouch_types.size(); i++) {
246  switch (vouch_types[i]) {
247  case 'b':
248  s_BiomaterialInstitutionCodeMap[tokens[0]] = tokens[2];
249  break;
250  case 'c':
251  s_CultureCollectionInstitutionCodeMap[tokens[0]] = tokens[2];
252  break;
253  case 's':
254  s_SpecimenVoucherInstitutionCodeMap[tokens[0]] = tokens[2];
255  break;
256  default:
257 // ERR_POST_X(1, Warning << "Bad format in institution_codes.txt entry " << line
258 // << "; unrecognized subtype (" << tokens[1] << "); disregarding");
259  break;
260  }
261  }
262  s_CompleteInstitutionCodeMap[tokens[0]] = tokens[2];
263  s_CompleteInstitutionFullNameMap[tokens[2]] = tokens[0];
264  s_InstitutionCodeTypeMap[tokens[0]] = tokens[1];
265  if (tokens.size() > 3 && !NStr::IsBlank(tokens[3])) {
266  NStr::TruncateSpacesInPlace(tokens[3]);
267  vector<string> synonyms;
268  NStr::Split(tokens[3], ",", synonyms);
269  NON_CONST_ITERATE(vector<string>, s, synonyms) {
271  s_InstitutionCodeSynonymsMap[*s] = tokens[0];
272  }
273  }
274  }
275 }
276 
277 
279 {
280  CFastMutexGuard GUARD(s_InstitutionCollectionCodeMutex);
282  return;
283  }
284  string file = g_FindDataFile("institution_codes.txt");
285  CTime builtin_timestamp
286  (static_cast<time_t>(kInstitutionCollectionCodeList_Timestamp));
288  if ( !file.empty() && !g_IsDataFileOld(file, builtin_timestamp) ) {
289  try {
290  lr = ILineReader::New(file);
291  } NCBI_CATCH("s_InitializeInstitutionCollectionCodeMaps")
292  }
293 
294  if (lr.Empty()) {
295  if (getenv("NCBI_DEBUG")) {
296  LOG_POST("Falling back on built-in data for institution code list.");
297  }
298  size_t num_codes = sizeof (kInstitutionCollectionCodeList) / sizeof (char *);
299  for (size_t i = 0; i < num_codes; i++) {
300  const char *p = kInstitutionCollectionCodeList[i];
302  }
303  } else {
304  if (getenv("NCBI_DEBUG")) {
305  LOG_POST("Reading from " + file + " for instition code list.");
306  }
307  do {
309  } while ( !lr->AtEOF() );
310  }
311 
313 }
314 
315 
317  bool& is_miscapitalized, string& correct_cap, bool& needs_country, bool& erroneous_country)
318 {
319  TInstitutionCodeMap::iterator it = code_map.find(inst_coll);
320  if (it != code_map.end()) {
321  if (NStr::EqualCase(it->first, inst_coll)) {
322  } else if (NStr::EqualNocase(it->first, inst_coll)) {
323  is_miscapitalized = true;
324  }
325  correct_cap = it->first;
326  return it;
327  } else {
328  size_t pos = NStr::Find(inst_coll, "<");
329  if (pos == string::npos) {
330  string check = inst_coll + "<";
331  it = code_map.begin();
332  while (it != code_map.end()) {
333  if (NStr::StartsWith(it->first, check, NStr::eNocase)) {
334  needs_country = true;
335  if (!NStr::StartsWith(it->first, check, NStr::eCase)) {
336  is_miscapitalized = true;
337  }
338  correct_cap = it->first.substr(0, inst_coll.length());
339  return it;
340  }
341  ++it;
342  }
343  } else {
344  string inst_sub = inst_coll.substr(0, pos);
345  it = code_map.find(inst_sub);
346  if (it != code_map.end()) {
347  erroneous_country = true;
348  return it;
349  }
350  }
351  }
352  return code_map.end();
353 }
354 
355 
356 bool COrgMod::IsInstitutionCodeValid(const string& inst_coll, string &voucher_type, bool& is_miscapitalized, string& correct_cap, bool& needs_country, bool& erroneous_country)
357 {
358  is_miscapitalized = false;
359  needs_country = false;
360  erroneous_country = false;
361  correct_cap.clear();
362 
364 
365  TInstitutionCodeMap::iterator ic = FindInstitutionCode(inst_coll, s_InstitutionCodeTypeMap, is_miscapitalized, correct_cap, needs_country, erroneous_country);
366  if (ic != s_InstitutionCodeTypeMap.end()) {
367  if (needs_country) {
368  // check to see if non-country-requiring code is in synonyms
369  bool syn_is_miscapitalized = false;
370  string syn_correct_cap = "";
371  bool syn_needs_country = false;
372  bool syn_erroneous_country = false;
374  s_InstitutionCodeSynonymsMap, syn_is_miscapitalized, syn_correct_cap,
375  syn_needs_country, syn_erroneous_country);
376  if (it != s_InstitutionCodeSynonymsMap.end() && !syn_needs_country) {
378  if (is != s_InstitutionCodeTypeMap.end()) {
379  is_miscapitalized = syn_is_miscapitalized;
380  correct_cap = syn_correct_cap;
381  needs_country = syn_needs_country;
382  erroneous_country = syn_erroneous_country;
383  voucher_type = is->second;
384  return true;
385  }
386  }
387  } else if (erroneous_country) {
388  // check to see if country-requiring code is in synonyms
389  bool syn_is_miscapitalized = false;
390  string syn_correct_cap = "";
391  bool syn_needs_country = false;
392  bool syn_erroneous_country = false;
394  s_InstitutionCodeSynonymsMap, syn_is_miscapitalized, syn_correct_cap,
395  syn_needs_country, syn_erroneous_country);
396  if (it != s_InstitutionCodeSynonymsMap.end() && !syn_needs_country) {
398  if (is != s_InstitutionCodeTypeMap.end()) {
399  is_miscapitalized = syn_is_miscapitalized;
400  correct_cap = syn_correct_cap;
401  needs_country = syn_needs_country;
402  erroneous_country = syn_erroneous_country;
403  voucher_type = is->second;
404  return true;
405  }
406  }
407  }
408  voucher_type = ic->second;
409  return true;
410  }
411  ic = FindInstitutionCode(inst_coll, s_InstitutionCodeSynonymsMap, is_miscapitalized, correct_cap, needs_country, erroneous_country);
412  if (ic != s_InstitutionCodeSynonymsMap.end()) {
414  if (it != s_InstitutionCodeTypeMap.end()) {
415  voucher_type = it->second;
416  }
417  return true;
418  }
419  return false;
420 }
421 
422 
423 string
424 COrgMod::IsCultureCollectionValid(const string& culture_collection)
425 {
426  if (NStr::Find(culture_collection, ":") == string::npos) {
427  return "Culture_collection should be structured, but is not";
428  } else {
429  return IsStructuredVoucherValid(culture_collection, "c");
430  }
431 }
432 
433 
434 string
435 COrgMod::IsSpecimenVoucherValid(const string& specimen_voucher)
436 {
437  if (NStr::Find(specimen_voucher, ":") == string::npos) {
438  return kEmptyStr;
439  } else {
440  return IsStructuredVoucherValid(specimen_voucher, "s");
441  }
442 }
443 
444 
445 string
446 COrgMod::IsBiomaterialValid(const string& biomaterial)
447 {
448  if (NStr::Find(biomaterial, ":") == string::npos) {
449  return kEmptyStr;
450  } else {
451  return IsStructuredVoucherValid(biomaterial, "b");
452  }
453 }
454 
455 
456 const string kMissingInst = "Voucher is missing institution code";
457 const string kMissingId = "Voucher is missing specific identifier";
458 
459 string
460 COrgMod::IsStructuredVoucherValid(const string& val, const string& v_type)
461 {
462  string inst_code;
463  string coll_code;
464  string inst_coll;
465  string id;
466 
467  ParseStructuredVoucher(val, inst_code, coll_code, id);
468  string rval = kEmptyStr;
469  if (NStr::IsBlank(inst_code)) {
470  rval = kMissingInst;
471  }
472  if (NStr::IsBlank(id)) {
473  rval = NStr::IsBlank(rval) ? kMissingId : rval + "\n" + kMissingId;
474  }
475  if (!NStr::IsBlank(rval)) {
476  return rval;
477  }
478 
479  if (NStr::IsBlank (coll_code)) {
480  inst_coll = inst_code;
481  } else {
482  inst_coll = inst_code + ":" + coll_code;
483  }
484 
485  // first, check combination of institution and collection (if collection found)
486  string voucher_type;
487  bool is_miscapitalized;
488  bool needs_country;
489  bool erroneous_country;
490  string correct_cap;
491  if (COrgMod::IsInstitutionCodeValid(inst_coll, voucher_type, is_miscapitalized, correct_cap, needs_country, erroneous_country)) {
492  if (needs_country) {
493  return "Institution code " + inst_coll + " needs to be qualified with a <COUNTRY> designation";
494  } else if (erroneous_country) {
495  return "Institution code " + inst_coll + " should not be qualified with a <COUNTRY> designation";
496  } else if (is_miscapitalized) {
497  return "Institution code " + inst_coll + " exists, but correct capitalization is " + correct_cap;
498  } else {
499  if (NStr::FindNoCase(voucher_type, v_type) == string::npos) {
500  if (NStr::FindNoCase (voucher_type, "b") != string::npos) {
501  return "Institution code " + inst_coll + " should be bio_material";
502  } else if (NStr::FindNoCase (voucher_type, "c") != string::npos) {
503  return "Institution code " + inst_coll + " should be culture_collection";
504  } else if (NStr::FindNoCase (voucher_type, "s") != string::npos) {
505  return "Institution code " + inst_coll + " should be specimen_voucher";
506  }
507  }
508  return kEmptyStr;
509  }
510  } else if (NStr::StartsWith(inst_coll, "personal", NStr::eNocase)) {
511  if (NStr::EqualNocase (inst_code, "personal") && NStr::IsBlank (coll_code)) {
512  return "Personal collection does not have name of collector";
513  }
514  return kEmptyStr;
515  } else if (NStr::IsBlank(coll_code)) {
516  return "Institution code " + inst_coll + " is not in list";
517  } else if (IsInstitutionCodeValid(inst_code, voucher_type, is_miscapitalized, correct_cap, needs_country, erroneous_country)) {
518  if (needs_country) {
519  return "Institution code in " + inst_coll + " needs to be qualified with a <COUNTRY> designation";
520  } else if (erroneous_country) {
521  return "Institution code " + inst_code + " should not be qualified with a <COUNTRY> designation";
522  } else if (is_miscapitalized) {
523  return "Institution code " + inst_code + " exists, but correct capitalization is " + correct_cap;
524  } else if (NStr::Equal (coll_code, "DNA")) {
525  // DNA is a valid collection for any institution (using bio_material)
526  if (!NStr::Equal(v_type, "b")) {
527  return "DNA should be bio_material";
528  }
529  } else {
530  return "Institution code " + inst_code + " exists, but collection "
531  + inst_coll + " is not in list";
532  }
533  } else {
534  return "Institution code " + inst_coll + " is not in list";
535  }
536  return kEmptyStr;
537 }
538 
539 
540 string COrgMod::MakeStructuredVoucher(const string& inst, const string& coll, const string& id)
541 {
542  string rval;
543  if (NStr::IsBlank(inst) && NStr::IsBlank(coll) && NStr::IsBlank(id)) {
544  rval = kEmptyStr;
545  } else if (NStr::IsBlank(inst) && NStr::IsBlank(coll)) {
546  rval = id;
547  } else if (NStr::IsBlank(coll)) {
548  rval = inst + ":" + id;
549  } else {
550  rval = inst + ":" + coll + ":" + id;
551  }
552  return rval;
553 }
554 
555 
556 // As described in SQD-1655, we can only rescue an unstructured
557 // structured voucher if it consists of a series of three or
558 // more letters followed by a series of digits, optionally separated
559 // by space, and if the series of letters looks up as a valid
560 // institution code.
562 {
563  // nothing to do if value is blank
564  if (NStr::IsBlank(val)) {
565  return false;
566  }
567 
568  // find first non-letter position
569  size_t len = 0;
570  string::iterator sit = val.begin();
571  while (sit != val.end() && isalpha(*sit)) {
572  len++;
573  sit++;
574  }
575  if (len < 3 || len == val.length()) {
576  // institution code too short or no second token
577  return false;
578  }
579  string inst_code = val.substr(0, len);
580  string remainder = val.substr(len);
581  NStr::TruncateSpacesInPlace(remainder);
582  if (NStr::IsBlank(remainder)) {
583  // no second token
584  return false;
585  }
586  // remainder must be all digits
587  sit = remainder.begin();
588  while (sit != remainder.end()) {
589  if (!isdigit(*sit)) {
590  return false;
591  }
592  sit++;
593  }
594 
595  bool rval = false;
596  COrgMod::TInstitutionCodeMap::iterator it = code_map.find(inst_code);
597  if (it != code_map.end()) {
598  val = inst_code + ":" + remainder;
599  rval = true;
600  }
601 
602  return rval;
603 }
604 
605 
606 bool COrgMod::AddStructureToVoucher(string& val, const string& v_type)
607 {
608  // nothing to do if value is blank
609  if (NStr::IsBlank(val)) {
610  return false;
611  }
612 
614  if (NStr::Find(v_type, "b") != string::npos && FindInstCodeAndSpecID(s_BiomaterialInstitutionCodeMap, val)) {
615  return true;
616  } else if (NStr::Find(v_type, "c") != string::npos && FindInstCodeAndSpecID(s_CultureCollectionInstitutionCodeMap, val)) {
617  return true;
618  } else if (NStr::Find(v_type, "s") != string::npos && FindInstCodeAndSpecID(s_SpecimenVoucherInstitutionCodeMap, val)) {
619  return true;
620  } else {
621  return false;
622  }
623 }
624 
625 
626 bool COrgMod::RescueInstFromParentheses(string& val, const string& voucher_type)
627 {
628  bool rval = false;
629 
630  if (!NStr::EndsWith(val, ")")) {
631  return false;
632  }
633  size_t colon_pos = NStr::Find(val, ":");
634  if (colon_pos != 0 && colon_pos != string::npos) {
635  return false;
636  }
637  size_t pos = NStr::Find(val, "(", NStr::eNocase, NStr::eReverseSearch);
638  if (pos == string::npos) {
639  return false;
640  }
641  string inst = val.substr(pos + 1, val.length() - pos - 2);
642  bool miscap = false, needs_country = false, wrong_country = false;
643  string capfix;
644 
645  string v_type = voucher_type;
646  if (IsInstitutionCodeValid(inst, v_type, miscap, capfix, needs_country, wrong_country)) {
647  if (colon_pos == 0) {
648  val = inst + val.substr(0, pos);
649  } else {
650  val = inst + ":" + val.substr(0, pos);
651  }
653  rval = true;
654  }
655 
656 
657  return rval;
658 }
659 
660 
661 bool
662 COrgMod::FixStructuredVoucher(string& val, const string& v_type)
663 {
664  string inst_code;
665  string coll_code;
666  string id;
667 
668  ParseStructuredVoucher(val, inst_code, coll_code, id);
669  if (NStr::IsBlank(inst_code)) {
670  if (AddStructureToVoucher(val, v_type)) {
671  return true;
672  } else {
673  return RescueInstFromParentheses(val, v_type);
674  }
675  }
676  bool rval = false;
677  bool found = false;
679 
681 
682  string new_inst_code = inst_code;
683  while ((!found) && (it != s_InstitutionCodeTypeMap.end())) {
684  if (NStr::Find(it->second, v_type) != string::npos) {
685  if (NStr::EqualNocase (it->first, inst_code)) {
686  if (!NStr::Equal (it->first, inst_code)) {
687  new_inst_code = it->first;
688  rval = true;
689  }
690  found = true;
691  } else if (NStr::StartsWith(inst_code, it->first)
692  && inst_code.c_str()[it->first.length()] == '<') {
693  /*
694  new_inst_code = it->first;
695  rval = true;
696  */
697  }
698  }
699  ++it;
700  }
701 
702 
703  if (rval) {
704  val = MakeStructuredVoucher(new_inst_code, coll_code, id);
705  }
706  return rval;
707 }
708 
709 
710 const string &
711 COrgMod::GetInstitutionFullName( const string &short_name )
712 {
715  if( iter != s_CompleteInstitutionCodeMap.end() ) {
716  return iter->second;
717  } else {
718  return kEmptyStr;
719  }
720 }
721 
722 const string &
723 COrgMod::GetInstitutionShortName( const string &full_name )
724 {
727  if( iter != s_CompleteInstitutionFullNameMap.end() ) {
728  return iter->second;
729  } else {
730  return kEmptyStr;
731  }
732 }
733 
734 
735 // look for multiple source vouchers
736 string COrgMod::CheckMultipleVouchers(const vector<string>& vouchers)
737 {
738  ITERATE(vector<string>, it, vouchers) {
739  string inst1, coll1, id1;
740  COrgMod::ParseStructuredVoucher(*it, inst1, coll1, id1);
741  if (NStr::IsBlank(inst1)) continue;
742  if (NStr::EqualNocase(inst1, "personal") || NStr::EqualCase(coll1, "DNA")) continue;
743 
744  vector<string>::const_iterator it_next = it;
745  for (++it_next; it_next != vouchers.end(); ++it_next) {
746  string inst2, coll2, id2;
747  COrgMod::ParseStructuredVoucher(*it_next, inst2, coll2, id2);
748  if (NStr::IsBlank(inst2)) continue;
749  if (NStr::EqualNocase(inst2, "personal") || NStr::EqualCase(coll2, "DNA")) continue;
750  if (!NStr::EqualNocase (inst1, inst2) || NStr::IsBlank(inst1)) continue;
751  return NStr::EqualNocase(coll1, coll2) && !NStr::IsBlank(coll1) ? "Multiple vouchers with same institution:collection" : "Multiple vouchers with same institution";
752  }
753  }
754  return kEmptyStr;
755 }
756 
757 
758 bool s_IsAllDigits(string str)
759 {
760  return (str.find_first_not_of("0123456789") == NPOS);
761 }
762 
763 
764 bool s_FixStrainForPrefix(const string& prefix, string& strain)
765 {
766  bool rval = false;
767 
768  if (NStr::StartsWith(strain, prefix, NStr::eNocase)) {
769  string tmp = strain.substr(prefix.length());
771  if (NStr::StartsWith(tmp, ":") || NStr::StartsWith(tmp, "/")) {
772  tmp = tmp.substr(1);
773  }
775  if (!NStr::IsBlank(tmp) && s_IsAllDigits(tmp)) {
776  strain = prefix + " " + tmp;
777  rval = true;
778  }
779  }
780  return rval;
781 }
782 
783 
784 string s_FixOneStrain( const string& strain)
785 {
786  string new_val = strain;
787  if (s_FixStrainForPrefix("ATCC", new_val)) {
788  // fixed for ATCC
789  } else if (s_FixStrainForPrefix("DSM", new_val)) {
790  // fixed for DSM
791  } else {
792  // no fix
793  new_val = kEmptyStr;
794  }
795  return new_val;
796 }
797 
798 
799 string COrgMod::FixStrain( const string& strain)
800 {
801  string new_val = strain;
802  vector<string> words;
803  vector<string> results;
804  NStr::Split(strain, ";", words);
805  FOR_EACH_STRING_IN_VECTOR(itr, words) {
806  string str = *itr;
808  string fixed = s_FixOneStrain(str);
809  if (fixed.empty()) {
810  results.push_back (str);
811  } else {
812  results.push_back (fixed);
813  }
814  }
815  return NStr::Join(results,"; ");
816 }
817 
818 
819 const char* sm_BadStrainValues[] = {
820  "yes",
821  "no",
822  "-",
823  "microbial"
824 };
825 
826 bool COrgMod::IsStrainValid(const string& strain)
827 {
828  size_t max = sizeof(sm_BadStrainValues) / sizeof(const char*);
829  for (size_t i = 0; i < max; i++) {
830  if (NStr::EqualNocase(strain, sm_BadStrainValues[i])) {
831  return false;
832  }
833  }
834  return true;
835 }
836 
837 
838 const char* sm_KnownHostWords[] = {
839  "alfalfa",
840  "almond",
841  "apple",
842  "asparagus",
843  "badger",
844  "bean",
845  "bitter melon",
846  "blackberry",
847  "blossoms",
848  "blueberry",
849  "bovine",
850  "brinjal",
851  "broad bean",
852  "cabbage",
853  "canine",
854  "cantaloupe",
855  "caprine",
856  "carrot",
857  "cassava",
858  "cat",
859  "catfish",
860  "cattle",
861  "cauliflower",
862  "Channel catfish",
863  "chestnut",
864  "chicken",
865  "chimpanzee",
866  "clover",
867  "corn",
868  "cotton",
869  "cow",
870  "cowpea",
871  "crab",
872  "cucumber",
873  "curd",
874  "dairy cow",
875  "dog",
876  "duck",
877  "equine",
878  "feline",
879  "fish",
880  "fox",
881  "goat",
882  "goldfish",
883  "goose",
884  "guanabana",
885  "honeydew",
886  "horse",
887  "ice cream",
888  "juniper",
889  "larva",
890  "laurel",
891  "leek",
892  "lentil",
893  "lilac",
894  "lily",
895  "maize",
896  "mamey",
897  "mamey sapote",
898  "mango",
899  "mangrove",
900  "mangroves",
901  "marigold",
902  "marine sponge",
903  "melon",
904  "mosquito",
905  "mulberry",
906  "mungbean",
907  "nematode",
908  "oat",
909  "ornamental pear",
910  "ovine",
911  "papaya",
912  "pea",
913  "peach",
914  "peacock",
915  "pear",
916  "pepper",
917  "pig",
918  "pomegranate",
919  "porcine",
920  "potato",
921  "raccoon dog",
922  "red fox",
923  "rhizospheric soil",
924  "rice",
925  "salmon",
926  "seagrass",
927  "sesame",
928  "sheep",
929  "shrimp",
930  "sorghum",
931  "sour cherry",
932  "sourdough",
933  "soybean",
934  "sponge",
935  "squash",
936  "strawberry",
937  "sugar beet",
938  "sunflower",
939  "sweet cherry",
940  "swine",
941  "tobacco",
942  "tomato",
943  "turf",
944  "turfgrass",
945  "turkey",
946  "turtle",
947  "watermelon",
948  "wheat",
949  "white clover",
950  "willow",
951  "wolf",
952  "yak",
953 };
954 
955 
957 {
958  string fix = value;
959 
960  size_t max = sizeof(sm_KnownHostWords) / sizeof(const char*);
961  for (size_t i = 0; i < max; i++) {
963  fix = sm_KnownHostWords[i];
964  break;
965  }
966  }
967  return fix;
968 }
969 
970 
971 static constexpr auto s_hostFixupMap =
973  { "-", "missing" },
974  { "no", "missing" },
975  { "none", "missing" },
976  { "NA", "not available" },
977  { "N/A", "not available" },
978  { "n/a", "not available" },
979  { "free-living", "natural / free-living" },
980  { "natural", "natural / free-living" },
981  { "not available", "not available" },
982  { "not collected", "not collected" },
983  { "not applicable", "not applicable" },
984  { "NR", "not applicable" },
985  { "not known", "unknown" },
986  { "other", "missing" },
987  { "misc", "missing" },
988  { "not determined", "unknown" },
989  { "unknown", "unknown" },
990  { "not available: to be reported later", "not available" },
991  { "obscured", "obscured" },
992  { "human", "Homo sapiens" },
993  { "homo sapiens", "Homo sapiens" }
994 });
995 
996 
997 
998 
999 string COrgMod::FixHost(const string& value)
1000 {
1001  string fix = value;
1002 
1003  auto possible_fix = s_hostFixupMap.find(value);
1004  if (possible_fix != s_hostFixupMap.end()) {
1005  fix = possible_fix->second;
1006  }
1007 
1008  return fix;
1009 }
1010 
1011 
1012 string COrgMod::FixCapitalization(TSubtype subtype, const string& value)
1013 {
1014  string new_val = value;
1015  switch (subtype) {
1017  new_val = FixHostCapitalization(value);
1018  break;
1019  default:
1020  new_val = value;
1021  break;
1022  }
1023  return new_val;
1024 }
1025 
1026 
1028 {
1029  if (!IsSetSubtype() || !IsSetSubname()) {
1030  return;
1031  }
1032 
1033  string new_val = FixCapitalization(GetSubtype(), GetSubname());
1034 
1035  if (!NStr::IsBlank(new_val)) {
1036  SetSubname(new_val);
1037  }
1038 
1039 }
1040 
1041 
1042 string COrgMod::AutoFix(TSubtype subtype, const string& value)
1043 {
1044  string new_val;
1045  switch (subtype) {
1047  new_val = FixStrain(value);
1048  break;
1050  new_val = FixHost(value);
1051  break;
1052  default:
1053  break;
1054  }
1055  return new_val;
1056 }
1057 
1058 
1060 {
1061  if (!IsSetSubtype() || !IsSetSubname()) {
1062  return;
1063  }
1064 
1065  string new_val = AutoFix(GetSubtype(), GetSubname());
1066 
1067  if (!NStr::IsBlank(new_val)) {
1068  SetSubname(new_val);
1069  }
1070 
1071 }
1072 
1073 
1074 void s_HarmonizeString(string& s)
1075 {
1076  NStr::ReplaceInPlace (s, " ", "");
1077  NStr::ReplaceInPlace (s, "_", "");
1078  NStr::ReplaceInPlace (s, "-", "");
1079  NStr::ReplaceInPlace (s, ":", "");
1080  NStr::ReplaceInPlace (s, "/", "");
1081 }
1082 
1083 
1084 bool COrgMod::FuzzyStrainMatch( const string& strain1, const string& strain2 )
1085 {
1086  string s1 = strain1;
1087  string s2 = strain2;
1088 
1089  s_HarmonizeString(s1);
1090  s_HarmonizeString(s2);
1091  return NStr::EqualNocase(s1, s2);
1092 }
1093 
1094 
1096 {
1097  bool any_change = false;
1098 
1099  if (IsSetSubtype() && IsSetSubname()) {
1100  string& val = SetSubname();
1101  switch (GetSubtype()) {
1102  case eSubtype_serovar:
1103  if (NStr::StartsWith(val, "serovar ")) {
1104  val = val.substr(8);
1105  any_change = true;
1106  }
1107  break;
1108  case eSubtype_sub_species:
1109  if (NStr::StartsWith(val, "subsp. ")) {
1110  val = val.substr(7);
1111  any_change = true;
1112  }
1113  break;
1114  default:
1115  break;
1116  }
1117  }
1118  return any_change;
1119 }
1120 
1121 
1126 };
1127 
1129 
1131 {
1132  bool rval = false;
1133 
1134  for (size_t i = 0; i < sNumUnexpectedViralOrgModQualifiers && !rval; i++) {
1135  if (subtype == sUnexpectedViralOrgModQualifiers[i]) {
1136  rval = true;
1137  }
1138  }
1139  return rval;
1140 }
1141 
1142 
1144 {
1146  return true;
1147  } else {
1148  return false;
1149  }
1150 }
1151 
1152 
1153 static const string sValidTypeMaterialPrefixes[] = {
1154  "type material",
1155  "type strain",
1156  "reference material",
1157  "reference strain",
1158  "neotype strain",
1159  "paralectotype",
1160  "hapantotype",
1161  "allotype",
1162  "culture from reference material",
1163  "culture from type material",
1164  "ex-type",
1165  "culture from hapantotype",
1166  "pathotype strain"
1167 };
1168 
1169 static const int sNumValidTypeMaterialPrefixes = sizeof(sValidTypeMaterialPrefixes) / sizeof(string);
1170 
1171 static const string sValidCultureTypeMaterialPrefixes[] = {
1172  "epitype",
1173  "hapantotype",
1174  "holotype",
1175  "isoepitype",
1176  "isoepitype",
1177  "isolectotype",
1178  "isoneotype",
1179  "isoparatype",
1180  "isosyntype",
1181  "isotype",
1182  "lectotype",
1183  "neotype",
1184  "paratype",
1185  "reference",
1186  "syntype",
1187  "type material"
1188 };
1189 
1191 
1192 bool COrgMod::IsValidTypeMaterial(const string& type_material)
1193 {
1194  for (int i = 0; i < sNumValidTypeMaterialPrefixes; i++) {
1195  if (NStr::StartsWith(type_material, sValidTypeMaterialPrefixes[i])) {
1196  return true;
1197  }
1198  }
1199 
1200  for (int i = 0; i < sNumValidCultureTypeMaterialPrefixes; i++) {
1201  if (NStr::StartsWith(type_material, sValidCultureTypeMaterialPrefixes[i])) {
1202  return true;
1203  } else if (NStr::StartsWith(type_material, "culture from " + sValidCultureTypeMaterialPrefixes[i])) {
1204  return true;
1205  } else if (NStr::StartsWith(type_material, "ex-" + sValidCultureTypeMaterialPrefixes[i])) {
1206  return true;
1207  }
1208  }
1209  return false;
1210 }
1211 
1212 
1213 // note that the INSDC method now calls IsValidTypeMaterial
1214 bool COrgMod::IsINSDCValidTypeMaterial(const string& type_material)
1215 {
1216  if (NStr::IsBlank(type_material)) {
1217  return false;
1218  }
1219 
1220  return IsValidTypeMaterial(type_material);
1221 }
1222 
1223 
1224 
1225 END_objects_SCOPE // namespace ncbi::objects::
1226 
1228 
1229 /* Original file checksum: lines: 65, chars: 1882, CRC32: efba64e1 */
static COrgMod::TInstitutionCodeMap s_CultureCollectionInstitutionCodeMap
Definition: OrgMod.cpp:216
bool FindInstCodeAndSpecID(COrgMod::TInstitutionCodeMap &code_map, string &val)
Definition: OrgMod.cpp:561
static const size_t sNumUnexpectedViralOrgModQualifiers
Definition: OrgMod.cpp:1128
static COrgMod::TInstitutionCodeMap s_CompleteInstitutionFullNameMap
Definition: OrgMod.cpp:220
void s_HarmonizeString(string &s)
Definition: OrgMod.cpp:1074
const char * sm_KnownHostWords[]
Definition: OrgMod.cpp:838
const char * sm_BadStrainValues[]
Definition: OrgMod.cpp:819
static const string sValidTypeMaterialPrefixes[]
Definition: OrgMod.cpp:1153
static const int sNumValidTypeMaterialPrefixes
Definition: OrgMod.cpp:1169
const string kMissingInst
Definition: OrgMod.cpp:456
static const COrgMod::TSubtype sUnexpectedViralOrgModQualifiers[]
Definition: OrgMod.cpp:1122
static const string sValidCultureTypeMaterialPrefixes[]
Definition: OrgMod.cpp:1171
static constexpr auto s_hostFixupMap
Definition: OrgMod.cpp:971
static bool s_InstitutionCollectionCodeMapInitialized
Definition: OrgMod.cpp:223
static COrgMod::TInstitutionCodeMap s_BiomaterialInstitutionCodeMap
Definition: OrgMod.cpp:214
bool s_FixStrainForPrefix(const string &prefix, string &strain)
Definition: OrgMod.cpp:764
static COrgMod::TInstitutionCodeMap s_SpecimenVoucherInstitutionCodeMap
Definition: OrgMod.cpp:215
bool s_IsAllDigits(string str)
Definition: OrgMod.cpp:758
const string kMissingId
Definition: OrgMod.cpp:457
static COrgMod::TInstitutionCodeMap s_InstitutionCodeTypeMap
Definition: OrgMod.cpp:221
DEFINE_STATIC_FAST_MUTEX(s_InstitutionCollectionCodeMutex)
static COrgMod::TInstitutionCodeMap s_CompleteInstitutionCodeMap
Definition: OrgMod.cpp:219
string s_FixOneStrain(const string &strain)
Definition: OrgMod.cpp:784
static COrgMod::TInstitutionCodeMap s_InstitutionCodeSynonymsMap
Definition: OrgMod.cpp:222
static const int sNumValidCultureTypeMaterialPrefixes
Definition: OrgMod.cpp:1190
static void s_InitializeInstitutionCollectionCodeMaps(void)
Definition: OrgMod.cpp:278
static void s_ProcessInstitutionCollectionCodeLine(const CTempString &line)
Definition: OrgMod.cpp:229
static bool FixStructuredVoucher(string &val, const string &voucher_type)
Definition: OrgMod.cpp:662
static bool FuzzyStrainMatch(const string &strain1, const string &strain2)
Definition: OrgMod.cpp:1084
EVocabulary
Definition: OrgMod.hpp:67
@ eVocabulary_insdc
Definition: OrgMod.hpp:69
static string IsCultureCollectionValid(const string &culture_collection)
Definition: OrgMod.cpp:424
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:86
void FixCapitalization()
Definition: OrgMod.cpp:1027
static const string & GetInstitutionShortName(const string &full_name)
Definition: OrgMod.cpp:723
static bool IsINSDCValidTypeMaterial(const string &type_material)
Definition: OrgMod.cpp:1214
static bool IsInstitutionCodeValid(const string &inst_coll, string &voucher_type, bool &is_miscapitalized, string &correct_cap, bool &needs_country, bool &erroneous_country)
Definition: OrgMod.cpp:356
static bool AddStructureToVoucher(string &val, const string &voucher_type)
Definition: OrgMod.cpp:606
static bool IsStrainValid(const string &strain)
Definition: OrgMod.cpp:826
static bool IsMultipleValuesAllowed(TSubtype)
Definition: OrgMod.cpp:127
static bool IsDiscouraged(const TSubtype stype, bool indexer=false)
Definition: OrgMod.cpp:159
static const string & GetInstitutionFullName(const string &short_name)
Definition: OrgMod.cpp:711
void AutoFix()
Definition: OrgMod.cpp:1059
static string IsStructuredVoucherValid(const string &val, const string &voucher_type)
Definition: OrgMod.cpp:460
static string FixHost(const string &value)
Definition: OrgMod.cpp:999
bool RemoveAbbreviation()
Definition: OrgMod.cpp:1095
static bool IsValidTypeMaterial(const string &type_material)
Definition: OrgMod.cpp:1192
static string IsBiomaterialValid(const string &biomaterial)
Definition: OrgMod.cpp:446
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
static string CheckMultipleVouchers(const vector< string > &)
Definition: OrgMod.cpp:736
static string FixHostCapitalization(const string &value)
Definition: OrgMod.cpp:956
static bool HoldsInstitutionCode(const TSubtype stype)
This indicates if the given Org-mod subtype is supposed to hold an institution code (Example: "ATCC:2...
Definition: OrgMod.cpp:175
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:62
~COrgMod(void)
Definition: OrgMod.cpp:57
bool IsUnexpectedViralOrgModQualifier() const
Definition: OrgMod.cpp:1143
static bool RescueInstFromParentheses(string &val, const string &voucher_type)
Definition: OrgMod.cpp:626
static TInstitutionCodeMap::iterator FindInstitutionCode(const string &inst_coll, TInstitutionCodeMap &code_map, bool &is_miscapitalized, string &correct_cap, bool &needs_country, bool &erroneous_country)
Definition: OrgMod.cpp:316
static string MakeStructuredVoucher(const string &inst, const string &coll, const string &id)
Definition: OrgMod.cpp:540
static string IsSpecimenVoucherValid(const string &specimen_voucher)
Definition: OrgMod.cpp:435
static string FixStrain(const string &strain)
Definition: OrgMod.cpp:799
static bool ParseStructuredVoucher(const string &str, string &inst, string &coll, string &id)
Definition: OrgMod.cpp:188
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CTime –.
Definition: ncbitime.hpp:296
static constexpr auto construct(typename _Enabled::type const (&init)[N])
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
char value[7]
Definition: config.c:431
#define check(s)
Definition: describecol2.c:21
Utility macros and typedefs for exploring NCBI objects from general.asn.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_CATCH(message)
Catch CExceptions as well This macro is deprecated - use *_X or *_XX variant instead of it.
Definition: ncbiexpt.hpp:580
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3197
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5324
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3310
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5383
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
TSubname & SetSubname(void)
Assign a value to Subname data member.
Definition: OrgMod_.hpp:370
TSubtype GetSubtype(void) const
Get the Subtype member data.
Definition: OrgMod_.hpp:307
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: OrgMod_.hpp:288
const TSubname & GetSubname(void) const
Get the Subname member data.
Definition: OrgMod_.hpp:347
bool IsSetSubname(void) const
Check if a value has been assigned to Subname data member.
Definition: OrgMod_.hpp:335
@ eSubtype_biotype
Definition: OrgMod_.hpp:97
@ eSubtype_gb_acronym
used by taxonomy database
Definition: OrgMod_.hpp:115
@ eSubtype_gb_synonym
used by taxonomy database
Definition: OrgMod_.hpp:117
@ eSubtype_substrain
Definition: OrgMod_.hpp:86
@ eSubtype_pathovar
Definition: OrgMod_.hpp:94
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_dosage
chromosome dosage of hybrid
Definition: OrgMod_.hpp:103
@ eSubtype_sub_species
Definition: OrgMod_.hpp:105
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_variety
Definition: OrgMod_.hpp:89
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
@ eSubtype_biovar
Definition: OrgMod_.hpp:96
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
@ eSubtype_serogroup
Definition: OrgMod_.hpp:91
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_serotype
Definition: OrgMod_.hpp:90
@ eSubtype_chemovar
Definition: OrgMod_.hpp:95
@ eSubtype_nomenclature
code of nomenclature in subname (B,P,V,Z or combination)
Definition: OrgMod_.hpp:122
@ eSubtype_serovar
Definition: OrgMod_.hpp:92
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_gb_anamorph
used by taxonomy database
Definition: OrgMod_.hpp:116
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_forma_specialis
Definition: OrgMod_.hpp:109
@ eSubtype_old_lineage
Definition: OrgMod_.hpp:123
FILE * file
int i
int len
Lightweight interface for getting lines of data with minimal memory copying.
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
static char tmp[2048]
Definition: utf8.c:42
static const char * prefix[]
Definition: pcregrep.c:405
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
static const char * str(char *buf, int n)
Definition: stats.c:84
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
Definition: util_misc.cpp:139
bool g_IsDataFileOld(const CTempString &path, const CTempString &id_line)
Check whether the given file (a full path, as returned by g_FindDataFile) is older than a built-in ve...
Definition: util_misc.cpp:193
Modified on Thu Dec 07 10:06:43 2023 by modify_doxy.py rev. 669887