NCBI C++ ToolKit
OrgMod.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: OrgMod.cpp 102689 2024-06-26 16:43:28Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using the following specifications:
34  * 'seqfeat.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 #include <util/static_map.hpp>
40 #include <util/util_misc.hpp>
41 #include <util/line_reader.hpp>
42 #include <util/compile_time.hpp>
43 #include <serial/enumvalues.hpp>
44 
46 
47 // generated includes
49 
50 // generated classes
51 
53 
54 BEGIN_objects_SCOPE // namespace ncbi::objects::
55 
56 // destructor
58 {
59 }
60 
61 
63  EVocabulary vocabulary)
64 {
65  string name = NStr::TruncateSpaces(str);
66  NStr::ToLower(name);
67  replace(name.begin(), name.end(), '_', '-');
68  replace(name.begin(), name.end(), ' ', '-');
69 
70  if (name == "note" ||
71  NStr::EqualNocase(name, "orgmod-note") ||
72  NStr::EqualNocase(name, "note-orgmod")) {
73  return eSubtype_other;
74  } else if (vocabulary == eVocabulary_insdc) {
75  if (name == "host" || name == "specific-host") {
76  return eSubtype_nat_host;
77  } else if (name == "sub-strain") {
78  return eSubtype_substrain;
79  }
80  }
81 
82  return ENUM_METHOD_NAME(ESubtype)()->FindValue(name);
83 }
84 
85 
86 bool COrgMod::IsValidSubtypeName(const string& str,
87  EVocabulary vocabulary)
88 {
89  string name = NStr::TruncateSpaces(str);
90  NStr::ToLower(name);
91  replace(name.begin(), name.end(), '_', '-');
92  replace(name.begin(), name.end(), ' ', '-');
93 
94  if (name == "note" ||
95  name == "orgmod-note" ||
96  name == "note-orgmod") {
97  return true;
98  } else if (vocabulary == eVocabulary_insdc) {
99  if (name == "host" || name == "sub-strain") {
100  return true;
101  }
102  }
103 
104  return ENUM_METHOD_NAME(ESubtype)()->IsValidName(name);
105 }
106 
107 
109 {
110  if (stype == eSubtype_other) {
111  return "note";
112  } else if (vocabulary == eVocabulary_insdc) {
113  switch (stype) {
114  case eSubtype_substrain: return "sub_strain";
115  case eSubtype_nat_host: return "host";
116  default:
117  return NStr::Replace
118  (ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true),
119  "-", "_");
120  }
121  } else {
122  return ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true);
123  }
124 }
125 
126 
128 {
129  switch( subtype ) { // per TM-863
130  case eSubtype_strain: // (2) ,
131  case eSubtype_substrain: // (3) ,
132  case eSubtype_variety: // (6) ,
133  case eSubtype_serotype: // (7) ,
134  case eSubtype_serogroup: // (8) ,
135  case eSubtype_serovar: // (9) ,
136  case eSubtype_cultivar: // (10) ,
137  case eSubtype_pathovar: // (11) ,
138  case eSubtype_chemovar: // (12) ,
139  case eSubtype_biovar: // (13) ,
140  case eSubtype_biotype: // (14) ,
141  case eSubtype_isolate: // (17) ,
142  case eSubtype_nat_host: // (21) , -- natural host of this specimen
143  case eSubtype_sub_species: // (22) ,
144  case eSubtype_forma: // (25) ,
145  case eSubtype_forma_specialis: // (26) ,
146  case eSubtype_ecotype: // (27) ,
147  case eSubtype_breed: // (31) ,
148  case eSubtype_gb_acronym: // (32) , -- used by taxonomy database
149  case eSubtype_gb_anamorph: // (33) , -- used by taxonomy database
150  case eSubtype_gb_synonym: // (34) , -- used by taxonomy database
151  case eSubtype_metagenome_source: // (37) ,
152  case eSubtype_nomenclature: // (39) ,
153  case eSubtype_old_name: // (254) ,
154  return false;
155  default: return true;
156  }
157 }
158 
159 
160 bool COrgMod::IsDiscouraged(const TSubtype subtype, bool indexer)
161 {
162  if (subtype == eSubtype_dosage
163  || subtype == eSubtype_gb_acronym
164  || subtype == eSubtype_gb_anamorph
165  || subtype == eSubtype_gb_synonym
166  || subtype == eSubtype_old_lineage
167  || subtype == eSubtype_old_name
168  || (subtype == eSubtype_metagenome_source && !indexer)) {
169  return true;
170  } else {
171  return false;
172  }
173 }
174 
175 
177 {
178  switch(stype) {
182  return true;
183  default:
184  return false;
185  }
186 }
187 
188 
189 bool COrgMod::ParseStructuredVoucher(const string& str, string& inst, string& coll, string& id)
190 {
191  if (NStr::IsBlank(str)) {
192  return false;
193  }
194  inst = kEmptyStr;
195  coll = kEmptyStr;
196  id = kEmptyStr;
197  size_t pos = NStr::Find(str, ":");
198  if (pos == string::npos) {
199  id = str;
200  return true;
201  }
202  inst = str.substr(0, pos);
203  id = str.substr(pos + 1);
204  pos = NStr::Find(id, ":");
205  if (pos != string::npos) {
206  coll = id.substr(0, pos);
207  id = id.substr(pos + 1);
208  }
209  return true;
210 }
211 
212 
213 // ===== biomaterial, and culture-collection BioSource subsource modifiers ================
214 
218 
219 // holds all the data in the specific ones above
225 
226 DEFINE_STATIC_FAST_MUTEX(s_InstitutionCollectionCodeMutex);
227 
228 #include "institution_codes.inc"
229 
231 {
232  if (NStr::StartsWith(line, "#")) {
233  // ignore line, this is a comment
234  return;
235  }
236  vector<string> tokens;
237  NStr::Split(line, "\t", tokens);
238  if (tokens.size() < 3) {
239 // ERR_POST_X(1, Warning << "Bad format in institution_codes.txt entry " << line
240 // << "; disregarding");
241  } else {
242  NStr::TruncateSpacesInPlace( tokens[0] );
243  NStr::TruncateSpacesInPlace( tokens[1] );
244  NStr::TruncateSpacesInPlace( tokens[2] );
245  string& vouch_types = tokens[1];
246  for (size_t i = 0; i < vouch_types.size(); i++) {
247  switch (vouch_types[i]) {
248  case 'b':
249  s_BiomaterialInstitutionCodeMap[tokens[0]] = tokens[2];
250  break;
251  case 'c':
252  s_CultureCollectionInstitutionCodeMap[tokens[0]] = tokens[2];
253  break;
254  case 's':
255  s_SpecimenVoucherInstitutionCodeMap[tokens[0]] = tokens[2];
256  break;
257  default:
258 // ERR_POST_X(1, Warning << "Bad format in institution_codes.txt entry " << line
259 // << "; unrecognized subtype (" << tokens[1] << "); disregarding");
260  break;
261  }
262  }
263  s_CompleteInstitutionCodeMap[tokens[0]] = tokens[2];
264  s_CompleteInstitutionFullNameMap[tokens[2]] = tokens[0];
265  s_InstitutionCodeTypeMap[tokens[0]] = tokens[1];
266  if (tokens.size() > 3 && !NStr::IsBlank(tokens[3])) {
267  NStr::TruncateSpacesInPlace(tokens[3]);
268  vector<string> synonyms;
269  NStr::Split(tokens[3], ",", synonyms);
270  NON_CONST_ITERATE(vector<string>, s, synonyms) {
272  s_InstitutionCodeSynonymsMap[*s] = tokens[0];
273  }
274  }
275  }
276 }
277 
278 
280 {
281  CFastMutexGuard GUARD(s_InstitutionCollectionCodeMutex);
283  return;
284  }
285  string file = g_FindDataFile("institution_codes.txt");
286  CTime builtin_timestamp
287  (static_cast<time_t>(kInstitutionCollectionCodeList_Timestamp));
289  if ( !file.empty() && !g_IsDataFileOld(file, builtin_timestamp) ) {
290  try {
292  } NCBI_CATCH("s_InitializeInstitutionCollectionCodeMaps")
293  }
294 
295  if (lr.Empty()) {
296  if (getenv("NCBI_DEBUG")) {
297  LOG_POST("Falling back on built-in data for institution code list.");
298  }
299  size_t num_codes = sizeof (kInstitutionCollectionCodeList) / sizeof (char *);
300  for (size_t i = 0; i < num_codes; i++) {
301  const char *p = kInstitutionCollectionCodeList[i];
303  }
304  } else {
305  if (getenv("NCBI_DEBUG")) {
306  LOG_POST("Reading from " + file + " for instition code list.");
307  }
308  do {
310  } while ( !lr->AtEOF() );
311  }
312 
314 }
315 
316 
318  bool& is_miscapitalized, string& correct_cap, bool& needs_country, bool& erroneous_country)
319 {
320  TInstitutionCodeMap::iterator it = code_map.find(inst_coll);
321  if (it != code_map.end()) {
322  if (NStr::EqualCase(it->first, inst_coll)) {
323  } else if (NStr::EqualNocase(it->first, inst_coll)) {
324  is_miscapitalized = true;
325  }
326  correct_cap = it->first;
327  return it;
328  } else {
329  size_t pos = NStr::Find(inst_coll, "<");
330  if (pos == string::npos) {
331  string check = inst_coll + "<";
332  it = code_map.begin();
333  while (it != code_map.end()) {
334  if (NStr::StartsWith(it->first, check, NStr::eNocase)) {
335  needs_country = true;
336  if (!NStr::StartsWith(it->first, check, NStr::eCase)) {
337  is_miscapitalized = true;
338  }
339  correct_cap = it->first.substr(0, inst_coll.length());
340  return it;
341  }
342  ++it;
343  }
344  } else {
345  string inst_sub = inst_coll.substr(0, pos);
346  it = code_map.find(inst_sub);
347  if (it != code_map.end()) {
348  erroneous_country = true;
349  return it;
350  }
351  }
352  }
353  return code_map.end();
354 }
355 
356 
357 bool COrgMod::IsInstitutionCodeValid(const string& inst_coll, string &voucher_type, bool& is_miscapitalized, string& correct_cap, bool& needs_country, bool& erroneous_country)
358 {
359  is_miscapitalized = false;
360  needs_country = false;
361  erroneous_country = false;
362  correct_cap.clear();
363 
365 
366  TInstitutionCodeMap::iterator ic = FindInstitutionCode(inst_coll, s_InstitutionCodeTypeMap, is_miscapitalized, correct_cap, needs_country, erroneous_country);
367  if (ic != s_InstitutionCodeTypeMap.end()) {
368  if (needs_country) {
369  // check to see if non-country-requiring code is in synonyms
370  bool syn_is_miscapitalized = false;
371  string syn_correct_cap = "";
372  bool syn_needs_country = false;
373  bool syn_erroneous_country = false;
375  s_InstitutionCodeSynonymsMap, syn_is_miscapitalized, syn_correct_cap,
376  syn_needs_country, syn_erroneous_country);
377  if (it != s_InstitutionCodeSynonymsMap.end() && !syn_needs_country) {
379  if (is != s_InstitutionCodeTypeMap.end()) {
380  is_miscapitalized = syn_is_miscapitalized;
381  correct_cap = syn_correct_cap;
382  needs_country = syn_needs_country;
383  erroneous_country = syn_erroneous_country;
384  voucher_type = is->second;
385  return true;
386  }
387  }
388  } else if (erroneous_country) {
389  // check to see if country-requiring code is in synonyms
390  bool syn_is_miscapitalized = false;
391  string syn_correct_cap = "";
392  bool syn_needs_country = false;
393  bool syn_erroneous_country = false;
395  s_InstitutionCodeSynonymsMap, syn_is_miscapitalized, syn_correct_cap,
396  syn_needs_country, syn_erroneous_country);
397  if (it != s_InstitutionCodeSynonymsMap.end() && !syn_needs_country) {
399  if (is != s_InstitutionCodeTypeMap.end()) {
400  is_miscapitalized = syn_is_miscapitalized;
401  correct_cap = syn_correct_cap;
402  needs_country = syn_needs_country;
403  erroneous_country = syn_erroneous_country;
404  voucher_type = is->second;
405  return true;
406  }
407  }
408  }
409  voucher_type = ic->second;
410  return true;
411  }
412  ic = FindInstitutionCode(inst_coll, s_InstitutionCodeSynonymsMap, is_miscapitalized, correct_cap, needs_country, erroneous_country);
413  if (ic != s_InstitutionCodeSynonymsMap.end()) {
415  if (it != s_InstitutionCodeTypeMap.end()) {
416  voucher_type = it->second;
417  }
418  return true;
419  }
420  return false;
421 }
422 
423 
424 string
425 COrgMod::IsCultureCollectionValid(const string& culture_collection)
426 {
427  if (NStr::Find(culture_collection, ":") == string::npos) {
428  return "Culture_collection should be structured, but is not";
429  } else {
430  return IsStructuredVoucherValid(culture_collection, "c");
431  }
432 }
433 
434 
435 string
436 COrgMod::IsSpecimenVoucherValid(const string& specimen_voucher)
437 {
438  if (NStr::Find(specimen_voucher, ":") == string::npos) {
439  return kEmptyStr;
440  } else {
441  return IsStructuredVoucherValid(specimen_voucher, "s");
442  }
443 }
444 
445 
446 string
447 COrgMod::IsBiomaterialValid(const string& biomaterial)
448 {
449  if (NStr::Find(biomaterial, ":") == string::npos) {
450  return kEmptyStr;
451  } else {
452  return IsStructuredVoucherValid(biomaterial, "b");
453  }
454 }
455 
456 
457 const string kMissingInst = "Voucher is missing institution code";
458 const string kMissingId = "Voucher is missing specific identifier";
459 
460 string
461 COrgMod::IsStructuredVoucherValid(const string& val, const string& v_type)
462 {
463  string inst_code;
464  string coll_code;
465  string inst_coll;
466  string id;
467 
468  ParseStructuredVoucher(val, inst_code, coll_code, id);
469  string rval = kEmptyStr;
470  if (NStr::IsBlank(inst_code)) {
471  rval = kMissingInst;
472  }
473  if (NStr::IsBlank(id)) {
474  rval = NStr::IsBlank(rval) ? kMissingId : rval + "\n" + kMissingId;
475  }
476  if (!NStr::IsBlank(rval)) {
477  return rval;
478  }
479 
480  if (NStr::IsBlank (coll_code)) {
481  inst_coll = inst_code;
482  } else {
483  inst_coll = inst_code + ":" + coll_code;
484  }
485 
486  // first, check combination of institution and collection (if collection found)
487  string voucher_type;
488  bool is_miscapitalized;
489  bool needs_country;
490  bool erroneous_country;
491  string correct_cap;
492  if (COrgMod::IsInstitutionCodeValid(inst_coll, voucher_type, is_miscapitalized, correct_cap, needs_country, erroneous_country)) {
493  if (needs_country) {
494  return "Institution code " + inst_coll + " needs to be qualified with a <COUNTRY> designation";
495  } else if (erroneous_country) {
496  return "Institution code " + inst_coll + " should not be qualified with a <COUNTRY> designation";
497  } else if (is_miscapitalized) {
498  return "Institution code " + inst_coll + " exists, but correct capitalization is " + correct_cap;
499  } else {
500  if (NStr::FindNoCase(voucher_type, v_type) == string::npos) {
501  if (NStr::FindNoCase (voucher_type, "b") != string::npos) {
502  return "Institution code " + inst_coll + " should be bio_material";
503  } else if (NStr::FindNoCase (voucher_type, "c") != string::npos) {
504  return "Institution code " + inst_coll + " should be culture_collection";
505  } else if (NStr::FindNoCase (voucher_type, "s") != string::npos) {
506  return "Institution code " + inst_coll + " should be specimen_voucher";
507  }
508  }
509  return kEmptyStr;
510  }
511  } else if (NStr::StartsWith(inst_coll, "personal", NStr::eNocase)) {
512  if (NStr::EqualNocase (inst_code, "personal") && NStr::IsBlank (coll_code)) {
513  return "Personal collection does not have name of collector";
514  }
515  return kEmptyStr;
516  } else if (NStr::IsBlank(coll_code)) {
517  return "Institution code " + inst_coll + " is not in list";
518  } else if (IsInstitutionCodeValid(inst_code, voucher_type, is_miscapitalized, correct_cap, needs_country, erroneous_country)) {
519  if (needs_country) {
520  return "Institution code in " + inst_coll + " needs to be qualified with a <COUNTRY> designation";
521  } else if (erroneous_country) {
522  return "Institution code " + inst_code + " should not be qualified with a <COUNTRY> designation";
523  } else if (is_miscapitalized) {
524  return "Institution code " + inst_code + " exists, but correct capitalization is " + correct_cap;
525  } else if (NStr::Equal (coll_code, "DNA")) {
526  // DNA is a valid collection for any institution (using bio_material)
527  if (!NStr::Equal(v_type, "b")) {
528  return "DNA should be bio_material";
529  }
530  } else {
531  return "Institution code " + inst_code + " exists, but collection "
532  + inst_coll + " is not in list";
533  }
534  } else {
535  return "Institution code " + inst_coll + " is not in list";
536  }
537  return kEmptyStr;
538 }
539 
540 
541 string COrgMod::MakeStructuredVoucher(const string& inst, const string& coll, const string& id)
542 {
543  string rval;
544  if (NStr::IsBlank(inst) && NStr::IsBlank(coll) && NStr::IsBlank(id)) {
545  rval = kEmptyStr;
546  } else if (NStr::IsBlank(inst) && NStr::IsBlank(coll)) {
547  rval = id;
548  } else if (NStr::IsBlank(coll)) {
549  rval = inst + ":" + id;
550  } else {
551  rval = inst + ":" + coll + ":" + id;
552  }
553  return rval;
554 }
555 
556 
557 // As described in SQD-1655, we can only rescue an unstructured
558 // structured voucher if it consists of a series of three or
559 // more letters followed by a series of digits, optionally separated
560 // by space, and if the series of letters looks up as a valid
561 // institution code.
563 {
564  // nothing to do if value is blank
565  if (NStr::IsBlank(val)) {
566  return false;
567  }
568 
569  // find first non-letter position
570  size_t len = 0;
571  string::iterator sit = val.begin();
572  while (sit != val.end() && isalpha(*sit)) {
573  len++;
574  sit++;
575  }
576  if (len < 3 || len == val.length()) {
577  // institution code too short or no second token
578  return false;
579  }
580  string inst_code = val.substr(0, len);
581  string remainder = val.substr(len);
582  NStr::TruncateSpacesInPlace(remainder);
583  if (NStr::IsBlank(remainder)) {
584  // no second token
585  return false;
586  }
587  // remainder must be all digits
588  sit = remainder.begin();
589  while (sit != remainder.end()) {
590  if (!isdigit(*sit)) {
591  return false;
592  }
593  sit++;
594  }
595 
596  bool rval = false;
597  COrgMod::TInstitutionCodeMap::iterator it = code_map.find(inst_code);
598  if (it != code_map.end()) {
599  val = inst_code + ":" + remainder;
600  rval = true;
601  }
602 
603  return rval;
604 }
605 
606 
607 bool COrgMod::AddStructureToVoucher(string& val, const string& v_type)
608 {
609  // nothing to do if value is blank
610  if (NStr::IsBlank(val)) {
611  return false;
612  }
613 
615  if (NStr::Find(v_type, "b") != string::npos && FindInstCodeAndSpecID(s_BiomaterialInstitutionCodeMap, val)) {
616  return true;
617  } else if (NStr::Find(v_type, "c") != string::npos && FindInstCodeAndSpecID(s_CultureCollectionInstitutionCodeMap, val)) {
618  return true;
619  } else if (NStr::Find(v_type, "s") != string::npos && FindInstCodeAndSpecID(s_SpecimenVoucherInstitutionCodeMap, val)) {
620  return true;
621  } else {
622  return false;
623  }
624 }
625 
626 
627 bool COrgMod::RescueInstFromParentheses(string& val, const string& voucher_type)
628 {
629  bool rval = false;
630 
631  if (!NStr::EndsWith(val, ")")) {
632  return false;
633  }
634  size_t colon_pos = NStr::Find(val, ":");
635  if (colon_pos != 0 && colon_pos != string::npos) {
636  return false;
637  }
638  size_t pos = NStr::Find(val, "(", NStr::eNocase, NStr::eReverseSearch);
639  if (pos == string::npos) {
640  return false;
641  }
642  string inst = val.substr(pos + 1, val.length() - pos - 2);
643  bool miscap = false, needs_country = false, wrong_country = false;
644  string capfix;
645 
646  string v_type = voucher_type;
647  if (IsInstitutionCodeValid(inst, v_type, miscap, capfix, needs_country, wrong_country)) {
648  if (colon_pos == 0) {
649  val = inst + val.substr(0, pos);
650  } else {
651  val = inst + ":" + val.substr(0, pos);
652  }
654  rval = true;
655  }
656 
657 
658  return rval;
659 }
660 
661 
662 bool
663 COrgMod::FixStructuredVoucher(string& val, const string& v_type)
664 {
665  string inst_code;
666  string coll_code;
667  string id;
668 
669  ParseStructuredVoucher(val, inst_code, coll_code, id);
670  if (NStr::IsBlank(inst_code)) {
671  if (AddStructureToVoucher(val, v_type)) {
672  return true;
673  } else {
674  return RescueInstFromParentheses(val, v_type);
675  }
676  }
677  bool rval = false;
678  bool found = false;
680 
682 
683  string new_inst_code = inst_code;
684  while ((!found) && (it != s_InstitutionCodeTypeMap.end())) {
685  if (NStr::Find(it->second, v_type) != string::npos) {
686  if (NStr::EqualNocase (it->first, inst_code)) {
687  if (!NStr::Equal (it->first, inst_code)) {
688  new_inst_code = it->first;
689  rval = true;
690  }
691  found = true;
692  } else if (NStr::StartsWith(inst_code, it->first)
693  && inst_code.c_str()[it->first.length()] == '<') {
694  /*
695  new_inst_code = it->first;
696  rval = true;
697  */
698  }
699  }
700  ++it;
701  }
702 
703 
704  if (rval) {
705  val = MakeStructuredVoucher(new_inst_code, coll_code, id);
706  }
707  return rval;
708 }
709 
710 
711 const string &
712 COrgMod::GetInstitutionFullName( const string &short_name )
713 {
716  if( iter != s_CompleteInstitutionCodeMap.end() ) {
717  return iter->second;
718  } else {
719  return kEmptyStr;
720  }
721 }
722 
723 const string &
724 COrgMod::GetInstitutionShortName( const string &full_name )
725 {
728  if( iter != s_CompleteInstitutionFullNameMap.end() ) {
729  return iter->second;
730  } else {
731  return kEmptyStr;
732  }
733 }
734 
735 
736 // look for multiple source vouchers
737 string COrgMod::CheckMultipleVouchers(const vector<string>& vouchers)
738 {
739  ITERATE(vector<string>, it, vouchers) {
740  string inst1, coll1, id1;
741  COrgMod::ParseStructuredVoucher(*it, inst1, coll1, id1);
742  if (NStr::IsBlank(inst1)) continue;
743  if (NStr::EqualNocase(inst1, "personal") || NStr::EqualCase(coll1, "DNA")) continue;
744 
745  vector<string>::const_iterator it_next = it;
746  for (++it_next; it_next != vouchers.end(); ++it_next) {
747  string inst2, coll2, id2;
748  COrgMod::ParseStructuredVoucher(*it_next, inst2, coll2, id2);
749  if (NStr::IsBlank(inst2)) continue;
750  if (NStr::EqualNocase(inst2, "personal") || NStr::EqualCase(coll2, "DNA")) continue;
751  if (!NStr::EqualNocase (inst1, inst2) || NStr::IsBlank(inst1)) continue;
752  return NStr::EqualNocase(coll1, coll2) && !NStr::IsBlank(coll1) ? "Multiple vouchers with same institution:collection" : "Multiple vouchers with same institution";
753  }
754  }
755  return kEmptyStr;
756 }
757 
758 
759 bool s_IsAllDigits(string str)
760 {
761  return (str.find_first_not_of("0123456789") == NPOS);
762 }
763 
764 
765 bool s_FixStrainForPrefix(const string& prefix, string& strain)
766 {
767  bool rval = false;
768 
769  if (NStr::StartsWith(strain, prefix, NStr::eNocase)) {
770  string tmp = strain.substr(prefix.length());
772  if (NStr::StartsWith(tmp, ":") || NStr::StartsWith(tmp, "/")) {
773  tmp = tmp.substr(1);
774  }
776  if (!NStr::IsBlank(tmp) && s_IsAllDigits(tmp)) {
777  strain = prefix + " " + tmp;
778  rval = true;
779  }
780  }
781  return rval;
782 }
783 
784 
785 string s_FixOneStrain( const string& strain)
786 {
787  string new_val = strain;
788  if (s_FixStrainForPrefix("ATCC", new_val)) {
789  // fixed for ATCC
790  } else if (s_FixStrainForPrefix("DSM", new_val)) {
791  // fixed for DSM
792  } else {
793  // no fix
794  new_val = kEmptyStr;
795  }
796  return new_val;
797 }
798 
799 
800 string COrgMod::FixStrain( const string& strain)
801 {
802  string new_val = strain;
803  vector<string> words;
804  vector<string> results;
805  NStr::Split(strain, ";", words);
806  FOR_EACH_STRING_IN_VECTOR(itr, words) {
807  string str = *itr;
809  string fixed = s_FixOneStrain(str);
810  if (fixed.empty()) {
811  results.push_back (str);
812  } else {
813  results.push_back (fixed);
814  }
815  }
816  return NStr::Join(results,"; ");
817 }
818 
819 
820 const char* sm_BadStrainValues[] = {
821  "yes",
822  "no",
823  "-",
824  "bacteria",
825  "sp.",
826  "sp",
827  "strain",
828  "environmental",
829  "soil",
830  "clinical isolate",
831  "NA",
832  "whole organism",
833  "microbial"
834 };
835 
836 bool COrgMod::IsStrainValid(const string& strain)
837 {
838  size_t max = sizeof(sm_BadStrainValues) / sizeof(const char*);
839  for (size_t i = 0; i < max; i++) {
840  if (NStr::EqualNocase(strain, sm_BadStrainValues[i])) {
841  return false;
842  }
843  }
844  return true;
845 }
846 
847 
848 const char* sm_KnownHostWords[] = {
849  "alfalfa",
850  "almond",
851  "apple",
852  "asparagus",
853  "badger",
854  "bean",
855  "bitter melon",
856  "blackberry",
857  "blossoms",
858  "blueberry",
859  "bovine",
860  "brinjal",
861  "broad bean",
862  "cabbage",
863  "canine",
864  "cantaloupe",
865  "caprine",
866  "carrot",
867  "cassava",
868  "cat",
869  "catfish",
870  "cattle",
871  "cauliflower",
872  "Channel catfish",
873  "chestnut",
874  "chicken",
875  "chimpanzee",
876  "clover",
877  "corn",
878  "cotton",
879  "cow",
880  "cowpea",
881  "crab",
882  "cucumber",
883  "curd",
884  "dairy cow",
885  "dog",
886  "duck",
887  "equine",
888  "feline",
889  "fish",
890  "fox",
891  "goat",
892  "goldfish",
893  "goose",
894  "guanabana",
895  "honeydew",
896  "horse",
897  "ice cream",
898  "juniper",
899  "larva",
900  "laurel",
901  "leek",
902  "lentil",
903  "lilac",
904  "lily",
905  "maize",
906  "mamey",
907  "mamey sapote",
908  "mango",
909  "mangrove",
910  "mangroves",
911  "marigold",
912  "marine sponge",
913  "melon",
914  "mosquito",
915  "mulberry",
916  "mungbean",
917  "nematode",
918  "oat",
919  "ornamental pear",
920  "ovine",
921  "papaya",
922  "pea",
923  "peach",
924  "peacock",
925  "pear",
926  "pepper",
927  "pig",
928  "pomegranate",
929  "porcine",
930  "potato",
931  "raccoon dog",
932  "red fox",
933  "rhizospheric soil",
934  "rice",
935  "salmon",
936  "seagrass",
937  "sesame",
938  "sheep",
939  "shrimp",
940  "sorghum",
941  "sour cherry",
942  "sourdough",
943  "soybean",
944  "sponge",
945  "squash",
946  "strawberry",
947  "sugar beet",
948  "sunflower",
949  "sweet cherry",
950  "swine",
951  "tobacco",
952  "tomato",
953  "turf",
954  "turfgrass",
955  "turkey",
956  "turtle",
957  "watermelon",
958  "wheat",
959  "white clover",
960  "willow",
961  "wolf",
962  "yak",
963 };
964 
965 
967 {
968  string fix = value;
969 
970  size_t max = sizeof(sm_KnownHostWords) / sizeof(const char*);
971  for (size_t i = 0; i < max; i++) {
973  fix = sm_KnownHostWords[i];
974  break;
975  }
976  }
977  return fix;
978 }
979 
980 
981 static constexpr auto s_hostFixupMap =
983  { "-", "missing" },
984  { "no", "missing" },
985  { "none", "missing" },
986  { "NA", "not available" },
987  { "N/A", "not available" },
988  { "n/a", "not available" },
989  { "free-living", "natural / free-living" },
990  { "natural", "natural / free-living" },
991  { "not available", "not available" },
992  { "not collected", "not collected" },
993  { "not applicable", "not applicable" },
994  { "NR", "not applicable" },
995  { "not known", "unknown" },
996  { "other", "missing" },
997  { "misc", "missing" },
998  { "not determined", "unknown" },
999  { "unknown", "unknown" },
1000  { "not available: to be reported later", "not available" },
1001  { "obscured", "obscured" },
1002  { "human", "Homo sapiens" },
1003  { "homo sapiens", "Homo sapiens" }
1004 });
1005 
1006 
1007 
1008 
1009 string COrgMod::FixHost(const string& value)
1010 {
1011  string fix = value;
1012 
1013  auto possible_fix = s_hostFixupMap.find(value);
1014  if (possible_fix != s_hostFixupMap.end()) {
1015  fix = possible_fix->second;
1016  }
1017 
1018  return fix;
1019 }
1020 
1021 
1022 string COrgMod::FixCapitalization(TSubtype subtype, const string& value)
1023 {
1024  string new_val = value;
1025  switch (subtype) {
1027  new_val = FixHostCapitalization(value);
1028  break;
1029  default:
1030  new_val = value;
1031  break;
1032  }
1033  return new_val;
1034 }
1035 
1036 
1038 {
1039  if (!IsSetSubtype() || !IsSetSubname()) {
1040  return;
1041  }
1042 
1043  string new_val = FixCapitalization(GetSubtype(), GetSubname());
1044 
1045  if (!NStr::IsBlank(new_val)) {
1046  SetSubname(new_val);
1047  }
1048 
1049 }
1050 
1051 
1052 string COrgMod::AutoFix(TSubtype subtype, const string& value)
1053 {
1054  string new_val;
1055  switch (subtype) {
1057  new_val = FixStrain(value);
1058  break;
1060  new_val = FixHost(value);
1061  break;
1062  default:
1063  break;
1064  }
1065  return new_val;
1066 }
1067 
1068 
1070 {
1071  if (!IsSetSubtype() || !IsSetSubname()) {
1072  return;
1073  }
1074 
1075  string new_val = AutoFix(GetSubtype(), GetSubname());
1076 
1077  if (!NStr::IsBlank(new_val)) {
1078  SetSubname(new_val);
1079  }
1080 
1081 }
1082 
1083 
1084 void s_HarmonizeString(string& s)
1085 {
1086  NStr::ReplaceInPlace (s, " ", "");
1087  NStr::ReplaceInPlace (s, "_", "");
1088  NStr::ReplaceInPlace (s, "-", "");
1089  NStr::ReplaceInPlace (s, ":", "");
1090  NStr::ReplaceInPlace (s, "/", "");
1091 }
1092 
1093 
1094 bool COrgMod::FuzzyStrainMatch( const string& strain1, const string& strain2 )
1095 {
1096  string s1 = strain1;
1097  string s2 = strain2;
1098 
1099  s_HarmonizeString(s1);
1100  s_HarmonizeString(s2);
1101  return NStr::EqualNocase(s1, s2);
1102 }
1103 
1104 
1106 {
1107  bool any_change = false;
1108 
1109  if (IsSetSubtype() && IsSetSubname()) {
1110  string& val = SetSubname();
1111  switch (GetSubtype()) {
1112  case eSubtype_serovar:
1113  if (NStr::StartsWith(val, "serovar ")) {
1114  val = val.substr(8);
1115  any_change = true;
1116  }
1117  break;
1118  case eSubtype_sub_species:
1119  if (NStr::StartsWith(val, "subsp. ")) {
1120  val = val.substr(7);
1121  any_change = true;
1122  }
1123  break;
1124  default:
1125  break;
1126  }
1127  }
1128  return any_change;
1129 }
1130 
1131 
1136 };
1137 
1139 
1141 {
1142  bool rval = false;
1143 
1144  for (size_t i = 0; i < sNumUnexpectedViralOrgModQualifiers && !rval; i++) {
1145  if (subtype == sUnexpectedViralOrgModQualifiers[i]) {
1146  rval = true;
1147  }
1148  }
1149  return rval;
1150 }
1151 
1152 
1154 {
1156  return true;
1157  } else {
1158  return false;
1159  }
1160 }
1161 
1162 
1163 static const string sValidTypeMaterialPrefixes[] = {
1164  "type material",
1165  "type strain",
1166  "reference material",
1167  "reference strain",
1168  "neotype strain",
1169  "paralectotype",
1170  "hapantotype",
1171  "allotype",
1172  "culture from reference material",
1173  "culture from type material",
1174  "ex-type",
1175  "culture from hapantotype",
1176  "pathotype strain"
1177 };
1178 
1179 static const int sNumValidTypeMaterialPrefixes = sizeof(sValidTypeMaterialPrefixes) / sizeof(string);
1180 
1181 static const string sValidCultureTypeMaterialPrefixes[] = {
1182  "epitype",
1183  "hapantotype",
1184  "holotype",
1185  "isoepitype",
1186  "isoepitype",
1187  "isolectotype",
1188  "isoneotype",
1189  "isoparatype",
1190  "isosyntype",
1191  "isotype",
1192  "lectotype",
1193  "neotype",
1194  "paratype",
1195  "reference",
1196  "syntype",
1197  "type material"
1198 };
1199 
1201 
1202 bool COrgMod::IsValidTypeMaterial(const string& type_material)
1203 {
1204  for (int i = 0; i < sNumValidTypeMaterialPrefixes; i++) {
1205  if (NStr::StartsWith(type_material, sValidTypeMaterialPrefixes[i])) {
1206  return true;
1207  }
1208  }
1209 
1210  for (int i = 0; i < sNumValidCultureTypeMaterialPrefixes; i++) {
1211  if (NStr::StartsWith(type_material, sValidCultureTypeMaterialPrefixes[i])) {
1212  return true;
1213  } else if (NStr::StartsWith(type_material, "culture from " + sValidCultureTypeMaterialPrefixes[i])) {
1214  return true;
1215  } else if (NStr::StartsWith(type_material, "ex-" + sValidCultureTypeMaterialPrefixes[i])) {
1216  return true;
1217  }
1218  }
1219  return false;
1220 }
1221 
1222 
1223 // note that the INSDC method now calls IsValidTypeMaterial
1224 bool COrgMod::IsINSDCValidTypeMaterial(const string& type_material)
1225 {
1226  if (NStr::IsBlank(type_material)) {
1227  return false;
1228  }
1229 
1230  return IsValidTypeMaterial(type_material);
1231 }
1232 
1233 
1235 {
1236  if (! CNcbiApplication::Instance()) {
1237  return false;
1238  }
1239 
1241  string fromEnv = env.Get("NCBI_VALIDATE_FOR_MULTIPLE_ISOLATES");
1242  NStr::ToLower(fromEnv);
1243  if (fromEnv == "true") {
1244  return true;
1245  } else if (fromEnv == "false") {
1246  return false;
1247  }
1248 
1250  string fromConfig = reg.GetString("OrgMod", "ValidateForMultipleIsolates", "off");
1251  NStr::ToLower(fromConfig);
1252  if (fromConfig == "1" || fromConfig == "on" || fromConfig == "true" || fromConfig == "yes") {
1253  return true;
1254  }
1255 
1256  // RW-2259 enable by default
1257  return true;
1258 }
1259 
1260 
1262 {
1263  static bool value = s_init_LookForMultipleIsolates();
1264  return value;
1265 }
1266 
1267 
1268 
1269 END_objects_SCOPE // namespace ncbi::objects::
1270 
1272 
1273 /* Original file checksum: lines: 65, chars: 1882, CRC32: efba64e1 */
static COrgMod::TInstitutionCodeMap s_CultureCollectionInstitutionCodeMap
Definition: OrgMod.cpp:217
bool FindInstCodeAndSpecID(COrgMod::TInstitutionCodeMap &code_map, string &val)
Definition: OrgMod.cpp:562
static const size_t sNumUnexpectedViralOrgModQualifiers
Definition: OrgMod.cpp:1138
static COrgMod::TInstitutionCodeMap s_CompleteInstitutionFullNameMap
Definition: OrgMod.cpp:221
void s_HarmonizeString(string &s)
Definition: OrgMod.cpp:1084
const char * sm_KnownHostWords[]
Definition: OrgMod.cpp:848
const char * sm_BadStrainValues[]
Definition: OrgMod.cpp:820
static const string sValidTypeMaterialPrefixes[]
Definition: OrgMod.cpp:1163
static const int sNumValidTypeMaterialPrefixes
Definition: OrgMod.cpp:1179
const string kMissingInst
Definition: OrgMod.cpp:457
static const COrgMod::TSubtype sUnexpectedViralOrgModQualifiers[]
Definition: OrgMod.cpp:1132
static const string sValidCultureTypeMaterialPrefixes[]
Definition: OrgMod.cpp:1181
static constexpr auto s_hostFixupMap
Definition: OrgMod.cpp:981
static bool s_InstitutionCollectionCodeMapInitialized
Definition: OrgMod.cpp:224
static COrgMod::TInstitutionCodeMap s_BiomaterialInstitutionCodeMap
Definition: OrgMod.cpp:215
bool s_FixStrainForPrefix(const string &prefix, string &strain)
Definition: OrgMod.cpp:765
static COrgMod::TInstitutionCodeMap s_SpecimenVoucherInstitutionCodeMap
Definition: OrgMod.cpp:216
bool s_IsAllDigits(string str)
Definition: OrgMod.cpp:759
const string kMissingId
Definition: OrgMod.cpp:458
static COrgMod::TInstitutionCodeMap s_InstitutionCodeTypeMap
Definition: OrgMod.cpp:222
DEFINE_STATIC_FAST_MUTEX(s_InstitutionCollectionCodeMutex)
static COrgMod::TInstitutionCodeMap s_CompleteInstitutionCodeMap
Definition: OrgMod.cpp:220
string s_FixOneStrain(const string &strain)
Definition: OrgMod.cpp:785
static COrgMod::TInstitutionCodeMap s_InstitutionCodeSynonymsMap
Definition: OrgMod.cpp:223
static const int sNumValidCultureTypeMaterialPrefixes
Definition: OrgMod.cpp:1200
static void s_InitializeInstitutionCollectionCodeMaps(void)
Definition: OrgMod.cpp:279
static bool s_init_LookForMultipleIsolates(void)
Definition: OrgMod.cpp:1234
static void s_ProcessInstitutionCollectionCodeLine(const CTempString &line)
Definition: OrgMod.cpp:230
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiEnvironment –.
Definition: ncbienv.hpp:110
CNcbiRegistry –.
Definition: ncbireg.hpp:913
static bool FixStructuredVoucher(string &val, const string &voucher_type)
Definition: OrgMod.cpp:663
static bool NCBI_ValidateForMultipleIsolates(void)
Definition: OrgMod.cpp:1261
static bool FuzzyStrainMatch(const string &strain1, const string &strain2)
Definition: OrgMod.cpp:1094
EVocabulary
Definition: OrgMod.hpp:67
@ eVocabulary_insdc
Definition: OrgMod.hpp:69
static string IsCultureCollectionValid(const string &culture_collection)
Definition: OrgMod.cpp:425
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:86
void FixCapitalization()
Definition: OrgMod.cpp:1037
static const string & GetInstitutionShortName(const string &full_name)
Definition: OrgMod.cpp:724
static bool IsINSDCValidTypeMaterial(const string &type_material)
Definition: OrgMod.cpp:1224
static bool IsInstitutionCodeValid(const string &inst_coll, string &voucher_type, bool &is_miscapitalized, string &correct_cap, bool &needs_country, bool &erroneous_country)
Definition: OrgMod.cpp:357
static bool AddStructureToVoucher(string &val, const string &voucher_type)
Definition: OrgMod.cpp:607
static bool IsStrainValid(const string &strain)
Definition: OrgMod.cpp:836
static bool IsMultipleValuesAllowed(TSubtype)
Definition: OrgMod.cpp:127
static bool IsDiscouraged(const TSubtype stype, bool indexer=false)
Definition: OrgMod.cpp:160
static const string & GetInstitutionFullName(const string &short_name)
Definition: OrgMod.cpp:712
void AutoFix()
Definition: OrgMod.cpp:1069
static string IsStructuredVoucherValid(const string &val, const string &voucher_type)
Definition: OrgMod.cpp:461
static string FixHost(const string &value)
Definition: OrgMod.cpp:1009
bool RemoveAbbreviation()
Definition: OrgMod.cpp:1105
static bool IsValidTypeMaterial(const string &type_material)
Definition: OrgMod.cpp:1202
static string IsBiomaterialValid(const string &biomaterial)
Definition: OrgMod.cpp:447
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
static string CheckMultipleVouchers(const vector< string > &)
Definition: OrgMod.cpp:737
static string FixHostCapitalization(const string &value)
Definition: OrgMod.cpp:966
static bool HoldsInstitutionCode(const TSubtype stype)
This indicates if the given Org-mod subtype is supposed to hold an institution code (Example: "ATCC:2...
Definition: OrgMod.cpp:176
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:62
~COrgMod(void)
Definition: OrgMod.cpp:57
bool IsUnexpectedViralOrgModQualifier() const
Definition: OrgMod.cpp:1153
static bool RescueInstFromParentheses(string &val, const string &voucher_type)
Definition: OrgMod.cpp:627
static TInstitutionCodeMap::iterator FindInstitutionCode(const string &inst_coll, TInstitutionCodeMap &code_map, bool &is_miscapitalized, string &correct_cap, bool &needs_country, bool &erroneous_country)
Definition: OrgMod.cpp:317
static string MakeStructuredVoucher(const string &inst, const string &coll, const string &id)
Definition: OrgMod.cpp:541
static string IsSpecimenVoucherValid(const string &specimen_voucher)
Definition: OrgMod.cpp:436
static string FixStrain(const string &strain)
Definition: OrgMod.cpp:800
static bool ParseStructuredVoucher(const string &str, string &inst, string &coll, string &id)
Definition: OrgMod.cpp:189
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CTime –.
Definition: ncbitime.hpp:296
static constexpr auto construct(typename _Enabled::type const (&init)[N])
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
#define check(s)
Definition: describecol2.c:21
static const char * str(char *buf, int n)
Definition: stats.c:84
static HENV env
Definition: transaction2.c:38
static char tmp[3200]
Definition: utf8.c:42
Utility macros and typedefs for exploring NCBI objects from general.asn.
const CNcbiEnvironment & GetEnvironment(void) const
Get the application's cached environment.
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_CATCH(message)
Catch CExceptions as well This macro is deprecated - use *_X or *_XX variant instead of it.
Definition: ncbiexpt.hpp:580
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
virtual string GetString(const string &section, const string &name, const string &default_value, TFlags flags=0) const
Get the parameter string value.
Definition: ncbireg.cpp:321
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2984
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5432
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2699
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5327
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3305
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5414
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5355
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5386
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
TSubname & SetSubname(void)
Assign a value to Subname data member.
Definition: OrgMod_.hpp:370
TSubtype GetSubtype(void) const
Get the Subtype member data.
Definition: OrgMod_.hpp:307
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: OrgMod_.hpp:288
const TSubname & GetSubname(void) const
Get the Subname member data.
Definition: OrgMod_.hpp:347
bool IsSetSubname(void) const
Check if a value has been assigned to Subname data member.
Definition: OrgMod_.hpp:335
@ eSubtype_biotype
Definition: OrgMod_.hpp:97
@ eSubtype_gb_acronym
used by taxonomy database
Definition: OrgMod_.hpp:115
@ eSubtype_gb_synonym
used by taxonomy database
Definition: OrgMod_.hpp:117
@ eSubtype_substrain
Definition: OrgMod_.hpp:86
@ eSubtype_pathovar
Definition: OrgMod_.hpp:94
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_dosage
chromosome dosage of hybrid
Definition: OrgMod_.hpp:103
@ eSubtype_sub_species
Definition: OrgMod_.hpp:105
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_variety
Definition: OrgMod_.hpp:89
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
@ eSubtype_biovar
Definition: OrgMod_.hpp:96
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
@ eSubtype_serogroup
Definition: OrgMod_.hpp:91
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_serotype
Definition: OrgMod_.hpp:90
@ eSubtype_chemovar
Definition: OrgMod_.hpp:95
@ eSubtype_nomenclature
code of nomenclature in subname (B,P,V,Z or combination)
Definition: OrgMod_.hpp:122
@ eSubtype_serovar
Definition: OrgMod_.hpp:92
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_gb_anamorph
used by taxonomy database
Definition: OrgMod_.hpp:116
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_forma_specialis
Definition: OrgMod_.hpp:109
@ eSubtype_old_lineage
Definition: OrgMod_.hpp:123
@ eSubtype_isolate
Definition: OrgMod_.hpp:100
FILE * file
int i
int len
Lightweight interface for getting lines of data with minimal memory copying.
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
static int * results[]
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
static SLJIT_INLINE sljit_ins lr(sljit_gpr dst, sljit_gpr src)
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
Definition: util_misc.cpp:139
bool g_IsDataFileOld(const CTempString &path, const CTempString &id_line)
Check whether the given file (a full path, as returned by g_FindDataFile) is older than a built-in ve...
Definition: util_misc.cpp:193
Modified on Wed Sep 04 14:59:30 2024 by modify_doxy.py rev. 669887