NCBI C++ ToolKit
BioSource.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: BioSource.cpp 99563 2023-04-18 18:43:29Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the data definition file
34  * 'seqfeat.asn'.
35  */
36 
37 // standard includes
38 
39 // generated includes
40 #include <ncbi_pch.hpp>
47 #include <algorithm>
48 #include <set>
49 #include <util/static_map.hpp>
50 #include <corelib/ncbistr.hpp>
51 #include <corelib/ncbistre.hpp>
52 #include <corelib/ncbienv.hpp>
53 #include <corelib/ncbiobj.hpp>
54 #include <corelib/ncbi_limits.h>
55 #include <memory>
56 #include <set>
57 #include <list>
58 #include <vector>
59 
60 
61 // generated classes
62 
64 
65 BEGIN_objects_SCOPE // namespace ncbi::objects::
66 
67 // destructor
69 {
70 }
71 
72 
73 int CBioSource::GetGenCode(int def) const
74 {
75  try {
77 
78  if ( !CanGetOrg() || !GetOrg().CanGetOrgname() ) {
79  return def; // assume standard genetic code
80  }
81  const COrgName& orn = GetOrg().GetOrgname();
82 
83  switch ( genome ) {
88  {
89  // mitochondrial code
90  if (orn.IsSetMgcode()) {
91  return orn.GetMgcode();
92  }
93  return def;
94  }
97  case eGenome_plastid:
98  case eGenome_cyanelle:
99  case eGenome_apicoplast:
100  case eGenome_leucoplast:
101  case eGenome_proplastid:
104  {
105  // bacteria and plant plastid code
106  if (orn.IsSetPgcode()) {
107  int pgcode = orn.GetPgcode();
108  if (pgcode > 0) return pgcode;
109  }
110  // bacteria and plant plastids default to code 11.
111  return 11;
112  }
113  default:
114  {
115  if (orn.IsSetGcode()) {
116  return orn.GetGcode();
117  }
118  return def;
119  }
120  }
121  } catch (...) {
122  return def; // was 0(!)
123  }
124 }
125 
127 
129  { "apicoplast", CBioSource::eGenome_apicoplast },
130  { "chloroplast", CBioSource::eGenome_chloroplast },
131  { "chromatophore", CBioSource::eGenome_chromatophore },
132  { "chromoplast", CBioSource::eGenome_chromoplast },
133  { "chromosome", CBioSource::eGenome_chromosome },
134  { "cyanelle", CBioSource::eGenome_cyanelle },
135  { "endogenous virus", CBioSource::eGenome_endogenous_virus },
136  { "endogenous_virus", CBioSource::eGenome_endogenous_virus },
137  { "extrachrom", CBioSource::eGenome_extrachrom },
138  { "extrachromosomal", CBioSource::eGenome_extrachrom },
139  { "genomic", CBioSource::eGenome_genomic },
140  { "hydrogenosome", CBioSource::eGenome_hydrogenosome },
141  { "insertion_seq", CBioSource::eGenome_insertion_seq },
142  { "kinetoplast", CBioSource::eGenome_kinetoplast },
143  { "leucoplast", CBioSource::eGenome_leucoplast },
144  { "macronuclear", CBioSource::eGenome_macronuclear },
145  { "mitochondrion", CBioSource::eGenome_mitochondrion },
146  { "mitochondrion:kinetoplast", CBioSource::eGenome_kinetoplast },
147  { "nucleomorph", CBioSource::eGenome_nucleomorph },
148  { "plasmid", CBioSource::eGenome_plasmid },
149  { "plastid", CBioSource::eGenome_plastid },
150  { "plastid:apicoplast", CBioSource::eGenome_apicoplast },
151  { "plastid:chloroplast", CBioSource::eGenome_chloroplast },
152  { "plastid:chromatophore", CBioSource::eGenome_chromatophore },
153  { "plastid:chromoplast", CBioSource::eGenome_chromoplast },
154  { "plastid:cyanelle", CBioSource::eGenome_cyanelle },
155  { "plastid:leucoplast", CBioSource::eGenome_leucoplast },
156  { "plastid:proplastid", CBioSource::eGenome_proplastid },
157  { "proplastid", CBioSource::eGenome_proplastid },
158  { "proviral", CBioSource::eGenome_proviral },
159  { "transposon", CBioSource::eGenome_transposon },
160  { "unknown", CBioSource::eGenome_unknown },
161  { "virion", CBioSource::eGenome_virion }
162 };
163 
164 
167 
168 CBioSource::EGenome CBioSource::GetGenomeByOrganelle (const string& organelle, NStr::ECase use_case, bool starts_with)
169 {
171 
172  if (use_case == NStr::eCase && !starts_with) {
173  TGenomeMap::const_iterator g_iter = sm_GenomeKeys.find (organelle.c_str ());
174  if (g_iter == sm_GenomeKeys.end()) {
175  if (NStr::Equal(organelle, "mitochondrial")) {
177  }
178  } else {
179  gtype = g_iter->second;
180  }
181  } else {
182  TGenomeMap::const_iterator g_iter = sm_GenomeKeys.begin();
183  if (starts_with) {
184  if (NStr::StartsWith(organelle, "mitochondrial", use_case)){
186  } else {
187  string match;
188  while (g_iter != sm_GenomeKeys.end() && gtype == CBioSource::eGenome_unknown) {
189  match = g_iter->first;
190  if (NStr::StartsWith(organelle, match.c_str(), use_case)) {
191  if (organelle.length() == match.length()
192  || (match.length() < organelle.length() && isspace(organelle[match.length()]))) {
193  gtype = g_iter->second;
194  }
195  }
196  ++g_iter;
197  }
198  }
199  } else {
200  if (NStr::Equal(organelle, "mitochondrial", use_case)) {
202  } else {
203  while (g_iter != sm_GenomeKeys.end() && gtype == CBioSource::eGenome_unknown) {
204  if (NStr::Equal(organelle, g_iter->first, use_case)) {
205  gtype = g_iter->second;
206  }
207  ++g_iter;
208  }
209  }
210  }
211  }
212  return gtype;
213 }
214 
215 
216 string CBioSource::GetOrganelleByGenome (unsigned int genome)
217 {
218  string organelle = kEmptyStr;
219  TGenomeMap::const_iterator g_iter = sm_GenomeKeys.begin();
220  while (g_iter != sm_GenomeKeys.end() &&
221  unsigned(g_iter->second) != genome) {
222  ++g_iter;
223  }
224  if (g_iter != sm_GenomeKeys.end()) {
225  organelle = g_iter->first;
226  }
227  return organelle;
228 }
229 
230 
232 
234  { "artificial", CBioSource::eOrigin_artificial },
235  { "mutant", CBioSource::eOrigin_mut },
236  { "natural", CBioSource::eOrigin_natural },
237  { "natural mutant", CBioSource::eOrigin_natmut },
238  { "other", CBioSource::eOrigin_other },
239  { "synthetic", CBioSource::eOrigin_synthetic },
240  { "unknown", CBioSource::eOrigin_unknown }
241 };
242 
243 static const TOriginKey origin_synonyms [] = {
244  { "mut", CBioSource::eOrigin_mut },
245  { "nat mut", CBioSource::eOrigin_natmut },
246  { "natmut", CBioSource::eOrigin_natmut }
247 };
248 
249 
253 
254 CBioSource::EOrigin CBioSource::GetOriginByString (const string& origin, NStr::ECase use_case, bool starts_with)
255 {
257 
258  if (use_case == NStr::eCase && !starts_with) {
259  TOriginMap::const_iterator g_iter = sm_OriginKeys.find (origin.c_str ());
260  if (g_iter == sm_OriginKeys.end ()) {
261  g_iter = sm_OriginSynonyms.find (origin.c_str());
262  if (g_iter != sm_OriginSynonyms.end ()) {
263  gtype = g_iter->second;
264  }
265  } else {
266  gtype = g_iter->second;
267  }
268  } else {
269  TOriginMap::const_iterator g_iter = sm_OriginKeys.begin();
270  bool found = false;
271  if (starts_with) {
272  string match;
273  while (g_iter != sm_OriginKeys.end() && !found) {
274  match = g_iter->first;
275  if (NStr::StartsWith(origin, match.c_str(), use_case)) {
276  if (origin.length() == match.length()
277  || (match.length() < origin.length() && isspace (origin[match.length()]))) {
278  gtype = g_iter->second;
279  found = true;
280  }
281  }
282  ++g_iter;
283  }
284  if (!found) {
285  g_iter = sm_OriginSynonyms.begin();
286  while (g_iter != sm_OriginSynonyms.end() && !found) {
287  match = g_iter->first;
288  if (NStr::StartsWith(origin, match.c_str(), use_case)) {
289  if (origin.length() == match.length()
290  || (match.length() < origin.length() && isspace (origin[match.length()]))) {
291  gtype = g_iter->second;
292  found = true;
293  }
294  }
295  ++g_iter;
296  }
297  }
298  } else {
299  while (g_iter != sm_OriginKeys.end() && !found) {
300  if (NStr::Equal(origin, g_iter->first, use_case)) {
301  gtype = g_iter->second;
302  }
303  ++g_iter;
304  }
305  if (!found) {
306  g_iter = sm_OriginSynonyms.begin();
307  while (g_iter != sm_OriginSynonyms.end() && !found) {
308  if (NStr::Equal(origin, g_iter->first, use_case)) {
309  gtype = g_iter->second;
310  }
311  ++g_iter;
312  }
313  }
314  }
315  }
316  return gtype;
317 }
318 
319 
321 {
322  string origin_str = "";
323  TOriginMap::const_iterator g_iter = sm_OriginKeys.begin();
324  while (g_iter != sm_OriginKeys.end() &&
325  unsigned(g_iter->second) != origin) {
326  ++g_iter;
327  }
328  if (g_iter != sm_OriginKeys.end()) {
329  origin_str = g_iter->first;
330  }
331  return origin_str;
332 }
333 
334 
335 bool CBioSource::IsSetTaxname(void) const
336 {
337  return IsSetOrg () && GetOrg ().IsSetTaxname ();
338 }
339 
340 const string& CBioSource::GetTaxname(void) const
341 {
342  return GetOrg ().GetTaxname ();
343 }
344 
345 bool CBioSource::IsSetCommon(void) const
346 {
347  return IsSetOrg () && GetOrg ().IsSetCommon ();
348 }
349 
350 const string& CBioSource::GetCommon(void) const
351 {
352  return GetOrg ().GetCommon ();
353 }
354 
355 bool CBioSource::IsSetLineage(void) const
356 {
357  return IsSetOrg () && GetOrg ().IsSetLineage ();
358 }
359 
360 const string& CBioSource::GetLineage(void) const
361 {
362  return GetOrg ().GetLineage ();
363 }
364 
365 bool CBioSource::IsSetGcode(void) const
366 {
367  return IsSetOrg () && GetOrg ().IsSetGcode ();
368 }
369 
370 int CBioSource::GetGcode(void) const
371 {
372  return GetOrg ().GetGcode ();
373 }
374 
375 bool CBioSource::IsSetMgcode(void) const
376 {
377  return IsSetOrg () && GetOrg ().IsSetMgcode ();
378 }
379 
380 int CBioSource::GetMgcode(void) const
381 {
382  return GetOrg ().GetMgcode ();
383 }
384 
385 bool CBioSource::IsSetPgcode(void) const
386 {
387  return IsSetOrg () && GetOrg ().IsSetPgcode ();
388 }
389 
390 int CBioSource::GetPgcode(void) const
391 {
392  return GetOrg ().GetPgcode ();
393 }
394 
396 {
397  return IsSetOrg () && GetOrg ().IsSetDivision ();
398 }
399 
400 const string& CBioSource::GetDivision(void) const
401 {
402  return GetOrg ().GetDivision ();
403 }
404 
405 bool CBioSource::IsSetOrgname(void) const
406 {
407  return IsSetOrg () && GetOrg ().IsSetOrgname ();
408 }
409 
411 {
412  return GetOrg ().GetOrgname ();
413 }
414 
415 bool CBioSource::IsSetOrgMod(void) const
416 {
417  return IsSetOrg () && GetOrg ().IsSetOrgMod ();
418 }
419 
420 
421 string CBioSource::GetRepliconName(void) const
422 {
423  string bioprojtype = GetBioprojectType();
425  if ((*sit)->IsSetSubtype() && (*sit)->IsSetName()){
426  CSubSource_Base::TSubtype subtype=(*sit)->GetSubtype();
427  string name =(*sit)->GetName();
428  switch(subtype){
433  return name;
434  break;
436  if(IsSetGenome() &&
438  return name;
439  }
440  break;
442  if(bioprojtype == "eSegment")
443  return name;
444  break;
445  }
446  }
447  }
448 
449  // no other name found
450  if (IsSetGenome()) {
451  switch (GetGenome()) {
455  return "unnamed";
456  break;
458  return "ANONYMOUS";
459  break;
461  return "kinetoplast";
462  break;
470  return "Pltd";
471  break;
474  return "MT";
475  break;
476  }
477  }
478  return kEmptyStr;
479 }
480 
481 
482 string CBioSource::GetBioprojectType (void) const
483 {
484  if (IsSetGenome()) {
485  switch (GetGenome()) {
489  return "ePlasmid";
490  break;
492  return "eExtrachrom";
493  break;
494  }
495  }
496 
498  if ((*sit)->IsSetSubtype() && (*sit)->GetSubtype() == CSubSource::eSubtype_plasmid_name) {
499  return "ePlasmid";
500  }
501  }
502 
505  if ((*sit)->IsSetSubtype() && (*sit)->GetSubtype() == CSubSource::eSubtype_linkage_group) {
506  return "eLinkageGroup";
507  }
508  }
509  }
510 
511  if (IsSetOrg() && GetOrg().IsSetLineage()) {
512  const string& lineage = GetOrg().GetLineage();
513  if (NStr::FindNoCase(lineage, "viruses") != string::npos ||
514  NStr::FindNoCase(lineage, "viroids") != string::npos) {
515  return "eSegment";
516  }
517  }
518  return "eChromosome";
519 }
520 
521 
523 {
525  return "eNuclearProkaryote";
526  }
527 
528  const string& bioprojecttype = GetBioprojectType();
529  if (NStr::Equal(bioprojecttype, "eSegment")) {
530  if (IsSetOrg() && GetOrg().IsSetLineage()) {
531  const string& lineage = GetOrg().GetLineage();
532  if (NStr::FindNoCase(lineage, "viruses") != string::npos) {
533  return "eVirionPhage";
534  } else if (NStr::FindNoCase(lineage, "viroids") != string::npos) {
535  return "eViroid";
536  }
537  }
538  return "eOther";
539  }
540 
541  if (!IsSetGenome()) {
542  return "eNuclearProkaryote";
543  } else {
544  switch (GetGenome()) {
550  return "eNuclearProkaryote";
551  break;
553  return "eMitochondrion";
554  break;
556  return "eKinetoplast";
557  break;
559  return "eChloroplast";
560  break;
562  return "eChromoplast";
563  break;
565  return "ePlastid";
566  break;
568  return "eMacronuclear";
569  break;
571  return "eCyanelle";
572  break;
575  return "eProviralProphage";
576  break;
578  if (IsSetOrg() && GetOrg().IsSetLineage()) {
579  const string& lineage = GetOrg().GetLineage();
580  if (NStr::FindNoCase(lineage, "viruses") != string::npos) {
581  return "eVirionPhage";
582  } else if (NStr::FindNoCase(lineage, "viroids") != string::npos) {
583  return "eViroid";
584  }
585  }
586  return "eOther";
587  break;
589  return "eNucleomorph";
590  break;
592  return "eApicoplast";
593  break;
595  return "eLeucoplast";
596  break;
598  return "eProplastid";
599  break;
601  return "eHydrogenosome";
602  break;
604  return "eChromatophore";
605  break;
608  return "eOther";
609  break;
610  }
611  }
612  if (!NStr::Equal(bioprojecttype, "eSegment")) {
613  return "eNuclearProkaryote";
614  }
615 
616  return "eOther";
617 }
618 
619 static const char* kDisableStrainForwardAttrib = "nomodforward";
620 
622 {
623  if (val) {
624  string attrib = kEmptyStr;
625  if (IsSetOrg() && GetOrg().IsSetOrgname() && GetOrg().GetOrgname().IsSetAttrib()) {
626  attrib = GetOrg().GetOrgname().GetAttrib();
627  }
628  if (NStr::Find(attrib, kDisableStrainForwardAttrib) == string::npos) {
629  if (!NStr::IsBlank(attrib)) {
630  attrib += ";";
631  }
632  attrib += kDisableStrainForwardAttrib;
633  SetOrg().SetOrgname().SetAttrib(attrib);
634  }
635  } else {
636  if (IsSetOrg() && GetOrg().IsSetOrgname() && GetOrg().GetOrgname().IsSetAttrib()) {
637  NStr::ReplaceInPlace(SetOrg().SetOrgname().SetAttrib(), kDisableStrainForwardAttrib, "");
638  NStr::ReplaceInPlace(SetOrg().SetOrgname().SetAttrib(), ";;", "");
639  if (NStr::IsBlank(GetOrg().GetOrgname().GetAttrib())) {
640  SetOrg().SetOrgname().ResetAttrib();
641  }
642  }
643  }
644 }
645 
646 
648 {
649  bool val = false;
650  if (IsSetOrg() && GetOrg().IsSetOrgname() && GetOrg().GetOrgname().IsSetAttrib()
651  && NStr::Find(GetOrg().GetOrgname().GetAttrib(), kDisableStrainForwardAttrib) != string::npos) {
652  val = true;
653  }
654  return val;
655 }
656 
657 
658 bool s_MustCopy (int subtype)
659 {
660  if (CSubSource::IsDiscouraged(subtype)) {
661  return false;
662  } else if (subtype == CSubSource::eSubtype_chromosome
663  || subtype == CSubSource::eSubtype_map
665  || subtype == CSubSource::eSubtype_other) {
666  return false;
667  }
668  return true;
669 }
670 
671 
673 {
674  if (org_ref.IsSetTaxname() && IsStopWord(org_ref.GetTaxname())) {
675  org_ref.ResetTaxname();
676  }
677  if (org_ref.IsSetOrgMod()) {
678  COrgName::TMod::iterator it = org_ref.SetOrgname().SetMod().begin();
679  while (it != org_ref.SetOrgname().SetMod().end()) {
680  if (IsStopWord((*it)->GetSubname())) {
681  it = org_ref.SetOrgname().SetMod().erase(it);
682  } else {
683  it++;
684  }
685  }
686  if (org_ref.GetOrgname().GetMod().empty()) {
687  org_ref.SetOrgname().ResetMod();
688  }
689  }
690 }
691 
692 
694 {
695  ITERATE(TFieldDiffList, it, diffs) {
696  if (!NStr::IsBlank((*it)->GetSrcVal())) {
697  return false;
698  }
699  }
700  return true;
701 }
702 
703 
704 void CBioSource::UpdateWithBioSample(const CBioSource& biosample, bool force, bool is_local_copy)
705 {
706  TFieldDiffList diffs = GetBiosampleDiffs(biosample, is_local_copy);
707  if (!force && !BiosampleDiffsOkForUpdate(diffs)) {
708  // throw exception
709  NCBI_THROW(CException, eUnknown, "Conflicts found");
710  }
711 
712  COrgName_Base::TMod mods;
713  CBioSource_Base::TSubtype subtypes;
714 
715  ITERATE(TFieldDiffList, it, diffs) {
716  if (NStr::EqualNocase((*it)->GetFieldName(), "Organism Name")) {
717  SetOrg().SetTaxname((*it)->GetSampleVal());
718  if (GetOrg().IsSetOrgname() && GetOrg().GetOrgname().IsSetName()) {
719  SetOrg().SetOrgname().ResetName();
720  }
722  } else if (NStr::EqualNocase((*it)->GetFieldName(), "Tax ID")) {
723  try {
724  SetOrg().SetTaxId(TAX_ID_FROM(int, atoi((*it)->GetSampleVal().c_str())));
725  } catch (...) {
726  NCBI_THROW(CException, eUnknown, "Non-integer Tax ID value");
727  }
728  } else {
729  string sample_val = (*it)->GetSampleVal();
730  if (IsStopWord(sample_val)) {
731  sample_val = "";
732  }
733  try {
734  COrgMod::TSubtype subtype = COrgMod::GetSubtypeValue((*it)->GetFieldName());
735  if (!NStr::IsBlank((*it)->GetSrcVal())) {
736  RemoveOrgMod(subtype, (*it)->GetSrcVal());
737  }
738  if (!NStr::IsBlank(sample_val)) {
739  CRef<COrgMod> mod(new COrgMod());
740  mod->SetSubtype(subtype);
741  mod->SetSubname(sample_val);
742  mods.push_back(mod);
743  }
744  } catch (...) {
745  try {
746  CSubSource::TSubtype subtype = CSubSource::GetSubtypeValue((*it)->GetFieldName());
747  if (CSubSource::NeedsNoText(subtype)) {
748  // process diff that involve NeedsNoText subtypes
749  if (NStr::EqualNocase((*it)->GetSrcVal(), "true")) {
750  RemoveSubSource(subtype);
751  }
752  if (NStr::EqualNocase(sample_val, "true")) {
753  CRef<CSubSource> sub(new CSubSource());
754  sub->SetSubtype(subtype);
755  sub->SetName("");
756  subtypes.push_back(sub);
757  }
758  }
759  else {
760  // process all other subtypes
761  if (!NStr::IsBlank((*it)->GetSrcVal())) {
762  RemoveSubSource(subtype, (*it)->GetSrcVal());
763  }
764  if (!NStr::IsBlank(sample_val)) {
765  CRef<CSubSource> sub(new CSubSource());
766  sub->SetSubtype(subtype);
767  sub->SetName(sample_val);
768  subtypes.push_back(sub);
769  }
770  }
771  } catch (...) {
772  NCBI_THROW(CException, eUnknown, "Unknown field name");
773  }
774  }
775  }
776  }
777 
778  if (!mods.empty()) {
779  SetOrg().SetOrgname().SetMod().splice(SetOrg().SetOrgname().SetMod().end(), mods);
780  }
781 
782  if (!subtypes.empty()) {
783  SetSubtype().splice(SetSubtype().end(), subtypes);
784  }
785 
786  AutoFix();
787 }
788 
789 
791 {
792  if (!IsSetSubtype()) {
793  return;
794  }
795  CBioSource::TSubtype::iterator it = SetSubtype().begin();
796  while (it != SetSubtype().end()) {
797  if (s_MustCopy((*it)->GetSubtype())) {
798  it = SetSubtype().erase(it);
799  } else {
800  ++it;
801  }
802  }
803 }
804 
805 
806 static const char* kOrgModNote = "orgmod_note";
807 static const char* kSubSrcNote = "subsrc_note";
808 static const char* kOrganismName = "Organism Name";
809 static const char* kTaxId = "Tax ID";
810 
812 {
813  TNameValList list;
814 
815  if (IsSetOrg() && GetOrg().IsSetTaxname()) {
816  list.push_back(TNameVal(kOrganismName, GetOrg().GetTaxname()));
817  }
818  if (IsSetOrg()) {
819  TTaxId taxid = GetOrg().GetTaxId();
820  if (taxid > ZERO_TAX_ID) {
821  try {
822  string val = NStr::NumericToString(taxid);
823  list.push_back(TNameVal(kTaxId, val));
824  } catch (...) {
825  }
826  }
827  }
828 
830  ITERATE(TNameValList, it, extra) {
831  list.push_back(*it);
832  }
833  extra = x_GetSubtypeNameValPairs();
834  ITERATE(TNameValList, it, extra) {
835  list.push_back(*it);
836  }
837 
838  return list;
839 }
840 
841 
843 {
844  int cmp = NStr::Compare (f1.first, f2.first);
845  if (cmp == 0) {
846  bool stop1 = CBioSource::IsStopWord(f1.second);
847  bool stop2 = CBioSource::IsStopWord(f2.second);
848  if (stop1 && stop2) {
849  // equal
850  cmp = 0;
851  } else if (stop1) {
852  // first is less
853  cmp = -1;
854  } else if (stop2) {
855  // second is less
856  cmp = 1;
857  } else {
858  cmp = NStr::CompareNocase (f1.second, f2.second);
859  if (cmp == 0) {
860  cmp = NStr::Compare(f1.second, f2.second);
861  }
862  }
863  }
864  return cmp;
865 }
866 
867 
869 {
870  int cmp = s_iCompareNameVals (f1, f2);
871  if (cmp < 0) {
872  return true;
873  } else {
874  return false;
875  }
876 }
877 
878 
880 {
881  TNameValList list;
882  if (IsSetOrgMod()) {
883  ITERATE(COrgName::TMod, it, GetOrg().GetOrgname().GetMod()) {
884  if ((*it)->IsSetSubname() && (*it)->IsSetSubtype()) {
885  string label;
886  if ((*it)->GetSubtype() == COrgMod::eSubtype_other) {
887  label = kOrgModNote;
888  } else {
889  label = COrgMod::GetSubtypeName((*it)->GetSubtype());
890  }
891  list.push_back(TNameVal(label, (*it)->GetSubname()));
892  }
893  }
894  }
895  sort(list.begin(), list.end(), s_CompareNameVals);
896  return list;
897 }
898 
899 
901 {
902  TNameValList list;
903  if (IsSetSubtype()) {
905  if ((*it)->IsSetName() && (*it)->IsSetSubtype()) {
906  CSubSource::TSubtype st = (*it)->GetSubtype();
907  string label;
908  if (st == CSubSource::eSubtype_other) {
909  label = kSubSrcNote;
910  } else {
912  }
913  string val = (*it)->GetName();
915  val = "true";
916  }
917  list.push_back(TNameVal(label, val));
918  }
919  }
920  }
921  sort(list.begin(), list.end(), s_CompareNameVals);
922  return list;
923 }
924 
925 
926 static const char* const s_IgnoreCaseQuals[] = {
927  "cell-type",
928  "collected-by",
929  "dev-stage",
930  "frequency",
931  "group",
932  "identified-by",
933  "isolation-source",
934  "map",
935  "metagenome-source",
936  "note",
937  "phenotype",
938  "sex",
939  "subgroup",
940  "tissue-type"
941 };
942 
945 
946 bool s_MayIgnoreCase(const string& value)
947 {
949 }
950 
951 
952 static const char* const s_TaxNameElementQuals[] = {
953  "biovar",
954  "chemovar",
955  "forma",
956  "forma-specialis",
957  "genotype",
958  "pathovar",
959  "serotype",
960  "serovar",
961  "subspecies",
962  "variety"
963 };
964 
967 
968 bool s_IsTaxNameElement(const string& value)
969 {
971 }
972 
973 
974 typedef enum {
979 
980 
981 typedef struct ignoreconflict {
982  const char* qual_name;
985 
986 
988  { "chromosome", eConflictIgnoreMissingInBioSample } ,
989  { "endogenous-virus-name", eConflictIgnoreMissingInBioSample } ,
990  { "germline", eConflictIgnoreMissingInBioSample } ,
991  { "insertion-seq-name", eConflictIgnoreMissingInBioSample } ,
992  { "linkage-group", eConflictIgnoreMissingInBioSample } ,
994  { "plasmid-name", eConflictIgnoreMissingInBioSample } ,
995  { "pop-variant", eConflictIgnoreMissingInBioSample } ,
996  { "rearranged", eConflictIgnoreMissingInBioSample } ,
997  { "segment", eConflictIgnoreMissingInBioSample } ,
998  { "transgenic", eConflictIgnoreMissingInBioSample } ,
999  { "transposon-name", eConflictIgnoreMissingInBioSample } ,
1000  { "whole-replicon", eConflictIgnoreMissingInBioSample } ,
1001  { "acronym", eConflictIgnoreAll },
1002  { "common", eConflictIgnoreAll } ,
1003  { "dosage", eConflictIgnoreAll } ,
1004  { "gb-acronym", eConflictIgnoreAll } ,
1005  { "gb-anamorph", eConflictIgnoreAll } ,
1006  { "gb-synonym", eConflictIgnoreAll } ,
1007  { "lineage", eConflictIgnoreAll } ,
1008  { "old-lineage", eConflictIgnoreAll } ,
1009  { "old-name", eConflictIgnoreAll } ,
1010  { "synonym", eConflictIgnoreAll } ,
1011  { "type-material", eConflictIgnoreAll },
1012  { "StructuredCommentPrefix", eConflictIgnoreAll} ,
1013  { "StructuredCommentSuffix", eConflictIgnoreAll}
1014 };
1015 
1016 
1017 bool s_SameExceptPrecision (double val1, double val2)
1018 {
1019  if (val1 > 180.0 || val2 > 180.0) {
1020  return false;
1021  }
1022  char buf1[20];
1023  char buf2[20];
1024  sprintf(buf1, "%0.2f", val1);
1025  sprintf(buf2, "%0.2f", val2);
1026  if (strcmp(buf1, buf2) == 0) {
1027  return true;
1028  }
1029  return false;
1030 }
1031 
1032 
1033 bool CBioSource::ShouldIgnoreConflict(const string& label, string src_val, string sample_val, bool is_local_copy)
1034 {
1035  size_t i;
1036  bool rval = false;
1037 
1038  // ignore if BioSource value is blank and BioSample value is a stop word
1039  if (NStr::IsBlank(src_val) && CBioSource::IsStopWord(sample_val)) {
1040  return true;
1041  }
1042 
1043  // ignore if case matches exactly
1044  if (s_MayIgnoreCase(label)) {
1045  if (NStr::EqualNocase(src_val, sample_val)) {
1046  return true;
1047  }
1048  } else {
1049  if (NStr::EqualCase(src_val, sample_val)) {
1050  return true;
1051  }
1052  }
1053 
1054  if (!NStr::IsBlank(src_val) && !NStr::IsBlank(sample_val)) {
1055  try {
1057  string test_val = CSubSource::AutoFix(subtype, sample_val);
1058  if (!NStr::IsBlank(test_val)) {
1059  if (NStr::Equal(src_val, test_val)) {
1060  return true;
1061  }
1062  }
1063  } catch (...) {
1064  try {
1066  string test_val = COrgMod::AutoFix(subtype, sample_val);
1067  if (!NStr::IsBlank(test_val)) {
1068  if (NStr::Equal(src_val, test_val)) {
1069  return true;
1070  }
1071  }
1072  } catch (...) {
1073  }
1074  }
1075  }
1076 
1077  for (i = 0; i < ArraySize(sIgnoreConflictList); i++) {
1078  if (NStr::EqualNocase(label, sIgnoreConflictList[i].qual_name)) {
1080  if (is_local_copy && ignore_type == eConflictIgnoreMissingInBioSample) {
1081  ignore_type = eConflictIgnoreAll;
1082  }
1083  switch (ignore_type) {
1084  case eConflictIgnoreAll:
1085  rval = true;
1086  break;
1088  if (NStr::IsBlank(src_val)) {
1089  rval = true;
1090  }
1091  break;
1093  if (NStr::IsBlank(sample_val) || CBioSource::IsStopWord(sample_val)) {
1094  rval = true;
1095  }
1096  break;
1097  }
1098  break;
1099  }
1100  }
1101 #if 0
1102  // special handling for lat-lon
1103  // commented out for SQD-4173
1104  if (!rval && NStr::EqualNocase(label, "lat-lon")) {
1105  bool src_format_correct, src_precision_correct,
1106  src_lat_in_range, src_lon_in_range;
1107  double src_lat_value, src_lon_value;
1108  CSubSource::IsCorrectLatLonFormat(src_val, src_format_correct, src_precision_correct,
1109  src_lat_in_range, src_lon_in_range,
1110  src_lat_value, src_lon_value);
1111  bool smpl_format_correct, smpl_precision_correct,
1112  smpl_lat_in_range, smpl_lon_in_range;
1113  double smpl_lat_value, smpl_lon_value;
1114  CSubSource::IsCorrectLatLonFormat(sample_val, smpl_format_correct, smpl_precision_correct,
1115  smpl_lat_in_range, smpl_lon_in_range,
1116  smpl_lat_value, smpl_lon_value);
1117  if (src_format_correct && smpl_format_correct
1118  && s_SameExceptPrecision(src_lat_value, smpl_lat_value)
1119  && s_SameExceptPrecision(src_lon_value, smpl_lon_value)) {
1120  rval = true;
1121  }
1122  }
1123 #endif
1124  // special handling for collection-date
1125  if (!rval && NStr::EqualNocase(label, "collection-date")) {
1126  try {
1127  CRef<CDate> src_date = CSubSource::DateFromCollectionDate(src_val);
1128  CRef<CDate> smpl_date = CSubSource::DateFromCollectionDate(sample_val);
1129  if (src_date && smpl_date && src_date->Equals(*smpl_date)) {
1130  rval = true;
1131  }
1132  } catch (...) {
1133  }
1134  }
1135  // special handling for country
1136  if (!rval && NStr::EqualNocase(label, "country")) {
1137  NStr::ReplaceInPlace(src_val, ": ", ":");
1138  NStr::ReplaceInPlace(sample_val, ": ", ":");
1139  if (NStr::Equal(src_val, sample_val)) {
1140  rval = true;
1141  }
1142  }
1143  // special handling for altitude
1144  if (!rval && NStr::EqualNocase(label, "altitude")) {
1145  if (NStr::EndsWith(src_val, ".") && !NStr::EndsWith(sample_val, ".")
1146  && NStr::EqualNocase(src_val.substr(0, src_val.length() - 1), sample_val)) {
1147  rval = true;
1148  }
1149  }
1150  return rval;
1151 }
1152 
1153 
1154 void CompareValLists(TFieldDiffList& list, const string& val_name, bool is_local_copy, const vector<string>& list1, const vector<string>& list2)
1155 {
1156  vector<bool> matched1;
1157  for (size_t i = 0; i < list1.size(); i++) {
1158  matched1.push_back(false);
1159  }
1160 
1161  vector<bool> matched2;
1162  for (size_t i = 0; i < list2.size(); i++) {
1163  matched2.push_back(false);
1164  }
1165  for (size_t i = 0; i < list1.size(); i++) {
1166  bool found = false;
1167  for (size_t j = 0; j < list2.size(); j++) {
1168  if (matched2[j]) {
1169  // already in use
1170  } else if (CBioSource::ShouldIgnoreConflict(val_name, list1[i], list2[j], is_local_copy)) {
1171  matched2[j] = true;
1172  found = true;
1173  break;
1174  }
1175  }
1176  if (found) {
1177  matched1[i] = true;
1178  }
1179  }
1180  for (size_t i = 0; i < list1.size(); i++) {
1181  if (!matched1[i]) {
1182  bool reported = false;
1183  for (size_t j = 0; j < list2.size(); j++) {
1184  if (!matched2[j]) {
1185  CRef<CFieldDiff> diff(new CFieldDiff(val_name, list1[i], list2[j]));
1186  list.push_back(diff);
1187  reported = true;
1188  matched2[j] = true;
1189  break;
1190  }
1191  }
1192  if (!reported) {
1193  CRef<CFieldDiff> diff(new CFieldDiff(val_name, list1[i], ""));
1194  list.push_back(diff);
1195  }
1196  }
1197  }
1198  for (size_t j = 0; j < list2.size(); j++) {
1199  if (!matched2[j]) {
1200  CRef<CFieldDiff> diff(new CFieldDiff(val_name, "", list2[j]));
1201  list.push_back(diff);
1202  }
1203  }
1204 }
1205 
1206 
1208  CBioSource::TNameValList& list1,
1209  CBioSource::TNameValList& list2,
1210  bool is_local_copy)
1211 {
1212  CBioSource::TNameValList::iterator it1 = list1.begin();
1213  CBioSource::TNameValList::iterator it2 = list2.begin();
1214  vector<bool> matched;
1215 
1216  while (it1 != list1.end() && it2 != list2.end()) {
1217  int cmp = NStr::Compare(it1->first, it2->first);
1218  if (cmp < 0) {
1219  if (!CBioSource::ShouldIgnoreConflict(it1->first, it1->second, "", is_local_copy)) {
1220  CRef<CFieldDiff> diff(new CFieldDiff(it1->first, it1->second, ""));
1221  list.push_back(diff);
1222  }
1223  it1++;
1224  } else if (cmp > 0) {
1225  if (!CBioSource::ShouldIgnoreConflict(it2->first, "", it2->second, is_local_copy)) {
1226  CRef<CFieldDiff> diff(new CFieldDiff(it2->first, "", it2->second));
1227  list.push_back(diff);
1228  }
1229  it2++;
1230  } else {
1231  // cmp == 0
1232  const string& val_name = it1->first;
1233  vector<string> v1;
1234  vector<string> v2;
1235  v1.push_back(it1->second);
1236  v2.push_back(it2->second);
1237  it1++;
1238  it2++;
1239  while (it1 != list1.end() && NStr::Equal(it1->first, val_name)) {
1240  v1.push_back(it1->second);
1241  it1++;
1242  }
1243  while (it2 != list2.end() && NStr::Equal(it2->first, val_name)) {
1244  v2.push_back(it2->second);
1245  it2++;
1246  }
1247 
1248  CompareValLists(list, val_name, is_local_copy, v1, v2);
1249  }
1250  }
1251  while (it1 != list1.end()) {
1252  if (!CBioSource::ShouldIgnoreConflict(it1->first, it1->second, "", is_local_copy)) {
1253  CRef<CFieldDiff> diff(new CFieldDiff(it1->first, it1->second, ""));
1254  list.push_back(diff);
1255  }
1256  it1++;
1257  }
1258  while (it2 != list2.end()) {
1259  if (!CBioSource::ShouldIgnoreConflict(it2->first, "", it2->second, is_local_copy)) {
1260  CRef<CFieldDiff> diff(new CFieldDiff(it2->first, "", it2->second));
1261  list.push_back(diff);
1262  }
1263  it2++;
1264  }
1265 }
1266 
1267 
1268 void CBioSource::x_RemoveNameElementDiffs(const CBioSource& biosample, TFieldDiffList& diff_list) const
1269 {
1270  string src_tax = "";
1271  if (IsSetOrg() && GetOrg().IsSetTaxname()) {
1272  src_tax = GetOrg().GetTaxname();
1273  }
1274  string sample_tax = "";
1275  if (biosample.IsSetOrg() && biosample.GetOrg().IsSetTaxname()) {
1276  sample_tax = biosample.GetOrg().GetTaxname();
1277  }
1278  TFieldDiffList::iterator it = diff_list.begin();
1279  while (it != diff_list.end()) {
1280  bool remove = false;
1281  if (s_IsTaxNameElement((*it)->GetFieldName())) {
1282  if (NStr::IsBlank((*it)->GetSampleVal())
1283  && NStr::Find(sample_tax, (*it)->GetSrcVal()) != string::npos) {
1284  // if value is missing from BioSample, but is present in BioSample taxname,
1285  // ignore
1286  remove = true;
1287  } else if (NStr::IsBlank((*it)->GetSrcVal())
1288  && NStr::Find(src_tax, (*it)->GetSampleVal()) != string::npos) {
1289  // if value is missing from BioSource, but is present in BioSource taxname,
1290  // ignore
1291  remove = true;
1292  }
1293  }
1294  if (remove) {
1295  it = diff_list.erase(it);
1296  } else {
1297  it++;
1298  }
1299  }
1300 }
1301 
1302 
1303 void RemoveDiffByName(TFieldDiffList& diff_list, string pair_name)
1304 {
1305  TFieldDiffList::iterator it = diff_list.begin();
1306  while (it != diff_list.end()) {
1307  if (NStr::EqualNocase((*it)->GetFieldName(), pair_name)) {
1308  it = diff_list.erase(it);
1309  } else {
1310  it++;
1311  }
1312  }
1313 }
1314 
1315 
1317 {
1319  && NStr::Find(GetOrg().GetOrgname().GetLineage(), "unclassified sequences; metagenomes") != string::npos) {
1320  return true;
1321  } else {
1322  return false;
1323  }
1324 }
1325 
1326 
1327 TFieldDiffList CBioSource::GetBiosampleDiffs(const CBioSource& biosample, bool is_local_copy) const
1328 {
1329  TFieldDiffList rval;
1330 
1331  TNameValList src_list = GetNameValPairs();
1332  sort(src_list.begin(), src_list.end(), s_CompareNameVals);
1333 
1334  TNameValList sample_list = biosample.GetNameValPairs();
1335  sort(sample_list.begin(), sample_list.end(), s_CompareNameVals);
1336 
1337  GetFieldDiffsFromNameValLists(rval, src_list, sample_list, is_local_copy);
1338  // commented out, SQD-4222
1339  //x_RemoveNameElementDiffs(biosample, rval);
1340 
1342  RemoveDiffByName(rval, "orgmod_note");
1343  RemoveDiffByName(rval, "subsrc_note");
1344  }
1345 
1346  return rval;
1347 }
1348 
1349 
1350 static const char* const s_StopWords[] = {
1351  "-",
1352  "?",
1353  "missing",
1354  "missing: control sample",
1355  "missing: data agreement established pre-2023",
1356  "missing: endangered species",
1357  "missing: human-identifiable",
1358  "missing: lab stock",
1359  "missing: sample group",
1360  "missing: synthetic construct",
1361  "missing: third party data",
1362  "n/a",
1363  "na",
1364  "none",
1365  "not applicable",
1366  "not available",
1367  "not collected",
1368  "not determined",
1369  "not provided",
1370  "not recorded",
1371  "null",
1372  "restricted access",
1373  "unk",
1374  "unknown",
1375  "unspecified"
1376 };
1377 
1379 static const TCStopWordStrSet s_StopWordsSet(s_StopWords, sizeof(s_StopWords), __FILE__, __LINE__);
1380 
1381 
1382 bool CBioSource::IsStopWord(const string& value)
1383 {
1384  if (s_StopWordsSet.find(value.c_str()) != s_StopWordsSet.end()) {
1385  return true;
1386  } else {
1387  return false;
1388  }
1389 }
1390 
1391 
1393 {
1394  if (IsSetSubtype()) {
1395  CBioSource::TSubtype::iterator it = SetSubtype().begin();
1396  while (it != SetSubtype().end()) {
1397  (*it)->AutoFix();
1398  if ((*it)->IsSetSubtype()
1399  && !CSubSource::NeedsNoText((*it)->GetSubtype())
1400  && (!(*it)->IsSetName() || NStr::IsBlank((*it)->GetName()))) {
1401  it = SetSubtype().erase(it);
1402  } else {
1403  it++;
1404  }
1405  }
1406  if (GetSubtype().empty()) {
1407  ResetSubtype();
1408  }
1409  }
1410  if (IsSetOrg() && GetOrg().IsSetOrgname() && GetOrg().GetOrgname().IsSetMod()) {
1411  COrgName::TMod::iterator it = SetOrg().SetOrgname().SetMod().begin();
1412  while (it != SetOrg().SetOrgname().SetMod().end()) {
1413  (*it)->AutoFix();
1414  if ((*it)->IsSetSubtype()
1415  && (!(*it)->IsSetSubname() || NStr::IsBlank((*it)->GetSubname()))) {
1416  it = SetOrg().SetOrgname().SetMod().erase(it);
1417  } else {
1418  it++;
1419  }
1420  }
1421  if (GetOrg().GetOrgname().GetMod().empty()) {
1422  SetOrg().SetOrgname().ResetMod();
1423  }
1424  }
1425 }
1426 
1427 
1428 void CBioSource::RemoveCultureNotes(bool is_species_level)
1429 {
1430  if (IsSetSubtype()) {
1431  CBioSource::TSubtype::iterator it = SetSubtype().begin();
1432  while (it != SetSubtype().end()) {
1433  if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == CSubSource::eSubtype_other) {
1434  CSubSource::RemoveCultureNotes((*it)->SetName(), is_species_level);
1435  if (NStr::IsBlank((*it)->GetName())) {
1436  it = SetSubtype().erase(it);
1437  } else {
1438  it++;
1439  }
1440  } else {
1441  it++;
1442  }
1443  }
1444  if (GetSubtype().empty()) {
1445  ResetSubtype();
1446  }
1447  }
1448 }
1449 
1450 static const char* s_SpecialLineageWords[] = {
1451  "Class",
1452  "Classification",
1453  "Domain",
1454  "Family",
1455  "Genus",
1456  "Kingdom",
1457  "Lineage",
1458  "Note",
1459  "Order",
1460  "Organism",
1461  "Phylum",
1462  "Species",
1463  "Superfamily",
1464  "Tax class/lineage",
1465  "Taxonomic classification",
1466  "Taxonomic Classification is",
1467  "Taxonomy"
1468 };
1469 
1470 typedef vector<CTempString> TWordList;
1471 
1472 // workaround for std::replace_if
1473 static inline bool s_IsPunct( char ch ) { return ispunct((unsigned char)ch) != 0; }
1474 
1475 static void s_GetWordListFromText(string& str, TWordList& word_list)
1476 {
1477  if (str.empty()) {
1478  return;
1479  }
1480  std::replace_if(str.begin(), str.end(), s_IsPunct, ' ');
1481  NStr::Split(str, " ", word_list, NStr::fSplit_Tokenize);
1482 }
1483 
1484 
1485 static bool s_DoesTextContainOnlyTheseWords(const string& text, const TWordList& word_list)
1486 {
1487  if (text.empty()) {
1488  return false;
1489  }
1490  bool match = true, at_least_one = false;
1491 
1492  const char* orig = text.c_str();
1493  const char* ch = orig;
1494 
1495  while (isspace((unsigned char)(*ch)) || ispunct((unsigned char)(*ch))) {
1496  ++ch;
1497  }
1498  while (*ch != 0 && match) {
1499  match = false;
1500  for (TWordList::const_iterator word = word_list.begin(); word != word_list.end() && !match; ++word) {
1501  size_t length = word->size();
1502  if (NStr::strncasecmp(ch, word->data(), length) == 0) {
1503  unsigned char next = *(ch + length);
1504  if (next == '\0' || isspace(next) || ispunct(next)) {
1505  match = true;
1506  ch += length;
1507  at_least_one = true;
1508  }
1509  }
1510  }
1511  while (isspace((unsigned char)(*ch)) || ispunct((unsigned char)(*ch))) {
1512  ++ch;
1513  }
1514  }
1515  return (match && at_least_one);
1516 }
1517 
1518 
1520 {
1521  if (!IsSetOrg() || !IsSetLineage() || GetOrg().GetTaxId() == ZERO_TAX_ID) {
1522  return false;
1523  }
1524  bool any_removed = false;
1525 
1526  // gather all words that appear in lineage, taxname and in s_SpecialLineageWords
1527  TWordList word_list;
1528 
1529  string lineage(GetLineage());
1530  s_GetWordListFromText(lineage, word_list);
1531 
1532  string taxname(GetTaxname());
1533  s_GetWordListFromText(taxname, word_list);
1534 
1535  for (unsigned int i = 0; i < ArraySize(s_SpecialLineageWords); ++i) {
1536  word_list.push_back(s_SpecialLineageWords[i]);
1537  }
1538 
1539  if (IsSetSubtype()) {
1540  CBioSource::TSubtype::iterator it = SetSubtype().begin();
1541  while (it != SetSubtype().end()) {
1542  CRef<CSubSource> subsrc = *it;
1543  bool removed = false;
1544  if (subsrc->IsSetSubtype() && subsrc->GetSubtype() == CSubSource::eSubtype_other) {
1545  if (subsrc->IsSetName()) {
1546  if (s_DoesTextContainOnlyTheseWords(subsrc->GetName(), word_list)) {
1547  // remove this subsource note
1548  it = SetSubtype().erase(it);
1549  removed = true;
1550  any_removed = true;
1551  }
1552  }
1553  }
1554  if (!removed) {
1555  ++it;
1556  }
1557  }
1558  if (GetSubtype().empty()) {
1559  ResetSubtype();
1560  }
1561  }
1562 
1563  if (IsSetOrgname() && GetOrg().GetOrgname().IsSetMod()) {
1564  COrgName::TMod::iterator iter = SetOrg().SetOrgname().SetMod().begin();
1565  while (iter != SetOrg().SetOrgname().SetMod().end()) {
1566  CRef<COrgMod> orgmod = *iter;
1567  bool removed = false;
1568  if (orgmod->IsSetSubtype() && orgmod->GetSubtype() == COrgMod::eSubtype_other) {
1569  if (orgmod->IsSetSubname()) {
1570  if (s_DoesTextContainOnlyTheseWords(orgmod->GetSubname(), word_list)) {
1571  // remove this orgmod note
1572  iter = SetOrg().SetOrgname().SetMod().erase(iter);
1573  removed = true;
1574  any_removed = true;
1575  }
1576  }
1577  }
1578  if (!removed) {
1579  ++iter;
1580  }
1581  }
1582  if (GetOrg().GetOrgname().GetMod().empty()) {
1583  SetOrg().SetOrgname().ResetMod();
1584  }
1585  }
1586 
1587  return any_removed;
1588 }
1589 
1590 
1592 {
1593  bool rval = false;
1594 
1595  if (IsSetSubtype()) {
1596  CBioSource::TSubtype::iterator it = SetSubtype().begin();
1597  while (it != SetSubtype().end()) {
1598  if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype) {
1599  it = SetSubtype().erase(it);
1600  rval = true;
1601  } else {
1602  it++;
1603  }
1604  }
1605  if (GetSubtype().empty()) {
1606  ResetSubtype();
1607  }
1608  }
1609  return rval;
1610 }
1611 
1612 
1613 bool CBioSource::RemoveSubSource(int subtype, const string& val)
1614 {
1615  bool rval = false;
1616 
1617  if (IsSetSubtype()) {
1618  CBioSource::TSubtype::iterator it = SetSubtype().begin();
1619  while (it != SetSubtype().end()) {
1620  if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype &&
1621  (*it)->IsSetName() && NStr::Equal((*it)->GetName(), val)) {
1622  it = SetSubtype().erase(it);
1623  rval = true;
1624  } else {
1625  it++;
1626  }
1627  }
1628  if (GetSubtype().empty()) {
1629  ResetSubtype();
1630  }
1631  }
1632  return rval;
1633 }
1634 
1635 
1636 bool CBioSource::RemoveOrgMod(int subtype)
1637 {
1638  bool rval = false;
1639 
1640  if (IsSetOrg() && GetOrg().IsSetOrgname() && GetOrg().GetOrgname().IsSetMod()) {
1641  COrgName::TMod::iterator it = SetOrg().SetOrgname().SetMod().begin();
1642  while (it != SetOrg().SetOrgname().SetMod().end()) {
1643  if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype) {
1644  it = SetOrg().SetOrgname().SetMod().erase(it);
1645  rval = true;
1646  } else {
1647  it++;
1648  }
1649  }
1650  if (GetOrg().GetOrgname().GetMod().empty()) {
1651  SetOrg().SetOrgname().ResetMod();
1652  }
1653  }
1654  return rval;
1655 }
1656 
1657 
1658 bool CBioSource::RemoveOrgMod(int subtype, const string& val)
1659 {
1660  bool rval = false;
1661 
1662  if (IsSetOrg() && GetOrg().IsSetOrgname() && GetOrg().GetOrgname().IsSetMod()) {
1663  COrgName::TMod::iterator it = SetOrg().SetOrgname().SetMod().begin();
1664  while (it != SetOrg().SetOrgname().SetMod().end()) {
1665  if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype &&
1666  (*it)->IsSetSubname() && NStr::Equal((*it)->GetSubname(), val)) {
1667  it = SetOrg().SetOrgname().SetMod().erase(it);
1668  rval = true;
1669  } else {
1670  it++;
1671  }
1672  }
1673  if (GetOrg().GetOrgname().GetMod().empty()) {
1674  SetOrg().SetOrgname().ResetMod();
1675  }
1676  }
1677  return rval;
1678 }
1679 
1680 
1682 {
1683  bool has_env_sample = false;
1684  bool has_metagenomic = false;
1685  bool any_change = false;
1686 
1687  if (IsSetSubtype()) {
1689  if ((*s)->IsSetSubtype()) {
1690  if ((*s)->GetSubtype() == CSubSource::eSubtype_environmental_sample) {
1691  has_env_sample = true;
1692  } else if ((*s)->GetSubtype() == CSubSource::eSubtype_metagenomic) {
1693  has_metagenomic = true;
1694  }
1695  if (has_env_sample && has_metagenomic) {
1696  break;
1697  }
1698  }
1699  }
1700  }
1701 
1702  if (!has_env_sample &&
1703  IsSetOrg() &&
1704  GetOrg().IsSetTaxname() &&
1705  NStr::StartsWith(GetOrg().GetTaxname(), "uncultured ")) {
1706  //If taxname starts with uncultured, set environmental - sample to true
1708  has_env_sample = true;
1709  any_change = true;
1710  }
1711 
1712  if (has_metagenomic && !has_env_sample) {
1713  // If metagenomic, set environmental_sample
1715  has_env_sample = true;
1716  any_change = true;
1717  }
1718 
1719  if (!has_env_sample &&
1720  IsSetOrg() && GetOrg().IsSetOrgname() &&
1721  GetOrg().GetOrgname().IsSetDiv() &&
1722  NStr::Equal(GetOrg().GetOrgname().GetDiv(), "ENV")) {
1723  // Add environmental_sample to BioSource if BioSource.org.orgname.div == "ENV"
1725  has_env_sample = true;
1726  any_change = true;
1727  }
1728 
1729  if (IsSetOrg() && GetOrg().IsSetOrgname() &&
1730  GetOrg().GetOrgname().IsSetLineage() &&
1731  NStr::Find(GetOrg().GetOrgname().GetLineage(), "metagenomes") != string::npos) {
1732  // Add metagenomic(and environmental_sample) if BioSource.org.orgname.lineage contains "metagenomes"
1733  if (!has_env_sample) {
1735  has_env_sample = true;
1736  any_change = true;
1737  }
1738  if (!has_metagenomic) {
1740  has_metagenomic = true;
1741  any_change = true;
1742  }
1743  }
1744 
1745  if (IsSetOrg() && GetOrg().IsSetOrgname() &&
1746  GetOrg().GetOrgname().IsSetMod()) {
1747  // Add metagenomic(and environmental_sample) if BioSource has /metagenome_source qualifier
1748  bool has_metagenome_source = false;
1749  ITERATE(COrgName::TMod, m, GetOrg().GetOrgname().GetMod()) {
1750  if ((*m)->IsSetSubtype() && (*m)->GetSubtype() == COrgMod::eSubtype_metagenome_source) {
1751  has_metagenome_source = true;
1752  break;
1753  }
1754  }
1755  if (has_metagenome_source) {
1756  if (!has_env_sample) {
1758  has_env_sample = true;
1759  any_change = true;
1760  }
1761  if (!has_metagenomic) {
1763  has_metagenomic = true;
1764  any_change = true;
1765  }
1766  }
1767  }
1768  return any_change;
1769 }
1770 
1771 
1773 {
1774  bool any_change = false;
1775 
1776  if (IsSetSubtype()) {
1777  CBioSource::TSubtype::iterator s = SetSubtype().begin();
1778  while (s != SetSubtype().end()) {
1779  if ((*s)->IsSetSubtype()) {
1780  if ((*s)->GetSubtype() == CSubSource::eSubtype_country || (*s)->GetSubtype() == CSubSource::eSubtype_collection_date) {
1781  // skip "missing" null exemption value (RW-1944)
1782  if ((*s)->IsSetName() && NStr::EqualNocase((*s)->GetName(), "missing")) {
1783  ++s;
1784  continue;
1785  }
1786  }
1787  }
1788  if ((*s)->IsSetName() &&
1789  (NStr::EqualNocase((*s)->GetName(), "Missing")
1790  || NStr::EqualNocase((*s)->GetName(), "N/A"))) {
1791  s = SetSubtype().erase(s);
1792  any_change = true;
1793  } else {
1794  ++s;
1795  }
1796  }
1797  if (GetSubtype().empty()) {
1798  ResetSubtype();
1799  any_change = true;
1800  }
1801  }
1802  if (IsSetOrg() && GetOrg().IsSetOrgname()
1803  && GetOrg().GetOrgname().IsSetMod()) {
1804  COrgName::TMod::iterator m = SetOrg().SetOrgname().SetMod().begin();
1805  while (m != SetOrg().SetOrgname().SetMod().end()) {
1806  if ((*m)->IsSetSubname() &&
1807  (NStr::EqualNocase((*m)->GetSubname(), "Missing")
1808  || NStr::EqualNocase((*m)->GetSubname(), "N/A"))) {
1809  m = SetOrg().SetOrgname().SetMod().erase(m);
1810  any_change = true;
1811  } else {
1812  ++m;
1813  }
1814  }
1815  if (GetOrg().GetOrgname().GetMod().empty()) {
1816  SetOrg().SetOrgname().ResetMod();
1817  any_change = true;
1818  }
1819  }
1820 
1821  return any_change;
1822 }
1823 
1824 
1825 bool CBioSource::IsViral(const string& lineage)
1826 {
1827  if (NStr::StartsWith(lineage, "Viruses; ", NStr::eNocase)) {
1828  return true;
1829  } else {
1830  return false;
1831  }
1832 }
1833 
1834 
1836 {
1837  if (IsSetOrg() && GetOrg().IsSetLineage()) {
1838  return IsViral(GetOrg().GetLineage());
1839  } else {
1840  return false;
1841  }
1842 }
1843 
1844 
1845 bool CBioSource::AllowSexQualifier(const string& lineage)
1846 {
1847  bool isViral = IsViral(lineage);
1848  bool isBacteria = false;
1849  bool isArchaea = false;
1850  bool isFungal = false;
1851 
1852  if (NStr::StartsWith(lineage, "Bacteria; ", NStr::eNocase)) {
1853  isBacteria = true;
1854  } else if (NStr::StartsWith(lineage, "Archaea; ", NStr::eNocase)) {
1855  isArchaea = true;
1856  } else if (NStr::StartsWith(lineage, "Eukaryota; Fungi; ", NStr::eNocase)) {
1857  isFungal = true;
1858  }
1859 
1860  if (isViral || isBacteria || isArchaea || isFungal) {
1861  return false;
1862  } else {
1863  return true;
1864  }
1865 }
1866 
1867 
1869 {
1870  if (!IsSetOrg() || !GetOrg().IsSetOrgname() || !GetOrg().GetOrgname().IsSetLineage()) {
1871  return true;
1872  } else {
1874  }
1875 }
1876 
1877 
1878 bool CBioSource::AllowMatingTypeQualifier(const string& lineage)
1879 {
1880  bool isViral = IsViral(lineage);
1881  bool isAnimal = false;
1882  bool isPlant = false;
1883 
1884  if (NStr::StartsWith(lineage, "Eukaryota; Metazoa; ", NStr::eNocase)) {
1885  isAnimal = true;
1886  } else if (NStr::StartsWith(lineage, "Eukaryota; Viridiplantae; Streptophyta; Embryophyta; ", NStr::eNocase)
1887  || NStr::StartsWith(lineage, "Eukaryota; Rhodophyta; ", NStr::eNocase)
1888  || NStr::StartsWith(lineage, "Eukaryota; stramenopiles; Phaeophyceae; ", NStr::eNocase)) {
1889  isPlant = true;
1890  }
1891 
1892  if (isViral || isAnimal || isPlant) {
1893  return false;
1894  } else {
1895  return true;
1896  }
1897 }
1898 
1899 
1901 {
1902  if (!IsSetOrg() || !GetOrg().IsSetOrgname() || !GetOrg().GetOrgname().IsSetLineage()) {
1903  return true;
1904  } else {
1906  }
1907 }
1908 
1909 
1911 {
1912  bool any_change = false;
1913  if (!IsSetSubtype()) {
1914  return false;
1915  }
1916  TSubtype::iterator it = SetSubtype().begin();
1917  while (it != SetSubtype().end()) {
1918  bool remove = false;
1919  if ((*it)->IsSetSubtype()) {
1920  if ((*it)->GetSubtype() == CSubSource::eSubtype_sex && !AllowSexQualifier()) {
1921  remove = true;
1922  } else if ((*it)->GetSubtype() == CSubSource::eSubtype_mating_type) {
1923  if ((*it)->IsSetName() && AllowSexQualifier()
1924  && CSubSource::IsValidSexQualifierValue((*it)->GetName())) {
1925  (*it)->SetSubtype(CSubSource::eSubtype_sex);
1926  any_change = true;
1927  } else if (!AllowMatingTypeQualifier()) {
1928  remove = true;
1929  }
1930  }
1931  }
1932  if (remove) {
1933  it = SetSubtype().erase(it);
1934  any_change = true;
1935  } else {
1936  ++it;
1937  }
1938  }
1939 
1940  if (GetSubtype().size() == 0) {
1941  ResetSubtype();
1942  any_change = true;
1943  }
1944 
1945  return any_change;
1946 }
1947 
1948 
1950 {
1951  if (!IsViral() || !IsSetOrg() || !GetOrg().IsSetOrgname() ||
1952  !GetOrg().GetOrgname().IsSetMod()) {
1953  return false;
1954  }
1955 
1956  bool any_change = false;
1957  COrgName::TMod::iterator m = SetOrg().SetOrgname().SetMod().begin();
1958  while (m != SetOrg().SetOrgname().SetMod().end()){
1959  if ((*m)->IsUnexpectedViralOrgModQualifier()) {
1960  m = SetOrg().SetOrgname().SetMod().erase(m);
1961  any_change = true;
1962  } else {
1963  ++m;
1964  }
1965  }
1966  if (GetOrg().GetOrgname().GetMod().empty()) {
1967  SetOrg().SetOrgname().ResetMod();
1968  any_change = true;
1969  }
1970  return any_change;
1971 }
1972 
1973 
1975 {
1978  return true;
1979  } else {
1980  return false;
1981  }
1982 }
1983 
1984 
1985 #define MAKE_COMMON_INT(o1,o2,o3,Field) \
1986  if (o1.IsSet##Field() && o2.IsSet##Field() && o1.Get##Field() == o2.Get##Field()) o3.Set##Field(o1.Get##Field());
1987 
1988 
1990 {
1991  CRef<CBioSource> common_src(new CBioSource());
1992 
1993  // copy common subtypes
1994  if (IsSetSubtype() && other.IsSetSubtype()) {
1995  ITERATE(TSubtype, it1, GetSubtype()) {
1996  bool found = false;
1997  ITERATE(TSubtype, it2, other.GetSubtype()) {
1998  if ((*it1)->Equals(**it2)) {
1999  found = true;
2000  break;
2001  }
2002  }
2003  if (found) {
2004  CRef<CSubSource> add(new CSubSource());
2005  add->Assign(**it1);
2006  common_src->SetSubtype().push_back(add);
2007  }
2008  }
2009  }
2010 
2011  MAKE_COMMON_INT((*this), other, (*common_src), Genome);
2012  MAKE_COMMON_INT((*this), other, (*common_src), Origin);
2013 
2014  if (IsSetPcr_primers() && other.IsSetPcr_primers() && GetPcr_primers().Equals(other.GetPcr_primers())) {
2015  common_src->SetPcr_primers().Assign(GetPcr_primers());
2016  }
2017 
2018  return common_src;
2019 }
2020 
2021 
2023 {
2024  if (!IsSetOrg() || !other.IsSetOrg()) {
2025  return CRef<CBioSource>(NULL);
2026  }
2027 
2028  CRef<COrg_ref> common_org = GetOrg().MakeCommon(other.GetOrg());
2029  if (!common_org) {
2030  return CRef<CBioSource>(NULL);
2031  }
2032 
2033  CRef<CBioSource> common_src = MakeCommonExceptOrg(other);
2034  common_src->SetOrg().Assign(*common_org);
2035 
2036  return common_src;
2037 }
2038 
2039 
2041 {
2042  if (!IsSetSubtype()) {
2043  return false;
2044  }
2045  ITERATE(TSubtype, it, GetSubtype()) {
2046  if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype) {
2047  return true;
2048  }
2049  }
2050  return false;
2051 }
2052 
2053 END_objects_SCOPE // namespace ncbi::objects::
2054 
2056 
2057 /* Original file checksum: lines: 64, chars: 1883, CRC32: e1194deb */
CStaticArraySet< const char *, PNocase_CStr > TCTaxNameElementQualsSet
Definition: BioSource.cpp:965
static const char * s_SpecialLineageWords[]
Definition: BioSource.cpp:1450
static const char * kOrgModNote
Definition: BioSource.cpp:806
bool s_CompareNameVals(const CBioSource::TNameVal &f1, const CBioSource::TNameVal &f2)
Definition: BioSource.cpp:868
void GetFieldDiffsFromNameValLists(TFieldDiffList &list, CBioSource::TNameValList &list1, CBioSource::TNameValList &list2, bool is_local_copy)
Definition: BioSource.cpp:1207
CStaticArraySet< const char *, PNocase_CStr > TCStopWordStrSet
Definition: BioSource.cpp:1378
CStaticPairArrayMap< const char *, CBioSource::EGenome, PNocase_CStr > TGenomeMap
Definition: BioSource.cpp:165
static const char *const s_TaxNameElementQuals[]
Definition: BioSource.cpp:952
static const char * kOrganismName
Definition: BioSource.cpp:808
SStaticPair< const char *, CBioSource::EGenome > TGenomeKey
Definition: BioSource.cpp:126
CStaticArraySet< const char *, PNocase_CStr > TCIgnoreCaseQualsSet
Definition: BioSource.cpp:943
static const TCIgnoreCaseQualsSet s_IgnoreCaseQualsSet(s_IgnoreCaseQuals, sizeof(s_IgnoreCaseQuals), __FILE__, __LINE__)
static const char * kTaxId
Definition: BioSource.cpp:809
static IgnoreConflictData sIgnoreConflictList[]
Definition: BioSource.cpp:987
bool s_IsTaxNameElement(const string &value)
Definition: BioSource.cpp:968
struct ignoreconflict IgnoreConflictData
static const TCTaxNameElementQualsSet s_TaxNameElementQualsSet(s_TaxNameElementQuals, sizeof(s_TaxNameElementQuals), __FILE__, __LINE__)
static const char * kSubSrcNote
Definition: BioSource.cpp:807
DEFINE_STATIC_ARRAY_MAP(TGenomeMap, sm_GenomeKeys, genome_key_to_subtype)
static bool s_IsPunct(char ch)
Definition: BioSource.cpp:1473
bool s_SameExceptPrecision(double val1, double val2)
Definition: BioSource.cpp:1017
bool s_MayIgnoreCase(const string &value)
Definition: BioSource.cpp:946
CStaticPairArrayMap< const char *, CBioSource::EOrigin, PNocase_CStr > TOriginMap
Definition: BioSource.cpp:250
vector< CTempString > TWordList
Definition: BioSource.cpp:1470
static const TGenomeKey genome_key_to_subtype[]
Definition: BioSource.cpp:128
static const char * kDisableStrainForwardAttrib
Definition: BioSource.cpp:619
static const char *const s_StopWords[]
Definition: BioSource.cpp:1350
static const TOriginKey origin_key_to_subtype[]
Definition: BioSource.cpp:233
void RemoveDiffByName(TFieldDiffList &diff_list, string pair_name)
Definition: BioSource.cpp:1303
SStaticPair< const char *, CBioSource::EOrigin > TOriginKey
Definition: BioSource.cpp:231
int s_iCompareNameVals(const CBioSource::TNameVal &f1, const CBioSource::TNameVal &f2)
Definition: BioSource.cpp:842
static const char *const s_IgnoreCaseQuals[]
Definition: BioSource.cpp:926
static bool s_DoesTextContainOnlyTheseWords(const string &text, const TWordList &word_list)
Definition: BioSource.cpp:1485
bool s_MustCopy(int subtype)
Definition: BioSource.cpp:658
static void s_GetWordListFromText(string &str, TWordList &word_list)
Definition: BioSource.cpp:1475
void CompareValLists(TFieldDiffList &list, const string &val_name, bool is_local_copy, const vector< string > &list1, const vector< string > &list2)
Definition: BioSource.cpp:1154
#define MAKE_COMMON_INT(o1, o2, o3, Field)
Definition: BioSource.cpp:1985
static const TCStopWordStrSet s_StopWordsSet(s_StopWords, sizeof(s_StopWords), __FILE__, __LINE__)
static const TOriginKey origin_synonyms[]
Definition: BioSource.cpp:243
EConflictIgnoreType
Definition: BioSource.cpp:974
@ eConflictIgnoreMissingInBioSample
Definition: BioSource.cpp:977
@ eConflictIgnoreMissingInBioSource
Definition: BioSource.cpp:976
@ eConflictIgnoreAll
Definition: BioSource.cpp:975
vector< CRef< CFieldDiff > > TFieldDiffList
Definition: BioSource.hpp:53
User-defined methods of the data storage class.
bool AllowMatingTypeQualifier() const
Definition: BioSource.cpp:1900
const string & GetLineage(void) const
Definition: BioSource.cpp:360
bool BiosampleDiffsOkForUpdate(const TFieldDiffList &diffs) const
Definition: BioSource.cpp:693
bool IsSetCommon(void) const
Definition: BioSource.cpp:345
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
bool RemoveLineageSourceNotes()
Definition: BioSource.cpp:1519
bool FixEnvironmentalSample()
Definition: BioSource.cpp:1681
bool IsSetGcode(void) const
Definition: BioSource.cpp:365
string GetRepliconName(void) const
Definition: BioSource.cpp:421
CRef< CBioSource > MakeCommonExceptOrg(const CBioSource &other) const
Definition: BioSource.cpp:1989
static bool ShouldIgnoreConflict(const string &label, string src_val, string sample_val, bool is_local_copy=false)
Definition: BioSource.cpp:1033
TNameValList GetNameValPairs() const
Definition: BioSource.cpp:811
bool IsSetOrgMod(void) const
Definition: BioSource.cpp:415
static string GetOrganelleByGenome(unsigned int genome)
Definition: BioSource.cpp:216
bool GetDisableStrainForwarding() const
Definition: BioSource.cpp:647
pair< string, string > TNameVal
Definition: BioSource.hpp:126
int GetPgcode(void) const
Definition: BioSource.cpp:390
void RemoveCultureNotes(bool is_species_level=true)
Definition: BioSource.cpp:1428
void AutoFix()
Definition: BioSource.cpp:1392
int GetGcode(void) const
Definition: BioSource.cpp:370
int GetGenCode(int def=1) const
Definition: BioSource.cpp:73
string GetBioprojectLocation(void) const
Definition: BioSource.cpp:522
bool FixSexMatingTypeInconsistencies()
Definition: BioSource.cpp:1910
void x_ClearCoordinatedBioSampleSubSources()
Definition: BioSource.cpp:790
CBioSource(void)
Definition: BioSource.hpp:203
void x_RemoveNameElementDiffs(const CBioSource &biosample, TFieldDiffList &diff_list) const
Definition: BioSource.cpp:1268
const string & GetCommon(void) const
Definition: BioSource.cpp:350
bool RemoveNullTerms()
Definition: BioSource.cpp:1772
const COrgName & GetOrgname(void) const
Definition: BioSource.cpp:410
const string & GetDivision(void) const
Definition: BioSource.cpp:400
CRef< CBioSource > MakeCommon(const CBioSource &other) const
Definition: BioSource.cpp:2022
bool FixGenomeForQualifiers()
Definition: BioSource.cpp:1974
static bool IsStopWord(const string &value)
Definition: BioSource.cpp:1382
static CBioSource::EOrigin GetOriginByString(const string &origin, NStr::ECase use_case=NStr::eCase, bool starts_with=false)
Definition: BioSource.cpp:254
bool HasSubtype(CSubSource::TSubtype subtype) const
Definition: BioSource.cpp:2040
bool IsSetMgcode(void) const
Definition: BioSource.cpp:375
bool IsViral() const
Definition: BioSource.cpp:1835
bool IsSetOrgname(void) const
Definition: BioSource.cpp:405
string GetBioprojectType(void) const
Definition: BioSource.cpp:482
TNameValList x_GetSubtypeNameValPairs() const
Definition: BioSource.cpp:900
bool RemoveOrgMod(int subtype)
Definition: BioSource.cpp:1636
vector< TNameVal > TNameValList
Definition: BioSource.hpp:127
bool x_ShouldIgnoreNoteForBiosample() const
Definition: BioSource.cpp:1316
bool IsSetLineage(void) const
Definition: BioSource.cpp:355
static void x_RemoveStopWords(COrg_ref &org_ref)
Definition: BioSource.cpp:672
bool IsSetPgcode(void) const
Definition: BioSource.cpp:385
bool IsSetDivision(void) const
Definition: BioSource.cpp:395
void UpdateWithBioSample(const CBioSource &biosample, bool force, bool is_local_copy=false)
Definition: BioSource.cpp:704
TNameValList x_GetOrgModNameValPairs() const
Definition: BioSource.cpp:879
int GetMgcode(void) const
Definition: BioSource.cpp:380
static CBioSource::EGenome GetGenomeByOrganelle(const string &organelle, NStr::ECase use_case=NStr::eCase, bool starts_with=false)
Definition: BioSource.cpp:168
bool AllowSexQualifier() const
Definition: BioSource.cpp:1868
void SetDisableStrainForwarding(bool val)
Definition: BioSource.cpp:621
static string GetStringFromOrigin(unsigned int origin)
Definition: BioSource.cpp:320
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
TFieldDiffList GetBiosampleDiffs(const CBioSource &biosample, bool is_local_copy=false) const
Definition: BioSource.cpp:1327
bool RemoveUnexpectedViralQualifiers()
Definition: BioSource.cpp:1949
~CBioSource(void)
Definition: BioSource.cpp:68
bool RemoveSubSource(int subtype)
Definition: BioSource.cpp:1591
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
void AutoFix()
Definition: OrgMod.cpp:1068
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:62
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
const string & GetLineage(void) const
Definition: Org_ref.cpp:124
const string & GetDivision(void) const
Definition: Org_ref.cpp:164
TTaxId SetTaxId(TTaxId tax_id)
Definition: Org_ref.cpp:93
CRef< COrg_ref > MakeCommon(const COrg_ref &other) const
Definition: Org_ref.cpp:280
int GetPgcode(void) const
Definition: Org_ref.cpp:154
bool IsSetDivision(void) const
Definition: Org_ref.cpp:159
bool IsSetPgcode(void) const
Definition: Org_ref.cpp:149
bool IsSetGcode(void) const
Definition: Org_ref.cpp:129
bool IsSetMgcode(void) const
Definition: Org_ref.cpp:139
int GetMgcode(void) const
Definition: Org_ref.cpp:144
int GetGcode(void) const
Definition: Org_ref.cpp:134
bool IsSetOrgMod(void) const
Definition: Org_ref.cpp:169
bool IsSetLineage(void) const
Definition: Org_ref.cpp:119
const_iterator find(const key_type &key) const
Return a const_iterator pointing to the specified element, or to the end if the element is not found.
Definition: static_set.hpp:680
const_iterator end() const
Return the end of the controlled sequence.
Definition: static_set.hpp:647
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105
TBase::const_iterator const_iterator
Definition: static_map.hpp:109
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:128
static bool IsValidSexQualifierValue(const string &value)
Definition: SubSource.cpp:2488
void AutoFix()
Definition: SubSource.cpp:5103
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:185
static bool NeedsNoText(const TSubtype &subtype)
Definition: SubSource.cpp:233
static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)
Definition: SubSource.cpp:1237
static CRef< CDate > DateFromCollectionDate(const string &str) THROWS((CException))
Definition: SubSource.cpp:287
static bool IsDiscouraged(const TSubtype subtype)
Definition: SubSource.cpp:247
static void RemoveCultureNotes(string &value, bool is_species_level=true)
Definition: SubSource.cpp:5192
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:90
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define TAX_ID_FROM(T, value)
Definition: ncbimisc.hpp:1111
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const CVect2< U > & v2
Definition: globals.hpp:440
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
Definition: sequence.cpp:274
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static int strncasecmp(const char *s1, const char *s2, size_t n)
Case-insensitive comparison of two zero-terminated strings, narrowed to the specified number of chara...
Definition: ncbistr.hpp:5247
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5325
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
ECase
Which type of string comparison.
Definition: ncbistr.hpp:1204
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
const TPcr_primers & GetPcr_primers(void) const
Get the Pcr_primers member data.
Definition: BioSource_.hpp:588
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:422
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
Definition: SubSource_.hpp:319
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
bool CanGetOrg(void) const
Check if it is safe to call GetOrg method.
Definition: BioSource_.hpp:503
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
TGenome & SetGenome(void)
Assign a value to Genome data member.
Definition: BioSource_.hpp:435
bool IsSetPcr_primers(void) const
Check if a value has been assigned to Pcr_primers data member.
Definition: BioSource_.hpp:576
bool CanGetGenome(void) const
Check if it is safe to call GetGenome method.
Definition: BioSource_.hpp:403
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
TOrg & SetOrg(void)
Assign a value to Org data member.
Definition: BioSource_.hpp:518
TSubtype GetSubtype(void) const
Get the Subtype member data.
Definition: SubSource_.hpp:310
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
Definition: BioSource_.hpp:397
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: SubSource_.hpp:291
void SetPcr_primers(TPcr_primers &value)
Assign a value to Pcr_primers data member.
Definition: BioSource_.cpp:124
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
void SetName(const TName &value)
Assign a value to Name data member.
Definition: SubSource_.hpp:359
const TName & GetName(void) const
Get the Name member data.
Definition: SubSource_.hpp:350
EGenome
biological context
Definition: BioSource_.hpp:97
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
Definition: SubSource_.hpp:338
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
void ResetSubtype(void)
Reset Subtype data member.
Definition: BioSource_.cpp:113
@ eSubtype_collection_date
DD-MMM-YYYY format.
Definition: SubSource_.hpp:114
@ eSubtype_environmental_sample
Definition: SubSource_.hpp:111
@ eSubtype_endogenous_virus_name
Definition: SubSource_.hpp:109
@ eOrigin_synthetic
purely synthetic
Definition: BioSource_.hpp:134
@ eOrigin_mut
artificially mutagenized
Definition: BioSource_.hpp:132
@ eOrigin_artificial
artificially engineered
Definition: BioSource_.hpp:133
@ eOrigin_natmut
naturally occurring mutant
Definition: BioSource_.hpp:131
@ eOrigin_natural
normal biological entity
Definition: BioSource_.hpp:130
@ eGenome_plasmid_in_mitochondrion
Definition: BioSource_.hpp:121
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
TSubtype GetSubtype(void) const
Get the Subtype member data.
Definition: OrgMod_.hpp:307
bool IsSetPgcode(void) const
plastid genetic code Check if a value has been assigned to Pgcode data member.
Definition: OrgName_.hpp:1040
TMgcode GetMgcode(void) const
Get the Mgcode member data.
Definition: OrgName_.hpp:965
TGcode GetGcode(void) const
Get the Gcode member data.
Definition: OrgName_.hpp:918
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: OrgMod_.hpp:288
const TSubname & GetSubname(void) const
Get the Subname member data.
Definition: OrgMod_.hpp:347
bool IsSetCommon(void) const
common name Check if a value has been assigned to Common data member.
Definition: Org_ref_.hpp:407
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
bool IsSetMgcode(void) const
mitochondrial genetic code Check if a value has been assigned to Mgcode data member.
Definition: OrgName_.hpp:946
void ResetTaxname(void)
Reset Taxname data member.
Definition: Org_ref_.cpp:52
const TCommon & GetCommon(void) const
Get the Common member data.
Definition: Org_ref_.hpp:419
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
bool IsSetSubname(void) const
Check if a value has been assigned to Subname data member.
Definition: OrgMod_.hpp:335
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:360
bool IsSetGcode(void) const
genetic code (see CdRegion) Check if a value has been assigned to Gcode data member.
Definition: OrgName_.hpp:899
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
Definition: Org_ref_.cpp:87
TPgcode GetPgcode(void) const
Get the Pgcode member data.
Definition: OrgName_.hpp:1059
const TAttrib & GetAttrib(void) const
Get the Attrib member data.
Definition: OrgName_.hpp:792
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
int i
static void text(MDB_val *v)
Definition: mdb_dump.c:62
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int ispunct(Uchar c)
Definition: ncbictype.hpp:68
Defines unified interface to application:
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
static const GLdouble origin[]
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
EConflictIgnoreType ignore_type
Definition: BioSource.cpp:983
const char * qual_name
Definition: BioSource.cpp:982
Modified on Thu Apr 25 08:20:56 2024 by modify_doxy.py rev. 669887