NCBI C++ ToolKit
tax_validation_and_cleanup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tax_validation_and_cleanup.cpp 102525 2024-05-22 17:12:56Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Colleen Bollin
27  *
28  * File Description:
29  * Tools for batch processing taxonomy-related validation and cleanup
30  * .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
37 
38 #include <serial/iterator.hpp>
39 
46 
47 #include <objmgr/bioseq_ci.hpp>
48 #include <objmgr/seqdesc_ci.hpp>
49 #include <objmgr/util/feature.hpp>
50 
51 #include <objmgr/feat_ci.hpp>
52 #include <objmgr/scope.hpp>
53 
55 
60 
61 #define NCBI_USE_ERRCODE_X Objtools_Validator
62 
65 BEGIN_SCOPE(validator)
66 using namespace sequence;
67 
68 const string kInvalidReplyMsg = "Taxonomy service returned invalid reply";
69 
70 
72 {
73  x_Init();
74 }
75 
76 
78 {
79  m_ValuesToTry.clear();
80  m_RepliesProcessed = 0;
81  m_Descs.clear();
82  m_Feats.clear();
83 }
84 
85 
87 {
88  m_Descs.push_back(TDescPair(desc, ctx));
89 }
90 
91 
93 {
94  m_Feats.push_back(feat);
95 }
96 
97 
98 void CQualifierRequest::AddRequests(vector<CRef<COrg_ref> >& request_list) const
99 {
100  for (const string& it : m_ValuesToTry) {
101  CRef<COrg_ref> rq(new COrg_ref);
102  rq->SetTaxname(it);
103  request_list.push_back(rq);
104  }
105 }
106 
107 
108 bool CQualifierRequest::MatchTryValue(const string& val) const
109 {
110  for (const string& it : m_ValuesToTry) {
111  if (NStr::EqualNocase(val, it)) {
112  return true;
113  }
114  }
115  return false;
116 }
117 
118 
120 {
121  vector<TTaxError> errs;
122  ListErrors(errs);
123  for (const auto& e : errs) {
124  for (const auto& it : m_Descs) {
125  imp.PostObjErr(e.severity, e.err_type, e.err_msg, *(it.first), it.second);
126  }
127  for (const auto& it : m_Feats) {
128  imp.PostObjErr(e.severity, e.err_type, e.err_msg, *it);
129  }
130  }
131 }
132 
133 
134 CSpecificHostRequest::CSpecificHostRequest(const string& host, const COrg_ref& org, bool for_fix) :
136  m_Host(host),
137  m_Response(eUnrecognized),
138  m_HostLineage(),
139  m_OrgLineage()
140 {
141  string host_check = SpecificHostValueToCheck(host);
142  if (NStr::IsBlank(host_check)) {
144  return;
145  }
146  if (!for_fix && !NStr::Equal(host, host_check)) {
147  m_ValuesToTry.push_back(host_check);
148  }
149  m_ValuesToTry.push_back(host);
150 
151  m_SuggestedFix.clear();
152  if (org.IsSetLineage()) {
153  m_OrgLineage = org.GetLineage();
154  }
155 }
156 
157 
158 void CSpecificHostRequest::AddReply(const CT3Reply& reply, TTaxId descTaxID)
159 {
160  if (m_Response == eAmbiguous) {
162  if (NStr::IsBlank(new_error)) {
166  m_Error = kEmptyStr;
167  }
168  } else if (m_Response == eUnrecognized) {
170  if (NStr::IsBlank(m_Error)) {
174  } else if (NStr::Find(m_Error, "ambiguous") != NPOS) {
176  } else if (NStr::StartsWith(m_Error, "Invalid value for specific host") && !IsLikelyTaxname(m_Host)) {
179  } else if (NStr::StartsWith(m_Error, "Specific host value is alternate name")) {
181  m_SuggestedFix = reply.GetData().GetOrg().GetTaxname();
183  } else {
185  if (NStr::IsBlank(m_SuggestedFix) && reply.IsData() && reply.GetData().IsSetOrg()) {
186  if (HasMisSpellFlag(reply.GetData())) {
187  m_SuggestedFix = reply.GetData().GetOrg().GetTaxname();
189  } else if (!FindMatchInOrgRef(m_Host, reply.GetData().GetOrg())
190  && !IsCommonName(reply.GetData())) {
191  m_SuggestedFix = reply.GetData().GetOrg().GetTaxname();
193  }
194  }
195  }
196  }
198 }
199 
200 
201 void CSpecificHostRequest::ListErrors(vector<TTaxError>& errs) const
202 {
203  switch (m_Response) {
204  case eNormal:
205  break;
206  case eAmbiguous:
208  break;
209  case eUnrecognized:
211  break;
212  case eAlternateName:
214  break;
215  }
216 
218  (NStr::Find(m_OrgLineage, "Streptophyta;") != NPOS || NStr::Find(m_OrgLineage, "Metazoa;") != NPOS) &&
219  (NStr::Find(m_HostLineage, "Fungi;") != NPOS || NStr::Find(m_HostLineage, "Bacteria;") != NPOS ||
220  NStr::Find(m_HostLineage, "Archaea;") != NPOS || NStr::Find(m_HostLineage, "Viruses;") != NPOS)) {
222  "Suspect Host Value - a prokaryote, fungus or virus is suspect as a host for a plant or animal" });
223  }
224 }
225 
226 
227 //LCOV_EXCL_START
228 //used by cleanup
229 const string& CSpecificHostRequest::SuggestFix() const
230 {
231  if (m_ValuesToTry.empty()) {
232  return m_Host;
233  } else {
234  return m_SuggestedFix;
235  }
236 }
237 //LCOV_EXCL_STOP
238 
239 // new strain code
240 
241 /*
242 // sample callback function for CStrainRequest::StrainContainsTaxonInfo
243 #include <objtools/validator/tax_validation_and_cleanup.hpp>
244 
245 // demo for sequence validator
246 static bool DemoStrainCheckCallback1(const string& organism, const string& strain)
247 
248 {
249  CTaxon3 taxon3(CTaxon3::initialize::yes);
250  auto responder = [&taxon3](const vector<CRef<COrg_ref>>& request)->CRef<CTaxon3_reply>
251  {
252  CRef<CTaxon3_reply> reply = taxon3.SendOrgRefList(request);
253  return reply;
254  };
255  return CStrainRequest::StrainContainsTaxonInfo(organism, strain, responder);
256 }
257 
258 // demo for biosample validator - coerced to use vector-based request for testing
259 static bool DemoStrainCheckCallback2(const string& organism, const string& strain)
260 
261 {
262  CTaxon3 taxon3(CTaxon3::initialize::yes);
263  auto responder = [&taxon3](const CRef<COrg_ref>& request)->CRef<CTaxon3_reply>
264  {
265  vector<CRef<COrg_ref>> rq;
266  rq.push_back(request);
267  CRef<CTaxon3_reply> reply = taxon3.SendOrgRefList(rq);
268  return reply;
269  };
270  return CStrainRequest::StrainContainsTaxonInfo(organism, strain, responder);
271 }
272 */
273 
274 static bool IgnoreStrain(const string& str)
275 {
276  // per VR-762, ignore strain if combination of letters and numbers
277  bool has_number = false;
278  bool has_letter = false;
279  for (char ch : str) {
280  if (isdigit(ch)) {
281  has_number = true;
282  } else if (isalpha(ch)) {
283  has_letter = true;
284  } else {
285  return false;
286  }
287  }
288  if (!has_number || !has_letter) {
289  return false;
290  }
291 
292  return true;
293 }
294 
295 static bool OrganismIsUnwanted(const string& str)
296 {
297  if (NStr::FindNoCase(str, "virus") != NPOS ||
298  NStr::FindNoCase(str, "viroid") != NPOS ||
299  NStr::FindNoCase(str, "vector") != NPOS ||
300  NStr::FindNoCase(str, "phage") != NPOS) {
301  return true;
302  } else {
303  return false;
304  }
305 }
306 
307 
308 static void GetStrainCandidates(const string& organism, const string& strain, vector<string>& candidates)
309 {
310  candidates.push_back(strain);
311 
312  size_t pos = 0;
313  for (char ch : strain) {
314  if (isalpha(ch)) {
315  ++pos;
316  } else {
317  if (pos >= 5) {
318  candidates.push_back(strain.substr(0, pos));
319  }
320  break;
321  }
322  }
323 
324  if (NStr::EndsWith(organism, " sp.")) {
325  candidates.push_back(organism.substr(0, organism.length() - 3) + strain);
326  }
327 }
328 
329 
330 static bool CheckStrainReply(const string& organism, const string&strain, CRef<CTaxon3_reply> reply)
331 {
332  if (NStr::IsBlank(organism) || ! reply) {
333  return false;
334  }
335 
336  string alternative = "";
337  if (NStr::EndsWith(organism, " sp.")) {
338  alternative = organism.substr(0, organism.length() - 3) + strain;
339  }
340 
341  CTaxon3_reply::TReply::const_iterator reply_it = reply->GetReply().begin();
342  while (reply_it != reply->GetReply().end()) {
343  CRef<COrg_ref> cpy;
344  if ((*reply_it)->IsData() && (*reply_it)->GetData().IsSetOrg()) {
345  cpy.Reset(new COrg_ref());
346  cpy->Assign((*reply_it)->GetData().GetOrg());
347  if (cpy && cpy->IsSetTaxname()) {
348  string taxname = cpy->GetTaxname();
349  if (NStr::EqualNocase(organism, taxname)) {
350  return true;
351  }
352  if (alternative.length() > 0 && NStr::EqualNocase(alternative, taxname)) {
353  return true;
354  }
355  }
356  }
357  ++reply_it;
358  }
359 
360  return false;
361 }
362 
363 
364 bool CStrainRequest::StrainContainsTaxonInfo(const string& organism, const string& strain,
365  std::function<CRef<CTaxon3_reply>(const vector<CRef<COrg_ref>>&)> taxoncallback)
366 {
367  if (NStr::IsBlank(organism) || OrganismIsUnwanted(organism) || NStr::IsBlank(strain) || IgnoreStrain(strain) || ! taxoncallback) {
368  return false;
369  }
370 
371  vector<string> candidates;
372 
373  GetStrainCandidates(organism, strain, candidates);
374 
375  vector<CRef<COrg_ref>> request;
376 
377  bool no_data_requests = true;
378 
379  for (vector<string>::iterator tax_it = candidates.begin(); tax_it != candidates.end(); ++tax_it) {
380  CRef<COrg_ref> org(new COrg_ref());
381  org->SetTaxname(*tax_it);
382  request.push_back(org);
383  no_data_requests = false;
384  }
385 
386  if (no_data_requests) {
387  return false;
388  }
389 
390  CRef<CTaxon3_reply> reply = taxoncallback(request);
391 
392  if (CheckStrainReply(organism, strain, reply)) {
393  return true;
394  }
395 
396  return false;
397 }
398 
399 
401  std::function<CRef<CTaxon3_reply>(const vector<CRef<COrg_ref>>&)> taxoncallback)
402 {
403  if (!org.IsSetTaxname() || !org.IsSetOrgMod()) {
404  return false;
405  }
406  if (org.IsSetLineage() && OrganismIsUnwanted(org.GetLineage())) {
407  return false;
408  }
409 
410  string organism;
411  string strain;
412 
413  organism = org.GetTaxname();
414  if (OrganismIsUnwanted(organism)) {
415  return false;
416  }
417  for (const auto& it : org.GetOrgname().GetMod()) {
418  if (it->IsSetSubtype() && it->IsSetSubname() &&
419  it->GetSubtype() == COrgMod::eSubtype_strain) {
420  strain = it->GetSubname();
421  }
422  }
423 
424  if (NStr::IsBlank(organism) || OrganismIsUnwanted(organism) || NStr::IsBlank(strain) || IgnoreStrain(strain) || ! taxoncallback) {
425  return false;
426  }
427 
428  vector<string> candidates;
429 
430  GetStrainCandidates(organism, strain, candidates);
431 
432  vector<CRef<COrg_ref>> request;
433 
434  bool no_data_requests = true;
435 
436  for (vector<string>::iterator tax_it = candidates.begin(); tax_it != candidates.end(); ++tax_it) {
437  CRef<COrg_ref> org(new COrg_ref());
438  org->SetTaxname(*tax_it);
439  request.push_back(org);
440  no_data_requests = false;
441  }
442 
443  if (no_data_requests) {
444  return false;
445  }
446 
447  CRef<CTaxon3_reply> reply = taxoncallback(request);
448 
449  if (CheckStrainReply(organism, strain, reply)) {
450  return true;
451  }
452 
453  return false;
454 }
455 
456 
457 bool CStrainRequest::StrainContainsTaxonInfo(const string& organism, const string& strain,
458  std::function<CRef<CTaxon3_reply>(const CRef<COrg_ref>&)> taxoncallback)
459 {
460  if (NStr::IsBlank(organism) || OrganismIsUnwanted(organism) || NStr::IsBlank(strain) || IgnoreStrain(strain) || ! taxoncallback) {
461  return false;
462  }
463 
464  vector<string> candidates;
465 
466  GetStrainCandidates(organism, strain, candidates);
467 
468  for (vector<string>::iterator tax_it = candidates.begin(); tax_it != candidates.end(); ++tax_it) {
469  CRef<COrg_ref> org(new COrg_ref());
470  org->SetTaxname(*tax_it);
471 
472  CRef<CTaxon3_reply> reply = taxoncallback(org);
473 
474  if (CheckStrainReply(organism, strain, reply)) {
475  return true;
476  }
477  }
478 
479  return false;
480 }
481 
482 
484  std::function<CRef<CTaxon3_reply>(const CRef<COrg_ref>&)> taxoncallback)
485 {
486  if (!org.IsSetTaxname() || !org.IsSetOrgMod()) {
487  return false;
488  }
489  if (org.IsSetLineage() && OrganismIsUnwanted(org.GetLineage())) {
490  return false;
491  }
492 
493  string organism;
494  string strain;
495 
496  organism = org.GetTaxname();
497  if (OrganismIsUnwanted(organism)) {
498  return false;
499  }
500  for (const auto& it : org.GetOrgname().GetMod()) {
501  if (it->IsSetSubtype() && it->IsSetSubname() &&
502  it->GetSubtype() == COrgMod::eSubtype_strain) {
503  strain = it->GetSubname();
504  }
505  }
506 
507  if (NStr::IsBlank(organism) || OrganismIsUnwanted(organism) || NStr::IsBlank(strain) || IgnoreStrain(strain) || ! taxoncallback) {
508  return false;
509  }
510 
511  vector<string> candidates;
512 
513  GetStrainCandidates(organism, strain, candidates);
514 
515  for (vector<string>::iterator tax_it = candidates.begin(); tax_it != candidates.end(); ++tax_it) {
516  CRef<COrg_ref> org(new COrg_ref());
517  org->SetTaxname(*tax_it);
518 
519  CRef<CTaxon3_reply> reply = taxoncallback(org);
520 
521  if (CheckStrainReply(organism, strain, reply)) {
522  return true;
523  }
524  }
525 
526  return false;
527 }
528 
529 
530 void CStrainRequest::x_AddOneStrain(const COrg_ref& org, vector<string>& candidates)
531 {
532  if (!org.IsSetTaxname() || !org.IsSetOrgMod()) {
533  return;
534  }
535  if (org.IsSetLineage() && OrganismIsUnwanted(org.GetLineage())) {
536  return;
537  }
538 
539  string organism;
540  string strain;
541 
542  organism = org.GetTaxname();
543  if (OrganismIsUnwanted(organism)) {
544  return;
545  }
546  for (const auto& it : org.GetOrgname().GetMod()) {
547  if (it->IsSetSubtype() && it->IsSetSubname() &&
548  it->GetSubtype() == COrgMod::eSubtype_strain) {
549  strain = it->GetSubname();
550  }
551  }
552  if (organism.empty() || strain.empty()) {
553  return;
554  }
555 
556  if (IgnoreStrain(strain)) {
557  return;
558  }
559 
560  GetStrainCandidates(organism, strain, candidates);
561 }
562 
563 
564 void CStrainRequest::x_CollectStrainsForRecord(const CSeq_entry& se, vector<string>& candidates)
565 {
566  // get source descriptors
568  {
569  if ((*it)->IsSource() && (*it)->GetSource().IsSetOrg()) {
570  const COrg_ref& org = (*it)->GetSource().GetOrg();
571  x_AddOneStrain(org, candidates);
572  }
573  }
574  // also get features
575  FOR_EACH_ANNOT_ON_SEQENTRY(annot_it, se)
576  {
577  FOR_EACH_SEQFEAT_ON_SEQANNOT(feat_it, **annot_it)
578  {
579  if ((*feat_it)->IsSetData() && (*feat_it)->GetData().IsBiosrc()
580  && (*feat_it)->GetData().GetBiosrc().IsSetOrg()) {
581  const COrg_ref& org = (*feat_it)->GetData().GetBiosrc().GetOrg();
582  x_AddOneStrain(org, candidates);
583  }
584  }
585  }
586 
587  // if set, recurse
588  if (se.IsSet()) {
590  {
591  x_CollectStrainsForRecord(**it, candidates);
592  }
593  }
594 }
595 
596 
598  const CRef<CSeqdesc> sd, const CRef<CSeq_feat> sf, const CSeq_entry& se,
599  std::function<CRef<CTaxon3_reply>(const vector<CRef<COrg_ref>>&)> taxoncallback)
600 {
601  if (!org.IsSetTaxname() || !org.IsSetOrgMod()) {
602  return;
603  }
604  if (org.IsSetLineage() && OrganismIsUnwanted(org.GetLineage())) {
605  return;
606  }
607 
608  string organism;
609  string strain;
610 
611  organism = org.GetTaxname();
612  if (OrganismIsUnwanted(organism)) {
613  return;
614  }
615  for (const auto& it : org.GetOrgname().GetMod()) {
616  if (it->IsSetSubtype() && it->IsSetSubname() &&
617  it->GetSubtype() == COrgMod::eSubtype_strain) {
618  strain = it->GetSubname();
619  }
620  }
621  if (organism.empty() || strain.empty()) {
622  return;
623  }
624 
625  if (IgnoreStrain(strain)) {
626  return;
627  }
628 
629  if (StrainContainsTaxonInfo(organism, strain, taxoncallback)) {
630  if (sd) {
632  "Strain '" + strain + "' contains taxonomic name information", *sd, &se);
633  } else if (sf) {
635  "Strain '" + strain + "' contains taxonomic name information", *sf, &se);
636  } else {
638  "Strain '" + strain + "' contains taxonomic name information", org);
639  }
640  }
641 }
642 
643 
645  std::function<CRef<CTaxon3_reply>(const vector<CRef<COrg_ref>>&)> taxoncallback)
646 {
647  // get source descriptors
649  {
650  const CRef<CSeqdesc> sd = (*it);
651  if (sd->IsSource() && sd->GetSource().IsSetOrg()) {
652  const COrg_ref& org = sd->GetSource().GetOrg();
653  x_CheckOneStrain(tval, imp, org, sd, CRef<CSeq_feat>(), se, taxoncallback);
654  }
655  }
656  // also get features
657  FOR_EACH_ANNOT_ON_SEQENTRY(annot_it, se)
658  {
659  FOR_EACH_SEQFEAT_ON_SEQANNOT(feat_it, **annot_it)
660  {
661  const CRef<CSeq_feat> sf = (*feat_it);
662  if (sf->IsSetData() && sf->GetData().IsBiosrc()
663  && sf->GetData().GetBiosrc().IsSetOrg()) {
664  const COrg_ref& org = sf->GetData().GetBiosrc().GetOrg();
665  x_CheckOneStrain(tval, imp, org, CRef<CSeqdesc>(), sf, se, taxoncallback);
666  }
667  }
668  }
669 
670  // if set, recurse
671  if (se.IsSet()) {
673  {
674  ExploreStrainsByTaxname(tval, imp, **it, taxoncallback);
675  }
676  }
677 }
678 
679 
681  std::function<CRef<CTaxon3_reply>(const vector<CRef<COrg_ref>>&)> taxoncallback)
682 {
683  vector<string> candidates;
684 
685  x_CollectStrainsForRecord(se, candidates);
686 
687  vector<CRef<COrg_ref>> request;
688 
689  bool no_data_requests = true;
690 
691  for (vector<string>::iterator tax_it = candidates.begin(); tax_it != candidates.end(); ++tax_it) {
692  CRef<COrg_ref> org(new COrg_ref());
693  org->SetTaxname(*tax_it);
694  request.push_back(org);
695  no_data_requests = false;
696  }
697 
698  if (no_data_requests) {
699  return;
700  }
701 
702  // first pass checks all strains at once, looking for any positive taxonomy matches
703  CRef<CTaxon3_reply> reply = taxoncallback(request);
704 
705  bool no_data_returns = true;
706 
707  CTaxon3_reply::TReply::const_iterator reply_it = reply->GetReply().begin();
708  while (reply_it != reply->GetReply().end()) {
709  if ((*reply_it)->IsData() && (*reply_it)->GetData().IsSetOrg()) {
710  no_data_returns = false;
711  }
712  ++reply_it;
713  }
714 
715  if (no_data_returns) {
716  return;
717  }
718 
719  // at least one strain in record returned a hit, check individual records by strain and taxname
720  ExploreStrainsByTaxname(tval, imp, se, taxoncallback);
721 }
722 
723 
724 // old strain code
725 
727 {
728  // per VR-762, ignore strain if combination of letters and numbers
729  bool has_number = false;
730  bool has_letter = false;
731  for (char ch : str) {
732  if (isdigit(ch)) {
733  has_number = true;
734  } else if (isalpha(ch)) {
735  has_letter = true;
736  } else {
737  return false;
738  }
739  }
740  if (!has_number || !has_letter) {
741  return false;
742  } else {
743  return true;
744  }
745 }
746 
747 
748 CStrainRequest::CStrainRequest(const string& strain, const COrg_ref& org) : CQualifierRequest(), m_Strain(strain)
749 {
750  if (org.IsSetTaxname()) {
751  m_Taxname = org.GetTaxname();
752  } else {
753  m_Taxname.clear();
754  }
755 
756  m_IsInvalid = false;
757  if (NStr::IsBlank(strain) || x_IgnoreStrain(strain)) {
758  return;
759  }
760 
761  m_ValuesToTry.push_back(strain);
762  size_t pos = 0;
763  for (char ch : strain) {
764  if (isalpha(ch)) {
765  ++pos;
766  } else {
767  if (pos >= 5) {
768  m_ValuesToTry.push_back(strain.substr(0, pos));
769  }
770  break;
771  }
772  }
773 
774  if (RequireTaxname(m_Taxname)) {
775  m_ValuesToTry.push_back(MakeKey(strain, m_Taxname));
776  }
777 }
778 
779 
780 string CStrainRequest::MakeKey(const string& strain, const string& taxname)
781 {
782  if (RequireTaxname(taxname)) {
783  return taxname.substr(0, taxname.length() - 3) + strain;
784  } else {
785  return strain;
786  }
787 }
788 
789 
790 bool CStrainRequest::RequireTaxname(const string& taxname)
791 {
792  if (NStr::EndsWith(taxname, " sp.")) {
793  return true;
794  } else {
795  return false;
796  }
797 }
798 
799 
801 {
802  if (NStr::FindNoCase(str, "virus") != NPOS ||
803  NStr::FindNoCase(str, "viroid") != NPOS ||
804  NStr::FindNoCase(str, "vector") != NPOS ||
805  NStr::FindNoCase(str, "phage") != NPOS) {
806  return true;
807  } else {
808  return false;
809  }
810 }
811 
812 
814 {
815  if (org.IsSetLineage() && x_IsUnwanted(org.GetLineage())) {
816  return false;
817  }
818  if (org.IsSetTaxname() && x_IsUnwanted(org.GetTaxname())) {
819  return false;
820  }
821  if (!org.IsSetOrgMod()) {
822  return false;
823  }
824  for (const auto& it : org.GetOrgname().GetMod()) {
825  if (it->IsSetSubtype() && it->IsSetSubname() &&
826  it->GetSubtype() == COrgMod::eSubtype_strain) {
827  return true;
828  }
829  }
830  return false;
831 }
832 
833 
834 void CStrainRequest::ListErrors(vector<TTaxError>& errs) const
835 {
836  if (m_IsInvalid) {
838  "Strain '" + m_Strain + "' contains taxonomic name information" });
839  }
840 }
841 
842 
843 void CStrainRequest::AddReply(const CT3Reply& reply, TTaxId descTaxID)
844 {
845  if (!m_IsInvalid) {
846  if (reply.IsData() && reply.GetData().IsSetOrg()) {
847  // TODO: if using just a one word input, make sure name is actually in taxname
848  if (m_ValuesToTry[m_RepliesProcessed].length() < m_Strain.length()) {
849  const COrg_ref& org = reply.GetData().GetOrg();
850  TTaxId taxID = org.GetTaxId();
852  if (taxID == descTaxID || descTaxID == ZERO_TAX_ID) {
853  m_IsInvalid = true;
854  }
855  }
856  } else {
857  m_IsInvalid = true;
858  }
859  }
860  }
862 }
863 
864 
866 {
867  m_Populated = false;
868  m_Map.clear();
869 }
870 
871 
873 {
874  m_Populated = true;
875  if (!desc->IsSource() || !desc->GetSource().IsSetOrg()) {
876  return;
877  }
878  const COrg_ref& org = desc->GetSource().GetOrg();
879  if (!org.IsSetOrgMod()) {
880  return;
881  }
882  if (!Check(org)) {
883  return;
884  }
885  for (const auto& mod_it : org.GetOrgname().GetMod()) {
886  if (mod_it->IsSetSubtype()
887  && mod_it->GetSubtype() == m_Subtype
888  && mod_it->IsSetSubname()) {
889  string qual = mod_it->GetSubname();
890  string key = GetKey(qual, org);
891 
893  if (find == m_Map.end()) {
894  m_Map[key] = x_MakeNewRequest(qual, org);
895  m_Map[key]->AddParent(desc, ctx);
896  } else {
897  find->second->AddParent(desc, ctx);
898  }
899  }
900  }
901 }
902 
903 
905 {
906  m_Populated = true;
907  if (!feat->IsSetData() || !feat->GetData().IsBiosrc() ||
908  !feat->GetData().GetBiosrc().IsSetOrg()) {
909  return;
910  }
911  const COrg_ref& org = feat->GetData().GetBiosrc().GetOrg();
912  if (!org.IsSetOrgMod()) {
913  return;
914  }
915  if (!Check(org)) {
916  return;
917  }
918  for (const auto& mod_it : org.GetOrgname().GetMod()) {
919  if (mod_it->IsSetSubtype()
920  && mod_it->GetSubtype() == m_Subtype
921  && mod_it->IsSetSubname()) {
922  string qual = mod_it->GetSubname();
923  string key = GetKey(qual, feat->GetData().GetBiosrc().GetOrg());
924 
926  if (find == m_Map.end()) {
927  m_Map[key] = x_MakeNewRequest(qual, feat->GetData().GetBiosrc().GetOrg());
928  m_Map[key]->AddParent(feat);
929  } else {
930  find->second->AddParent(feat);
931  }
932  }
933  }
934 }
935 
936 
938 {
939  m_Populated = true;
940  if (!org.IsSetOrgMod()) {
941  return;
942  }
943  if (!Check(org)) {
944  return;
945  }
946  for (const auto& mod_it : org.GetOrgname().GetMod()) {
947  if (mod_it->IsSetSubtype()
948  && mod_it->GetSubtype() == m_Subtype
949  && mod_it->IsSetSubname()) {
950  string qual = mod_it->GetSubname();
951  string key = GetKey(qual, org);
952 
954  if (find == m_Map.end()) {
955  m_Map[key] = x_MakeNewRequest(qual, org);
956  }
957  }
958  }
959 }
960 
961 
962 //LCOV_EXCL_START
963 //only used by biosample
964 void CQualLookupMap::AddString(const string& val)
965 {
966  m_Populated = true;
967 
969  if (find == m_Map.end()) {
970  CRef<COrg_ref> org(new COrg_ref());
971  m_Map[val] = x_MakeNewRequest(val, *org);
972  }
973 }
974 //LCOV_EXCL_STOP
975 
976 
977 vector<CRef<COrg_ref> > CQualLookupMap::GetRequestList()
978 {
979  vector<CRef<COrg_ref> > org_rq_list;
980  org_rq_list.reserve(m_Map.size());
981  for (auto& it : m_Map) {
982  it.second->AddRequests(org_rq_list);
983  }
984  return org_rq_list;
985 }
986 
987 
989 {
991  if (map_it != m_Map.end() && map_it->second->NumRemainingReplies() > 0) {
992  return map_it;
993  }
994  map_it = m_Map.begin();
995  while (map_it != m_Map.end()) {
996  if (map_it->second->MatchTryValue(val) && map_it->second->NumRemainingReplies() > 0) {
997  return map_it;
998  }
999  ++map_it;
1000  }
1001  return m_Map.end();
1002 }
1003 
1004 
1005 string CQualLookupMap::IncrementalUpdate(const vector<CRef<COrg_ref> >& input, const CTaxon3_reply& reply, TTaxId descTaxID)
1006 {
1007  string error_message;
1008  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
1009  vector<CRef<COrg_ref> >::const_iterator rq_it = input.begin();
1010 
1011  while (reply_it != reply.GetReply().end() && rq_it != input.end()) {
1012  TQualifierRequests::iterator map_it = x_FindRequest((*rq_it)->GetTaxname());
1013  if (map_it == m_Map.end()) {
1014  error_message = "Unexpected taxonomy response for " + (*rq_it)->GetTaxname();
1015  return error_message;
1016  }
1017  map_it->second->AddReply(**reply_it, descTaxID);
1018  ++rq_it;
1019  ++reply_it;
1020  }
1021 
1022  if (reply_it != reply.GetReply().end()) {
1023  error_message = "Unexpected taxonomy responses for " + COrgMod::GetSubtypeName(m_Subtype);
1024  }
1025  return kEmptyStr;
1026 }
1027 
1028 
1029 //LCOV_EXCL_START
1030 //only used for cleanup
1032 {
1033  for (const auto& rq_it : m_Map) {
1034  if (rq_it.second->NumRemainingReplies() > 0) {
1035  return false;
1036  }
1037  }
1038  return true;
1039 }
1040 //LCOV_EXCL_STOP
1041 
1042 
1044 {
1045  for (auto& rq_it : m_Map) {
1046  rq_it.second->PostErrors(imp);
1047  }
1048 }
1049 
1050 
1051 //LCOV_EXCL_START
1052 //only used by biosample
1053 void CQualLookupMap::ListErrors(vector<TTaxError>& errs) const
1054 {
1055  for (const auto& rq_it : m_Map) {
1056  rq_it.second->ListErrors(errs);
1057  }
1058 }
1059 
1060 
1061 //LCOV_EXCL_STOP
1062 
1063 
1065 {
1066  CRef<CQualifierRequest> rq(new CSpecificHostRequest(orig_val, org));
1067  return rq;
1068 }
1069 
1070 
1071 //LCOV_EXCL_START
1072 //used for cleanup
1074 {
1075  CRef<CQualifierRequest> rq(new CSpecificHostRequest(orig_val, org, true));
1076  return rq;
1077 }
1078 
1079 
1081 {
1082  string adjusted = host_val;
1083  NStr::TruncateSpacesInPlace(adjusted);
1084  adjusted = COrgMod::FixHost(adjusted);
1085  return adjusted;
1086 }
1087 
1088 
1090 {
1091  if (!org_ref.IsSetOrgname() ||
1092  !org_ref.GetOrgname().IsSetMod()) {
1093  return false;
1094  }
1095 
1096  bool changed = false;
1097 
1098  for (auto& m : org_ref.SetOrgname().SetMod()) {
1099  if (m->IsSetSubtype() &&
1100  m->GetSubtype() == COrgMod::eSubtype_nat_host &&
1101  m->IsSetSubname()) {
1102  string host_val = x_DefaultSpecificHostAdjustments(m->GetSubname());
1103 
1105  if (it != m_Map.end()) {
1106  const CSpecificHostRequest* rq = dynamic_cast<const CSpecificHostRequest*>(it->second.GetPointer());
1107  string new_val = x_DefaultSpecificHostAdjustments(rq->SuggestFix());
1108  if (!NStr::IsBlank(new_val) && !NStr::Equal(new_val, m->GetSubname())) {
1109  m->SetSubname(new_val);
1110  changed = true;
1111  }
1112  }
1113  }
1114  }
1115 
1116  return changed;
1117 }
1118 //LCOV_EXCL_STOP
1119 
1120 
1122 {
1123  CRef<CQualifierRequest> rq(new CStrainRequest(orig_val, org));
1124  return rq;
1125 }
1126 
1127 
1129 {
1131  m_tax_func = [this](const vector<CRef<COrg_ref>>& list) -> CRef<CTaxon3_reply> {
1132  return m_taxon3->SendOrgRefList(list);
1133  };
1134 }
1135 
1136 
1138  : m_tax_func(tax_func)
1139 {
1140 }
1141 
1142 
1144 {
1146  m_SrcDescs.clear();
1147  m_DescCtxs.clear();
1148  m_SrcFeats.clear();
1152  m_StrainRequestsBuilt = false;
1153  x_GatherSources(se);
1154 }
1155 
1156 
1158 {
1159  if (!m_DescCtxs.empty()) {
1160  return m_DescCtxs.front();
1161  } else {
1162  return CConstRef<CSeq_entry>();
1163  }
1164 }
1165 
1166 
1168 {
1169  // get source descriptors
1171  {
1172  if ((*it)->IsSource() && (*it)->GetSource().IsSetOrg()) {
1173  CConstRef<CSeqdesc> desc;
1174  desc.Reset(*it);
1175  m_SrcDescs.push_back(desc);
1176  CConstRef<CSeq_entry> r_se;
1177  r_se.Reset(&se);
1178  m_DescCtxs.push_back(r_se);
1179  }
1180  }
1181  // also get features
1182  FOR_EACH_ANNOT_ON_SEQENTRY(annot_it, se)
1183  {
1184  FOR_EACH_SEQFEAT_ON_SEQANNOT(feat_it, **annot_it)
1185  {
1186  if ((*feat_it)->IsSetData() && (*feat_it)->GetData().IsBiosrc()
1187  && (*feat_it)->GetData().GetBiosrc().IsSetOrg()) {
1188  CConstRef<CSeq_feat> feat;
1189  feat.Reset(*feat_it);
1190  m_SrcFeats.push_back(feat);
1191  }
1192  }
1193  }
1194 
1195  // if set, recurse
1196  if (se.IsSet()) {
1198  {
1199  x_GatherSources(**it);
1200  }
1201  }
1202 }
1203 
1204 
1206 {
1207  // request list for taxon3
1208  vector< CRef<COrg_ref> > org_rq_list;
1209 
1210  // first do descriptors
1211  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.cbegin();
1212  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.cbegin();
1213  while (desc_it != m_SrcDescs.cend() && ctx_it != m_DescCtxs.cend()) {
1214  CRef<COrg_ref> rq(new COrg_ref);
1215  const COrg_ref& org = (*desc_it)->GetSource().GetOrg();
1216  rq->Assign(org);
1217  TTaxId taxid = org.GetTaxId();
1218  if (m_descTaxID == ZERO_TAX_ID) {
1219  const_cast<TTaxId&>(m_descTaxID) = taxid;
1220  }
1221  org_rq_list.push_back(rq);
1222 
1223  ++desc_it;
1224  ++ctx_it;
1225  }
1226 
1227  // now do features
1228  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.cbegin();
1229  while (feat_it != m_SrcFeats.cend()) {
1230  CRef<COrg_ref> rq(new COrg_ref);
1231  const COrg_ref& org = (*feat_it)->GetData().GetBiosrc().GetOrg();
1232  rq->Assign(org);
1233  org_rq_list.push_back(rq);
1234 
1235  ++feat_it;
1236  }
1237  return org_rq_list;
1238 }
1239 
1240 
1241 void CTaxValidationAndCleanup::x_InterpretTaxonomyError(const CT3Error& error, const COrg_ref& org, const EErrType type, vector<TTaxError>& errs) const
1242 {
1243  const string err_str = error.IsSetMessage() ? error.GetMessage() : "?";
1244 
1245  if (NStr::Equal(err_str, "Organism not found")) {
1246  string msg = "Organism not found in taxonomy database";
1247  if (error.IsSetOrg() && error.GetOrg().IsSetTaxname() &&
1248  !NStr::Equal(error.GetOrg().GetTaxname(), "Not valid") &&
1249  (!org.IsSetTaxname() ||
1250  !NStr::Equal(org.GetTaxname(), error.GetOrg().GetTaxname()))) {
1251  msg += " (suggested:" + error.GetOrg().GetTaxname() + ")";
1252  }
1253  errs.push_back(TTaxError{ eDiag_Warning, eErr_SEQ_DESCR_OrganismNotFound, msg });
1254  } else if (NStr::StartsWith(err_str, "Organism not found. Possible matches")) {
1255  errs.push_back(TTaxError{ eDiag_Warning, eErr_SEQ_DESCR_OrganismNotFound, err_str });
1256  } else if (NStr::Equal(err_str, kInvalidReplyMsg)) {
1257  errs.push_back(TTaxError{ eDiag_Error, eErr_SEQ_DESCR_TaxonomyLookupProblem, err_str });
1258  } else if (NStr::Find(err_str, "ambiguous name") != NPOS) {
1260  "Taxonomy lookup failed with message '" + err_str + "'"});
1261  } else {
1262  errs.push_back(TTaxError{ eDiag_Warning, type,
1263  "Taxonomy lookup failed with message '" + err_str + "'" });
1264  }
1265 }
1266 
1267 
1269 (const CT3Reply& reply, const COrg_ref& org, CBioSource::TGenome genome, bool is_insd_patent, bool is_wp, vector<TTaxError>& errs) const
1270 {
1271  if (reply.IsError()) {
1273  } else if (reply.IsData()) {
1274  bool is_species_level = true;
1275  bool is_unidentified = false;
1276  bool force_consult = false;
1277  bool has_nucleomorphs = false;
1278  bool is_cyanobacteria = false;
1279  bool has_metagenome_source = false;
1280  if (reply.GetData().IsSetOrg()) {
1281  const COrg_ref& orp_rep = reply.GetData().GetOrg();
1282  if (org.IsSetTaxname() && orp_rep.IsSetTaxname()) {
1283  const string& taxname_req = org.GetTaxname();
1284  const string& taxname_rep = orp_rep.GetTaxname();
1285  if (NStr::Equal(taxname_rep, "unidentified")) {
1286  is_unidentified = true;
1287  }
1288  TTaxId taxid_request = org.GetTaxId();
1289  TTaxId taxid_reply = orp_rep.GetTaxId();
1290 
1291  if (taxid_request != ZERO_TAX_ID && taxid_reply != ZERO_TAX_ID && taxid_request != taxid_reply) {
1293  "Organism name is '" + taxname_req
1294  + "', taxonomy ID should be '" + NStr::NumericToString(taxid_reply)
1295  + "' but is '" + NStr::NumericToString(taxid_request) + "'" });
1296  }
1297  }
1298  if (org.IsSetOrgMod()) {
1299  for (const auto& it : org.GetOrgname().GetMod()) {
1300  if (it->IsSetSubtype() && it->IsSetSubname() &&
1301  it->GetSubtype() == COrgMod::eSubtype_metagenome_source) {
1302  has_metagenome_source = true;
1303  }
1304  }
1305  }
1306  if (org.IsSetLineage()) {
1307  string org_lineage = org.GetLineage();
1308  if (! NStr::IsBlank(org_lineage) && NStr::Find(org_lineage, "Bacteria; Cyanobacteriota") != NPOS) {
1309  is_cyanobacteria = true;
1310  }
1311  }
1312  }
1313  reply.GetData().GetTaxFlags(is_species_level, force_consult, has_nucleomorphs);
1314  if (!is_species_level && !is_wp) {
1316  "Taxonomy lookup reports is_species_level FALSE"});
1317  }
1318  if (force_consult) {
1319  if (is_insd_patent && is_unidentified) {
1320  force_consult = false;
1321  }
1322  if (is_cyanobacteria && has_metagenome_source) {
1323  force_consult = false;
1324  }
1325  }
1326  if (force_consult) {
1328  "Taxonomy lookup reports taxonomy consultation needed"});
1329  }
1330  if (genome == CBioSource::eGenome_nucleomorph
1331  && !has_nucleomorphs) {
1333  "Taxonomy lookup does not have expected nucleomorph flag"});
1334  } else if (genome == CBioSource::eGenome_plastid
1335  && (!reply.GetData().HasPlastids())) {
1337  "Taxonomy lookup does not have expected plastid flag"});
1338  }
1339  }
1340 }
1341 
1343  const CTaxon3_reply& reply,
1344  CValidError_imp& imp,
1345  bool is_insd_patent) const
1346 {
1347  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
1348 
1349  // process descriptor responses
1350  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.cbegin();
1351  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.cbegin();
1352 
1353  while (reply_it != reply.GetReply().end()
1354  && desc_it != m_SrcDescs.cend()
1355  && ctx_it != m_DescCtxs.cend()) {
1356  vector<TTaxError> errs;
1357  const COrg_ref& orp_req = (*desc_it)->GetSource().GetOrg();
1358  ListTaxLookupErrors(**reply_it, orp_req,
1359  (*desc_it)->GetSource().IsSetGenome() ? (*desc_it)->GetSource().GetGenome() : CBioSource::eGenome_unknown,
1360  is_insd_patent, imp.IsWP(), errs);
1361  for (const TTaxError& e : errs) {
1362  imp.PostObjErr(e.severity, e.err_type, e.err_msg, **desc_it, *ctx_it);
1363  }
1364  ++reply_it;
1365  ++desc_it;
1366  ++ctx_it;
1367  }
1368  // process feat responses
1369  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.cbegin();
1370  while (reply_it != reply.GetReply().cend()
1371  && feat_it != m_SrcFeats.end()) {
1372  vector<TTaxError> errs;
1373  const COrg_ref& orp_req = (*feat_it)->GetData().GetBiosrc().GetOrg();
1374  ListTaxLookupErrors(**reply_it, orp_req,
1375  (*feat_it)->GetData().GetBiosrc().IsSetGenome() ? (*feat_it)->GetData().GetBiosrc().GetGenome() : CBioSource::eGenome_unknown,
1376  is_insd_patent, imp.IsWP(), errs);
1377  for (const TTaxError& e : errs) {
1378  imp.PostErr(e.severity, e.err_type, e.err_msg,* *feat_it);
1379  }
1380  ++reply_it;
1381  ++feat_it;
1382  }
1383 }
1384 
1385 
1387  const CTaxon3_reply& reply,
1388  CValidError_imp& imp,
1389  bool is_insd_patent,
1390  size_t offset) const
1391 {
1392  // cout << MSerial_AsnText << reply << endl;
1393 
1394  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
1395 
1396  // process descriptor responses
1397  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.cbegin();
1398  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.cbegin();
1399 
1400  size_t skipped = 0;
1401  while (skipped < offset
1402  && desc_it != m_SrcDescs.cend()
1403  && ctx_it != m_DescCtxs.cend()) {
1404  ++desc_it;
1405  ++ctx_it;
1406  skipped++;
1407  }
1408 
1409  while (reply_it != reply.GetReply().end()
1410  && desc_it != m_SrcDescs.cend()
1411  && ctx_it != m_DescCtxs.cend()) {
1412  vector<TTaxError> errs;
1413  const COrg_ref& orp_req = (*desc_it)->GetSource().GetOrg();
1414  ListTaxLookupErrors(**reply_it, orp_req,
1415  (*desc_it)->GetSource().IsSetGenome() ? (*desc_it)->GetSource().GetGenome() : CBioSource::eGenome_unknown,
1416  is_insd_patent, imp.IsWP(), errs);
1417  for (const TTaxError& e : errs) {
1418  imp.PostObjErr(e.severity, e.err_type, e.err_msg, **desc_it, *ctx_it);
1419  }
1420  ++reply_it;
1421  ++desc_it;
1422  ++ctx_it;
1423  }
1424 
1425  if (reply_it == reply.GetReply().end()) {
1426  return;
1427  }
1428  // process feat responses
1429  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.cbegin();
1430  while (skipped < offset && feat_it != m_SrcFeats.end()) {
1431  ++feat_it;
1432  skipped++;
1433  }
1434  while (reply_it != reply.GetReply().cend() &&
1435  feat_it != m_SrcFeats.end()) {
1436  vector<TTaxError> errs;
1437  const COrg_ref& orp_req = (*feat_it)->GetData().GetBiosrc().GetOrg();
1438  ListTaxLookupErrors(**reply_it, orp_req,
1439  (*feat_it)->GetData().GetBiosrc().IsSetGenome() ? (*feat_it)->GetData().GetBiosrc().GetGenome() : CBioSource::eGenome_unknown,
1440  is_insd_patent, imp.IsWP(), errs);
1441  for (const TTaxError& e : errs) {
1442  imp.PostErr(e.severity, e.err_type, e.err_msg, **feat_it);
1443  }
1444  ++reply_it;
1445  ++feat_it;
1446  }
1447 }
1448 
1449 
1450 //LCOV_EXCL_START
1451 //used by Genome Workbench
1453  const CTaxon3_reply& reply,
1454  vector<CRef<COrg_ref>> org_refs,
1455  string& error_message,
1456  bool use_error_orgrefs) const
1457 {
1458  bool changed = false;
1459  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
1460  vector<CRef<COrg_ref> >::iterator org_it = org_refs.begin();
1461  while (reply_it != reply.GetReply().end() && org_it != org_refs.end()) {
1462  CRef<COrg_ref> cpy;
1463  if ((*reply_it)->IsData() &&
1464  (*reply_it)->GetData().IsSetOrg()) {
1465  cpy.Reset(new COrg_ref());
1466  cpy->Assign((*reply_it)->GetData().GetOrg());
1467  } else if (use_error_orgrefs &&
1468  (*reply_it)->IsError() &&
1469  (*reply_it)->GetError().IsSetOrg() &&
1470  (*reply_it)->GetError().GetOrg().IsSetTaxname() &&
1471  ! NStr::Equal((*reply_it)->GetError().GetOrg().GetTaxname(), "Not valid")) {
1472  cpy.Reset(new COrg_ref());
1473  cpy->Assign((*reply_it)->GetError().GetOrg());
1474  }
1475  if (cpy) {
1476  cpy->CleanForGenBank();
1477  if (!cpy->Equals(**org_it)) {
1478  (*org_it)->Assign(*cpy);
1479  changed = true;
1480  }
1481  }
1482  ++reply_it;
1483  ++org_it;
1484  }
1485  if (reply_it != reply.GetReply().end()) {
1486  error_message = "More taxonomy replies than requests!";
1487  } else if (org_it != org_refs.end()) {
1488  error_message = "Not enough taxonomy replies!";
1489  }
1490  return changed;
1491 }
1492 //LCOV_EXCL_STOP
1493 
1494 
1495 vector<CRef<COrg_ref> > CTaxValidationAndCleanup::GetSpecificHostLookupRequest(bool for_fix)
1496 {
1497  if (for_fix) {
1498  if (!m_HostMapForFix.IsPopulated()) {
1500  }
1502  } else {
1503  if (!m_HostMap.IsPopulated()) {
1505  }
1506  return m_HostMap.GetRequestList();
1507  }
1508 }
1509 
1511 {
1512  if (!m_StrainRequestsBuilt) {
1514  }
1515 
1516  vector<CRef<COrg_ref> > org_rq_list = m_StrainMap.GetRequestList();
1517  return org_rq_list;
1518 }
1519 
1520 
1522 {
1523  //first do descriptors
1524  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.begin();
1525  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.begin();
1526  while (desc_it != m_SrcDescs.end() && ctx_it != m_DescCtxs.end()) {
1527  lookup.AddDesc(*desc_it, *ctx_it);
1528  ++desc_it;
1529  ++ctx_it;
1530  }
1531  // collect features with specific hosts
1532  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.begin();
1533  while (feat_it != m_SrcFeats.end()) {
1534  lookup.AddFeat(*feat_it);
1535  ++feat_it;
1536  }
1537 
1538 }
1539 
1540 
1542 {
1544  m_StrainRequestsBuilt = true;
1545 }
1546 
1547 
1549 {
1550  m_HostMap.PostErrors(imp);
1551 }
1552 
1553 //LCOV_EXCL_START
1554 //appears to not be used
1556 {
1557  string error_message;
1558  if (!m_HostMap.IsUpdateComplete()) {
1559  vector<CRef<COrg_ref> > input = m_HostMap.GetRequestList();
1560  error_message = m_HostMap.IncrementalUpdate(input, reply);
1561  }
1562  if (!NStr::IsBlank(error_message)) {
1564  return;
1565  }
1566 
1567  m_HostMap.PostErrors(imp);
1568 }
1569 //LCOV_EXCL_STOP
1570 
1571 
1572 //LCOV_EXCL_START
1573 //only used by cleanup
1575  vector<CRef<COrg_ref>> requests,
1576  const CTaxon3_reply& reply,
1577  vector<CRef<COrg_ref>> org_refs)
1578 {
1580  // need to calculate requests for this list
1581  m_HostMapForFix.IncrementalUpdate(requests, reply);
1582  }
1583  return AdjustOrgRefsForSpecificHosts(org_refs);
1584 }
1585 
1586 
1588 {
1589  bool changed = false;
1590  for (auto org = org_refs.begin(); org != org_refs.end(); org++) {
1591  changed |= m_HostMapForFix.ApplyToOrg(**org);
1592  }
1593  return changed;
1594 }
1595 
1596 
1598 {
1600  if (map_it != m_SpecificHostRequests.end() && map_it->second.NumRemainingReplies() > 0) {
1601  return map_it;
1602  }
1603  map_it = m_SpecificHostRequests.begin();
1604  while (map_it != m_SpecificHostRequests.end()) {
1605  if (map_it->second.MatchTryValue(val) && map_it->second.NumRemainingReplies() > 0) {
1606  return map_it;
1607  }
1608  ++map_it;
1609  }
1610  return m_SpecificHostRequests.end();
1611 }
1612 //LCOV_EXCL_STOP
1613 
1614 
1616 {
1617  string error_message;
1618  if (m_HostMap.IsPopulated()) {
1619  error_message = m_HostMap.IncrementalUpdate(input, reply);
1620  }
1621  if (NStr::IsBlank(error_message)) {
1622  if (m_HostMapForFix.IsPopulated()) {
1623  error_message = m_HostMapForFix.IncrementalUpdate(input, reply);
1624  }
1625  }
1626  return error_message;
1627 }
1628 
1629 
1630 //LCOV_EXCL_START
1631 //used only by cleanup
1633 {
1634  if (m_HostMap.IsPopulated()) {
1635  return m_HostMap.IsUpdateComplete();
1636  } else if (m_HostMapForFix.IsPopulated()) {
1638  } else {
1639  return false;
1640  }
1641 }
1642 
1643 
1645 {
1646  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
1648  while (rq_it != m_SpecificHostRequests.end()) {
1649  while (rq_it->second.NumRemainingReplies() > 0 && reply_it != reply.GetReply().end()) {
1650  rq_it->second.AddReply(**reply_it, 0);
1651  ++reply_it;
1652  }
1653  if (rq_it->second.NumRemainingReplies() > 0) {
1654  error_message = "Failed to respond to all taxonomy requests for specific host";
1655  break;
1656  }
1657  ++rq_it;
1658  }
1659 
1660  if (reply_it != reply.GetReply().end()) {
1661  error_message = "Unexpected taxonomy responses for specific host";
1662  }
1663 }
1664 
1665 
1667 {
1668  if (!org_ref.IsSetOrgname() ||
1669  !org_ref.GetOrgname().IsSetMod()) {
1670  return false;
1671  }
1672 
1673  bool changed = false;
1674 
1675  for (auto& m : org_ref.SetOrgname().SetMod()) {
1676  if (m->IsSetSubtype() &&
1677  m->GetSubtype() == COrgMod::eSubtype_nat_host &&
1678  m->IsSetSubname()) {
1679  string host_val = x_DefaultSpecificHostAdjustments(m->GetSubname());
1681  if (it != m_SpecificHostRequests.end()) {
1682  const string& new_val = it->second.SuggestFix();
1683  if (!NStr::IsBlank(new_val) && !NStr::Equal(new_val, m->GetSubname())) {
1684  m->SetSubname(new_val);
1685  changed = true;
1686  }
1687  }
1688  }
1689  }
1690 
1691  return changed;
1692 }
1693 
1694 
1696 {
1697  string adjusted = host_val;
1698  NStr::TruncateSpacesInPlace(adjusted);
1699  adjusted = COrgMod::FixHost(adjusted);
1700  return adjusted;
1701 }
1702 
1703 
1705 {
1706  return m_StrainMap.IncrementalUpdate(input, reply, descTaxID);
1707 }
1708 
1709 
1711 {
1712  return m_StrainMap.IsUpdateComplete();
1713 }
1714 //LCOV_EXCL_STOP
1715 
1716 
1718 {
1719  m_StrainMap.PostErrors(imp);
1720 }
1721 
1723 {
1724  return (num < m_DescCtxs.size()) ? m_DescCtxs[num] : CConstRef<CSeq_entry>();
1725 }
1726 
1727 
1728 //LCOV_EXCL_START
1729 //used by Genome Workbench, asn_cleanup, and table2asn but not asnvalidate
1731 {
1732  Init(*(seh.GetCompleteSeq_entry()));
1733 
1734  vector<CRef<COrg_ref> > original_orgs = GetTaxonomyLookupRequest();
1735  if (original_orgs.empty())
1736  {
1737  return false;
1738  }
1739  const size_t chunk_size = 1000;
1740  vector< CRef<COrg_ref> > edited_orgs;
1741 
1742  size_t i = 0;
1743  while (i < original_orgs.size())
1744  {
1745  size_t len = min(chunk_size, original_orgs.size() - i);
1746  vector< CRef<COrg_ref> > tmp_original_orgs(original_orgs.begin() + i, original_orgs.begin() + i + len);
1747  vector< CRef<COrg_ref> > tmp_edited_orgs;
1748  for (CRef<COrg_ref>& it : tmp_original_orgs)
1749  {
1750  CRef<COrg_ref> cpy(new COrg_ref());
1751  cpy->Assign(*it);
1752  tmp_edited_orgs.push_back(cpy);
1753  }
1754  CRef<CTaxon3_reply> tmp_lookup_reply = m_tax_func(tmp_original_orgs);
1755  string error_message;
1756  AdjustOrgRefsWithTaxLookupReply(*tmp_lookup_reply, tmp_edited_orgs, error_message);
1757  if (!NStr::IsBlank(error_message))
1758  {
1759  // post error message
1760  ERR_POST(Error << error_message);
1761  return false;
1762  }
1763  edited_orgs.insert(edited_orgs.end(), tmp_edited_orgs.begin(), tmp_edited_orgs.end());
1764  i += len;
1765  }
1766 
1767  if (with_host) {
1768  vector< CRef<COrg_ref> > spec_host_rq = GetSpecificHostLookupRequest(true);
1769  i = 0;
1770  while (i < spec_host_rq.size())
1771  {
1772  size_t len = min(chunk_size, spec_host_rq.size() - i);
1773  vector< CRef<COrg_ref> > tmp_spec_host_rq(spec_host_rq.begin() + i, spec_host_rq.begin() + i + len);
1774  CRef<CTaxon3_reply> tmp_spec_host_reply = m_tax_func(tmp_spec_host_rq);
1775  string error_message = IncrementalSpecificHostMapUpdate(tmp_spec_host_rq, *tmp_spec_host_reply);
1776  if (!NStr::IsBlank(error_message))
1777  {
1778  // post error message
1779  ERR_POST(Error << error_message);
1780  return false;
1781  }
1782  i += len;
1783  }
1784 
1785  AdjustOrgRefsForSpecificHosts(edited_orgs);
1786  }
1787 
1788  // update descriptors
1789  size_t num_descs = NumDescs();
1790  size_t num_updated_descs = 0;
1791  for (size_t n = 0; n < num_descs; n++) {
1792  if (!original_orgs[n]->Equals(*(edited_orgs[n]))) {
1793  CSeqdesc* orig = const_cast<CSeqdesc *>(GetDesc(n).GetPointer());
1794  orig->SetSource().SetOrg().Assign(*(edited_orgs[n]));
1795  num_updated_descs++;
1796  }
1797  }
1798 
1799  // now update features
1800  size_t num_updated_feats = 0;
1801  for (size_t n = 0; n < NumFeats(); n++) {
1802  if (!original_orgs[n + num_descs]->Equals(*edited_orgs[n + num_descs])) {
1803  CConstRef<CSeq_feat> feat = GetFeat(n);
1804  CRef<CSeq_feat> new_feat(new CSeq_feat());
1805  new_feat->Assign(*feat);
1806  new_feat->SetData().SetBiosrc().SetOrg().Assign(*(edited_orgs[n + num_descs]));
1807 
1808  CSeq_feat_Handle fh = seh.GetScope().GetSeq_featHandle(*feat);
1809  CSeq_feat_EditHandle efh(fh);
1810  efh.Replace(*new_feat);
1811  num_updated_feats++;
1812  }
1813  }
1814  return (num_updated_descs > 0 || num_updated_feats > 0);
1815 }
1816 //LCOV_EXCL_STOP
1817 
1818 
1819 //LCOV_EXCL_START
1820 //only used by biosample
1822 {
1824  string err_msg;
1825  if(IsOneSpecificHostValid(val, err_msg)) {
1826  return;
1827  }
1830 
1831  vector< CRef<COrg_ref> > spec_host_rq = m_HostMapForFix.GetRequestList();
1832  if (spec_host_rq.empty()) {
1834  return;
1835  }
1836  vector< CRef<COrg_ref> > edited;
1837  edited.push_back(CRef<COrg_ref>(new COrg_ref()));
1838  edited.front()->SetOrgname().SetMod().push_back(CRef<COrgMod>(new COrgMod(COrgMod::eSubtype_nat_host, val)));
1839 
1840  CRef<CTaxon3_reply> tmp_spec_host_reply = m_tax_func(spec_host_rq);
1841 
1842  if (!tmp_spec_host_reply->IsSetReply() || !tmp_spec_host_reply->GetReply().front()->IsData()) {
1843  val = kEmptyStr;
1845  return;
1846  }
1847 
1848  string error_message = IncrementalSpecificHostMapUpdate(spec_host_rq, *tmp_spec_host_reply);
1849  if (!NStr::IsBlank(error_message))
1850  {
1851  // post error message
1852  ERR_POST(Error << error_message);
1853  }
1854 
1855 
1857 
1858  val = edited.front()->GetOrgname().GetMod().front()->GetSubname();
1860 }
1861 //LCOV_EXCL_STOP
1862 
1863 
1864 //LCOV_EXCL_START
1865 //only used by biosample
1866 bool CTaxValidationAndCleanup::IsOneSpecificHostValid(const string& val, string& error_msg)
1867 {
1868  error_msg = kEmptyStr;
1869  m_HostMap.Clear();
1870 
1872 
1873  vector< CRef<COrg_ref> > spec_host_rq = m_HostMap.GetRequestList();
1874  if (spec_host_rq.empty()) {
1875  m_HostMap.Clear();
1876  return true;
1877  }
1878 
1879  CRef<CTaxon3_reply> tmp_spec_host_reply = m_tax_func(spec_host_rq);
1880 
1881  string err_msg;
1882  if (tmp_spec_host_reply) {
1883  err_msg = IncrementalSpecificHostMapUpdate(spec_host_rq, *tmp_spec_host_reply);
1884  } else {
1885  err_msg = "Connection to taxonomy failed";
1886  }
1887  bool rval = true;
1888  error_msg = err_msg;
1889 
1890  if (!NStr::IsBlank(err_msg)) {
1891  ERR_POST(Error << err_msg);
1892  m_HostMap.Clear();
1893  rval = false;
1894  } else {
1895  vector<TTaxError> errs;
1896  m_HostMap.ListErrors(errs);
1897  if (errs.size() > 0) {
1898  error_msg = errs.front().err_msg;
1899  rval = false;
1900  }
1901  }
1902  m_HostMap.Clear();
1903  return rval;
1904 }
1905 //LCOV_EXCL_STOP
1906 
1907 
1909 {
1910  x_ClearMaps();
1911 
1912  vector<TTaxError> errs;
1913 
1914  // lookup of whole org
1915  vector< CRef<COrg_ref> > org_rq_list;
1916  CRef<COrg_ref> rq(new COrg_ref);
1917  rq->Assign(org);
1918  org_rq_list.push_back(rq);
1919 
1920  CRef<CTaxon3_reply> reply = m_tax_func(org_rq_list);
1921 
1922  if (!reply || !reply->IsSetReply()) {
1924  "Taxonomy service connection failure", org);
1925  } else {
1926  ListTaxLookupErrors(*(reply->GetReply().front()), org, genome,
1927  false, false, errs);
1928  }
1929 
1930  // Now look at specific-host values
1931  m_HostMap.AddOrg(org);
1932  org_rq_list = GetSpecificHostLookupRequest(false);
1933 
1934  if (!org_rq_list.empty()) {
1935  reply = m_tax_func(org_rq_list);
1936  string err_msg;
1937  if (reply) {
1938  err_msg = IncrementalSpecificHostMapUpdate(org_rq_list, *reply);
1939  } else {
1940  err_msg = "Connection to taxonomy failed";
1941  }
1942  if (!NStr::IsBlank(err_msg)) {
1944  } else {
1945  m_HostMap.ListErrors(errs);
1946  }
1947  }
1948 
1949 
1950  // validate strain
1951  m_StrainMap.AddOrg(org);
1952  org_rq_list = GetStrainLookupRequest();
1953  if (!org_rq_list.empty()) {
1954  reply = m_tax_func(org_rq_list);
1955  string err_msg = IncrementalStrainMapUpdate(org_rq_list, *reply);
1956  if (!NStr::IsBlank(err_msg)) {
1958  } else {
1959  m_StrainMap.ListErrors(errs);
1960  }
1961  }
1962 
1963  for (const TTaxError& e : errs) {
1964  imp.PostObjErr(e.severity, e.err_type, e.err_msg, org);
1965  }
1966 }
1967 
1968 
1969 END_SCOPE(validator)
EErrType
@ eErr_SEQ_DESCR_TaxonomyConsultRequired
@ eErr_SEQ_DESCR_TaxonomyServiceProblem
@ eErr_SEQ_DESCR_TaxonomyNucleomorphProblem
@ eErr_SEQ_DESCR_TaxonomyPlastidsProblem
@ eErr_SEQ_DESCR_StrainContainsTaxInfo
@ eErr_SEQ_DESCR_OrganismNotFound
@ eErr_SEQ_DESCR_AmbiguousSpecificHost
@ eErr_SEQ_DESCR_TaxonomyAmbiguousName
@ eErr_SEQ_DESCR_TaxonomyLookupProblem
@ eErr_SEQ_DESCR_BadSpecificHost
@ eErr_SEQ_DESCR_TaxonomyIsSpeciesProblem
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
static string FixHost(const string &value)
Definition: OrgMod.cpp:1008
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
void CleanForGenBank()
Definition: Org_ref.cpp:510
const string & GetLineage(void) const
Definition: Org_ref.cpp:124
bool IsSetOrgMod(void) const
Definition: Org_ref.cpp:169
bool IsSetLineage(void) const
Definition: Org_ref.cpp:119
TQualifierRequests::iterator x_FindRequest(const string &val)
string IncrementalUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply, TTaxId descTaxID=ZERO_TAX_ID)
virtual CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org)=0
virtual void ListErrors(vector< TTaxError > &errs) const
void AddDesc(CConstRef< CSeqdesc > desc, CConstRef< CSeq_entry > ctx)
void AddOrg(const COrg_ref &org)
virtual bool Check(const COrg_ref &) const
void AddFeat(CConstRef< CSeq_feat > feat)
virtual string GetKey(const string &orig_val, const COrg_ref &org) const =0
void AddString(const string &val)
void PostErrors(CValidError_imp &imp)
vector< CRef< COrg_ref > > GetRequestList()
bool MatchTryValue(const string &val) const
pair< CConstRef< CSeqdesc >, CConstRef< CSeq_entry > > TDescPair
void PostErrors(CValidError_imp &imp)
void AddParent(CConstRef< CSeqdesc > desc, CConstRef< CSeq_entry > ctx)
void AddRequests(vector< CRef< COrg_ref > > &request_list) const
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org) override
bool ApplyToOrg(COrg_ref &org) const override
static string x_DefaultSpecificHostAdjustments(const string &host_val)
CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org) override
void ListErrors(vector< TTaxError > &errs) const override
CSpecificHostRequest(const string &orig_val, const COrg_ref &org, bool for_fix=false)
void AddReply(const CT3Reply &reply, TTaxId descTaxID) override
const string & SuggestFix() const
CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org) override
static void x_AddOneStrain(const COrg_ref &org, vector< string > &candidates)
static void ExploreStrainsByTaxname(CTaxValidationAndCleanup &tval, CValidError_imp &imp, const CSeq_entry &se, std::function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref >> &)> taxoncallback)
static bool Check(const COrg_ref &org)
static string MakeKey(const string &strain, const string &taxname)
void ListErrors(vector< TTaxError > &errs) const override
static bool x_IgnoreStrain(const string &str)
void AddReply(const CT3Reply &reply, TTaxId descTaxID) override
static bool StrainContainsTaxonInfo(const string &organism, const string &strain, std::function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref >> &)> taxoncallback)
static bool x_IsUnwanted(const string &str)
static void ExploreStrainsForTaxonInfo(CTaxValidationAndCleanup &tval, CValidError_imp &imp, const CSeq_entry &se, std::function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref >> &)> taxoncallback)
CStrainRequest(const string &strain, const COrg_ref &org)
static void x_CheckOneStrain(CTaxValidationAndCleanup &tval, CValidError_imp &imp, const COrg_ref &org, const CRef< CSeqdesc > sd, const CRef< CSeq_feat > sf, const CSeq_entry &se, std::function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref >> &)> taxoncallback)
static void x_CollectStrainsForRecord(const CSeq_entry &se, vector< string > &candidates)
static bool RequireTaxname(const string &taxname)
void GetTaxFlags(bool &is_species_level, bool &force_consult, bool &has_nucleomorphs) const
Definition: T3Data.cpp:58
bool HasPlastids(void) const
Definition: T3Data.cpp:93
CT3Reply –.
Definition: T3Reply.hpp:66
void x_InterpretTaxonomyError(const CT3Error &error, const COrg_ref &org, const EErrType type, vector< TTaxError > &errs) const
CConstRef< CSeq_feat > GetFeat(size_t num) const
vector< CConstRef< CSeq_feat > > m_SrcFeats
vector< CConstRef< CSeq_entry > > m_DescCtxs
static string x_DefaultSpecificHostAdjustments(const string &host_val)
vector< CRef< COrg_ref > > GetTaxonomyLookupRequest() const
void ReportSpecificHostErrors(const CTaxon3_reply &reply, CValidError_imp &imp)
CConstRef< CSeq_entry > GetSeqContext(size_t num) const
void x_UpdateSpecificHostMapWithReply(const CTaxon3_reply &reply, string &error_message)
TSpecificHostRequests m_SpecificHostRequests
TSpecificHostRequests::iterator x_FindHostFixRequest(const string &val)
CSpecificHostMapForFix m_HostMapForFix
void x_CreateQualifierMap(CQualLookupMap &lookup)
void ListTaxLookupErrors(const CT3Reply &reply, const COrg_ref &org, CBioSource::TGenome genome, bool is_insd_patent, bool is_wp, vector< TTaxError > &errs) const
void ReportIncrementalTaxLookupErrors(const CTaxon3_reply &reply, CValidError_imp &imp, bool is_insd_patent, size_t offset) const
bool AdjustOrgRefsForSpecificHosts(vector< CRef< COrg_ref > > org_refs)
vector< CConstRef< CSeqdesc > > m_SrcDescs
CConstRef< CSeqdesc > GetDesc(size_t num) const
bool x_ApplySpecificHostMap(COrg_ref &org_ref) const
bool AdjustOrgRefsWithSpecificHostReply(vector< CRef< COrg_ref >> requests, const CTaxon3_reply &reply, vector< CRef< COrg_ref >> org_refs)
void ReportTaxLookupErrors(const CTaxon3_reply &reply, CValidError_imp &imp, bool is_insd_patent) const
void CheckOneOrg(const COrg_ref &org, CBioSource::TGenome genome, CValidError_imp &imp)
bool AdjustOrgRefsWithTaxLookupReply(const CTaxon3_reply &reply, vector< CRef< COrg_ref > > org_refs, string &error_message, bool use_error_orgrefs=false) const
vector< CRef< COrg_ref > > GetStrainLookupRequest()
string IncrementalSpecificHostMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)
bool DoTaxonomyUpdate(CSeq_entry_Handle seh, bool with_host)
CConstRef< CSeq_entry > GetTopReportObject() const
string IncrementalStrainMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply, TTaxId descTaxID=ZERO_TAX_ID)
void ReportStrainErrors(CValidError_imp &imp)
vector< CRef< COrg_ref > > GetSpecificHostLookupRequest(bool for_fix)
bool IsOneSpecificHostValid(const string &val, string &err_msg)
void x_GatherSources(const CSeq_entry &se)
CTaxon3_reply –.
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
Definition: validatorp.cpp:372
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
bool IsWP() const
size_type size() const
Definition: map.hpp:148
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
static const int chunk_size
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
CS_CONTEXT * ctx
Definition: t0006.c:12
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:50
static int type
Definition: getdata.c:31
static const char * str(char *buf, int n)
Definition: stats.c:84
int offset
Definition: replacements.h:160
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
Definition: OrgName_.hpp:827
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:360
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
Definition: Org_ref_.cpp:87
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
bool IsData(void) const
Check if variant Data is selected.
Definition: T3Reply_.hpp:263
const TData & GetData(void) const
Get the variant data.
Definition: T3Reply_.cpp:124
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: T3Data_.hpp:273
const TReply & GetReply(void) const
Get the Reply member data.
bool IsError(void) const
Check if variant Error is selected.
Definition: T3Reply_.hpp:257
const TError & GetError(void) const
Get the variant data.
Definition: T3Reply_.cpp:102
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: T3Data_.hpp:285
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref > > &list)> taxupdate_func_t
Definition: itaxon3.hpp:60
static int input()
int i
yy_size_t n
int len
const struct ncbi::grid::netcache::search::fields::KEY key
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T min(T x_, T y_)
The Object manager core.
bool IsCommonName(const CT3Data &data)
Definition: utilities.cpp:1944
bool HasMisSpellFlag(const CT3Data &data)
Definition: utilities.cpp:1966
string SpecificHostValueToCheck(const string &val)
Definition: utilities.cpp:2042
string InterpretSpecificHostResult(const string &host, const CT3Reply &reply, const string &orig_host=kEmptyStr)
Definition: utilities.cpp:2085
bool IsLikelyTaxname(const string &val)
Definition: utilities.cpp:2150
bool FindMatchInOrgRef(const string &str, const COrg_ref &org)
Definition: utilities.cpp:1985
#define FOR_EACH_SEQFEAT_ON_SEQANNOT(Itr, Var)
FOR_EACH_SEQFEAT_ON_SEQANNOT EDIT_EACH_SEQFEAT_ON_SEQANNOT.
Definition: seq_macros.hpp:410
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FOR_EACH_DESCRIPTOR_ON_SEQENTRY
#define FOR_EACH_ANNOT_ON_SEQENTRY
Definition: type.c:6
static void GetStrainCandidates(const string &organism, const string &strain, vector< string > &candidates)
const string kInvalidReplyMsg
static bool OrganismIsUnwanted(const string &str)
static bool CheckStrainReply(const string &organism, const string &strain, CRef< CTaxon3_reply > reply)
static bool IgnoreStrain(const string &str)
Modified on Thu May 30 12:24:28 2024 by modify_doxy.py rev. 669887