NCBI C++ ToolKit
tax_validation_and_cleanup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tax_validation_and_cleanup.cpp 101354 2023-12-05 15:27:22Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Colleen Bollin
27  *
28  * File Description:
29  * Tools for batch processing taxonomy-related validation and cleanup
30  * .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
37 
38 #include <serial/iterator.hpp>
39 
46 
47 #include <objmgr/bioseq_ci.hpp>
48 #include <objmgr/seqdesc_ci.hpp>
49 #include <objmgr/util/feature.hpp>
50 
51 #include <objmgr/feat_ci.hpp>
52 #include <objmgr/scope.hpp>
53 
55 
60 
61 #define NCBI_USE_ERRCODE_X Objtools_Validator
62 
65 BEGIN_SCOPE(validator)
66 using namespace sequence;
67 
68 const string kInvalidReplyMsg = "Taxonomy service returned invalid reply";
69 
70 
72 {
73  x_Init();
74 }
75 
76 
78 {
79  m_ValuesToTry.clear();
80  m_RepliesProcessed = 0;
81  m_Descs.clear();
82  m_Feats.clear();
83 }
84 
85 
87 {
88  m_Descs.push_back(TDescPair(desc, ctx));
89 }
90 
91 
93 {
94  m_Feats.push_back(feat);
95 }
96 
97 
98 void CQualifierRequest::AddRequests(vector<CRef<COrg_ref> >& request_list) const
99 {
100  for (const string& it : m_ValuesToTry) {
101  CRef<COrg_ref> rq(new COrg_ref);
102  rq->SetTaxname(it);
103  request_list.push_back(rq);
104  }
105 }
106 
107 
108 bool CQualifierRequest::MatchTryValue(const string& val) const
109 {
110  for (const string& it : m_ValuesToTry) {
111  if (NStr::EqualNocase(val, it)) {
112  return true;
113  }
114  }
115  return false;
116 }
117 
118 
120 {
121  vector<TTaxError> errs;
122  ListErrors(errs);
123  for (const auto& e : errs) {
124  for (const auto& it : m_Descs) {
125  imp.PostObjErr(e.severity, e.err_type, e.err_msg, *(it.first), it.second);
126  }
127  for (const auto& it : m_Feats) {
128  imp.PostObjErr(e.severity, e.err_type, e.err_msg, *it);
129  }
130  }
131 }
132 
133 
134 CSpecificHostRequest::CSpecificHostRequest(const string& host, const COrg_ref& org, bool for_fix) :
136  m_Host(host),
137  m_Response(eUnrecognized),
138  m_HostLineage(),
139  m_OrgLineage()
140 {
141  string host_check = SpecificHostValueToCheck(host);
142  if (NStr::IsBlank(host_check)) {
144  return;
145  }
146  if (!for_fix && !NStr::Equal(host, host_check)) {
147  m_ValuesToTry.push_back(host_check);
148  }
149  m_ValuesToTry.push_back(host);
150 
151  m_SuggestedFix.clear();
152  if (org.IsSetLineage()) {
153  m_OrgLineage = org.GetLineage();
154  }
155 }
156 
157 
159 {
160  if (m_Response == eAmbiguous) {
162  if (NStr::IsBlank(new_error)) {
166  m_Error = kEmptyStr;
167  }
168  } else if (m_Response == eUnrecognized) {
170  if (NStr::IsBlank(m_Error)) {
174  } else if (NStr::Find(m_Error, "ambiguous") != NPOS) {
176  } else if (NStr::StartsWith(m_Error, "Invalid value for specific host") && !IsLikelyTaxname(m_Host)) {
179  } else if (NStr::StartsWith(m_Error, "Specific host value is alternate name")) {
181  m_SuggestedFix = reply.GetData().GetOrg().GetTaxname();
183  } else {
185  if (NStr::IsBlank(m_SuggestedFix) && reply.IsData() && reply.GetData().IsSetOrg()) {
186  if (HasMisSpellFlag(reply.GetData())) {
187  m_SuggestedFix = reply.GetData().GetOrg().GetTaxname();
189  } else if (!FindMatchInOrgRef(m_Host, reply.GetData().GetOrg())
190  && !IsCommonName(reply.GetData())) {
191  m_SuggestedFix = reply.GetData().GetOrg().GetTaxname();
193  }
194  }
195  }
196  }
198 }
199 
200 
201 void CSpecificHostRequest::ListErrors(vector<TTaxError>& errs) const
202 {
203  switch (m_Response) {
204  case eNormal:
205  break;
206  case eAmbiguous:
208  break;
209  case eUnrecognized:
211  break;
212  case eAlternateName:
214  break;
215  }
216 
218  (NStr::Find(m_OrgLineage, "Streptophyta;") != NPOS || NStr::Find(m_OrgLineage, "Metazoa;") != NPOS) &&
219  (NStr::Find(m_HostLineage, "Fungi;") != NPOS || NStr::Find(m_HostLineage, "Bacteria;") != NPOS ||
220  NStr::Find(m_HostLineage, "Archaea;") != NPOS || NStr::Find(m_HostLineage, "Viruses;") != NPOS)) {
222  "Suspect Host Value - a prokaryote, fungus or virus is suspect as a host for a plant or animal" });
223  }
224 }
225 
226 
227 //LCOV_EXCL_START
228 //used by cleanup
229 const string& CSpecificHostRequest::SuggestFix() const
230 {
231  if (m_ValuesToTry.empty()) {
232  return m_Host;
233  } else {
234  return m_SuggestedFix;
235  }
236 }
237 //LCOV_EXCL_STOP
238 
239 
241 {
242  // per VR-762, ignore strain if combination of letters and numbers
243  bool has_number = false;
244  bool has_letter = false;
245  for (char ch : str) {
246  if (isdigit(ch)) {
247  has_number = true;
248  } else if (isalpha(ch)) {
249  has_letter = true;
250  } else {
251  return false;
252  }
253  }
254  if (!has_number || !has_letter) {
255  return false;
256  } else {
257  return true;
258  }
259 }
260 
261 
262 CStrainRequest::CStrainRequest(const string& strain, const COrg_ref& org) : CQualifierRequest(), m_Strain(strain)
263 {
264  if (org.IsSetTaxname()) {
265  m_Taxname = org.GetTaxname();
266  } else {
267  m_Taxname.clear();
268  }
269 
270  m_IsInvalid = false;
271  if (NStr::IsBlank(strain) || x_IgnoreStrain(strain)) {
272  return;
273  }
274 
275  m_ValuesToTry.push_back(strain);
276  size_t pos = 0;
277  for (char ch : strain) {
278  if (isalpha(ch)) {
279  ++pos;
280  } else {
281  if (pos >= 5) {
282  m_ValuesToTry.push_back(strain.substr(0, pos));
283  }
284  break;
285  }
286  }
287 
288  if (RequireTaxname(m_Taxname)) {
289  m_ValuesToTry.push_back(MakeKey(strain, m_Taxname));
290  }
291 }
292 
293 
294 string CStrainRequest::MakeKey(const string& strain, const string& taxname)
295 {
296  if (RequireTaxname(taxname)) {
297  return taxname.substr(0, taxname.length() - 3) + strain;
298  } else {
299  return strain;
300  }
301 }
302 
303 
304 bool CStrainRequest::RequireTaxname(const string& taxname)
305 {
306  if (NStr::EndsWith(taxname, " sp.")) {
307  return true;
308  } else {
309  return false;
310  }
311 }
312 
313 
315 {
316  if (NStr::FindNoCase(str, "virus") != NPOS ||
317  NStr::FindNoCase(str, "viroid") != NPOS ||
318  NStr::FindNoCase(str, "vector") != NPOS ||
319  NStr::FindNoCase(str, "phage") != NPOS) {
320  return true;
321  } else {
322  return false;
323  }
324 }
325 
326 
328 {
329  if (org.IsSetLineage() && x_IsUnwanted(org.GetLineage())) {
330  return false;
331  }
332  if (org.IsSetTaxname() && x_IsUnwanted(org.GetTaxname())) {
333  return false;
334  }
335  if (!org.IsSetOrgMod()) {
336  return false;
337  }
338  for (const auto& it : org.GetOrgname().GetMod()) {
339  if (it->IsSetSubtype() && it->IsSetSubname() &&
340  it->GetSubtype() == COrgMod::eSubtype_strain) {
341  return true;
342  }
343  }
344  return false;
345 }
346 
347 
348 void CStrainRequest::ListErrors(vector<TTaxError>& errs) const
349 {
350  if (m_IsInvalid) {
352  "Strain '" + m_Strain + "' contains taxonomic name information" });
353  }
354 }
355 
356 
358 {
359  if (!m_IsInvalid) {
360  if (reply.IsData() && reply.GetData().IsSetOrg()) {
361  // TODO: if using just a one word input, make sure name is actually in taxname
362  if (m_ValuesToTry[m_RepliesProcessed].length() < m_Strain.length()) {
364  m_IsInvalid = true;
365  }
366  } else {
367  m_IsInvalid = true;
368  }
369  }
370  }
372 }
373 
374 
376 {
377  m_Populated = false;
378  m_Map.clear();
379 }
380 
381 
383 {
384  m_Populated = true;
385  if (!desc->IsSource() || !desc->GetSource().IsSetOrg()) {
386  return;
387  }
388  const COrg_ref& org = desc->GetSource().GetOrg();
389  if (!org.IsSetOrgMod()) {
390  return;
391  }
392  if (!Check(org)) {
393  return;
394  }
395  for (const auto& mod_it : org.GetOrgname().GetMod()) {
396  if (mod_it->IsSetSubtype()
397  && mod_it->GetSubtype() == m_Subtype
398  && mod_it->IsSetSubname()) {
399  string qual = mod_it->GetSubname();
400  string key = GetKey(qual, org);
401 
403  if (find == m_Map.end()) {
404  m_Map[key] = x_MakeNewRequest(qual, org);
405  m_Map[key]->AddParent(desc, ctx);
406  } else {
407  find->second->AddParent(desc, ctx);
408  }
409  }
410  }
411 }
412 
413 
415 {
416  m_Populated = true;
417  if (!feat->IsSetData() || !feat->GetData().IsBiosrc() ||
418  !feat->GetData().GetBiosrc().IsSetOrg()) {
419  return;
420  }
421  const COrg_ref& org = feat->GetData().GetBiosrc().GetOrg();
422  if (!org.IsSetOrgMod()) {
423  return;
424  }
425  if (!Check(org)) {
426  return;
427  }
428  for (const auto& mod_it : org.GetOrgname().GetMod()) {
429  if (mod_it->IsSetSubtype()
430  && mod_it->GetSubtype() == m_Subtype
431  && mod_it->IsSetSubname()) {
432  string qual = mod_it->GetSubname();
433  string key = GetKey(qual, feat->GetData().GetBiosrc().GetOrg());
434 
436  if (find == m_Map.end()) {
437  m_Map[key] = x_MakeNewRequest(qual, feat->GetData().GetBiosrc().GetOrg());
438  m_Map[key]->AddParent(feat);
439  } else {
440  find->second->AddParent(feat);
441  }
442  }
443  }
444 }
445 
446 
448 {
449  m_Populated = true;
450  if (!org.IsSetOrgMod()) {
451  return;
452  }
453  if (!Check(org)) {
454  return;
455  }
456  for (const auto& mod_it : org.GetOrgname().GetMod()) {
457  if (mod_it->IsSetSubtype()
458  && mod_it->GetSubtype() == m_Subtype
459  && mod_it->IsSetSubname()) {
460  string qual = mod_it->GetSubname();
461  string key = GetKey(qual, org);
462 
464  if (find == m_Map.end()) {
465  m_Map[key] = x_MakeNewRequest(qual, org);
466  }
467  }
468  }
469 }
470 
471 
472 //LCOV_EXCL_START
473 //only used by biosample
474 void CQualLookupMap::AddString(const string& val)
475 {
476  m_Populated = true;
477 
479  if (find == m_Map.end()) {
480  CRef<COrg_ref> org(new COrg_ref());
481  m_Map[val] = x_MakeNewRequest(val, *org);
482  }
483 }
484 //LCOV_EXCL_STOP
485 
486 
487 vector<CRef<COrg_ref> > CQualLookupMap::GetRequestList()
488 {
489  vector<CRef<COrg_ref> > org_rq_list;
490  org_rq_list.reserve(m_Map.size());
491  for (auto& it : m_Map) {
492  it.second->AddRequests(org_rq_list);
493  }
494  return org_rq_list;
495 }
496 
497 
499 {
501  if (map_it != m_Map.end() && map_it->second->NumRemainingReplies() > 0) {
502  return map_it;
503  }
504  map_it = m_Map.begin();
505  while (map_it != m_Map.end()) {
506  if (map_it->second->MatchTryValue(val) && map_it->second->NumRemainingReplies() > 0) {
507  return map_it;
508  }
509  ++map_it;
510  }
511  return m_Map.end();
512 }
513 
514 
516 {
517  string error_message;
518  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
519  vector<CRef<COrg_ref> >::const_iterator rq_it = input.begin();
520 
521  while (reply_it != reply.GetReply().end() && rq_it != input.end()) {
522  TQualifierRequests::iterator map_it = x_FindRequest((*rq_it)->GetTaxname());
523  if (map_it == m_Map.end()) {
524  error_message = "Unexpected taxonomy response for " + (*rq_it)->GetTaxname();
525  return error_message;
526  }
527  map_it->second->AddReply(**reply_it);
528  ++rq_it;
529  ++reply_it;
530  }
531 
532  if (reply_it != reply.GetReply().end()) {
533  error_message = "Unexpected taxonomy responses for " + COrgMod::GetSubtypeName(m_Subtype);
534  }
535  return kEmptyStr;
536 }
537 
538 
539 //LCOV_EXCL_START
540 //only used for cleanup
542 {
543  for (const auto& rq_it : m_Map) {
544  if (rq_it.second->NumRemainingReplies() > 0) {
545  return false;
546  }
547  }
548  return true;
549 }
550 //LCOV_EXCL_STOP
551 
552 
554 {
555  for (auto& rq_it : m_Map) {
556  rq_it.second->PostErrors(imp);
557  }
558 }
559 
560 
561 //LCOV_EXCL_START
562 //only used by biosample
563 void CQualLookupMap::ListErrors(vector<TTaxError>& errs) const
564 {
565  for (const auto& rq_it : m_Map) {
566  rq_it.second->ListErrors(errs);
567  }
568 }
569 
570 
571 //LCOV_EXCL_STOP
572 
573 
575 {
576  CRef<CQualifierRequest> rq(new CSpecificHostRequest(orig_val, org));
577  return rq;
578 }
579 
580 
581 //LCOV_EXCL_START
582 //used for cleanup
584 {
585  CRef<CQualifierRequest> rq(new CSpecificHostRequest(orig_val, org, true));
586  return rq;
587 }
588 
589 
591 {
592  string adjusted = host_val;
593  NStr::TruncateSpacesInPlace(adjusted);
594  adjusted = COrgMod::FixHost(adjusted);
595  return adjusted;
596 }
597 
598 
600 {
601  if (!org_ref.IsSetOrgname() ||
602  !org_ref.GetOrgname().IsSetMod()) {
603  return false;
604  }
605 
606  bool changed = false;
607 
608  for (auto& m : org_ref.SetOrgname().SetMod()) {
609  if (m->IsSetSubtype() &&
610  m->GetSubtype() == COrgMod::eSubtype_nat_host &&
611  m->IsSetSubname()) {
612  string host_val = x_DefaultSpecificHostAdjustments(m->GetSubname());
613 
615  if (it != m_Map.end()) {
616  const CSpecificHostRequest* rq = dynamic_cast<const CSpecificHostRequest*>(it->second.GetPointer());
617  string new_val = x_DefaultSpecificHostAdjustments(rq->SuggestFix());
618  if (!NStr::IsBlank(new_val) && !NStr::Equal(new_val, m->GetSubname())) {
619  m->SetSubname(new_val);
620  changed = true;
621  }
622  }
623  }
624  }
625 
626  return changed;
627 }
628 //LCOV_EXCL_STOP
629 
630 
632 {
633  CRef<CQualifierRequest> rq(new CStrainRequest(orig_val, org));
634  return rq;
635 }
636 
637 
639 {
641  m_tax_func = [this](const vector<CRef<COrg_ref>>& list) -> CRef<CTaxon3_reply> {
642  return m_taxon3->SendOrgRefList(list);
643  };
644 }
645 
646 
648  : m_tax_func(tax_func)
649 {
650 }
651 
652 
654 {
655  m_SrcDescs.clear();
656  m_DescCtxs.clear();
657  m_SrcFeats.clear();
661  m_StrainRequestsBuilt = false;
662  x_GatherSources(se);
663 }
664 
665 
667 {
668  if (!m_DescCtxs.empty()) {
669  return m_DescCtxs.front();
670  } else {
671  return CConstRef<CSeq_entry>();
672  }
673 }
674 
675 
677 {
678  // get source descriptors
680  {
681  if ((*it)->IsSource() && (*it)->GetSource().IsSetOrg()) {
682  CConstRef<CSeqdesc> desc;
683  desc.Reset(*it);
684  m_SrcDescs.push_back(desc);
686  r_se.Reset(&se);
687  m_DescCtxs.push_back(r_se);
688  }
689  }
690  // also get features
691  FOR_EACH_ANNOT_ON_SEQENTRY(annot_it, se)
692  {
693  FOR_EACH_SEQFEAT_ON_SEQANNOT(feat_it, **annot_it)
694  {
695  if ((*feat_it)->IsSetData() && (*feat_it)->GetData().IsBiosrc()
696  && (*feat_it)->GetData().GetBiosrc().IsSetOrg()) {
698  feat.Reset(*feat_it);
699  m_SrcFeats.push_back(feat);
700  }
701  }
702  }
703 
704  // if set, recurse
705  if (se.IsSet()) {
707  {
708  x_GatherSources(**it);
709  }
710  }
711 }
712 
713 
715 {
716  // request list for taxon3
717  vector< CRef<COrg_ref> > org_rq_list;
718 
719  // first do descriptors
720  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.cbegin();
721  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.cbegin();
722  while (desc_it != m_SrcDescs.cend() && ctx_it != m_DescCtxs.cend()) {
723  CRef<COrg_ref> rq(new COrg_ref);
724  const COrg_ref& org = (*desc_it)->GetSource().GetOrg();
725  rq->Assign(org);
726  org_rq_list.push_back(rq);
727 
728  ++desc_it;
729  ++ctx_it;
730  }
731 
732  // now do features
733  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.cbegin();
734  while (feat_it != m_SrcFeats.cend()) {
735  CRef<COrg_ref> rq(new COrg_ref);
736  const COrg_ref& org = (*feat_it)->GetData().GetBiosrc().GetOrg();
737  rq->Assign(org);
738  org_rq_list.push_back(rq);
739 
740  ++feat_it;
741  }
742  return org_rq_list;
743 }
744 
745 
746 void CTaxValidationAndCleanup::x_InterpretTaxonomyError(const CT3Error& error, const COrg_ref& org, const EErrType type, vector<TTaxError>& errs) const
747 {
748  const string err_str = error.IsSetMessage() ? error.GetMessage() : "?";
749 
750  if (NStr::Equal(err_str, "Organism not found")) {
751  string msg = "Organism not found in taxonomy database";
752  if (error.IsSetOrg() && error.GetOrg().IsSetTaxname() &&
753  !NStr::Equal(error.GetOrg().GetTaxname(), "Not valid") &&
754  (!org.IsSetTaxname() ||
755  !NStr::Equal(org.GetTaxname(), error.GetOrg().GetTaxname()))) {
756  msg += " (suggested:" + error.GetOrg().GetTaxname() + ")";
757  }
759  } else if (NStr::StartsWith(err_str, "Organism not found. Possible matches")) {
760  errs.push_back(TTaxError{ eDiag_Warning, eErr_SEQ_DESCR_OrganismNotFound, err_str });
761  } else if (NStr::Equal(err_str, kInvalidReplyMsg)) {
762  errs.push_back(TTaxError{ eDiag_Error, eErr_SEQ_DESCR_TaxonomyLookupProblem, err_str });
763  } else if (NStr::Find(err_str, "ambiguous name") != NPOS) {
765  "Taxonomy lookup failed with message '" + err_str + "'"});
766  } else {
767  errs.push_back(TTaxError{ eDiag_Warning, type,
768  "Taxonomy lookup failed with message '" + err_str + "'" });
769  }
770 }
771 
772 
774 (const CT3Reply& reply, const COrg_ref& org, CBioSource::TGenome genome, bool is_insd_patent, bool is_wp, vector<TTaxError>& errs) const
775 {
776  if (reply.IsError()) {
778  } else if (reply.IsData()) {
779  bool is_species_level = true;
780  bool is_unidentified = false;
781  bool force_consult = false;
782  bool has_nucleomorphs = false;
783  bool is_cyanobacteria = false;
784  bool has_metagenome_source = false;
785  if (reply.GetData().IsSetOrg()) {
786  const COrg_ref& orp_rep = reply.GetData().GetOrg();
787  if (org.IsSetTaxname() && orp_rep.IsSetTaxname()) {
788  const string& taxname_req = org.GetTaxname();
789  const string& taxname_rep = orp_rep.GetTaxname();
790  if (NStr::Equal(taxname_rep, "unidentified")) {
791  is_unidentified = true;
792  }
793  TTaxId taxid_request = org.GetTaxId();
794  TTaxId taxid_reply = orp_rep.GetTaxId();
795 
796  if (taxid_request != ZERO_TAX_ID && taxid_reply != ZERO_TAX_ID && taxid_request != taxid_reply) {
798  "Organism name is '" + taxname_req
799  + "', taxonomy ID should be '" + NStr::NumericToString(taxid_reply)
800  + "' but is '" + NStr::NumericToString(taxid_request) + "'" });
801  }
802  }
803  if (org.IsSetOrgMod()) {
804  for (const auto& it : org.GetOrgname().GetMod()) {
805  if (it->IsSetSubtype() && it->IsSetSubname() &&
806  it->GetSubtype() == COrgMod::eSubtype_metagenome_source) {
807  has_metagenome_source = true;
808  }
809  }
810  }
811  if (org.IsSetLineage()) {
812  string org_lineage = org.GetLineage();
813  if (! NStr::IsBlank(org_lineage) && NStr::Find(org_lineage, "Bacteria; Cyanobacteriota") != NPOS) {
814  is_cyanobacteria = true;
815  }
816  }
817  }
818  reply.GetData().GetTaxFlags(is_species_level, force_consult, has_nucleomorphs);
819  if (!is_species_level && !is_wp) {
821  "Taxonomy lookup reports is_species_level FALSE"});
822  }
823  if (force_consult) {
824  if (is_insd_patent && is_unidentified) {
825  force_consult = false;
826  }
827  if (is_cyanobacteria && has_metagenome_source) {
828  force_consult = false;
829  }
830  }
831  if (force_consult) {
833  "Taxonomy lookup reports taxonomy consultation needed"});
834  }
835  if (genome == CBioSource::eGenome_nucleomorph
836  && !has_nucleomorphs) {
838  "Taxonomy lookup does not have expected nucleomorph flag"});
839  } else if (genome == CBioSource::eGenome_plastid
840  && (!reply.GetData().HasPlastids())) {
842  "Taxonomy lookup does not have expected plastid flag"});
843  }
844  }
845 }
846 
848  const CTaxon3_reply& reply,
849  CValidError_imp& imp,
850  bool is_insd_patent) const
851 {
852  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
853 
854  // process descriptor responses
855  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.cbegin();
856  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.cbegin();
857 
858  while (reply_it != reply.GetReply().end()
859  && desc_it != m_SrcDescs.cend()
860  && ctx_it != m_DescCtxs.cend()) {
861  vector<TTaxError> errs;
862  const COrg_ref& orp_req = (*desc_it)->GetSource().GetOrg();
863  ListTaxLookupErrors(**reply_it, orp_req,
864  (*desc_it)->GetSource().IsSetGenome() ? (*desc_it)->GetSource().GetGenome() : CBioSource::eGenome_unknown,
865  is_insd_patent, imp.IsWP(), errs);
866  for (const TTaxError& e : errs) {
867  imp.PostObjErr(e.severity, e.err_type, e.err_msg, **desc_it, *ctx_it);
868  }
869  ++reply_it;
870  ++desc_it;
871  ++ctx_it;
872  }
873  // process feat responses
874  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.cbegin();
875  while (reply_it != reply.GetReply().cend()
876  && feat_it != m_SrcFeats.end()) {
877  vector<TTaxError> errs;
878  const COrg_ref& orp_req = (*feat_it)->GetData().GetBiosrc().GetOrg();
879  ListTaxLookupErrors(**reply_it, orp_req,
880  (*feat_it)->GetData().GetBiosrc().IsSetGenome() ? (*feat_it)->GetData().GetBiosrc().GetGenome() : CBioSource::eGenome_unknown,
881  is_insd_patent, imp.IsWP(), errs);
882  for (const TTaxError& e : errs) {
883  imp.PostErr(e.severity, e.err_type, e.err_msg,* *feat_it);
884  }
885  ++reply_it;
886  ++feat_it;
887  }
888 }
889 
890 
892  const CTaxon3_reply& reply,
893  CValidError_imp& imp,
894  bool is_insd_patent,
895  size_t offset) const
896 {
897  // cout << MSerial_AsnText << reply << endl;
898 
899  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
900 
901  // process descriptor responses
902  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.cbegin();
903  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.cbegin();
904 
905  size_t skipped = 0;
906  while (skipped < offset
907  && desc_it != m_SrcDescs.cend()
908  && ctx_it != m_DescCtxs.cend()) {
909  ++desc_it;
910  ++ctx_it;
911  skipped++;
912  }
913 
914  while (reply_it != reply.GetReply().end()
915  && desc_it != m_SrcDescs.cend()
916  && ctx_it != m_DescCtxs.cend()) {
917  vector<TTaxError> errs;
918  const COrg_ref& orp_req = (*desc_it)->GetSource().GetOrg();
919  ListTaxLookupErrors(**reply_it, orp_req,
920  (*desc_it)->GetSource().IsSetGenome() ? (*desc_it)->GetSource().GetGenome() : CBioSource::eGenome_unknown,
921  is_insd_patent, imp.IsWP(), errs);
922  for (const TTaxError& e : errs) {
923  imp.PostObjErr(e.severity, e.err_type, e.err_msg, **desc_it, *ctx_it);
924  }
925  ++reply_it;
926  ++desc_it;
927  ++ctx_it;
928  }
929 
930  if (reply_it == reply.GetReply().end()) {
931  return;
932  }
933  // process feat responses
934  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.cbegin();
935  while (skipped < offset && feat_it != m_SrcFeats.end()) {
936  ++feat_it;
937  skipped++;
938  }
939  while (reply_it != reply.GetReply().cend() &&
940  feat_it != m_SrcFeats.end()) {
941  vector<TTaxError> errs;
942  const COrg_ref& orp_req = (*feat_it)->GetData().GetBiosrc().GetOrg();
943  ListTaxLookupErrors(**reply_it, orp_req,
944  (*feat_it)->GetData().GetBiosrc().IsSetGenome() ? (*feat_it)->GetData().GetBiosrc().GetGenome() : CBioSource::eGenome_unknown,
945  is_insd_patent, imp.IsWP(), errs);
946  for (const TTaxError& e : errs) {
947  imp.PostErr(e.severity, e.err_type, e.err_msg, **feat_it);
948  }
949  ++reply_it;
950  ++feat_it;
951  }
952 }
953 
954 
955 //LCOV_EXCL_START
956 //used by Genome Workbench
958  const CTaxon3_reply& reply,
959  vector<CRef<COrg_ref>> org_refs,
960  string& error_message,
961  bool use_error_orgrefs) const
962 {
963  bool changed = false;
964  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
965  vector<CRef<COrg_ref> >::iterator org_it = org_refs.begin();
966  while (reply_it != reply.GetReply().end() && org_it != org_refs.end()) {
967  CRef<COrg_ref> cpy;
968  if ((*reply_it)->IsData() &&
969  (*reply_it)->GetData().IsSetOrg()) {
970  cpy.Reset(new COrg_ref());
971  cpy->Assign((*reply_it)->GetData().GetOrg());
972  } else if (use_error_orgrefs &&
973  (*reply_it)->IsError() &&
974  (*reply_it)->GetError().IsSetOrg() &&
975  (*reply_it)->GetError().GetOrg().IsSetTaxname() &&
976  ! NStr::Equal((*reply_it)->GetError().GetOrg().GetTaxname(), "Not valid")) {
977  cpy.Reset(new COrg_ref());
978  cpy->Assign((*reply_it)->GetError().GetOrg());
979  }
980  if (cpy) {
981  cpy->CleanForGenBank();
982  if (!cpy->Equals(**org_it)) {
983  (*org_it)->Assign(*cpy);
984  changed = true;
985  }
986  }
987  ++reply_it;
988  ++org_it;
989  }
990  if (reply_it != reply.GetReply().end()) {
991  error_message = "More taxonomy replies than requests!";
992  } else if (org_it != org_refs.end()) {
993  error_message = "Not enough taxonomy replies!";
994  }
995  return changed;
996 }
997 //LCOV_EXCL_STOP
998 
999 
1000 vector<CRef<COrg_ref> > CTaxValidationAndCleanup::GetSpecificHostLookupRequest(bool for_fix)
1001 {
1002  if (for_fix) {
1003  if (!m_HostMapForFix.IsPopulated()) {
1005  }
1007  } else {
1008  if (!m_HostMap.IsPopulated()) {
1010  }
1011  return m_HostMap.GetRequestList();
1012  }
1013 }
1014 
1016 {
1017  if (!m_StrainRequestsBuilt) {
1019  }
1020 
1021  vector<CRef<COrg_ref> > org_rq_list = m_StrainMap.GetRequestList();
1022  return org_rq_list;
1023 }
1024 
1025 
1027 {
1028  //first do descriptors
1029  vector<CConstRef<CSeqdesc> >::const_iterator desc_it = m_SrcDescs.begin();
1030  vector<CConstRef<CSeq_entry> >::const_iterator ctx_it = m_DescCtxs.begin();
1031  while (desc_it != m_SrcDescs.end() && ctx_it != m_DescCtxs.end()) {
1032  lookup.AddDesc(*desc_it, *ctx_it);
1033  ++desc_it;
1034  ++ctx_it;
1035  }
1036  // collect features with specific hosts
1037  vector<CConstRef<CSeq_feat> >::const_iterator feat_it = m_SrcFeats.begin();
1038  while (feat_it != m_SrcFeats.end()) {
1039  lookup.AddFeat(*feat_it);
1040  ++feat_it;
1041  }
1042 
1043 }
1044 
1045 
1047 {
1049  m_StrainRequestsBuilt = true;
1050 }
1051 
1052 
1054 {
1055  m_HostMap.PostErrors(imp);
1056 }
1057 
1058 //LCOV_EXCL_START
1059 //appears to not be used
1061 {
1062  string error_message;
1063  if (!m_HostMap.IsUpdateComplete()) {
1064  vector<CRef<COrg_ref> > input = m_HostMap.GetRequestList();
1065  error_message = m_HostMap.IncrementalUpdate(input, reply);
1066  }
1067  if (!NStr::IsBlank(error_message)) {
1069  return;
1070  }
1071 
1072  m_HostMap.PostErrors(imp);
1073 }
1074 //LCOV_EXCL_STOP
1075 
1076 
1077 //LCOV_EXCL_START
1078 //only used by cleanup
1080  vector<CRef<COrg_ref>> requests,
1081  const CTaxon3_reply& reply,
1082  vector<CRef<COrg_ref>> org_refs)
1083 {
1085  // need to calculate requests for this list
1086  m_HostMapForFix.IncrementalUpdate(requests, reply);
1087  }
1088  return AdjustOrgRefsForSpecificHosts(org_refs);
1089 }
1090 
1091 
1093 {
1094  bool changed = false;
1095  for (auto org = org_refs.begin(); org != org_refs.end(); org++) {
1096  changed |= m_HostMapForFix.ApplyToOrg(**org);
1097  }
1098  return changed;
1099 }
1100 
1101 
1103 {
1105  if (map_it != m_SpecificHostRequests.end() && map_it->second.NumRemainingReplies() > 0) {
1106  return map_it;
1107  }
1108  map_it = m_SpecificHostRequests.begin();
1109  while (map_it != m_SpecificHostRequests.end()) {
1110  if (map_it->second.MatchTryValue(val) && map_it->second.NumRemainingReplies() > 0) {
1111  return map_it;
1112  }
1113  ++map_it;
1114  }
1115  return m_SpecificHostRequests.end();
1116 }
1117 //LCOV_EXCL_STOP
1118 
1119 
1121 {
1122  string error_message;
1123  if (m_HostMap.IsPopulated()) {
1124  error_message = m_HostMap.IncrementalUpdate(input, reply);
1125  }
1126  if (NStr::IsBlank(error_message)) {
1127  if (m_HostMapForFix.IsPopulated()) {
1128  error_message = m_HostMapForFix.IncrementalUpdate(input, reply);
1129  }
1130  }
1131  return error_message;
1132 }
1133 
1134 
1135 //LCOV_EXCL_START
1136 //used only by cleanup
1138 {
1139  if (m_HostMap.IsPopulated()) {
1140  return m_HostMap.IsUpdateComplete();
1141  } else if (m_HostMapForFix.IsPopulated()) {
1143  } else {
1144  return false;
1145  }
1146 }
1147 
1148 
1150 {
1151  CTaxon3_reply::TReply::const_iterator reply_it = reply.GetReply().begin();
1153  while (rq_it != m_SpecificHostRequests.end()) {
1154  while (rq_it->second.NumRemainingReplies() > 0 && reply_it != reply.GetReply().end()) {
1155  rq_it->second.AddReply(**reply_it);
1156  ++reply_it;
1157  }
1158  if (rq_it->second.NumRemainingReplies() > 0) {
1159  error_message = "Failed to respond to all taxonomy requests for specific host";
1160  break;
1161  }
1162  ++rq_it;
1163  }
1164 
1165  if (reply_it != reply.GetReply().end()) {
1166  error_message = "Unexpected taxonomy responses for specific host";
1167  }
1168 }
1169 
1170 
1172 {
1173  if (!org_ref.IsSetOrgname() ||
1174  !org_ref.GetOrgname().IsSetMod()) {
1175  return false;
1176  }
1177 
1178  bool changed = false;
1179 
1180  for (auto& m : org_ref.SetOrgname().SetMod()) {
1181  if (m->IsSetSubtype() &&
1182  m->GetSubtype() == COrgMod::eSubtype_nat_host &&
1183  m->IsSetSubname()) {
1184  string host_val = x_DefaultSpecificHostAdjustments(m->GetSubname());
1186  if (it != m_SpecificHostRequests.end()) {
1187  const string& new_val = it->second.SuggestFix();
1188  if (!NStr::IsBlank(new_val) && !NStr::Equal(new_val, m->GetSubname())) {
1189  m->SetSubname(new_val);
1190  changed = true;
1191  }
1192  }
1193  }
1194  }
1195 
1196  return changed;
1197 }
1198 
1199 
1201 {
1202  string adjusted = host_val;
1203  NStr::TruncateSpacesInPlace(adjusted);
1204  adjusted = COrgMod::FixHost(adjusted);
1205  return adjusted;
1206 }
1207 
1208 
1210 {
1211  return m_StrainMap.IncrementalUpdate(input, reply);
1212 }
1213 
1214 
1216 {
1217  return m_StrainMap.IsUpdateComplete();
1218 }
1219 //LCOV_EXCL_STOP
1220 
1221 
1223 {
1224  m_StrainMap.PostErrors(imp);
1225 }
1226 
1228 {
1229  return (num < m_DescCtxs.size()) ? m_DescCtxs[num] : CConstRef<CSeq_entry>();
1230 }
1231 
1232 
1233 //LCOV_EXCL_START
1234 //used by Genome Workbench, asn_cleanup, and table2asn but not asnvalidate
1236 {
1237  Init(*(seh.GetCompleteSeq_entry()));
1238 
1239  vector<CRef<COrg_ref> > original_orgs = GetTaxonomyLookupRequest();
1240  if (original_orgs.empty())
1241  {
1242  return false;
1243  }
1244  const size_t chunk_size = 1000;
1245  vector< CRef<COrg_ref> > edited_orgs;
1246 
1247  size_t i = 0;
1248  while (i < original_orgs.size())
1249  {
1250  size_t len = min(chunk_size, original_orgs.size() - i);
1251  vector< CRef<COrg_ref> > tmp_original_orgs(original_orgs.begin() + i, original_orgs.begin() + i + len);
1252  vector< CRef<COrg_ref> > tmp_edited_orgs;
1253  for (CRef<COrg_ref>& it : tmp_original_orgs)
1254  {
1255  CRef<COrg_ref> cpy(new COrg_ref());
1256  cpy->Assign(*it);
1257  tmp_edited_orgs.push_back(cpy);
1258  }
1259  CRef<CTaxon3_reply> tmp_lookup_reply = m_tax_func(tmp_original_orgs);
1260  string error_message;
1261  AdjustOrgRefsWithTaxLookupReply(*tmp_lookup_reply, tmp_edited_orgs, error_message);
1262  if (!NStr::IsBlank(error_message))
1263  {
1264  // post error message
1265  ERR_POST(Error << error_message);
1266  return false;
1267  }
1268  edited_orgs.insert(edited_orgs.end(), tmp_edited_orgs.begin(), tmp_edited_orgs.end());
1269  i += len;
1270  }
1271 
1272  if (with_host) {
1273  vector< CRef<COrg_ref> > spec_host_rq = GetSpecificHostLookupRequest(true);
1274  i = 0;
1275  while (i < spec_host_rq.size())
1276  {
1277  size_t len = min(chunk_size, spec_host_rq.size() - i);
1278  vector< CRef<COrg_ref> > tmp_spec_host_rq(spec_host_rq.begin() + i, spec_host_rq.begin() + i + len);
1279  CRef<CTaxon3_reply> tmp_spec_host_reply = m_tax_func(tmp_spec_host_rq);
1280  string error_message = IncrementalSpecificHostMapUpdate(tmp_spec_host_rq, *tmp_spec_host_reply);
1281  if (!NStr::IsBlank(error_message))
1282  {
1283  // post error message
1284  ERR_POST(Error << error_message);
1285  return false;
1286  }
1287  i += len;
1288  }
1289 
1290  AdjustOrgRefsForSpecificHosts(edited_orgs);
1291  }
1292 
1293  // update descriptors
1294  size_t num_descs = NumDescs();
1295  size_t num_updated_descs = 0;
1296  for (size_t n = 0; n < num_descs; n++) {
1297  if (!original_orgs[n]->Equals(*(edited_orgs[n]))) {
1298  CSeqdesc* orig = const_cast<CSeqdesc *>(GetDesc(n).GetPointer());
1299  orig->SetSource().SetOrg().Assign(*(edited_orgs[n]));
1300  num_updated_descs++;
1301  }
1302  }
1303 
1304  // now update features
1305  size_t num_updated_feats = 0;
1306  for (size_t n = 0; n < NumFeats(); n++) {
1307  if (!original_orgs[n + num_descs]->Equals(*edited_orgs[n + num_descs])) {
1308  CConstRef<CSeq_feat> feat = GetFeat(n);
1309  CRef<CSeq_feat> new_feat(new CSeq_feat());
1310  new_feat->Assign(*feat);
1311  new_feat->SetData().SetBiosrc().SetOrg().Assign(*(edited_orgs[n + num_descs]));
1312 
1313  CSeq_feat_Handle fh = seh.GetScope().GetSeq_featHandle(*feat);
1314  CSeq_feat_EditHandle efh(fh);
1315  efh.Replace(*new_feat);
1316  num_updated_feats++;
1317  }
1318  }
1319  return (num_updated_descs > 0 || num_updated_feats > 0);
1320 }
1321 //LCOV_EXCL_STOP
1322 
1323 
1324 //LCOV_EXCL_START
1325 //only used by biosample
1327 {
1329  string err_msg;
1330  if(IsOneSpecificHostValid(val, err_msg)) {
1331  return;
1332  }
1335 
1336  vector< CRef<COrg_ref> > spec_host_rq = m_HostMapForFix.GetRequestList();
1337  if (spec_host_rq.empty()) {
1339  return;
1340  }
1341  vector< CRef<COrg_ref> > edited;
1342  edited.push_back(CRef<COrg_ref>(new COrg_ref()));
1343  edited.front()->SetOrgname().SetMod().push_back(CRef<COrgMod>(new COrgMod(COrgMod::eSubtype_nat_host, val)));
1344 
1345  CRef<CTaxon3_reply> tmp_spec_host_reply = m_tax_func(spec_host_rq);
1346 
1347  if (!tmp_spec_host_reply->IsSetReply() || !tmp_spec_host_reply->GetReply().front()->IsData()) {
1348  val = kEmptyStr;
1350  return;
1351  }
1352 
1353  string error_message = IncrementalSpecificHostMapUpdate(spec_host_rq, *tmp_spec_host_reply);
1354  if (!NStr::IsBlank(error_message))
1355  {
1356  // post error message
1357  ERR_POST(Error << error_message);
1358  }
1359 
1360 
1362 
1363  val = edited.front()->GetOrgname().GetMod().front()->GetSubname();
1365 }
1366 //LCOV_EXCL_STOP
1367 
1368 
1369 //LCOV_EXCL_START
1370 //only used by biosample
1371 bool CTaxValidationAndCleanup::IsOneSpecificHostValid(const string& val, string& error_msg)
1372 {
1373  error_msg = kEmptyStr;
1374  m_HostMap.Clear();
1375 
1377 
1378  vector< CRef<COrg_ref> > spec_host_rq = m_HostMap.GetRequestList();
1379  if (spec_host_rq.empty()) {
1380  m_HostMap.Clear();
1381  return true;
1382  }
1383 
1384  CRef<CTaxon3_reply> tmp_spec_host_reply = m_tax_func(spec_host_rq);
1385 
1386  string err_msg;
1387  if (tmp_spec_host_reply) {
1388  err_msg = IncrementalSpecificHostMapUpdate(spec_host_rq, *tmp_spec_host_reply);
1389  } else {
1390  err_msg = "Connection to taxonomy failed";
1391  }
1392  bool rval = true;
1393  error_msg = err_msg;
1394 
1395  if (!NStr::IsBlank(err_msg)) {
1396  ERR_POST(Error << err_msg);
1397  m_HostMap.Clear();
1398  rval = false;
1399  } else {
1400  vector<TTaxError> errs;
1401  m_HostMap.ListErrors(errs);
1402  if (errs.size() > 0) {
1403  error_msg = errs.front().err_msg;
1404  rval = false;
1405  }
1406  }
1407  m_HostMap.Clear();
1408  return rval;
1409 }
1410 //LCOV_EXCL_STOP
1411 
1412 
1414 {
1415  x_ClearMaps();
1416 
1417  vector<TTaxError> errs;
1418 
1419  // lookup of whole org
1420  vector< CRef<COrg_ref> > org_rq_list;
1421  CRef<COrg_ref> rq(new COrg_ref);
1422  rq->Assign(org);
1423  org_rq_list.push_back(rq);
1424 
1425  CRef<CTaxon3_reply> reply = m_tax_func(org_rq_list);
1426 
1427  if (!reply || !reply->IsSetReply()) {
1429  "Taxonomy service connection failure", org);
1430  } else {
1431  ListTaxLookupErrors(*(reply->GetReply().front()), org, genome,
1432  false, false, errs);
1433  }
1434 
1435  // Now look at specific-host values
1436  m_HostMap.AddOrg(org);
1437  org_rq_list = GetSpecificHostLookupRequest(false);
1438 
1439  if (!org_rq_list.empty()) {
1440  reply = m_tax_func(org_rq_list);
1441  string err_msg;
1442  if (reply) {
1443  err_msg = IncrementalSpecificHostMapUpdate(org_rq_list, *reply);
1444  } else {
1445  err_msg = "Connection to taxonomy failed";
1446  }
1447  if (!NStr::IsBlank(err_msg)) {
1449  } else {
1450  m_HostMap.ListErrors(errs);
1451  }
1452  }
1453 
1454 
1455  // validate strain
1456  m_StrainMap.AddOrg(org);
1457  org_rq_list = GetStrainLookupRequest();
1458  if (!org_rq_list.empty()) {
1459  reply = m_tax_func(org_rq_list);
1460  string err_msg = IncrementalStrainMapUpdate(org_rq_list, *reply);
1461  if (!NStr::IsBlank(err_msg)) {
1463  } else {
1464  m_StrainMap.ListErrors(errs);
1465  }
1466  }
1467 
1468  for (const TTaxError& e : errs) {
1469  imp.PostObjErr(e.severity, e.err_type, e.err_msg, org);
1470  }
1471 }
1472 
1473 
1474 END_SCOPE(validator)
EErrType
@ eErr_SEQ_DESCR_TaxonomyConsultRequired
@ eErr_SEQ_DESCR_TaxonomyServiceProblem
@ eErr_SEQ_DESCR_TaxonomyNucleomorphProblem
@ eErr_SEQ_DESCR_TaxonomyPlastidsProblem
@ eErr_SEQ_DESCR_StrainContainsTaxInfo
@ eErr_SEQ_DESCR_OrganismNotFound
@ eErr_SEQ_DESCR_AmbiguousSpecificHost
@ eErr_SEQ_DESCR_TaxonomyAmbiguousName
@ eErr_SEQ_DESCR_TaxonomyLookupProblem
@ eErr_SEQ_DESCR_BadSpecificHost
@ eErr_SEQ_DESCR_TaxonomyIsSpeciesProblem
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:50
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
static string FixHost(const string &value)
Definition: OrgMod.cpp:999
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
void CleanForGenBank()
Definition: Org_ref.cpp:510
const string & GetLineage(void) const
Definition: Org_ref.cpp:124
bool IsSetOrgMod(void) const
Definition: Org_ref.cpp:169
bool IsSetLineage(void) const
Definition: Org_ref.cpp:119
TQualifierRequests::iterator x_FindRequest(const string &val)
string IncrementalUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)
virtual CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org)=0
virtual void ListErrors(vector< TTaxError > &errs) const
void AddDesc(CConstRef< CSeqdesc > desc, CConstRef< CSeq_entry > ctx)
void AddOrg(const COrg_ref &org)
virtual bool Check(const COrg_ref &) const
void AddFeat(CConstRef< CSeq_feat > feat)
virtual string GetKey(const string &orig_val, const COrg_ref &org) const =0
void AddString(const string &val)
void PostErrors(CValidError_imp &imp)
vector< CRef< COrg_ref > > GetRequestList()
bool MatchTryValue(const string &val) const
pair< CConstRef< CSeqdesc >, CConstRef< CSeq_entry > > TDescPair
void PostErrors(CValidError_imp &imp)
void AddParent(CConstRef< CSeqdesc > desc, CConstRef< CSeq_entry > ctx)
void AddRequests(vector< CRef< COrg_ref > > &request_list) const
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org) override
bool ApplyToOrg(COrg_ref &org) const override
static string x_DefaultSpecificHostAdjustments(const string &host_val)
CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org) override
void ListErrors(vector< TTaxError > &errs) const override
CSpecificHostRequest(const string &orig_val, const COrg_ref &org, bool for_fix=false)
const string & SuggestFix() const
void AddReply(const CT3Reply &reply) override
CRef< CQualifierRequest > x_MakeNewRequest(const string &orig_val, const COrg_ref &org) override
static bool Check(const COrg_ref &org)
static string MakeKey(const string &strain, const string &taxname)
void ListErrors(vector< TTaxError > &errs) const override
static bool x_IgnoreStrain(const string &str)
void AddReply(const CT3Reply &reply) override
static bool x_IsUnwanted(const string &str)
CStrainRequest(const string &strain, const COrg_ref &org)
static bool RequireTaxname(const string &taxname)
void GetTaxFlags(bool &is_species_level, bool &force_consult, bool &has_nucleomorphs) const
Definition: T3Data.cpp:58
bool HasPlastids(void) const
Definition: T3Data.cpp:93
CT3Reply –.
Definition: T3Reply.hpp:66
void x_InterpretTaxonomyError(const CT3Error &error, const COrg_ref &org, const EErrType type, vector< TTaxError > &errs) const
CConstRef< CSeq_feat > GetFeat(size_t num) const
vector< CConstRef< CSeq_feat > > m_SrcFeats
vector< CConstRef< CSeq_entry > > m_DescCtxs
static string x_DefaultSpecificHostAdjustments(const string &host_val)
vector< CRef< COrg_ref > > GetTaxonomyLookupRequest() const
void ReportSpecificHostErrors(const CTaxon3_reply &reply, CValidError_imp &imp)
string IncrementalStrainMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)
CConstRef< CSeq_entry > GetSeqContext(size_t num) const
void x_UpdateSpecificHostMapWithReply(const CTaxon3_reply &reply, string &error_message)
TSpecificHostRequests m_SpecificHostRequests
TSpecificHostRequests::iterator x_FindHostFixRequest(const string &val)
CSpecificHostMapForFix m_HostMapForFix
void x_CreateQualifierMap(CQualLookupMap &lookup)
void ListTaxLookupErrors(const CT3Reply &reply, const COrg_ref &org, CBioSource::TGenome genome, bool is_insd_patent, bool is_wp, vector< TTaxError > &errs) const
void ReportIncrementalTaxLookupErrors(const CTaxon3_reply &reply, CValidError_imp &imp, bool is_insd_patent, size_t offset) const
bool AdjustOrgRefsForSpecificHosts(vector< CRef< COrg_ref > > org_refs)
vector< CConstRef< CSeqdesc > > m_SrcDescs
CConstRef< CSeqdesc > GetDesc(size_t num) const
bool x_ApplySpecificHostMap(COrg_ref &org_ref) const
bool AdjustOrgRefsWithSpecificHostReply(vector< CRef< COrg_ref >> requests, const CTaxon3_reply &reply, vector< CRef< COrg_ref >> org_refs)
void ReportTaxLookupErrors(const CTaxon3_reply &reply, CValidError_imp &imp, bool is_insd_patent) const
void CheckOneOrg(const COrg_ref &org, CBioSource::TGenome genome, CValidError_imp &imp)
bool AdjustOrgRefsWithTaxLookupReply(const CTaxon3_reply &reply, vector< CRef< COrg_ref > > org_refs, string &error_message, bool use_error_orgrefs=false) const
vector< CRef< COrg_ref > > GetStrainLookupRequest()
string IncrementalSpecificHostMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)
bool DoTaxonomyUpdate(CSeq_entry_Handle seh, bool with_host)
CConstRef< CSeq_entry > GetTopReportObject() const
void ReportStrainErrors(CValidError_imp &imp)
vector< CRef< COrg_ref > > GetSpecificHostLookupRequest(bool for_fix)
void Init(const CSeq_entry &se)
bool IsOneSpecificHostValid(const string &val, string &err_msg)
void x_GatherSources(const CSeq_entry &se)
CTaxon3_reply –.
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
Definition: validatorp.cpp:358
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
bool IsWP() const
size_type size() const
Definition: map.hpp:148
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
static const int chunk_size
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
CS_CONTEXT * ctx
Definition: t0006.c:12
static int type
Definition: getdata.c:31
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3197
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5383
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
Definition: OrgName_.hpp:827
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:360
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
Definition: Org_ref_.cpp:87
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
@ eSubtype_nat_host
natural host of this specimen
Definition: OrgMod_.hpp:104
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_metagenome_source
Definition: OrgMod_.hpp:120
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
bool IsData(void) const
Check if variant Data is selected.
Definition: T3Reply_.hpp:263
const TData & GetData(void) const
Get the variant data.
Definition: T3Reply_.cpp:124
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: T3Data_.hpp:273
const TReply & GetReply(void) const
Get the Reply member data.
bool IsError(void) const
Check if variant Error is selected.
Definition: T3Reply_.hpp:257
const TError & GetError(void) const
Get the variant data.
Definition: T3Reply_.cpp:102
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: T3Data_.hpp:285
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref > > &list)> taxupdate_func_t
Definition: itaxon3.hpp:60
static int input()
int i
yy_size_t n
int len
const struct ncbi::grid::netcache::search::fields::KEY key
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T min(T x_, T y_)
The Object manager core.
bool IsCommonName(const CT3Data &data)
Definition: utilities.cpp:1944
bool HasMisSpellFlag(const CT3Data &data)
Definition: utilities.cpp:1966
string SpecificHostValueToCheck(const string &val)
Definition: utilities.cpp:2042
string InterpretSpecificHostResult(const string &host, const CT3Reply &reply, const string &orig_host=kEmptyStr)
Definition: utilities.cpp:2085
bool IsLikelyTaxname(const string &val)
Definition: utilities.cpp:2150
bool FindMatchInOrgRef(const string &str, const COrg_ref &org)
Definition: utilities.cpp:1985
int offset
Definition: replacements.h:160
#define FOR_EACH_SEQFEAT_ON_SEQANNOT(Itr, Var)
FOR_EACH_SEQFEAT_ON_SEQANNOT EDIT_EACH_SEQFEAT_ON_SEQANNOT.
Definition: seq_macros.hpp:410
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FOR_EACH_DESCRIPTOR_ON_SEQENTRY
#define FOR_EACH_ANNOT_ON_SEQENTRY
static const char * str(char *buf, int n)
Definition: stats.c:84
Definition: type.c:6
const string kInvalidReplyMsg
Modified on Wed Dec 06 07:14:10 2023 by modify_doxy.py rev. 669887