NCBI C++ ToolKit
macro_fn_lookup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: macro_fn_lookup.cpp 47378 2023-02-27 20:09:44Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrea Asztalos
27  *
28  *
29  */
30 
31 #include <ncbi_pch.hpp>
38 
39 
40 /** @addtogroup GUI_MACRO_SCRIPTS_UTIL
41  *
42  * @{
43  */
44 
46 BEGIN_SCOPE(macro)
48 
49 
50 ///////////////////////////////////////////////////////////////////////////////////
51 /// constants used in the uncultured macro tool
52 
53 static const char* sSpeciesSpecificPrimers[] = {
54  "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",
55  "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",
56  "[BankIt_uncultured16S_wizard]; [species_specific primers]",
57  "[uncultured (with species-specific primers)]",
58  "[uncultured]; [amplified with species-specific primers]",
59  "[uncultured (using species-specific primers) bacterial source]",
60  "[amplified with species-specific primers]; [uncultured; wizard]",
61  "[amplified with species-specific primers]",
62  "amplified with species-specific primers",
63  NULL
64 };
65 
66 static const char* sUniversalPrimers[] = {
67  "[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]",
68  "[BankIt_uncultured23S_wizard]; [universal primers]; [tgge]",
69  "[BankIt_uncultured16S-23SIGS_wizard]; [universal primers]; [tgge]",
70  "[BankIt_uncultured16S_wizard]; [universal primers]; [dgge]",
71  "[BankIt_uncultured23S_wizard]; [universal primers]; [dgge]",
72  "[BankIt_uncultured16S-23SIGS_wizard]; [universal primers]; [dgge]",
73  "[BankIt_uncultured16S_wizard]; [universal primers]",
74  "[BankIt_uncultured23S_wizard]; [universal primers]",
75  "[BankIt_uncultured16S-23SIGS_wizard]; [universal primers]",
76  "[universal primers]; [uncultured; wizard]",
77  "[uncultured (using universal primers)]",
78  "[uncultured (using universal primers) bacterial source]",
79  "[uncultured]; [universal primers]",
80  NULL
81 };
82 
83 static bool s_HasSubSourceNote(const CBioSource &bsrc, const char* note_list[])
84 {
85  if (!bsrc.IsSetSubtype()) {
86  return false;
87  }
88 
89  ITERATE(CBioSource::TSubtype, subtype, bsrc.GetSubtype()) {
90  if ((*subtype)->IsSetSubtype()
91  && (*subtype)->GetSubtype() == CSubSource::eSubtype_other
92  && (*subtype)->IsSetName()) {
93  int i = 0;
94  while (note_list[i]) {
95  if (NStr::FindNoCase((*subtype)->GetName(), note_list[i]) != NPOS) {
96  return true;
97  }
98  i++;
99  }
100  }
101  }
102  return false;
103 }
104 
105 void CUnculturedTool::CorrectBioSource(CBioSource& bsrc, bool& converted_note, bool& removed_note_sp, bool& removed_note_univ)
106 {
107  m_Bsrc.Reset(const_cast<const CBioSource*>(&bsrc));
108  x_Reset();
109 
110  string suggested_taxname = GetSuggestedCorrection();
111  // update only if suggested taxname is different from the original one
112  if (!NStr::EqualCase(m_Bsrc->GetTaxname(), suggested_taxname)) {
113  bsrc.SetOrg().SetTaxname(suggested_taxname);
115  }
116  else if (s_HasTaxId(bsrc) && s_IsUnculturedName(bsrc.GetTaxname())) {
118  m_RemoveNote_Univ = true;
119  }
121  m_RemoveNote_Sp = true;
122  }
123  }
124 
125 
126  if (m_ConvertNote) {
127  x_ConvertNote(bsrc, sSpeciesSpecificPrimers, "amplified with species-specific primers");
129  } else if (m_RemoveNote_Sp) {
131  }
132  if (m_RemoveNote_Univ) {
133  x_ConvertNote(bsrc, sUniversalPrimers, "");
134  }
135 
136  converted_note = m_ConvertNote;
137  removed_note_sp = m_RemoveNote_Sp;
138  removed_note_univ = m_RemoveNote_Univ;
139 }
140 
142 {
143  if (!m_Bsrc) {
144  NCBI_THROW(CException, eUnknown, "No biosource is specified");
145  }
146 
147  x_Reset();
148 
149  if (!x_ShouldLookupTaxname()) {
150  return (m_Bsrc->IsSetTaxname()) ? m_Bsrc->GetTaxname() : kEmptyStr;
151  }
152 
153  string standard_taxname = x_StandardFixes();
154  if (standard_taxname.empty())
155  return kEmptyStr;
156 
157  set<string> submit;
158  if (m_Bsrc->IsSetTaxname() && NStr::StartsWith(m_Bsrc->GetTaxname(), "uncultured", NStr::eNocase)) {
159  submit.insert(m_Bsrc->GetTaxname());
160  }
161  submit.insert(standard_taxname);
162  submit.insert(s_MakeUnculturedName(standard_taxname, " bacterium"));
163  submit.insert(s_MakeUnculturedName(standard_taxname, " archaeon"));
164  submit.insert(s_MakeUnculturedName(standard_taxname, " sp."));
165  submit.insert(s_MakeUnculturedName(standard_taxname));
166  if (NStr::FindNoCase(standard_taxname, " ") != NPOS) {
167  string tmp, name2;
168  NStr::SplitInTwo(standard_taxname, " ", name2, tmp);
169  standard_taxname = name2;
170  submit.insert(standard_taxname);
171  submit.insert(s_MakeUnculturedName(standard_taxname, " bacterium"));
172  submit.insert(s_MakeUnculturedName(standard_taxname, " archaeon"));
173  submit.insert(s_MakeUnculturedName(standard_taxname, " sp."));
174  submit.insert(s_MakeUnculturedName(standard_taxname));
175  }
176 
177  // send this list to taxonomy
178  vector<CRef<COrg_ref> > rq_list;
179  ITERATE(set<string>, name, submit) {
180  CRef<COrg_ref> org(new COrg_ref());
181  org->SetTaxname(*name);
182  rq_list.push_back(org);
183  }
184 
185  CTaxon3 taxon3;
186  taxon3.Init();
187  CRef<CTaxon3_reply> reply = taxon3.SendOrgRefList(rq_list);
188  if (!reply->IsSetReply())
189  return kEmptyStr;
190 
191  size_t index = 0;
192  ITERATE(CTaxon3_reply::TReply, rp_it, reply->GetReply()) {
193  m_ReplyCache[rq_list[index]->GetTaxname()] = *rp_it;
194  index++;
195  }
196 
197  return x_GetCorrection();
198 }
199 
201 {
203  m_ConvertNote = false;
204  m_RemoveNote_Sp = false;
205  m_RemoveNote_Univ = false;
206 }
207 
209 {
210  if (m_Bsrc->IsSetTaxname() && !m_Bsrc->GetTaxname().empty() && s_OkToTaxFix(m_Bsrc->GetTaxname())) {
211  if (!s_HasTaxId(*m_Bsrc) || !s_IsUnculturedName(m_Bsrc->GetTaxname())) {
212  return true;
213  }
214  }
215  return false;
216 }
217 
218 static const char* sUnfixable[] = {
219  "rickettsia",
220  "candidatus",
221  "endosymbiont",
222  "phytoplasma",
223  "wolbachia"
224 };
225 
226 bool CUnculturedTool::s_OkToTaxFix(const string& taxname)
227 {
228  for (auto&& elem : sUnfixable) {
229  if (NStr::FindNoCase(taxname, elem) != NPOS) {
230  return false;
231  }
232  }
233  return true;
234 }
235 
236 bool CUnculturedTool::s_IsUnculturedName(const string& taxname)
237 {
238  return NStr::StartsWith(taxname, "uncultured", NStr::eNocase);
239 }
240 
242 {
243  if (bsrc.IsSetOrg()) {
244  TTaxId taxid = bsrc.GetOrg().GetTaxId();
245  if (taxid != ZERO_TAX_ID) { // found taxid
246  return true;
247  }
248  }
249  return false;
250 }
251 
253 {
254  string taxname = m_Bsrc->GetTaxname();
255  string old;
256  while (old != taxname) {
257  old = taxname;
258  const string uncultured = "uncultured ";
259  const string sp = " sp";
260  const string spdot = " sp.";
261  if (NStr::StartsWith(taxname, uncultured, NStr::eNocase)) {
262  taxname = taxname.substr(uncultured.length());
263  }
264  if (NStr::EndsWith(taxname, sp, NStr::eNocase)) {
265  taxname = taxname.substr(0, taxname.length() - sp.length());
266  }
267  if (NStr::EndsWith(taxname, spdot, NStr::eNocase)) {
268  taxname = taxname.substr(0, taxname.length() - spdot.length());
269  }
270  NStr::ReplaceInPlace(taxname, ", ", " ");
271  NStr::ReplaceInPlace(taxname, ",", " ");
272  }
273 
274  return taxname;
275 }
276 
277 string CUnculturedTool::s_MakeUnculturedName(const string& taxname, const string& suffix)
278 {
279  return "uncultured " + taxname + suffix;
280 }
281 
282 
284 {
285  const string& orig_taxname = m_Bsrc->GetTaxname();
286  if (NStr::StartsWith(orig_taxname, "uncultured", NStr::eNocase)) {
287  CRef<CT3Reply> reply_orig = x_GetReply(orig_taxname);
288  if (reply_orig && reply_orig->IsData()) {
289  return reply_orig->GetData().GetOrg().GetTaxname();
290  }
291  }
292 
293  string standard_taxname = x_StandardFixes();
294 
295  if (standard_taxname.empty())
296  return kEmptyStr;
297 
298  CRef<CT3Reply> reply = x_GetReply(standard_taxname);
299  _ASSERT(!reply.IsNull());
300 
301  string suggestion;
302  if (s_IsAmbiguous(reply)) {
303  suggestion = s_MakeUnculturedName(standard_taxname, " bacterium");
304  if (x_CheckSuggestedFix(suggestion))
305  return suggestion;
306  suggestion = s_MakeUnculturedName(standard_taxname, " archaeon");
307  if (x_CheckSuggestedFix(suggestion))
308  return suggestion;
309  }
310 
311  // standard_taxname may change after this call
312  suggestion = x_TryRankFix(reply, standard_taxname);
313  if (x_CheckSuggestedFix(suggestion))
314  return suggestion;
315 
316  suggestion = s_MakeUnculturedName(standard_taxname);
317  if (x_CheckSuggestedFix(suggestion))
318  return suggestion;
319 
320  suggestion = s_MakeUnculturedName(standard_taxname, " sp.");
321  if (x_CheckSuggestedFix(suggestion))
322  return suggestion;
323 
324  suggestion = s_MakeUnculturedName(standard_taxname);
325  return suggestion;
326 }
327 
328 
329 CRef<CT3Reply> CUnculturedTool::x_GetReply(const string& standard_taxname)
330 {
331  if (m_ReplyCache.find(standard_taxname) != m_ReplyCache.end())
332  return m_ReplyCache[standard_taxname];
333 
334  _ASSERT(m_Bsrc);
335  vector<CRef<COrg_ref> > rq_list;
336  CRef<COrg_ref> org(new COrg_ref());
337  org->SetTaxname(standard_taxname);
338  rq_list.push_back(org);
339 
340  CTaxon3 taxon3;
341  taxon3.Init();
342  CRef<CTaxon3_reply> reply = taxon3.SendOrgRefList(rq_list);
343  CRef<CT3Reply> t3reply;
344  if (reply->IsSetReply() && !reply->GetReply().empty()) {
345  t3reply = reply->GetReply().front();
346  m_ReplyCache[standard_taxname] = t3reply;
347  }
348  return t3reply;
349 }
350 
351 
352 bool CUnculturedTool::x_CheckSuggestedFix(string &suggestion)
353 {
354  CRef<CT3Reply> reply = x_GetReply(suggestion);
355  _ASSERT(!reply.IsNull());
356 
357  string rank = s_GetRank(reply);
358  if (NStr::EqualNocase(rank, "species")) {
359  suggestion = s_GetSuggestion(reply);
360  // Note that this does not only perform a check - it also potentially modifies suggestion.
361  // This is how it's done in the original sequin code.
362  return true;
363  }
364 
365  return false;
366 }
367 
369 {
370  // add the environmental_sample modifier if there is not one already!
371  FOR_EACH_SUBSOURCE_ON_BIOSOURCE(subsrc, bsrc) {
372  if ((*subsrc)->IsSetSubtype()
373  && (*subsrc)->GetSubtype() == CSubSource::eSubtype_environmental_sample) {
374  // found
375  return;
376  }
377  }
378 
380 }
381 
383 {
384  if (reply && reply->IsError() && reply->GetError().IsSetMessage()
385  && NStr::EqualNocase(reply->GetError().GetMessage(), "Taxname is ambiguous")) {
386  return true;
387  }
388  return false;
389 }
390 
392 {
393  if (reply && reply->IsError() && reply->GetError().IsSetMessage()
394  && NStr::EqualNocase(reply->GetError().GetMessage(), "Organism not found")) {
395  return true;
396  }
397  return false;
398 }
399 
401 {
402  string rank;
403  if (reply && reply->IsData() && reply->GetData().IsSetStatus())
404  ITERATE(CT3Data::TStatus, status, reply->GetData().GetStatus()) {
405  if ((*status)->IsSetProperty()
406  && NStr::EqualNocase((*status)->GetProperty(), "rank")
407  && (*status)->IsSetValue()
408  && (*status)->GetValue().IsStr()) {
409  rank = (*status)->GetValue().GetStr();
410  }
411  }
412 
413  return rank;
414 }
415 
417 {
418  if (reply && reply->IsData()) {
419  return reply->GetData().GetOrg().GetTaxname();
420  }
421 
422  return kEmptyStr;
423 }
424 
426 {
427  if (reply && reply->IsData() && reply->GetData().IsSetOrg()
428  && reply->GetData().GetOrg().IsSetOrgname()
429  && reply->GetData().GetOrg().GetOrgname().IsSetLineage()
430  && NStr::FindNoCase(reply->GetData().GetOrg().GetOrgname().GetLineage(), lineage) != NPOS) {
431  return true;
432  }
433  return false;
434 }
435 
436 
438 // In case of binomial truncation the input name will be modified. This is what's happening in the original sequin code
439 {
440  string suggestion;
441  bool is_species_level(false), force_consult(false), has_nucleomorphs(false);
442  if (reply->IsData()) {
443  reply->GetData().GetTaxFlags(is_species_level, force_consult, has_nucleomorphs);
444  }
445  bool has_species_specific_note = s_HasSubSourceNote(m_Bsrc.GetObject(), sSpeciesSpecificPrimers);
446  bool has_universal_note = s_HasSubSourceNote(m_Bsrc.GetObject(), sUniversalPrimers);
447 
448  if (is_species_level) {
449  if (has_species_specific_note) {
450  suggestion = s_GetSuggestion(reply);
451  m_ConvertNote = true;
452  } else {
453  if (has_universal_note) {
454  m_RemoveNote_Univ = true;
455  }
456  if (NStr::FindNoCase(name, " ") != NPOS) {
457  string tmp, name2;
458  NStr::SplitInTwo(name, " ", name2, tmp);
459  name = name2;
460  CRef<CT3Reply> reply2 = x_GetReply(name);
461  if (!reply2) {
462  return suggestion;
463  }
464 
465  return x_TryRankFix(reply2, name);
466  }
467  }
468  } else {
469  if (!s_OrganismNotFound(reply)) {
470  // remove notes only if organism name has been found
471  if (has_species_specific_note) {
472  m_RemoveNote_Sp = true;
473  }
474  if (has_universal_note) {
475  m_RemoveNote_Univ = true;
476  }
477  }
478 
479  string rank = s_GetRank(reply);
480  if (NStr::EqualNocase(rank, "genus")) {
481  if (s_CompareOrgnameLineage(reply, "archaea") || s_CompareOrgnameLineage(reply, "bacteria"))
482  suggestion = s_MakeUnculturedName(s_GetSuggestion(reply), " sp.");
483  else if (s_CompareOrgnameLineage(reply, " Fungi;"))
484  suggestion = s_MakeUnculturedName(s_GetSuggestion(reply));
485  }
486  else {
487  if (s_CompareOrgnameLineage(reply, "archaea"))
488  suggestion = s_MakeUnculturedName(s_GetSuggestion(reply), " archaeon");
489  else if (s_CompareOrgnameLineage(reply, "bacteria"))
490  suggestion = s_MakeUnculturedName(s_GetSuggestion(reply), " bacterium");
491  else if (s_CompareOrgnameLineage(reply, " Fungi;"))
492  suggestion = s_MakeUnculturedName(s_GetSuggestion(reply));
493  }
494  }
495 
496  return suggestion;
497 }
498 
499 void CUnculturedTool::x_ConvertNote(CBioSource& bsrc, const char* note_list[], const string& new_note)
500 {
501  if (!bsrc.IsSetSubtype())
502  return;
503 
504  CBioSource::TSubtype::iterator subtype = bsrc.SetSubtype().begin();
505  while (subtype != bsrc.SetSubtype().end()) {
506  bool erased = false;
507  if ((*subtype)->IsSetSubtype()
508  && (*subtype)->GetSubtype() == CSubSource::eSubtype_other
509  && (*subtype)->IsSetName()) {
510  const string& name = (*subtype)->GetName();
511  int i = 0;
512  while (note_list[i]) {
513  if (NStr::FindNoCase(name, note_list[i]) != NPOS) {
514  if (new_note.empty()) {
515  subtype = bsrc.SetSubtype().erase(subtype);
516  erased = true;
517  } else {
518  (*subtype)->SetName(new_note);
519  }
520  break;
521  }
522  i++;
523  }
524  }
525 
526  if (!erased) {
527  ++subtype;
528  }
529  }
530 
531  if (bsrc.GetSubtype().empty()) {
532  bsrc.ResetSubtype();
533  }
534 }
535 
536 
537 ////////////////////////////////////////////////////////////////////////
538 /// class CCulturedTool
539 
540 static const char* sCulturedNodes[] = {
541  "[BankIt_cultured16S_wizard]",
542  "[cultured bacterial source]",
543  "[BankIt_cultured16S_wizard]; [universal primers]",
544  "[cultured; wizard]",
545  "[BankIt_cultured16S_wizard]; [species_specific primers]; [tgge]" ,
546  "[BankIt_cultured16S_wizard]; [species_specific primers]; [dgge]" ,
547  "[BankIt_cultured16S_wizard]; [species_specific primers]",
548  "[BankIt_cultured23S_wizard]",
549  "[BankIt_cultured16S-23SIGS_wizard]",
550  NULL
551 };
552 
553 void CCulturedTool::CorrectBioSource(CBioSource& bsrc, bool& remove_note)
554 {
555  m_Bsrc.Reset(const_cast<const CBioSource*>(&bsrc));
556 
557  // Combined here steps 1 - 10 fom cultured_macro script (C version).
558  // Possible extension in the future to also combine steps 14 - 16.
559  string taxname = m_Bsrc->GetTaxname();
560  if (taxname.empty()) return;
561 
562  NStr::ReplaceInPlace(taxname, "_", " "); // step 3
563 
564  CMacroFunction_EditStringQual::s_EditText(taxname, "Novel ", "",
566 
567  CMacroFunction_EditStringQual::s_EditText(taxname, "unclassified", "",
569 
570  CMacroFunction_EditStringQual::s_EditText(taxname, " SP.", " sp.",
571  CMacroFunction_EditStringQual::eEnd, false); // step 6
572 
573  CMacroFunction_EditStringQual::s_EditText(taxname, " SP", " sp.",
574  CMacroFunction_EditStringQual::eEnd, false); // step 7
575 
576  CMacroFunction_EditStringQual::s_EditText(taxname, " species", " sp.",
577  CMacroFunction_EditStringQual::eEnd, false); // step 8
578 
579  CMacroFunction_EditStringQual::s_EditText(taxname, " spp", " sp.",
580  CMacroFunction_EditStringQual::eEnd, false); // step 9
581 
582  CMacroFunction_EditStringQual::s_EditText(taxname, " spp.", " sp.",
583  CMacroFunction_EditStringQual::eEnd, false); // step 10
584 
585 
586 
587  if (!NStr::EqualCase(m_Bsrc->GetTaxname(), taxname)) {
588  bsrc.SetOrg().SetTaxname(taxname);
590  }
591 
592  remove_note = false;
593  remove_note |= x_RemoveIfFoundSubSourceNote(bsrc, sCulturedNodes);
594  remove_note |= x_RemoveIfFoundOrgModNote(bsrc, sCulturedNodes);
595 }
596 
597 
598 bool CCulturedTool::x_RemoveIfFoundSubSourceNote(CBioSource &bsrc, const char* note_list[])
599 {
600  if (!bsrc.IsSetSubtype()) {
601  return false;
602  }
603 
604  bool modified = false;
605  auto&& subtype = bsrc.SetSubtype().begin();
606  while (subtype != bsrc.SetSubtype().end()) {
607  bool erased = false;
608  if ((*subtype)->IsSetSubtype()
609  && (*subtype)->GetSubtype() == CSubSource::eSubtype_other
610  && (*subtype)->IsSetName()) {
611  const string& name = (*subtype)->GetName();
612  auto i = 0;
613  while (note_list[i]) {
614  if (NStr::FindNoCase(name, note_list[i]) != NPOS) {
615  subtype = bsrc.SetSubtype().erase(subtype);
616  erased = true;
617  modified = true;
618  break;
619  }
620  ++i;
621  }
622  }
623  if (!erased) {
624  ++subtype;
625  }
626  }
627 
628  if (bsrc.GetSubtype().empty()) {
629  bsrc.ResetSubtype();
630  }
631  return modified;
632 }
633 
634 bool CCulturedTool::x_RemoveIfFoundOrgModNote(CBioSource &bsrc, const char* note_list[])
635 {
636  if (!bsrc.IsSetOrgMod()) {
637  return false;
638  }
639 
640  bool modified = false;
641  COrgName::TMod& orgMods = bsrc.SetOrg().SetOrgname().SetMod();
642  auto&& orgmod = orgMods.begin();
643  while (orgmod != orgMods.end()) {
644  bool erased = false;
645  if ((*orgmod)->IsSetSubtype()
646  && (*orgmod)->GetSubtype() == CSubSource::eSubtype_other
647  && (*orgmod)->IsSetSubname()) {
648  const string& name = (*orgmod)->GetSubname();
649  auto i = 0;
650  while (note_list[i]) {
651  if (NStr::FindNoCase(name, note_list[i]) != NPOS) {
652  orgmod = orgMods.erase(orgmod);
653  erased = true;
654  modified = true;
655  break;
656  }
657  ++i;
658  }
659  }
660  if (!erased) {
661  ++orgmod;
662  }
663  }
664 
665  if (orgMods.empty()) {
666  bsrc.SetOrg().SetOrgname().ResetMod();
667  }
668  return modified;
669 }
670 
671 
672 ///////////////////////////////////////////////////////////////////////////////
673 /// class CMacroFunction_UnculturedTaxLookup
674 /// DoUnculturedTaxLookup();
675 ///
676 DEFINE_MACRO_FUNCNAME(CMacroFunction_UnculturedTaxLookup, "DoUnculturedTaxLookup")
677 void CMacroFunction_UnculturedTaxLookup::TheFunction()
678 {
679  // only for source descriptors
680  CObjectInfo oi = m_DataIter->GetEditedObject();
682  if (!bsrc || m_DataIter->IsFeature() || !bsrc->IsSetTaxname()) {
683  return;
684  }
685 
686  const string orig_taxname = bsrc->GetTaxname();
687 
688  CUnculturedTool uncultured_taxtool;
689  bool converted_note(false), removed_note_sp(false), removed_note_univ(false);
690  uncultured_taxtool.CorrectBioSource(*bsrc, converted_note, removed_note_sp, removed_note_univ);
691  bool changed = (!NStr::EqualCase(orig_taxname, bsrc->GetTaxname())) || converted_note || removed_note_sp || removed_note_univ;
692 
693  if (changed) {
694  m_DataIter->SetModified();
696  if (!NStr::EqualCase(orig_taxname, bsrc->GetTaxname())) {
697  log << "Corrected " << orig_taxname << " to " << bsrc->GetTaxname();
698  }
699  string msg;
700  if (converted_note) {
701  msg.assign("Converted species-specific primer note to \"amplified with species-specific primers\" for ");
702  }
703  else if (removed_note_sp) {
704  msg.assign("Removed species-specific primer note for ");;
705  }
706  else if (removed_note_univ) {
707  msg.assign("Removed universal primer note for ");
708  }
709  if (!msg.empty()) {
710  if (!IsOssEmpty(log)) {
711  log << endl;
712  }
713  log << msg << bsrc->GetTaxname();
714  }
715  x_LogFunction(log);
716  }
717 }
718 
719 bool CMacroFunction_UnculturedTaxLookup::x_ValidArguments() const
720 {
721  return (m_Args.empty());
722 }
723 
724 
725 ///////////////////////////////////////////////////////////////////////////////
726 /// CMacroFunction_CulturedTaxLookup
727 /// DoCulturedTaxLookup();
728 ///
729 DEFINE_MACRO_FUNCNAME(CMacroFunction_CulturedTaxLookup, "DoCulturedTaxLookup")
730 void CMacroFunction_CulturedTaxLookup::TheFunction()
731 {
732  // only for source descriptors
733  CObjectInfo oi = m_DataIter->GetEditedObject();
735  if (!bsrc || m_DataIter->IsFeature()) {
736  return;
737  }
738 
739  const string orig_taxname = bsrc->GetTaxname();
740 
741  CCulturedTool cultured_taxtool;
742  bool removed_note(false);
743  cultured_taxtool.CorrectBioSource(*bsrc, removed_note);
744  bool changed = (!NStr::EqualCase(orig_taxname, bsrc->GetTaxname())) || removed_note;
745 
746  if (changed) {
747  m_DataIter->SetModified();
748  string msg;
749  if (!NStr::EqualCase(orig_taxname, bsrc->GetTaxname())) {
750  msg = "Corrected " + orig_taxname + " to " + bsrc->GetTaxname();
751  }
752  if (removed_note) {
753  if (!msg.empty()) {
754  msg += "\n";
755  }
756  msg += "Removed note for " + bsrc->GetTaxname();
757  }
759  log << msg;
760  x_LogFunction(log);
761  }
762 }
763 
764 bool CMacroFunction_CulturedTaxLookup::x_ValidArguments() const
765 {
766  return (m_Args.empty());
767 }
768 
769 
770 
771 END_SCOPE(macro)
773 
774 /* @} */
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
bool IsSetOrgMod(void) const
Definition: BioSource.cpp:415
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
CObjectInfo –.
Definition: objectinfo.hpp:597
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
void GetTaxFlags(bool &is_species_level, bool &force_consult, bool &has_nucleomorphs) const
Definition: T3Data.cpp:58
virtual CRef< CTaxon3_reply > SendOrgRefList(const vector< CRef< COrg_ref > > &list, COrg_ref::fOrgref_parts result_parts=COrg_ref::eOrgref_default, fT3reply_parts t3result_parts=eT3reply_default)
Definition: taxon3.cpp:190
virtual void Init()
Definition: taxon3.cpp:74
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
static char tmp[3200]
Definition: utf8.c:42
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
bool m_RemoveNote_Sp
flag to remove species-specific note
static bool s_EditText(string &str, const string &find, const string &repl, ESearchLoc loc, bool case_sensitive, bool is_regex=false)
void CleanupForTaxnameChange(CObjectInfo oi)
Definition: macro_util.cpp:526
bool m_RemoveNote_Univ
flag to remove universal note
static bool s_IsUnculturedName(const string &taxname)
static const char * sUnfixable[]
static bool s_CompareOrgnameLineage(CRef< objects::CT3Reply > reply, const string &lineage)
bool x_RemoveIfFoundSubSourceNote(objects::CBioSource &bsrc, const char *note_list[])
string GetSuggestedCorrection(void)
static const char * sCulturedNodes[]
class CCulturedTool
bool m_ConvertNote
flag to convert species-specific note
static bool s_OkToTaxFix(const string &taxname)
CRef< objects::CT3Reply > x_GetReply(const string &standard_taxname)
static bool s_HasSubSourceNote(const CBioSource &bsrc, const char *note_list[])
CConstRef< objects::CBioSource > m_Bsrc
CConstRef< objects::CBioSource > m_Bsrc
static string s_MakeUnculturedName(const string &taxname, const string &suffix=kEmptyStr)
void CorrectBioSource(objects::CBioSource &bsrc, bool &converted_note, bool &removed_note_sp, bool &removed_note_univ)
void CorrectBioSource(objects::CBioSource &bsrc, bool &remove_note)
bool x_CheckSuggestedFix(string &suggestion)
static bool s_IsAmbiguous(CRef< objects::CT3Reply > reply)
#define DEFINE_MACRO_FUNCNAME(CL_NAME, FN_NAME)
static string s_GetRank(CRef< objects::CT3Reply > reply)
static void s_AddEnvironmentalSample(objects::CBioSource &bsrc)
static const char * sSpeciesSpecificPrimers[]
constants used in the uncultured macro tool
static const char * sUniversalPrimers[]
map< string, CRef< objects::CT3Reply > > m_ReplyCache
void x_ConvertNote(objects::CBioSource &biosource, const char *note_list[], const string &new_note)
bool x_RemoveIfFoundOrgModNote(objects::CBioSource &bsrc, const char *note_list[])
static bool s_OrganismNotFound(CRef< objects::CT3Reply > reply)
bool x_ShouldLookupTaxname(void)
static bool s_HasTaxId(const objects::CBioSource &bsrc)
static string s_GetSuggestion(CRef< objects::CT3Reply > reply)
string x_TryRankFix(CRef< objects::CT3Reply > reply, string &name)
static const TObjectType * SafeCast(TTypeInfo type)
Definition: serialutil.hpp:76
TObjectPtr GetObjectPtr(void) const
Get pointer to object.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TObjectType & GetObject(void) const
Get object.
Definition: ncbiobj.hpp:1697
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
bool IsOssEmpty(CNcbiOstrstream &oss)
Definition: ncbistre.hpp:831
#define kEmptyStr
Definition: ncbistr.hpp:123
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
#define NPOS
Definition: ncbistr.hpp:133
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5325
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3554
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:527
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
void ResetSubtype(void)
Reset Subtype data member.
Definition: BioSource_.cpp:113
@ eSubtype_environmental_sample
Definition: SubSource_.hpp:111
const TLineage & GetLineage(void) const
Get the Lineage member data.
Definition: OrgName_.hpp:864
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
Definition: OrgName_.hpp:852
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:529
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
bool IsData(void) const
Check if variant Data is selected.
Definition: T3Reply_.hpp:263
const TData & GetData(void) const
Get the variant data.
Definition: T3Reply_.cpp:124
bool IsSetStatus(void) const
Check if a value has been assigned to Status data member.
Definition: T3Data_.hpp:328
const TStatus & GetStatus(void) const
Get the Status member data.
Definition: T3Data_.hpp:340
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: T3Data_.hpp:273
list< CRef< CT3StatusFlags > > TStatus
Definition: T3Data_.hpp:94
bool IsError(void) const
Check if variant Error is selected.
Definition: T3Reply_.hpp:257
const TError & GetError(void) const
Get the variant data.
Definition: T3Reply_.cpp:102
list< CRef< CT3Reply > > TReply
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: T3Data_.hpp:285
const TMessage & GetMessage(void) const
Get the Message member data.
Definition: T3Error_.hpp:394
bool IsSetMessage(void) const
Check if a value has been assigned to Message data member.
Definition: T3Error_.hpp:382
int i
static const char * suffix[]
Definition: pcregrep.c:408
#define FOR_EACH_SUBSOURCE_ON_BIOSOURCE(Itr, Var)
FOR_EACH_SUBSOURCE_ON_BIOSOURCE EDIT_EACH_SUBSOURCE_ON_BIOSOURCE.
#define _ASSERT
Modified on Wed Apr 24 14:16:50 2024 by modify_doxy.py rev. 669887