NCBI C++ ToolKit
genus_species_fixup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: genus_species_fixup.cpp 42425 2019-02-21 15:31:49Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
28 
29 
30 #include <ncbi_pch.hpp>
35 
37 
38 
41 
43 {
44  return x_GetCommand(tse, "Genus-species fixup");
45 }
46 
48 {
49  bool modified = false;
50  if (biosource.IsSetTaxname() && !biosource.GetTaxname().empty() && !s_HasTaxId(biosource))
51  {
52  string name = biosource.GetTaxname();
53  biosource.SetOrg().ResetCommon();
54  EDIT_EACH_ORGMOD_ON_ORGNAME(orgmod, biosource.SetOrg().SetOrgname())
55  {
56  if ((*orgmod)->IsSetSubtype() && (*orgmod)->GetSubtype() == COrgMod::eSubtype_old_name)
57  ERASE_ORGMOD_ON_ORGNAME(orgmod, biosource.SetOrg().SetOrgname());
58  }
59 
60  CRef<COrg_ref> new_org(new COrg_ref);
61  new_org->Assign(biosource.SetOrg());
62  s_GetNameFromTaxonServer(new_org);
63 
64  biosource.SetOrg(*new_org);
65  string new_name = biosource.SetOrg().GetTaxname();
66  if (!new_name.empty() && new_name != name)
67  {
68  CRef< CSubSource > subsource(new CSubSource);
70  subsource->SetName(name);
71  biosource.SetSubtype().push_back(subsource);
72  }
73  modified = true;
74  }
75 
76  return modified;
77 }
78 
80 {
81  if (biosource.IsSetOrg()) {
82  int taxid = biosource.GetOrg().GetTaxId();
83  if (taxid != 0) { // found taxid
84  return true;
85  }
86  }
87  return false;
88 }
89 
91 {
92  string name = org->GetTaxname();
93  vector<string> submit;
94  s_PermuteTaxname(name,submit);
95  if (submit.empty())
96  return;
97 
98  vector<CRef<COrg_ref>> request_list;
99  for (auto&& it : submit)
100  {
101  CRef<COrg_ref> new_org(new COrg_ref());
102  new_org->Assign(*org);
103  new_org->SetTaxname(it);
104  request_list.push_back(new_org);
105  }
106 
107  CTaxon3 taxon3;
108  taxon3.Init();
109  CRef<CTaxon3_reply> reply;
110  reply = taxon3.SendOrgRefList(request_list);
111  if (reply->IsSetReply() )
112  {
113  const auto& ans = reply->GetReply();
114  for (auto&& it : ans)
115  {
116  if (it->IsData() &&
117  it->GetData().IsSetOrg() &&
118  it->GetData().GetOrg().GetTaxId() >= 0)
119  {
120  org->Assign(it->GetData().GetOrg());
121  break;
122  }
123  }
124  }
125 }
126 
127 void CGenusSpeciesFixup::s_PermuteTaxname(const string &name, vector<string> &submit)
128 {
129  vector<string> words;
130  NStr::Split(name, " ", words, NStr::fSplit_Tokenize);
131  if (words.size() == 1)
132  {
133  vector<string> subwords;
134  NStr::Split(name, "_", subwords, NStr::fSplit_Tokenize);
135 
136  for (int i = 0; i < subwords.size() - 1; i++)
137  {
138  string new_name = subwords[0];
139  for (int j = 1; j <= i; j++)
140  new_name += "_" + subwords[j];
141  new_name += " " + subwords[i+1];
142  for (int j = i+2; j < subwords.size(); j++)
143  new_name += "_" + subwords[j];
144  submit.push_back(new_name);
145  }
146  return;
147  }
148  if (words.size() == 2)
149  return;
150  if (words.size() >= 4 && NStr::Equal(words[2],"subsp.",NStr::eNocase))
151  {
152  string new_name = words[0] + " " + words[1] + " " + words[2] + " " + words[3];
153  submit.push_back(new_name);
154  }
155  if (words.size() >= 3 && !NStr::Equal(words[2],"subsp.",NStr::eNocase))
156  {
157  string new_name = words[0] + " " + words[1] + " " + words[2];
158  submit.push_back(new_name);
159  }
160  if (words.size() >= 2)
161  {
162  string new_name = words[0] + " " + words[1];
163  submit.push_back(new_name);
164  }
165 }
166 
User-defined methods of the data storage class.
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
static void s_PermuteTaxname(const string &name, vector< string > &submit)
static void s_GetNameFromTaxonServer(CRef< objects::COrg_ref > org)
CRef< CCmdComposite > GetCommand(objects::CSeq_entry_Handle tse)
virtual bool x_ApplyToBioSource(objects::CBioSource &biosource)
static bool s_HasTaxId(const objects::CBioSource &)
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
CSeq_entry_Handle –.
virtual CRef< CTaxon3_reply > SendOrgRefList(const vector< CRef< COrg_ref > > &list, COrg_ref::fOrgref_parts result_parts=COrg_ref::eOrgref_default, fT3reply_parts t3result_parts=eT3reply_default)
Definition: taxon3.cpp:190
virtual void Init()
Definition: taxon3.cpp:74
CRef< CCmdComposite > x_GetCommand(objects::CSeq_entry_Handle tse, const string &title)
USING_SCOPE(objects)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
Definition: SubSource_.hpp:319
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
void SetName(const TName &value)
Assign a value to Name data member.
Definition: SubSource_.hpp:359
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
@ eSubtype_old_name
Definition: OrgMod_.hpp:124
int i
#define EDIT_EACH_ORGMOD_ON_ORGNAME(Itr, Var)
#define ERASE_ORGMOD_ON_ORGNAME(Itr, Var)
ERASE_ORGMOD_ON_ORGNAME.
Modified on Wed Apr 24 14:10:03 2024 by modify_doxy.py rev. 669887