NCBI C++ ToolKit
cuPrefTaxNodes.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuPrefTaxNodes.cpp 98275 2022-10-20 16:09:40Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Repackaged by Chris Lanczycki from Charlie Liu's algTaxDataSource
27  *
28  * File Description:
29  *
30  * Class to maintain lists of preferred and model tax nodes,
31  * using the Cdd-pref-nodes ASN specification.
32  *
33  * ===========================================================================
34  */
35 
36 
37 #include <ncbi_pch.hpp>
41 
44 BEGIN_SCOPE(cd_utils)
45 
46 const string CPriorityTaxNodes::PREF_TAXNODE_FILE = "data/txnodes.asn";
47 
48 CPriorityTaxNodes::CPriorityTaxNodes(TaxNodeInputType inputType) : m_inputType(inputType)
49 {
50  string filename = PREF_TAXNODE_FILE;
51  LoadFromFile(filename, false);
52 }
53 
54 CPriorityTaxNodes::CPriorityTaxNodes(const string& prefTaxnodeFileName, TaxNodeInputType inputType) : m_inputType(inputType)
55 {
56  LoadFromFile(prefTaxnodeFileName, false);
57 }
58 
59 CPriorityTaxNodes::CPriorityTaxNodes(const CCdd_pref_nodes& prefNodes, TaxNodeInputType inputType) : m_inputType(inputType)
60 {
61  BuildMap(prefNodes, false);
62  m_loaded = true;
63 }
64 
65 
66 CPriorityTaxNodes::CPriorityTaxNodes(const vector< TTaxId >& taxids, TaxClient& taxClient, TaxNodeInputType inputType) : m_inputType(inputType)
67 {
68  CCdd_org_ref_set cddOrgRefSet;
69  unsigned int nAdded = TaxIdsToCddOrgRefSet(taxids, cddOrgRefSet, taxClient);
70 
71  Reset();
72  putIntoMap(cddOrgRefSet);
73  m_loaded = (nAdded == taxids.size());
74 }
75 
77 {
78 }
79 
80 void CPriorityTaxNodes::Reset(TaxNodeInputType* inputType, bool forceClearAncestorMap) {
81 
82  m_err = "";
83  m_loaded = false;
85 
86  if (forceClearAncestorMap || (inputType && !(m_inputType & *inputType))) {
88  }
89 
90  if (inputType) {
91  m_inputType = *inputType;
92  }
93 }
94 
95 
96 bool CPriorityTaxNodes::LoadFromFile(const string& prefTaxnodeFileName, bool doReset)
97 {
98  bool result = ReadPreferredTaxnodes(prefTaxnodeFileName, doReset);
99 
100  if (!result)
101  m_err = "Failed to read preferred Taxonomy nodes from file '" + prefTaxnodeFileName + "'.\n";
102 
103  m_loaded = result;
104  return result;
105 }
106 
107 unsigned int CPriorityTaxNodes::Load(const CCdd_pref_nodes& prefNodes, bool reset)
108 {
109  unsigned int nInit = (reset) ? 0 : m_selectedTaxNodesMap.size();
110  BuildMap(prefNodes, reset);
111  return m_selectedTaxNodesMap.size() - nInit;
112 }
113 
114 bool CPriorityTaxNodes::ReadPreferredTaxnodes(const string& filename, bool reset)
115 {
116  CCdd_pref_nodes prefNodes;
117  if (!ReadASNFromFile(filename.c_str(), &prefNodes, false, &m_err))
118  {
119  return false;
120  }
121 
122  BuildMap(prefNodes, reset);
123  return true;
124 }
125 
126 void CPriorityTaxNodes::BuildMap(const CCdd_pref_nodes& prefNodes, bool reset) {
127  if (reset)
128  Reset();
129 
130  //build a taxId/taxName map
132  putIntoMap(prefNodes.GetPreferred_nodes());
133  if ((m_inputType & eCddModelOrgs) && prefNodes.CanGetModel_organisms())
134  putIntoMap(prefNodes.GetModel_organisms());
135  if ((m_inputType & eCddOptional) && prefNodes.CanGetOptional_nodes())
136  putIntoMap(prefNodes.GetOptional_nodes());
137 }
138 
140 {
141  const list< CRef< CCdd_org_ref > >& orgList = orgRefs.Get();
142  list< CRef< CCdd_org_ref > >::const_iterator cit = orgList.begin();
143  int i = m_selectedTaxNodesMap.size();
144  for (; cit != orgList.end(); cit++)
145  {
147  i++;
148  }
149 }
150 
152 {
153  if (orgRef->CanGetReference())
154  {
155  const COrg_ref& org = orgRef->GetReference();
156  if (org.IsSetTaxname()) {
157  return org.GetTaxname();
158  }
159  }
160 
161  return kEmptyStr;
162 }
163 
165 {
166  if (orgRef->CanGetReference())
167  {
168  const COrg_ref& org = orgRef->GetReference();
169  return org.GetTaxId();
170  }
171  else
172  return ZERO_TAX_ID;
173 }
174 
176 {
177  return orgRef->GetActive();
178 }
179 
180 unsigned int CPriorityTaxNodes::TaxIdsToCddOrgRefSet(const vector< TTaxId >& taxids, CCdd_org_ref_set& cddOrgRefSet, TaxClient& taxClient, vector<TTaxId>* notAddedTaxids)
181 {
182 
183  unsigned int nAdded = 0, nTaxa = taxids.size();
184  CCdd_org_ref_set::Tdata& cddOrgRefList = cddOrgRefSet.Set();
185 
186  if (notAddedTaxids) notAddedTaxids->clear();
187 
188  for (unsigned int i = 0; i < nTaxa; ++i) {
189  CRef< CCdd_org_ref > cddOrgRef( new CCdd_org_ref );
190  if (cddOrgRef.NotEmpty()) {
191  COrg_ref& orgRef = cddOrgRef->SetReference();
192  if (taxClient.GetOrgRef(taxids[i], orgRef)) {
193 // CRef< CCdd_org_ref::TReference > orgRef( &cddOrgRef->SetReference());
194 // if (taxClient.GetOrgRef(taxids[i], orgRef)) {
195  cddOrgRef->SetActive(true);
196  cddOrgRefList.push_back(cddOrgRef);
197  ++nAdded;
198  } else if (notAddedTaxids) {
199  notAddedTaxids->push_back(taxids[i]);
200  }
201  } else if (notAddedTaxids) {
202  notAddedTaxids->push_back(taxids[i]);
203  }
204  }
205  return nAdded;
206 }
207 
208 unsigned int CPriorityTaxNodes::CddOrgRefSetToTaxIds(const CCdd_org_ref_set& cddOrgRefSet, vector< TTaxId >& taxids, vector<int>* notAddedIndices)
209 {
210  TTaxId taxId;
211  unsigned int taxaIndex = 0, nAdded = 0;
212  const CCdd_org_ref_set::Tdata cddOrgRefList = cddOrgRefSet.Get();
213  CCdd_org_ref_set::Tdata::const_iterator cddOrgRefListCit = cddOrgRefList.begin(), citEnd = cddOrgRefList.end();
214 
215  if (notAddedIndices) notAddedIndices->clear();
216 
217  for (; cddOrgRefListCit != citEnd; ++cddOrgRefListCit ) {
218  taxId = getTaxId(*cddOrgRefListCit);
219  if (taxId > ZERO_TAX_ID) {
220  taxids.push_back(taxId);
221  ++nAdded;
222  } else if (notAddedIndices) {
223  notAddedIndices->push_back(taxaIndex);
224  }
225  ++taxaIndex;
226  }
227  return nAdded;
228 }
229 
231 {
232  TaxidToOrgMap::iterator titEnd = m_selectedTaxNodesMap.end(), tit = titEnd;
233  TAncestorMap::iterator ancestorIt;
234 
235  if (taxid != ZERO_TAX_ID) {
236  // First see if this taxid has been seen before; if so, retrieve iterator from toMap...
237  ancestorIt = m_ancestralTaxNodeMap.find(taxid);
238  if (ancestorIt != m_ancestralTaxNodeMap.end() && ancestorIt->second >= ZERO_TAX_ID) {
239  tit = m_selectedTaxNodesMap.find(ancestorIt->second);
240  }
241 
242  // If no ancestralMap, or ancestor not in ancestralMap, use taxClient if present.
243  // Add ancestral taxid to ancestralMap if found.
244  if (taxClient && tit == titEnd) {
245  for (tit = m_selectedTaxNodesMap.begin(); tit != titEnd; tit++)
246  {
247  if (taxClient->IsTaxDescendant(tit->first, taxid)) {
249  break;
250  }
251  }
252  }
253  }
254 
255  return tit;
256 }
257 
259 {
261  return it != m_selectedTaxNodesMap.end();
262 }
263 
264 bool CPriorityTaxNodes::GetPriorityTaxid(TTaxId taxidIn, TTaxId& priorityTaxid, TaxClient& taxClient)
265 {
266  string nodeName;
267  return GetPriorityTaxidAndName(taxidIn, priorityTaxid, nodeName, taxClient);
268 }
269 
270 bool CPriorityTaxNodes::GetPriorityTaxidAndName(TTaxId taxidIn, TTaxId& priorityTaxid, string& nodeName, TaxClient& taxClient)
271 {
272  bool result = false;
274 
275  priorityTaxid = ZERO_TAX_ID;
276  nodeName = kEmptyStr;
277  if (it != itEnd) {
278  priorityTaxid = taxidIn;
279  result = true;
280  } else { // fail to find exact match; try to find ancetral match
281  it = findAncestor(taxidIn, &taxClient);
282  if (it != itEnd)
283  {
284  priorityTaxid = it->first;
285  result = true;
286  }
287  }
288 
289  if (it != itEnd) { // result = true
290  nodeName = getTaxName(it->second.orgRef);
291  }
292 
293  return result;
294 }
295 
296 // return -1 if fails or taxid = 0
297 int CPriorityTaxNodes::GetPriorityTaxnode(TTaxId taxid, const OrgNode*& orgNode, TaxClient* taxClient)
298 {
300 
301  orgNode = NULL;
302  if (taxid != ZERO_TAX_ID) {
303  if (it != itEnd)
304  {
305  orgNode = &(it->second);
306  return it->second.order;
307  }
308  else // fail to find exact match; try to find ancetral match
309  {
310  it = findAncestor(taxid, taxClient);
311  if (it != itEnd)
312  {
313  orgNode = &(it->second);
314  return it->second.order;
315  }
316  }
317  }
318  return -1;
319 }
320 
321 // return index into list; -1 if fails
322 int CPriorityTaxNodes::GetPriorityTaxnode(TTaxId taxid, string& nodeName, TaxClient* taxClient)
323 {
324  const OrgNode* orgNode = NULL;
325 
326  nodeName = kEmptyStr;
327  if (GetPriorityTaxnode(taxid, orgNode, taxClient) != -1 && orgNode) {
328  nodeName.append(getTaxName(orgNode->orgRef));
329  return orgNode->order;
330  }
331  return -1;
332 }
333 
334 END_SCOPE(cd_utils)
User-defined methods of the data storage class.
CCdd_org_ref_set –.
CCdd_org_ref –.
Definition: Cdd_org_ref.hpp:66
CCdd_pref_nodes –.
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
bool LoadFromFile(const string &prefTaxnodeFileName, bool doReset=false)
bool ReadPreferredTaxnodes(const string &fileName, bool reset=false)
TaxNodeInputType m_inputType
int GetPriorityTaxnode(TTaxId taxid, string &nodeName, TaxClient *taxClient=NULL)
bool GetPriorityTaxid(TTaxId taxidIn, TTaxId &priorityTaxid, TaxClient &taxClient)
virtual ~CPriorityTaxNodes()
bool IsPriorityTaxnode(TTaxId taxid)
static TTaxId getTaxId(const CRef< CCdd_org_ref > &orgRef)
static string getTaxName(const CRef< CCdd_org_ref > &orgRef)
unsigned int Load(const CCdd_pref_nodes &prefNodes, bool doReset=false)
static unsigned int CddOrgRefSetToTaxIds(const CCdd_org_ref_set &cddOrgRefSet, vector< TTaxId > &taxids, vector< int > *notAddedIndices=NULL)
CPriorityTaxNodes(TaxNodeInputType inputType)
static const string PREF_TAXNODE_FILE
static unsigned int TaxIdsToCddOrgRefSet(const vector< TTaxId > &taxids, CCdd_org_ref_set &cddOrgRefSet, TaxClient &taxClient, vector< TTaxId > *notAddedTaxids=NULL)
void BuildMap(const CCdd_pref_nodes &prefNodes, bool reset=false)
TaxidToOrgMap::iterator findAncestor(TTaxId taxid, TaxClient *taxClient)
void Reset(TaxNodeInputType *inputType=NULL, bool forceClearAncestorMap=false)
static bool isActive(const CRef< CCdd_org_ref > &orgRef)
bool GetPriorityTaxidAndName(TTaxId taxidIn, TTaxId &priorityTaxid, string &nodeName, TaxClient &taxClient)
TAncestorMap m_ancestralTaxNodeMap
TaxidToOrgMap m_selectedTaxNodesMap
void putIntoMap(const CCdd_org_ref_set &orgRefs)
virtual bool GetOrgRef(TTaxId taxId, COrg_ref &orgRef)
virtual bool IsTaxDescendant(TTaxId tax1, TTaxId tax2)
size_type size() const
Definition: map.hpp:148
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
static bool ReadASNFromFile(const char *filename, ASNClass *ASNobject, bool isBinary, std::string *err)
USING_SCOPE(objects)
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
void SetActive(TActive value)
Assign a value to Active data member.
const Tdata & Get(void) const
Get the member data.
const TModel_organisms & GetModel_organisms(void) const
Get the Model_organisms member data.
bool CanGetOptional_nodes(void) const
Check if it is safe to call GetOptional_nodes method.
TActive GetActive(void) const
Get the Active member data.
list< CRef< CCdd_org_ref > > Tdata
void SetReference(TReference &value)
Assign a value to Reference data member.
bool CanGetReference(void) const
Check if it is safe to call GetReference method.
bool CanGetModel_organisms(void) const
Check if it is safe to call GetModel_organisms method.
bool CanGetPreferred_nodes(void) const
Check if it is safe to call GetPreferred_nodes method.
const TPreferred_nodes & GetPreferred_nodes(void) const
Get the Preferred_nodes member data.
Tdata & Set(void)
Assign a value to data member.
const TReference & GetReference(void) const
Get the Reference member data.
const TOptional_nodes & GetOptional_nodes(void) const
Get the Optional_nodes member data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:360
int i
const CRef< CCdd_org_ref > orgRef
else result
Definition: token2.c:20
Modified on Thu Nov 30 04:56:46 2023 by modify_doxy.py rev. 669887