NCBI C++ ToolKit
cuPrefTaxNodes.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuPrefTaxNodes.hpp 89947 2020-04-30 13:05:44Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Repackaged by Chris Lanczycki from Charlie Liu's algTaxDataSource
27  *
28  * File Description:
29  *
30  * Class to maintain lists of preferred and model tax nodes,
31  * using the Cdd-pref-nodes ASN specification.
32  *
33  * ===========================================================================
34  */
35 
36 #ifndef CU_PREFTAXNODES_HPP
37 #define CU_PREFTAXNODES_HPP
41 
44 BEGIN_SCOPE(cd_utils)
45 
46 struct OrgNode
47 {
48  OrgNode(int n, const CRef<CCdd_org_ref>& oref) : order(n), orgRef(oref) {}
49 
50  int order; // an index tracking the order of insertion into a particular TaxidOrgMap
52 };
53 
55 
57 {
58  static const string PREF_TAXNODE_FILE;
59 public:
60 
62  eCddPrefNodes = 1, // take only the 'preferred-nodes' field of Cdd-pref-nodes
63  eCddModelOrgs = 2, // take only the 'model-organisms' field of Cdd-pref-nodes
64  eCddOptional = 4, // take only the 'optional-nodes' field of Cdd-pref-nodes
65  eCddPrefNodesAll = 7, // treat all entries in Cdd-pref-nodes ASN.1 data structure as one set
66  eRawTaxIds = 8 // simply a set of taxonomy ids
67  };
68 
69  // All data read via the constructors are added to existing data
70  // unless reset = true, in which case existing data is removed first.
71 
72  // Will look for a file under 'data/txnodes.asn' relative to current directory.
73  // Reads in an object of type CCdd_pref_nodes.
74  CPriorityTaxNodes(TaxNodeInputType inputType);
75 
76  // Pass the full path to the file containing preferred tax node info formatted
77  // in ASN.1 as per the Cdd-pref-nodes spec.
78  CPriorityTaxNodes(const string& prefTaxnodeFileName, TaxNodeInputType inputType);
79 
80  // Use input to set up the internal maps.
81  CPriorityTaxNodes(const CCdd_pref_nodes& prefNodes, TaxNodeInputType inputType);
82 
83  // Given a list of taxids for each of the maps, construct the maps.
84  CPriorityTaxNodes(const vector< TTaxId >& taxids, TaxClient& taxClient, TaxNodeInputType inputType = eRawTaxIds);
85 
86  virtual ~CPriorityTaxNodes();
87 
88  // Contents of file is loaded: it is added to existing data
89  // unless doReset = true, in which case existing data is erased.
90  // Cannot currently add to existing contents of pref nodes maps.
91  // Return true only if data was loaded from a file (false will be
92  // returned if data already exists and doReset = false).
93  bool LoadFromFile(const string& prefTaxnodeFileName, bool doReset = false);
94 
95  // Loads nodes from 'prefNodes' using same type as m_inputType.
96  unsigned int Load(const CCdd_pref_nodes& prefNodes, bool doReset = false);
97 
98  // If want to also reset the node input type, pass a valid pointer.
99  // The selected tax node map is always cleared, but by default the ancestor map
100  // is not cleared unless a) the input type changes and doesn't include the current
101  // input type, or b) the 'forceClearAncestorMap' is true.
102  void Reset(TaxNodeInputType* inputType = NULL, bool forceClearAncestorMap = false);
103 
104  bool IsLoaded() {return m_loaded;}
105  string GetLastError() {return m_err;}
106  TaxNodeInputType GetNodeInputType() const {return m_inputType;}
107 
108  // Looks if taxid is *exactly* one of the priority tax nodes.
109  bool IsPriorityTaxnode(TTaxId taxid);
110 
111  // If taxidIn is not an a priority taxnode, return its nearest ancestor that is.
112  // Use taxClient to ascend the lineage of taxidIn in the full taxonomy tree.
113  // Return true if a priorityTaxid is found; otherwise return false and set priorityTaxid = 0.
114  bool GetPriorityTaxid(TTaxId taxidIn, TTaxId& priorityTaxid, TaxClient& taxClient);
115  bool GetPriorityTaxidAndName(TTaxId taxidIn, TTaxId& priorityTaxid, string& nodeName, TaxClient& taxClient);
116 
117  // If not an exact match w/ a priority taxnode, and there is
118  // not an entry for the taxid in the corresponding ancestral map, use taxClient
119  // to ascend the tax tree to see if one of its ancestors is a match. Return the
120  // first such tax node's info.
121  // Return value itself is OrgNode.order, or -1 on failure or if taxid = 0.
122  int GetPriorityTaxnode(TTaxId taxid, string& nodeName, TaxClient* taxClient = NULL);
123  int GetPriorityTaxnode(TTaxId taxid, const OrgNode*& orgNode, TaxClient* taxClient = NULL);
124 
125  // Extract fields from a CCdd_org_ref.
126  static string getTaxName(const CRef< CCdd_org_ref >& orgRef);
127  static TTaxId getTaxId(const CRef< CCdd_org_ref >& orgRef);
128  static bool isActive(const CRef< CCdd_org_ref >& orgRef);
129 
130  // Conversion functions between a collection of taxonomy ids and a CCdd_org_ref_set
131  // object, which ultimately packages COrg_ref objects. Return value is the number
132  // of CCdd_org_ref/int added to the 2nd argument; last argument when present identifies
133  // those inputs which could not be converted. In both, no sentinel placeholders
134  // are left in the output data structure for failures --> output index not guaranteed
135  // to correspond to input index.
136  // NOTE: 2nd argument is not cleared/reset before adding new data.
137  static unsigned int CddOrgRefSetToTaxIds(const CCdd_org_ref_set& cddOrgRefSet, vector< TTaxId >& taxids, vector<int>* notAddedIndices = NULL);
138  static unsigned int TaxIdsToCddOrgRefSet(const vector< TTaxId >& taxids, CCdd_org_ref_set& cddOrgRefSet, TaxClient& taxClient, vector<TTaxId>* notAddedTaxids = NULL);
139 
140 protected:
141  string m_err;
142 
143 private:
144  bool m_loaded;
146 
147  // Map not static so that can have several different collections of tax nodes.
149 
150  // Use to map a taxid to the taxid found by findAncestor (mapped value may equal key value),
151  // which should have an entry in m_selectedTaxNodesMap.
152  // Maintain these to avoid use of TaxClient when possible.
155 
156  // Return m_selectedTaxNodesMap.end() if no ancestor found for taxid, or taxid = 0.
157  // Use taxClient only if non-null and the corresponding ancestral map does not contain
158  // non-zero taxid as a key, or if ancestralMap pointer is NULL (to force use of the TaxClient).
159  TaxidToOrgMap::iterator findAncestor(TTaxId taxid, TaxClient* taxClient);
160 
161  void putIntoMap(const CCdd_org_ref_set& orgRefs);
162  void BuildMap(const CCdd_pref_nodes& prefNodes, bool reset = false);
163  bool ReadPreferredTaxnodes(const string& fileName, bool reset = false);
164 };
165 
166 END_SCOPE(cd_utils)
168 
169 #endif // CU_PREFTAXNODES_HPP
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CCdd_org_ref_set –.
CCdd_pref_nodes –.
TaxNodeInputType m_inputType
TaxNodeInputType GetNodeInputType() const
static const string PREF_TAXNODE_FILE
TAncestorMap m_ancestralTaxNodeMap
TaxidToOrgMap m_selectedTaxNodesMap
map< TTaxId, TTaxId > TAncestorMap
USING_SCOPE(objects)
map< TTaxId, OrgNode > TaxidToOrgMap
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
yy_size_t n
const CRef< CCdd_org_ref > orgRef
OrgNode(int n, const CRef< CCdd_org_ref > &oref)
Modified on Tue Apr 23 07:39:42 2024 by modify_doxy.py rev. 669887