NCBI C++ ToolKit
cuTaxTree.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuTaxTree.hpp 101914 2024-03-01 17:18:17Z gouriano $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Charlie Liu
27  *
28  * File Description: Retrieve and create a Taxonomy tree for several CDs.
29  * part of CDTree app
30  */
31 
32 #ifndef CU_TAXTREE_HPP
33 #define CU_TAXTREE_HPP
38 #include <list>
39 #include <stack>
40 #include <algorithm>
41 
44 BEGIN_SCOPE(cd_utils)
45 
47 {
48 public:
49  //fields for internal tax nodes
52  short rankId;
53  //fields for external seq nodes
54  int rowId;
57  //total and selected leaf counts
58  int numLeaves;
60 
61  //methods
62  TaxNode();
63  TaxNode(const TaxNode& rhs);
64  bool operator==(const TaxNode& rhs) const {return taxId == rhs.taxId;};
65  static bool isSeqLeaf(const TaxNode& node);
66  static bool isSubSeqLeaf(const TaxNode& node);
67  static TaxNode* makeTaxNode(TTaxId taxID, std::string taxName, short rankId=-1);
68  static TaxNode* makeSeqLeaf(int rowID, std::string sequenceName);
69  static TaxNode* makeSubSeqLeaf(int rowID, CCdCore* cd, int rowInCd);
70 
71 private:
72  void init();
73 };
74 
77 //typedef list<CCd*> CDList;
78 
79 /* taxonomy ranking -- total 8 level
80  Superkingdom: Eukaryota
81  Kingdom: Metazoa
82  Phylum: Chordata
83  Class: Mammalia
84  Order: Primata
85  Family: Hominidae
86  Genus: Homo
87  Species: sapiens
88 */
89 
90 // define a family hiearchy of CDs
92 {
93 public:
95 
96  const vector<int>& getFailedRows() { return m_failedRows;}
97  void selectTaxNode(TaxTreeIterator& taxNode, bool select);
98  void setSelections(const vector<int>& rowIDs, CCdCore* cd=0);
99  int getSelections(vector<int>& rows);
100  void clearSelection();
101  void deselectAllTaxNodes();
102  void fillLeafCount(const TaxTreeIterator& cursor);
103  //bool isPreferredTaxNode(const TaxTreeIterator& taxNode);
104  int getAllLeafNodes(const TaxTreeIterator& taxNode, vector<TaxTreeIterator>& nodes) const;
105  short getRankId(string rankName);
106  TaxTreeIterator getParentAtRank(int row, string rankName);
107  bool isEmpty()const;
108  //bool missLocalTaxFiles()const {return m_missLocalTaxFiles;}
109  ~TaxTreeData();
110 
111  void addTaxToBioseq(CBioseq& bioseq, TTaxId taxid, string& taxName);
112  bool writeToFile(string fname)const;
113  bool writeToFileAsTable(string fname)const;
114  bool write(std::ostream&os, const iterator& cursor)const;
115  bool writeAsTable(std::ostream&os, const iterator& cursor, const iterator& branchingNode)const;
116 private:
122  // wrapper of taxonomy server class
124  vector<int> m_failedRows;
125 
126  bool makeTaxonomyTree();
127  void addRows(const AlignmentCollection& ac);
128  // get integer taxid for a sequence
129  TTaxId GetTaxIDForSequence(const AlignmentCollection& aligns, int rowID);
130  // get info for taxid
131  void selectTaxTreeLeaf(const TaxTreeIterator& cursor, bool select, CCdCore* cd=0);
132  void addSeqTax(int rowID, string seqName, TTaxId taxid);
133  void growAndInsertLineage(stack<TaxNode*>& lineage);
134  void insertLineage(TaxTreeIterator& pos, stack<TaxNode*>& lineage);
135  void cacheRank(short rank, string rankName);
136  void writeOutRanks();
137 };
138 
139 END_SCOPE(cd_utils)
141 #endif
142 
bool operator==(const TaxNode &rhs) const
Definition: cuTaxTree.hpp:64
std::string orgName
Definition: cuTaxTree.hpp:51
int numLeaves
Definition: cuTaxTree.hpp:58
std::string seqName
Definition: cuTaxTree.hpp:56
TTaxId taxId
Definition: cuTaxTree.hpp:50
CCdCore * cd
Definition: cuTaxTree.hpp:55
short rankId
Definition: cuTaxTree.hpp:52
int rowId
Definition: cuTaxTree.hpp:54
int selectedLeaves
Definition: cuTaxTree.hpp:59
RankNameToId m_rankNameToId
Definition: cuTaxTree.hpp:121
const AlignmentCollection & m_ac
Definition: cuTaxTree.hpp:117
TaxClient * m_taxDataSource
Definition: cuTaxTree.hpp:123
map< string, short > RankNameToId
Definition: cuTaxTree.hpp:120
vector< int > m_failedRows
Definition: cuTaxTree.hpp:124
RowToTaxNode m_rowToTaxNode
Definition: cuTaxTree.hpp:119
const vector< int > & getFailedRows()
Definition: cuTaxTree.hpp:96
map< int, TaxonomyTree::iterator > RowToTaxNode
Definition: cuTaxTree.hpp:118
int GetTaxIDForSequence(const Sequence *seq)
USING_SCOPE(objects)
tree< TaxNode > TaxonomyTree
Definition: cuTaxTree.hpp:75
TaxonomyTree::iterator TaxTreeIterator
Definition: cuTaxTree.hpp:76
static void DLIST_NAME() init(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:40
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
string
Definition: cgiapp.hpp:690
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
#define row(bind, expected)
Definition: string_bind.c:73
Modified on Fri Sep 20 14:58:24 2024 by modify_doxy.py rev. 669887