NCBI C++ ToolKit
cuSeqTreeAPI.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuSeqTreeAPI.cpp 90374 2020-06-08 15:56:03Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Charlie Liu
27  *
28  * File Description:
29  *
30  * API to handle SeqTree
31  *
32  * ===========================================================================
33  */
34 
35 #include <ncbi_pch.hpp>
40 
42 BEGIN_SCOPE(cd_utils)
43 
44 SeqTreeAPI::SeqTreeAPI(vector<CCdCore*>& cds, bool loadExistingTreeOnly)
45  : m_ma(), m_cd(0), m_family(0), m_seqTree(0), m_taxTree(0), m_taxClient(0),
46  m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(), m_triedTreeMaking(false),
47  m_loadOnly(loadExistingTreeOnly)
48 {
49  vector<CDFamily*> families;
50  CDFamily::createFamilies(cds, families);
51  if(families.size() != 1 )
52  {
53  for (int i = 0 ; i <families.size(); i++)
54  delete families[i];
55  return;
56  }
57  m_ma.setAlignment(*families[0]);
58  m_family = families[0];
59 }
60 
62  : m_ma(), m_cd(cd), m_family(0), m_seqTree(0), m_taxTree(0), m_taxClient(0),
63  m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(), m_triedTreeMaking(false),
64  m_loadOnly(true)
65 {
66 }
67 
69  : m_ma(), m_cd(0), m_family(&cdfam), m_seqTree(0), m_taxTree(0), m_taxClient(0),
70  m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(option), m_triedTreeMaking(false),
71  m_loadOnly(false)
72 {
73  m_family = new CDFamily(cdfam);
74 }
75 
77 {
78  if (m_seqTree) delete m_seqTree;
79  if (m_taxTree) delete m_taxTree;
80  if (m_taxClient) delete m_taxClient;
81  if (m_family) delete m_family;
82 }
83 
85 {
86  m_useMembership = true;
87 }
88 
90 {
91  m_taxLevel = tax;
92  m_useMembership = false;
93 }
94 
96 {
97  if (m_seqTree == 0)
99  if (m_seqTree == 0)
100  return 0;
101  else
102  return m_seqTree->getNumLeaf();
103 }
104  //return a string of tree method names
105  //lay out the tree to the specified area, .i.e. fit to screen
106 string SeqTreeAPI::layoutSeqTree(int maxX, int maxY, vector<SeqTreeEdge>& edges)
107 {
108  if (m_seqTree == 0)
109  makeOrLoadTree();
110  return layoutSeqTree(maxX, maxY, -1, edges);
111 }
112 
113  //lay out the tree with a fixed spacing between sequences
114 string SeqTreeAPI::layoutSeqTree(int maxX, vector<SeqTreeEdge>& edges, int yInt)
115 {
116  if (m_seqTree == 0)
117  makeOrLoadTree();
118  int maxY = 0; //not used
119  if (yInt < 2)
120  yInt = 2;
121  return layoutSeqTree(maxX, maxY, yInt, edges);
122 }
124 {
125  if (m_triedTreeMaking) //if already tried, don't try again
126  return m_seqTree != 0;
127  if (m_seqTree != 0)
128  {
129  delete m_seqTree;
130  m_seqTree = 0;
131  m_seqTree = new SeqTree;
132  }
133  if (!m_loadOnly)
134  {
135  if (m_ma.getFirstCD() == 0)
137  if(!m_ma.isBlockAligned())
138  {
139  ERR_POST("Sequence tree is not made for " <<m_ma.getFirstCD()->GetAccession()
140  <<" because it does not have a consistent block alognment.");
141  }
142  else
143  {
147  }
148  if (m_seqTree)
150  }
151  m_triedTreeMaking = true;
152  return m_seqTree != 0;
153 }
154 
156 {
157  if (m_triedTreeMaking) //if already tried, don't try again
158  return m_seqTree != 0;
159  m_seqTree = new SeqTree();
160  bool loaded = false;
161  if (m_cd)
163  else
164  loaded = loadAndValidateExistingTree();
165  if (!loaded)
166  {
167  return makeTree();
168  }
169  return loaded;
170 }
171 
172 string SeqTreeAPI::layoutSeqTree(int maxX, int maxY, int yInt, vector<SeqTreeEdge>& edges)
173 {
174  if (!m_seqTree)
175  return "";
176  SeqTreeRootedLayout treeLayout(yInt);
177  treeLayout.calculateNodePositions(*m_seqTree, maxX, maxY);
178  getAllEdges(edges);
182  param.append(" / " + GetScoringMatrixName(m_treeOptions.matrix));
183  }
184  return param;
185 }
186 
187 int SeqTreeAPI::getAllEdges(vector<SeqTreeEdge>& edges)
188 {
190  return edges.size();
191 }
192 
193 void SeqTreeAPI::getEdgesFromSubTree(const SeqTree::iterator& cursor, vector<SeqTreeEdge>& edges)
194 {
195  //if cursor is a leaf node, draw its name and return
196  if (cursor.number_of_children() == 0)
197  {
198  return;
199  }
200  //always draw from parent to children
201  SeqTreeNode nodeParent;
202  nodeParent.isLeaf = false;
203  nodeParent.x = cursor->x;
204  nodeParent.y = cursor->y;
205  SeqTree::sibling_iterator sib = cursor.begin();
206  while (sib != cursor.end())
207  {
208  SeqTreeNode nodeChild, nodeTurn;
209  nodeChild.x = sib->x;
210  nodeChild.y = sib->y;
211  nodeTurn.isLeaf = false;
212  nodeTurn.x = nodeParent.x;
213  nodeTurn.y= nodeChild.y;
214  if (sib.number_of_children() == 0)
215  {
216  nodeChild.isLeaf =true;
217  nodeChild.name = sib->name;
218  nodeChild.displayAnnotation = kEmptyStr;
219  annotateLeafNode(*sib,nodeChild);
220  }
221  else
222  nodeChild.isLeaf = false;
223  SeqTreeEdge e1, e2;
224  e1.first = nodeParent;
225  e1.second = nodeTurn;
226  e2.first = nodeTurn;
227  e2.second = nodeChild;
228  edges.push_back(e1);
229  edges.push_back(e2);
230  getEdgesFromSubTree(sib, edges);
231  ++sib;
232  }
233 }
234 
236 {
237  if (m_useMembership)
238  {
239  /*if (!nodeData.membership.empty() && m_loadOnly)
240  {
241  node.annotation = nodeData.membership;
242  }*/
243  if ((m_ma.GetNumRows() > 0))
244  {
245  CCdCore* cd = m_ma.GetScopedLeafCD(nodeData.rowID);
246  if (cd)
247  {
248  node.annotation = cd->GetAccession();
249  }
250  }
251  else if (m_cd)
252  {
253  if (!nodeData.membership.empty())
254  node.annotation = nodeData.membership;
255  else
256  node.annotation = m_cd->GetAccession();
257  }
258  }
259  else
260  {
261  if (m_cd)
262  {
263  if (m_taxClient == 0)
264  {
265  m_taxClient = new TaxClient();
266  m_taxClient->init();
267  }
268  TTaxId taxid = m_taxClient->GetTaxIDForSeqId(nodeData.seqId);
269  if (taxid >= ZERO_TAX_ID)
270  {
271  if (m_taxLevel == BySuperkingdom)
272  node.annotation = m_taxClient->GetSuperKingdom(taxid);
273  }
274  }
275  else
276  {
277  if (!m_taxTree)
278  m_taxTree = new TaxTreeData(m_ma);
279  string rankName = "superkingdom";
280  /*if(m_taxLevel == ByKingdom)
281  rankName = "kingdom";*/
282  TaxTreeIterator taxNode = m_taxTree->getParentAtRank(nodeData.rowID, rankName);
283  node.annotation = taxNode->orgName;
284  }
285  }
286 }
287 
289 {
290  if ( m_seqTree == 0)
291  m_seqTree = new SeqTree;
292  CCdCore* cd = 0;
293  if (m_family)
294  {
295  cd = m_family->getRootCD();
296  }
297  else
298  cd = m_ma.getFirstCD();
299  if (!cd->IsSetSeqtree())
300  return false;
301  SeqLocToSeqItemMap liMap;
303  return false;
304  CRef< CAlgorithm_type > algType(const_cast<CAlgorithm_type*> (&(cd->GetSeqtree().GetAlgorithm())));
307  {
308  CDFamily *subfam = new CDFamily;
309  m_family->subfamily(m_family->begin(),subfam, true); //childrenOnly = true
310  m_ma.setAlignment(*subfam);
311  delete subfam;
312  }
313  else
314  {
316  }
317  bool validated = false;
319  validated = true;
320  else //if not same, resolve RowID with SeqLoc
321  {
324  }
325  if (validated)
327  return validated;
328 }
329 
330 bool SeqTreeAPI::loadExistingTree(CCdCore* cd, TreeOptions* treeOptions, SeqTree* seqTree)
331 {
332  if (!cd->IsSetSeqtree() || !treeOptions)
333  return false;
334 
335  //bool loaded = false;
336  SeqTree* tmpTree = 0;
337  SeqTree tmpTreeObj;
338 
339  if (seqTree)
340  tmpTree = seqTree;
341  else
342  tmpTree = &tmpTreeObj;
343 
344  SeqLocToSeqItemMap liMap;
345  if (!SeqTreeAsnizer::convertToSeqTree(cd->SetSeqtree(), *tmpTree, liMap))
346  return false;
347  CRef< CAlgorithm_type > algType(const_cast<CAlgorithm_type*> (&(cd->GetSeqtree().GetAlgorithm())));
348  SeqTreeAsnizer::convertToTreeOption(algType, *treeOptions);
349  return true;
350 }
351 
352 END_SCOPE(cd_utils)
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
CCdCore * GetScopedLeafCD(int row) const
CCdCore * getFirstCD() const
CAlgorithm_type –.
string GetAccession(int &Version) const
Definition: cuCdCore.cpp:81
CCdCore * getRootCD() const
Definition: cuCdFamily.cpp:66
static int createFamilies(vector< CCdCore * > &cds, vector< CDFamily * > &families)
Definition: cuCdFamily.cpp:464
void subfamily(CDFamilyIterator cit, CDFamily *&subfam, bool childrenOnly=false)
Definition: cuCdFamily.cpp:187
static bool DistMethodUsesScoringMatrix(EDistMethod method)
Definition: cuDistmat.cpp:57
string GetDistMethodName()
Definition: cuDistmat.hpp:127
bool setAlignment(const CDFamily &family)
string membership
Definition: cuSeqtree.hpp:80
int rowID
Definition: cuSeqtree.hpp:74
CConstRef< CSeq_id > seqId
Definition: cuSeqtree.hpp:81
SeqTree * m_seqTree
void annotateTreeByTaxonomy(TaxonomyLevel tax)
int getNumOfLeaves()
void annotateLeafNode(const SeqItem &nodeData, SeqTreeNode &node)
MultipleAlignment m_ma
bool loadExistingTree(CCdCore *cd, TreeOptions *treeOptions, SeqTree *seqTree)
bool m_triedTreeMaking
CCdCore * m_cd
bool makeTree()
SeqTreeAPI(vector< CCdCore * > &cds, bool loadExistingTreeOnly=true)
bool loadAndValidateExistingTree()
bool makeOrLoadTree()
void getEdgesFromSubTree(const SeqTree::iterator &cursor, vector< SeqTreeEdge > &edges)
TreeOptions m_treeOptions
TaxTreeData * m_taxTree
TaxClient * m_taxClient
string layoutSeqTree(int maxX, int maxY, vector< SeqTreeEdge > &edges)
TaxonomyLevel m_taxLevel
CDFamily * m_family
int getAllEdges(vector< SeqTreeEdge > &edges)
bool m_useMembership
void annotateTreeByMembership()
static bool convertToSeqTree(CSequence_tree &asnSeqTree, SeqTree &seqTree, SeqLocToSeqItemMap &liMap)
static bool convertToTreeOption(const CRef< CAlgorithm_type > &alg, TreeOptions &treeOption)
static bool resolveRowId(const AlignmentCollection &ac, SeqLocToSeqItemMap &liMap)
static bool refillAsnMembership(const AlignmentCollection &ac, SeqLocToSeqItemMap &liMap)
void calculateNodePositions(SeqTree &treeData, int maxX, int maxY)
bool isSequenceCompositionSame(AlignmentCollection &aligns)
Definition: cuSeqtree.cpp:639
int getNumLeaf()
Definition: cuSeqtree.cpp:272
void fixRowName(AlignmentCollection &aligns, SeqNameMode mode)
Definition: cuSeqtree.cpp:531
virtual string GetSuperKingdom(TTaxId taxid)
virtual TTaxId GetTaxIDForSeqId(CConstRef< CSeq_id > sid)
virtual bool init()
Definition: cuTaxClient.cpp:77
TaxTreeIterator getParentAtRank(int row, string rankName)
Definition: cuTaxTree.cpp:546
static SeqTree * makeTree(AlignmentCollection *alignData, const TreeOptions &treeOptions)
Definition: map.hpp:338
sibling_iterator end() const
unsigned int number_of_children() const
sibling_iterator begin() const
pre_order_iterator begin() const
Definition: tree_msvc7.hpp:573
#define option
@ eScoreAligned
Definition: cuDistmat.hpp:64
@ eScoreBlastFoot
Definition: cuDistmat.hpp:66
@ eScoreBlastFull
Definition: cuDistmat.hpp:67
string GetScoringMatrixName(EScoreMatrixType type)
pair< SeqTreeNode, SeqTreeNode > SeqTreeEdge
string GetTreeAlgorithmName(ETreeMethod algorithm)
Definition: cuSeqTreeNj.cpp:47
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
const TAlgorithm & GetAlgorithm(void) const
Get the Algorithm member data.
void SetSeqtree(TSeqtree &value)
Assign a value to Seqtree data member.
Definition: Cdd_.cpp:396
bool IsSetSeqtree(void) const
Check if a value has been assigned to Seqtree data member.
Definition: Cdd_.hpp:1650
const TSeqtree & GetSeqtree(void) const
Get the Seqtree member data.
Definition: Cdd_.hpp:1662
int i
string displayAnnotation
string annotation
ETreeMethod clusteringMethod
EDistMethod distMethod
EScoreMatrixType matrix
CAlgorithm_type::ETree_scope scope
Modified on Sat Dec 09 04:49:34 2023 by modify_doxy.py rev. 669887