NCBI C++ ToolKit
bio_tree.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bio_tree.cpp 79999 2017-10-26 17:56:13Z falkrb $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Anatoliy Kuznetsov
27  *
28  * File Description: Things for representing and manipulating bio trees
29  *
30  */
31 
32 /// @file bio_tree.cpp
33 /// Things for representing and manipulating bio trees
34 
35 #include <ncbi_pch.hpp>
37 
39 
41 {
42 }
43 
45  : m_FeatureList(flist.m_FeatureList)
46 {}
47 
50 {
51  m_FeatureList.assign(flist.m_FeatureList.begin(),
52  flist.m_FeatureList.end());
53  return *this;
54 }
55 
57  const string& value)
58 {
60  if (it->id == id) {
61  it->value = value;
62  return;
63  }
64  }
65  m_FeatureList.push_back(CBioTreeFeaturePair(id, value));
66 }
67 
68 const string&
70 {
72  if (it->id == id) {
73  return it->value;
74  }
75  }
76  return kEmptyStr;
77 }
78 
80  string& result) const
81 {
83  if (it->id == id) {
84  result = it->value;
85  return true;
86  }
87  }
88  return false;
89 }
90 
92 {
94  if (it->id == id) {
95  m_FeatureList.erase(it);
96  return;
97  }
98  }
99 }
100 
101 
102 
103 
104 
105 
107  : m_IdCounter(0)
108 {
109 }
110 
111 
113  const CBioTreeFeatureDictionary& btr)
114  : m_Dict(btr.m_Dict),
115  m_Name2Id(btr.m_Name2Id),
116  m_IdCounter(btr.m_IdCounter)
117 {
118 }
119 
122 {
123  Clear();
124 
125  ITERATE(TFeatureDict, it, btr.m_Dict) {
126  m_Dict.insert(*it);
127  }
128 
129  ITERATE(TFeatureNameIdx, it, btr.m_Name2Id) {
130  m_Name2Id.insert(*it);
131  }
132 
133  m_IdCounter = btr.m_IdCounter;
134 
135  return *this;
136 }
137 
139 {
140  m_Dict.clear();
141  m_Name2Id.clear();
142  m_IdCounter = 0;
143 }
144 
145 bool
146 CBioTreeFeatureDictionary::HasFeature(const string& feature_name) const
147 {
149  return (it != m_Name2Id.end());
150 }
151 
152 bool
154 {
156  return (it != m_Dict.end());
157 }
158 
160 CBioTreeFeatureDictionary::Register(const string& feature_name)
161 {
163  if (it != m_Name2Id.end()) {
164  /// Feature already exists
165  return it->second;
166  }
167 
168  unsigned id = m_IdCounter;
169  Register(m_IdCounter, feature_name);
170  return id;
171 }
172 
174  const string& feature_name)
175 {
176  if (m_Dict.count(id)) {
177  if (m_Dict[id] == feature_name) {
178  /// Duplicate registration of the same feature
179  return;
180  }
182  "Duplicate feature id: " + NStr::NumericToString(id));
183  }
184  if (m_Name2Id.count(feature_name)) {
186  "Duplicate feature name: " + feature_name);
187  }
188  m_IdCounter = max(m_IdCounter, id+1);
189  m_Dict.insert(
190  pair<TBioTreeFeatureId, string>(id, feature_name));
192  pair<string, TBioTreeFeatureId>(feature_name, id));
193 
194 }
195 
196 void
198 {
199  if (!m_Dict.count(id)) {
200  return;
201  }
202 
203  m_Name2Id.erase(m_Dict[id]);
204  m_Dict.erase(id);
205 }
206 
207 
209 CBioTreeFeatureDictionary::GetId(const string& feature_name) const
210 {
212  if (it == m_Name2Id.end()) {
213  return (TBioTreeFeatureId)-1;
214  }
215  return it->second;
216 }
217 
219 {
221  if (it != m_Dict.end())
222  return it->second;
223  return "";
224 }
225 
226 
227 static string s_EncodeLabel(const string& label);
228 
229 // recursive function
231 {
232  if (!node.IsLeaf()) {
233  os << '(';
234  CBioTreeDynamic::TBioTreeNode::TNodeList_CI it = node.SubNodeBegin();
235  for (; it != node.SubNodeEnd(); ++it) {
236  if (it != node.SubNodeBegin()) {
237  os << ", ";
238  }
239 
240  const CBioTreeDynamic::TBioTreeNode::TParent* p = *it;
241  const CBioTreeDynamic::TBioTreeNode* pp =
243  PrintNode(os, tree, *pp, label_fmt);
244  }
245  os << ')';
246  }
247 
248  string label;
249  if (label_fmt) {
250  label = label_fmt->GetLabelForNode(node);
251  }
252  else {
253  if (tree.GetFeatureDict().HasFeature("label")) {
254  label = node.GetValue().features
255  .GetFeatureValue(tree.GetFeatureDict().GetId("label"));
256  }
257  }
258  if (!label.empty()) {
259  os << s_EncodeLabel(label);
260  }
261 
262  string dist_string;
263  if (tree.GetFeatureDict().HasFeature("dist")) {
264  dist_string = node.GetValue().features
265  .GetFeatureValue(tree.GetFeatureDict().GetId("dist"));
266  }
267  if (!dist_string.empty()) {
268  os << ':' << dist_string;
269  }
270 }
271 
272 
274 {
275  PrintNode(os, tree, *tree.GetTreeNode());
276  os << ';' << endl;
277  return os;
278 };
279 
281 {
282  PrintNode(os, tree, *tree.GetTreeNode(), label_fmt);
283  os << ';' << endl;
284 }
285 
286 
287 void WriteNexusTree(CNcbiOstream& os, const CBioTreeDynamic& tree, const string& tree_name, const IBioTreeDynamicLabelFormatter* label_fmt)
288 {
289  os << "#nexus\n\nbegin trees;\ntree " << tree_name << " = ";
290  WriteNewickTree(os, tree, label_fmt);
291  os << "\nend;" << endl;
292 };
293 
294 
295 // Encode a label for Newick format:
296 // If necessary, enclose it in single quotes,
297 // but first escape any single quotes by doubling them.
298 // e.g., "This 'label'" -> "'This ''label'''"
299 static string s_EncodeLabel(const string& label) {
300  if (label.find_first_of("()[]':;,_") == string::npos) {
301  // No need to quote, but any spaces must be changed to underscores
302  string unquoted = label;
303  for (size_t i = 0; i < label.size(); ++i) {
304  if (unquoted[i] == ' ') {
305  unquoted[i] = '_';
306  }
307  }
308  return unquoted;
309  }
310  if (label.find_first_of("'") == string::npos) {
311  return '\'' + label + '\'';
312  }
313  string rv;
314  rv.reserve(label.size() + 2);
315  rv.append(1, '\'');
316  for (unsigned int i = 0; i < label.size(); ++i) {
317  rv.append(1, label[i]);
318  if (label[i] == '\'') {
319  // "'" -> "''"
320  rv.append(1, label[i]);
321  }
322  }
323  rv.append(1, '\'');
324 
325  return rv;
326 }
327 
328 
static string s_EncodeLabel(const string &label)
Definition: bio_tree.cpp:299
Things for representing and manipulating bio trees.
Feature dictionary.
Definition: bio_tree.hpp:176
Features storage for the bio tree node.
Definition: bio_tree.hpp:101
Interface to obtain custom labels for nodes.
Definition: bio_tree.hpp:497
void erase(iterator pos)
Definition: map.hpp:167
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define kEmptyStr
Definition: ncbistr.hpp:123
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
string GetName(TBioTreeFeatureId id) const
Return the featue name given the id, or "" if not found.
Definition: bio_tree.cpp:218
TFeatureDict m_Dict
id -> feature name map
Definition: bio_tree.hpp:222
void PrintNode(CNcbiOstream &os, const CBioTreeDynamic &tree, const CBioTreeDynamic::TBioTreeNode &node, const IBioTreeDynamicLabelFormatter *label_fmt)
Newick but without the terminal ';'.
Definition: bio_tree.cpp:230
void WriteNewickTree(CNcbiOstream &os, const CBioTreeDynamic &tree, const IBioTreeDynamicLabelFormatter *label_fmt)
Newick format output.
Definition: bio_tree.cpp:280
void Clear()
Clear the dictionary.
Definition: bio_tree.cpp:138
CBioTreeFeatureDictionary & operator=(const CBioTreeFeatureDictionary &btr)
Definition: bio_tree.cpp:121
unsigned int TBioTreeFeatureId
Feature Id.
Definition: bio_tree.hpp:60
TFeatureNameIdx m_Name2Id
id -> feature name map
Definition: bio_tree.hpp:223
void SetFeature(TBioTreeFeatureId id, const string &value)
Set feature value, feature if exists replaced, if not added.
Definition: bio_tree.cpp:56
virtual string GetLabelForNode(const CBioTreeDynamic::TBioTreeNode &node) const =0
TFeatureList m_FeatureList
Definition: bio_tree.hpp:132
TBioTreeFeatureId GetId(const string &feature_name) const
If feature is already registered returns its id by name.
Definition: bio_tree.cpp:209
CBioTreeFeatureList & operator=(const CBioTreeFeatureList &flist)
Definition: bio_tree.cpp:49
TBioTreeFeatureId Register(const string &feature_name)
Register new feature, return its id.
Definition: bio_tree.cpp:160
const string & GetFeatureValue(TBioTreeFeatureId id) const
Get feature value by id.
Definition: bio_tree.cpp:69
CBioNode TBioTreeNode
Biotree node (forms the tree hierarchy)
Definition: bio_tree.hpp:358
void Unregister(TBioTreeFeatureId id)
Definition: bio_tree.cpp:197
unsigned int m_IdCounter
Feature id counter.
Definition: bio_tree.hpp:225
bool HasFeature(const string &feature_name) const
Check if feature is listed in the dictionary.
Definition: bio_tree.cpp:146
void WriteNexusTree(CNcbiOstream &os, const CBioTreeDynamic &tree, const string &tree_name, const IBioTreeDynamicLabelFormatter *label_fmt)
Nexus format output (Newick with some stuff around it).
Definition: bio_tree.cpp:287
vector< CBioTreeFeaturePair > TFeatureList
Definition: bio_tree.hpp:103
void RemoveFeature(TBioTreeFeatureId id)
Remove feature from the list.
Definition: bio_tree.cpp:91
CNcbiOstream & operator<<(CNcbiOstream &os, const CBioTreeDynamic &tree)
Newick format output.
Definition: bio_tree.cpp:273
static const char label[]
int i
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
T max(T x_, T y_)
Tree node feature pair (id to string)
Definition: bio_tree.hpp:70
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:57:54 2024 by modify_doxy.py rev. 669887