NCBI C++ ToolKit
DistanceMatrix.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: DistanceMatrix.cpp 72177 2016-04-20 15:50:10Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Josh Cherry
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using the following specifications:
34  * 'biotree.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 
40 // generated includes
42 
43 // generated classes
44 
46 
47 BEGIN_objects_SCOPE // namespace ncbi::objects::
48 
49 // destructor
51 {
52 }
53 
54 
56 {
57  size_t sz = GetLabels().size();
58  if (GetDistances().size() != sz * (sz - 1) / 2) {
59  throw runtime_error("mismatch between number of labels "
60  "and number of distances");
61  }
62  mat.Resize(sz, sz);
63  mat.Set(0);
64  size_t idx = 0;
65  for (size_t j = 0; j < sz; ++j) {
66  for (size_t i = j + 1; i < sz; ++i) {
67  mat(i, j) = mat(j, i) = GetDistances()[idx];
68  ++idx;
69  }
70  }
71 }
72 
73 
75 {
76  // Check that the matrix passed in is appropriate
77  if (mat.GetRows() != mat.GetCols()) {
78  throw runtime_error("matrix is not square");
79  }
80  size_t sz = mat.GetCols();
81  for (size_t i = 0; i < sz; ++i) {
82  if (mat(i, i) != 0) {
83  throw runtime_error("element on main diagonal is nonzero");
84  }
85  for (size_t j = 0; j < sz; ++j) {
86  if (mat(i, j) != mat(j, i)) {
87  throw runtime_error("matrix is not symmetric");
88  }
89  }
90  }
91 
92  // Set distances accordingly
94  for (size_t j = 0; j < sz; ++j) {
95  for (size_t i = j + 1; i < sz; ++i) {
96  // should check that mat(i, j) == mat(j, i);
97  SetDistances().push_back(mat(i, j));
98  }
99  }
100 }
101 
102 
103 void CDistanceMatrix::Read(istream &istr, EFormat format) {
104  if (format == eGuess) {
105  CT_POS_TYPE init_pos = istr.tellg();
106  try {
107  Read(istr, eSquare);
108  return;
109  }
110  catch (std::exception&) {
111  istr.seekg(init_pos);
112  }
113  try {
114  Read(istr, eUpper);
115  return;
116  }
117  catch (std::exception&) {
118  istr.seekg(init_pos);
119  }
120  Read(istr, eLower);
121  return;
122  }
123 
124  string line;
125  NcbiGetlineEOL(istr, line);
126 
127  unsigned int dim = NStr::StringToUInt(NStr::TruncateSpaces(line));
128 
129  vector<string> names;
130  vector<vector<double> > values;
131 
132  for (unsigned int i = 0; i < dim; ++i) {
133  unsigned int count = 0;
134  vector<double> line_values;
135  unsigned int min_expected_cols, max_expected_cols;
136  if (format == eSquare) {
137  min_expected_cols = dim;
138  max_expected_cols = dim;
139  } else if (format == eLower) {
140  min_expected_cols = i;
141  max_expected_cols = i + 1; // main diagonal is optional
142  } else if (format == eUpper) {
143  min_expected_cols = dim - i;
144  max_expected_cols = dim - i;
145  } else {
146  throw runtime_error("invald matrix format specified");
147  }
148  while (true) {
149  NcbiGetlineEOL(istr, line);
150  if (istr.eof()) {
151  throw runtime_error("unexpected EOF");
152  }
153  if (!istr.good()) {
154  throw runtime_error("problem reading file");
155  }
156  if (count == 0) {
157  string name = NStr::TruncateSpaces(line.substr(0, 10));
158  names.push_back(name);
159  line = line.substr(10);
160  }
161  list<string> fields;
162  NStr::Split(line, " \t\n\r", fields, NStr::fSplit_Tokenize);
163  ITERATE (list<string>, field, fields) {
164  line_values.push_back(NStr::StringToDouble(*field));
165  }
166  if (line_values.size() > max_expected_cols) {
167  throw runtime_error("too many columns in row");
168  }
169  if (line_values.size() >= min_expected_cols) {
170  break;
171  }
172  ++count;
173  }
174  values.push_back(line_values);
175  }
176  CNcbiMatrix<double> mat(dim, dim);
177  for (unsigned int i = 0; i < dim; ++i) {
178  for (unsigned int j = 0; j < dim; ++j) {
179  if (format == eSquare) {
180  mat(i, j) = values[i][j];
181  } else if (format == eUpper) {
182  if (i > j) {
183  mat(i, j) = values[j][i];
184  } else {
185  mat(i, j) = values[i][j];
186  }
187  } else { // eLower
188  if (i == j) {
189  if (values[i].size() == i + 1) {
190  // diagonal value was provided in file
191  mat(i, j) = values[i][j];
192  } else {
193  mat(i, j) = 0;
194  }
195  } else if (i > j) {
196  mat(i, j) = values[i][j];
197  } else {
198  mat(i, j) = values[j][i];
199  }
200  }
201  }
202  }
203  FromMatrix(mat);
204  ResetLabels();
205  ITERATE (vector<string>, name, names) {
206  SetLabels().push_back(*name);
207  }
208 }
209 
210 END_objects_SCOPE // namespace ncbi::objects::
211 
213 
214 /* Original file checksum: lines: 65, chars: 1906, CRC32: af87b363 */
User-defined methods of the data storage class.
void FromMatrix(const CNcbiMatrix< double > &mat)
void Read(istream &istr, EFormat format=eGuess)
void AsMatrix(CNcbiMatrix< double > &mat) const
void Resize(size_t i, size_t j, T val=T())
resize this matrix, filling the empty cells with a known value
Definition: matrix.hpp:390
void Set(T val)
set all values in the matrix to a known value
Definition: matrix.hpp:417
size_t GetRows() const
get the number of rows in this matrix
Definition: matrix.hpp:298
size_t GetCols() const
get the number of columns in this matrix
Definition: matrix.hpp:305
static const struct name_t names[]
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
#define CT_POS_TYPE
Definition: ncbistre.hpp:730
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
void ResetLabels(void)
Reset Labels data member.
void ResetDistances(void)
Reset Distances data member.
TDistances & SetDistances(void)
Assign a value to Distances data member.
const TDistances & GetDistances(void) const
Get the Distances member data.
const TLabels & GetLabels(void) const
Get the Labels member data.
TLabels & SetLabels(void)
Assign a value to Labels data member.
int i
const struct ncbi::grid::netcache::search::fields::SIZE size
static Format format
Definition: njn_ioutil.cpp:53
Modified on Tue May 28 05:53:53 2024 by modify_doxy.py rev. 669887