NCBI C++ ToolKit
blastdb_formatter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blastdb_formatter.cpp 91306 2020-10-08 11:57:15Z gouriano $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blastdb_formatter.cpp
31  * Implementation of the CBlastDbFormatter class
32  */
33 
34 #include <ncbi_pch.hpp>
37 #include <numeric> // for std::accumulate
38 
41 
42 CBlastDbFormatter::CBlastDbFormatter(const string& fmt_spec)
43  : m_FmtSpec(fmt_spec)
44 {
45  // Record where the offsets where the replacements must occur
46  for (SIZE_TYPE i = 0; i < m_FmtSpec.size(); i++) {
47  if (m_FmtSpec[i] == '%' && m_FmtSpec[i+1] == '%') {
48  // remove the escape character for '%'
49  m_FmtSpec.erase(i++, 1);
50  continue;
51  }
52 
53  if (m_FmtSpec[i] == '%') {
54  m_ReplOffsets.push_back(i);
55  m_ReplacementTypes.push_back(m_FmtSpec[i+1]);
56  }
57  }
58  // Handle %d defline in ASN.1 text format, can only be by itself
59 
60  if (m_ReplOffsets.empty() ||
61  m_ReplacementTypes.size() != m_ReplOffsets.size()) {
62  NCBI_THROW(CInvalidDataException, eInvalidInput,
63  "Invalid format specification");
64  }
65 }
66 
67 /// Proxy class for retrieving meta data from a BLAST DB
69 public:
70  CBlastDbMetadata(const SSeqDBInitInfo& db_init_info)
71  : m_DbInitInfo(db_init_info)
72  {}
73 
74  string GetFileName() const {
76  }
77  string GetMoleculeType() const {
79  }
80  string GetTitle() {
81  x_InitBlastDb();
82  return m_BlastDb->GetTitle();
83  }
84  string GetDate() {
85  x_InitBlastDb();
86  return m_BlastDb->GetDate();
87  }
89  x_InitBlastDb();
90  // FIXME: should this use CSeqDB::GetTotals?
92  }
93  string GetDbLength() {
94  x_InitBlastDb();
95  // FIXME: should this use CSeqDB::GetTotals?
97  }
98  string GetDiskUsage() {
99  x_InitBlastDb();
101  }
102  string GetVersion() {
103  x_InitBlastDb();
104  int db_version = ( m_BlastDb->GetBlastDbVersion() == EBlastDbVersion::eBDB_Version5?5:4 );
105  return NStr::IntToString( db_version );
106  }
107 
108 private:
109  /// Information to initialize the BLAST DB handle
111  /// BLAST DB handle
113 
114  /// Initialize and cache BLAST DB handle if necessary
115  void x_InitBlastDb() {
116  if (m_BlastDb.Empty()) {
118  }
120  }
121 };
122 
123 string
125 {
126  CBlastDbMetadata dbmeta(db_init_info);
127  vector<string> data2write;
128  data2write.reserve(m_ReplacementTypes.size());
129  ITERATE(vector<char>, fmt, m_ReplacementTypes) {
130  switch (*fmt) {
131  case 'f': // file name
132  data2write.push_back(dbmeta.GetFileName());
133  break;
134  case 't': // title
135  data2write.push_back(dbmeta.GetTitle());
136  break;
137  case 'n': // number of sequences
138  data2write.push_back(dbmeta.GetNumberOfSequences());
139  break;
140  case 'l': // DB length
141  data2write.push_back(dbmeta.GetDbLength());
142  break;
143  case 'p': // molecule type
144  data2write.push_back(dbmeta.GetMoleculeType());
145  break;
146  case 'd': // date of last update
147  data2write.push_back(dbmeta.GetDate());
148  break;
149  case 'U': // Disk usage
150  data2write.push_back(dbmeta.GetDiskUsage());
151  break;
152  case 'v': // version
153  data2write.push_back(dbmeta.GetVersion());
154  break;
155  default:
156  CNcbiOstrstream os;
157  os << "Unrecognized format specification: '%" << *fmt << "'";
158  NCBI_THROW(CInvalidDataException, eInvalidInput,
160  }
161  }
162  return x_Replacer(data2write);
163 }
164 
165 /// Auxiliary functor to compute the length of a string (shamlessly copied from
166 /// seq_writer.cpp)
167 struct StrLenAdd
168 {
169  SIZE_TYPE operator() (SIZE_TYPE a, const string& b) const {
170  return a + b.size();
171  }
172 };
173 
174 // also inspired by seq_writer.cpp
175 string
176 CBlastDbFormatter::x_Replacer(const vector<string>& data2write) const
177 {
178  SIZE_TYPE data2write_size = accumulate(data2write.begin(), data2write.end(),
179  0, StrLenAdd());
180  string retval;
181  retval.reserve(m_FmtSpec.size() + data2write_size -
182  (data2write.size() * 2));
183 
184  SIZE_TYPE fmt_idx = 0;
185  for (SIZE_TYPE i = 0, kSize = m_ReplOffsets.size(); i < kSize; i++) {
186  retval.append(&m_FmtSpec[fmt_idx], &m_FmtSpec[m_ReplOffsets[i]]);
187  retval.append(data2write[i]);
188  fmt_idx = m_ReplOffsets[i] + 2;
189  }
190  if (fmt_idx <= m_FmtSpec.size()) {
191  retval.append(&m_FmtSpec[fmt_idx], &m_FmtSpec[m_FmtSpec.size()]);
192  }
193 
194  return retval;
195 }
196 
USING_SCOPE(objects)
Definition of a customizable BLAST DB information formatter interface.
string m_FmtSpec
The output format specification.
vector< SIZE_TYPE > m_ReplOffsets
Vector of offsets where the replacements will take place.
string Write(const SSeqDBInitInfo &db_init_info)
Extracts the BLAST database information for the requested BLAST DB according to the output format spe...
string x_Replacer(const vector< string > &data2write) const
Replace format specifiers for the data contained in data2write.
CBlastDbFormatter(const string &fmt_spec)
Constructor.
vector< char > m_ReplacementTypes
Proxy class for retrieving meta data from a BLAST DB.
void x_InitBlastDb()
Initialize and cache BLAST DB handle if necessary.
CRef< CSeqDB > m_BlastDb
BLAST DB handle.
string GetMoleculeType() const
SSeqDBInitInfo m_DbInitInfo
Information to initialize the BLAST DB handle.
string GetFileName() const
CBlastDbMetadata(const SSeqDBInitInfo &db_init_info)
Defines invalid user input exceptions.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
Definition: seqdb.cpp:685
static string ESeqType2String(ESeqType type)
Converts a CSeqDB sequence type into a human readable string.
Definition: seqdb.cpp:1328
Int8 GetDiskUsage() const
Retrieve the disk usage in bytes for this BLAST database.
Definition: seqdb.cpp:1464
string GetTitle() const
Returns the database title.
Definition: seqdb.cpp:630
int GetNumSeqs() const
Returns the number of sequences available.
Definition: seqdb.cpp:670
string GetDate() const
Returns the construction date of the database.
Definition: seqdb.cpp:635
EBlastDbVersion GetBlastDbVersion() const
Return blast db version.
Definition: seqdb.cpp:1604
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3305
static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)
Convert UInt8 to string.
Definition: ncbistr.hpp:5162
int i
unsigned int a
Definition: ncbi_localip.c:102
@ eBDB_Version5
Definition: seqdbcommon.hpp:53
Structure to define basic information to initialize a BLAST DB.
Definition: seqdb.hpp:1541
string m_BlastDbName
The BLAST DB name.
Definition: seqdb.hpp:1543
CRef< CSeqDB > InitSeqDb() const
Create a new CSeqDB instance from this object.
Definition: seqdb.hpp:1564
CSeqDB::ESeqType m_MoleculeType
The molecule type.
Definition: seqdb.hpp:1545
Auxiliary functor to compute the length of a string (shamlessly copied from seq_writer....
SIZE_TYPE operator()(SIZE_TYPE a, const string &b) const
#define _ASSERT
Modified on Fri Sep 20 14:57:31 2024 by modify_doxy.py rev. 669887