NCBI C++ ToolKit
bdb_dumper.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bdb_dumper.cpp 79860 2017-10-18 13:38:41Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Anatoliy Kuznetsov
27  *
28  * File Description: BDB file dumper utility
29  *
30  */
31 #include <ncbi_pch.hpp>
32 #include <stdio.h>
33 #include <corelib/ncbiapp.hpp>
34 #include <corelib/ncbienv.hpp>
35 #include <corelib/ncbireg.hpp>
36 #include <corelib/ncbiargs.hpp>
37 #include <corelib/ncbistr.hpp>
38 
39 #include <db/bdb/bdb_expt.hpp>
40 #include <db/bdb/bdb_types.hpp>
41 #include <db/bdb/bdb_file.hpp>
42 #include <db/bdb/bdb_cursor.hpp>
43 #include <db/bdb/bdb_filedump.hpp>
44 #include <db/bdb/bdb_blob.hpp>
45 
46 
47 
49 
50 
51 
53 {
54  string field_name;
55  string type;
56  unsigned int length;
57  bool is_null;
59 };
60 
62 {
63 public:
64  typedef vector<SBDB_FieldDescription> TFileStructure;
65 public:
69 
70  void ParseConfigFile(const string& fname);
71 
72  bool ParseStructureLine(const string& line,
73  SBDB_FieldDescription* descr);
74 
75  const TFileStructure& GetStructure() const { return m_FileStructure; }
77 
78  bool IsBlobStorage() const { return m_BlobStorage; }
79 
80  bool IsKeyDuplicates() const { return m_KeyDuplicates; }
81 
82 private:
86 };
87 
88 
89 
90 /// Parse structure file line
91 /// Returns TRUE if parsing is successfull
92 bool
94  SBDB_FieldDescription* descr)
95 {
96  int cmp;
97  list<string> tokens;
98 
99  NStr::Split(line, string(" \t"), tokens,
101 
102  descr->field_name = descr->type = "";
103  descr->length = 0;
104  descr->is_null = descr->is_primary_key = false;
105 
106  unsigned cnt = 0;
107  ITERATE(list<string>, it, tokens) {
108  switch (cnt) {
109  case 0: // field name
110  cmp = NStr::CompareNocase(*it, string("duplicates"));
111 
112  if (cmp == 0) { // "DUPLICATES ALLOWED" ?
113  ++it;
114  if (it != tokens.end()) {
115  cmp = NStr::CompareNocase(*it, string("allowed"));
116  if (cmp == 0) {
117  m_KeyDuplicates = true;
118  return false;
119  }
120  cmp = NStr::CompareNocase(*it, string("yes"));
121  if (cmp == 0) {
122  m_KeyDuplicates = true;
123  return false;
124  }
125  cmp = NStr::CompareNocase(*it, string("true"));
126  if (cmp == 0) {
127  m_KeyDuplicates = true;
128  return false;
129  }
130  }
131  }
132 
133  descr->field_name = *it;
134  break;
135  case 1: // field type
136  {
137  string s = *it;
138 
139  string::size_type offs1 = s.find_first_of("(");
140  if (offs1 != string::npos) {
141  string::size_type offs2 = s.find_first_of(")");
142  if (offs2 == string::npos || offs2 < offs1) {
143  // TODO: parsing error here
144  return false;
145  }
146  const char *ls = s.c_str() + offs1;
147  descr->length = ::atoi(ls);
148  s.resize(offs1);
149  }
150 
151  descr->type = s;
152  }
153  break;
154  default: // PK or NULL or NOT NULL
155  {
156  cmp = NStr::CompareNocase(*it, string("PK"));
157  if (cmp == 0) {
158  descr->is_primary_key = true;
159  }
160  else {
161  cmp = NStr::CompareNocase(*it, string("key"));
162  if (cmp == 0) {
163  descr->is_primary_key = true;
164  }
165  else {
166  cmp = NStr::CompareNocase(*it, string("NULL"));
167  if (cmp == 0) {
168  descr->is_null = true;
169  }
170  }
171  }
172  }
173  break;
174  } // switch
175  ++cnt;
176  } // ITERATE
177 
178  return true;
179 }
180 
182 {
183  string line;
184  unsigned line_idx = 0;
185 
186  m_FileStructure.resize(0);
187 
188  CBDB_FieldFactory ffact;
189 
190  CNcbiIfstream fi(fname.c_str());
191 
192  if (!fi.is_open()) {
193  string msg = "Cannot open config file: ";
194  msg += fname;
195  BDB_THROW(eInvalidValue, msg);
196  }
197 
198  for ( ;fi.good(); ++line_idx) {
199  getline(fi, line);
200 
201  // check if it is a comment
202  const char* s = line.c_str();
203  for(; isspace((unsigned char)(*s)); ++s) {}
204 
205  if (*s == 0 || *s == '#') // empty line or comment
206  continue;
207 
208  SBDB_FieldDescription fdescr;
209  bool parsed = ParseStructureLine(line, &fdescr);
210 
211  if (parsed) {
212  CBDB_FieldFactory::EType ft = ffact.GetType(fdescr.type);
213  if (ft == CBDB_FieldFactory::eUnknown) {
214  BDB_THROW(eInvalidType, fdescr.type);
215  }
216  if (ft == CBDB_FieldFactory::eBlob) { // BLOB file
217  m_BlobStorage = true;
218  }
219 
220  m_FileStructure.push_back(fdescr);
221  }
222 
223  } // for
224 }
225 
226 
227 ///////////////////////////////////////////////////////////////////////
228 
229 
230 /// BDB file dumper application
231 ///
232 /// @internal
233 ///
235 {
236 public:
237  void Init(void);
238  int Run(void);
239 protected:
240 
241  void Dump(const CArgs& args,
243  bool dump_lob_storage);
244 };
245 
247 {
248 
249  // Setup command line arguments and parameters
250 
251  // Create command-line argument descriptions class
252  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
253 
254  // Specify USAGE context
255  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
256  "BDB dumper");
257 
258  arg_desc->AddPositional("dbname",
259  "BDB database name", CArgDescriptions::eString);
260 
261  arg_desc->AddPositional("confname",
262  "BDB database structure configuration file.\n"
263  "(Use 'blob' or 'lob' to dump LOB storage).",
265 
266  arg_desc->AddOptionalKey("k",
267  "dbkey",
268  "File key value.(Dumps record for only this key)",
270 
271  arg_desc->AddOptionalKey("cs",
272  "column_separator",
273  "Column separator string (default:TAB)",
275 
276  arg_desc->AddOptionalKey("q",
277  "query",
278  "BDB query string",
280 
281  arg_desc->AddOptionalKey("bfile",
282  "blob_file",
283  "Dump BLOB to file (Use with -k)",
285 
286  arg_desc->AddOptionalKey("ofile",
287  "output_file",
288  "All records dumped into another database (append mode)",
290 
291  arg_desc->AddFlag("nl", "Do NOT print field labels(names)");
292  arg_desc->AddFlag("bt", "Display BLOB as text (default: HEX)");
293  arg_desc->AddFlag("bf", "Display full BLOB");
294 
295  // Setup arg.descriptions for this application
296  SetupArgDescriptions(arg_desc.release());
297 
298 }
299 
300 
301 
304  bool dump_lob_storage)
305 {
306  const string& db_name = args["dbname"].AsString();
307 
309  parser.SetStructure();
310 
311  CBDB_FieldFactory ffact;
312 
313  unique_ptr<CBDB_File> db_file;
314  unique_ptr<CBDB_BLobFile> db_blob_file;
315  unique_ptr<CBDB_File> db_out_file;
316 
317  CBDB_File* dump_file;
318 
319  if (dump_lob_storage) {
320  // Create and configure the simple LOB dumper
321  // (We dont need config for that)
322  db_blob_file.reset(new CBDB_BLobFile);
323  dump_file = db_blob_file.get();
324  dump_file->SetFieldOwnership(true);
325 
327  dump_file->BindKey("id", field);
328 
329  } else {
330 
331  // Create config file based dumper
332 
333  dump_lob_storage = parser.IsBlobStorage();
334 
335  if (dump_lob_storage) {
336  db_blob_file.reset(new CBDB_BLobFile);
337  dump_file = db_blob_file.get();
338  } else {
339  db_file.reset(new CBDB_File(parser.IsKeyDuplicates() ?
342  dump_file = db_file.get();
343  }
344  dump_file->SetFieldOwnership(true);
345 
346  // ----------------------------------------
347  //
348  // Create description based file structure
349 
350  bool struct_created = false;
351 
353  struct_created = true;
354  CBDB_FieldFactory::EType ft = ffact.GetType(it->type);
355  if (ft == CBDB_FieldFactory::eString ||
357  if (it->length == 0) {
358  it->length = 4096;
359  }
360  }
361  if (it->is_primary_key) {
362  CBDB_Field* field = ffact.Create(ft);
363  dump_file->BindKey(it->field_name.c_str(),
364  field,
365  it->length);
366  } else {
367  if (parser.IsBlobStorage()) {
368  // All DATA fields are ignored (we have only one BLOB per record)
369  // if operator requests anything but one BLOB it is treated as an
370  // error (TODO: add a warning here)
371  } else {
372  CBDB_Field* field = ffact.Create(ft);
373  dump_file->BindData(it->field_name.c_str(),
374  field,
375  it->length,
376  it->is_null ? eNullable : eNotNullable);
377  }
378  }
379  }
380 
381  if (!struct_created) {
382  NcbiCerr << "Incorrect structure file (no fields)" << NcbiEndl;
383  exit(1);
384  }
385 
386  }
387 
388 
389  dump_file->Open(db_name.c_str(), CBDB_File::eReadOnly);
390 
391  // ------------------------------------------
392  //
393  // Dump the content
394 
395  CBDB_FileDumper fdump;
396  if (args["nl"]) {
398  }
400 
401  if (args["cs"]) {
402  fdump.SetColumnSeparator(args["cs"].AsString());
403  }
404 
405 
406  CBDB_FileDumper::TBlobFormat bformat = 0;
407  if (args["bt"]) {
408  bformat |= CBDB_FileDumper::eBlobAsTxt;
409  } else {
410  bformat |= CBDB_FileDumper::eBlobAsHex;
411  }
412  if (args["bf"]) {
413  bformat |= CBDB_FileDumper::eBlobAll;
414  } else {
416  }
417 
418  fdump.SetBlobFormat(bformat);
419 
420  if (args["q"]) { // query
421  fdump.SetQuery(args["q"].AsString());
422  }
423 
424  if (args["bfile"]) {
425  fdump.SetBlobDumpFile(args["bfile"].AsString());
426  }
427 
428  if (args["ofile"]) {
429  const string& out_fname = args["ofile"].AsString();
430  db_out_file.reset(new CBDB_File(parser.IsKeyDuplicates() ?
433  db_out_file->SetFieldOwnership(true);
434  db_out_file->DuplicateStructure(*dump_file);
435 
436  db_out_file->Open(out_fname.c_str(), CBDB_File::eReadWriteCreate);
437 
438  fdump.SetOutFile(db_out_file.get());
439  }
440 
441  if (args["k"]) {
442  const string& key_str = args["k"].AsString();
443 
444  CBDB_FileCursor cur(*dump_file);
446 
447  const CBDB_BufferManager* key_buf = dump_file->GetKeyBuffer();
448  unsigned field_count = key_buf->FieldCount();
449 
450  if (field_count > 1) {
451  list<string> keys;
452  NStr::Split(key_str, string("|"), keys);
453 
454  unsigned cnt = 0;
455  ITERATE(list<string>, it, keys) {
456  cur.From << *it;
457  if (++cnt == field_count)
458  break;
459  }
460  } else {
461  cur.From << key_str;
462  }
463 
464  fdump.Dump(NcbiCout, cur);
465 
466  } else {
467 
468  fdump.Dump(NcbiCout, *dump_file);
469  }
470 
471  NcbiCout << "Dumped records: " << fdump.GetRecordsDumped() << NcbiEndl;
472 }
473 
474 
476 {
477  try
478  {
479  const CArgs & args = GetArgs();
480  const string& conf_name = args["confname"].AsString();
481 
483  bool blob_store = false;
484 
485  if (NStr::CompareNocase(conf_name, "lob") == 0 ||
486  NStr::CompareNocase(conf_name, "blob") == 0) {
487  blob_store = true;
488  } else {
489  parser.ParseConfigFile(conf_name);
490  }
491 
492  Dump(args, parser, blob_store);
493  }
494  catch (CBDB_ErrnoException& ex)
495  {
496  NcbiCerr << "Error: DBD errno exception:" << ex.what();
497  return 1;
498  }
499  catch (CBDB_LibException& ex)
500  {
501  NcbiCerr << "Error: DBD library exception:" << ex.what();
502  return 1;
503  }
504 
505  return 0;
506 }
507 
508 
509 int main(int argc, const char* argv[])
510 {
511  return CBDB_FileDumperApp().AppMain(argc, argv);
512 }
BDB library BLOB support.
Berkeley BDB file cursor.
int main(int argc, const char *argv[])
Definition: bdb_dumper.cpp:509
USING_NCBI_SCOPE
Definition: bdb_dumper.cpp:48
Exception specifications for BDB library.
BDB File management.
BDB File covertion into text.
#define false
Definition: bool.h:36
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Berkeley DB BLOB File class.
Definition: bdb_blob.hpp:59
BDB Data Field Buffer manager class.
Definition: bdb_types.hpp:1768
const TFileStructure & GetStructure() const
Definition: bdb_dumper.cpp:75
bool ParseStructureLine(const string &line, SBDB_FieldDescription *descr)
Parse structure file line Returns TRUE if parsing is successfull.
Definition: bdb_dumper.cpp:93
TFileStructure & SetStructure()
Definition: bdb_dumper.cpp:76
bool IsKeyDuplicates() const
Definition: bdb_dumper.cpp:80
void ParseConfigFile(const string &fname)
Definition: bdb_dumper.cpp:181
vector< SBDB_FieldDescription > TFileStructure
Definition: bdb_dumper.cpp:64
TFileStructure m_FileStructure
Definition: bdb_dumper.cpp:83
BDB errno exception class.
Definition: bdb_expt.hpp:84
Class factory for BDB field types.
Definition: bdb_types.hpp:1961
Base class for constructing BDB fields.
Definition: bdb_types.hpp:297
Berkeley DB file cursor class.
Definition: bdb_cursor.hpp:95
BDB file dumper application.
Definition: bdb_dumper.cpp:235
void Init(void)
Initialize the application.
Definition: bdb_dumper.cpp:246
void Dump(const CArgs &args, CBDB_ConfigStructureParser &parser, bool dump_lob_storage)
Definition: bdb_dumper.cpp:302
int Run(void)
Run the application.
Definition: bdb_dumper.cpp:475
Utility class to convert DBD files into text files.
Berkeley DB file class.
Definition: bdb_file.hpp:445
BDB library exception.
Definition: bdb_expt.hpp:121
The NCBI C++ standard methods for dealing with std::string.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eNotNullable
Value cannot be null.
Definition: ncbimisc.hpp:115
@ eNullable
Value can be null.
Definition: ncbimisc.hpp:114
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
void SetQuery(const string &query_str)
Set query filter.
void SetValueFormatting(EValueFormatting vf)
void SetCondition(ECondition cond_from, ECondition cond_to=eNotSet)
Set search condition(type of interval)
Definition: bdb_cursor.cpp:263
void SetFieldOwnership(bool own_fields)
Fields deletion is managed by the class when own_fields is TRUE.
Definition: bdb_file.cpp:1254
unsigned GetRecordsDumped() const
Return number of records processed by Dump.
void SetColumnNames(EPrintFieldNames print_names)
Control field names printing.
void Open(const string &filename, EOpenMode open_mode, bool support_dirty_read=false, unsigned rec_len=0)
Open file with specified access mode.
Definition: bdb_file.hpp:774
void SetColumnSeparator(const string &col_separator)
void Dump(const string &dump_file_name, CBDB_File &db)
Convert BDB file into text file.
void SetBlobDumpFile(const string &fname)
Set BLOB dump file name.
void BindKey(const char *field_name, CBDB_Field *key_field, size_t buf_size=0)
Definition: bdb_file.cpp:1281
const CBDB_BufferManager * GetKeyBuffer() const
Get Buffer manager for key section of the file.
Definition: bdb_file.hpp:513
void BindData(const char *field_name, CBDB_Field *data_field, size_t buf_size=0, ENullable is_null=eNullable)
Definition: bdb_file.cpp:1296
unsigned int TBlobFormat
CBDB_ConditionHandle From
Definition: bdb_cursor.hpp:252
void SetBlobFormat(TBlobFormat bf)
void SetOutFile(CBDB_File *out_dbf)
Set reference on output file (mode when all dumped records are put into a separate database) Class do...
@ eReadWriteCreate
read-write, create if it doesn't exist
Definition: bdb_file.hpp:82
@ eDuplicatesDisable
Definition: bdb_file.hpp:102
EType GetType(const string &type) const
Return type enumerator by string type (case insensitive)
Definition: bdb_types.cpp:1166
unsigned int FieldCount() const
Return number of fields attached using function Bind.
Definition: bdb_types.hpp:2529
CBDB_Field * Create(EType type) const
Definition: bdb_types.cpp:1208
#define BDB_THROW(errcode, message)
Definition: bdb_expt.hpp:178
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCout
Definition: ncbistre.hpp:543
#define NcbiCerr
Definition: ncbistre.hpp:544
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
@ fSplit_Truncate
Definition: ncbistr.hpp:2501
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2498
exit(2)
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
int isspace(Uchar c)
Definition: ncbictype.hpp:69
Defines unified interface to application:
Process information in the NCBI Registry, including working with configuration files.
static unsigned cnt[256]
#define fi
unsigned int length
Definition: bdb_dumper.cpp:56
Modified on Sat Dec 09 04:47:01 2023 by modify_doxy.py rev. 669887