NCBI C++ ToolKit
test_bdb_split.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: test_bdb_split.cpp 77790 2017-05-08 13:31:07Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Anatoliy Kuznetsov
27  *
28  * File Description: Test application for NCBI Berkeley DB library (BDB)
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbiargs.hpp>
36 #include <corelib/ncbifile.hpp>
37 #include <corelib/ncbitime.hpp>
38 #include <stdio.h>
39 
40 #include <db/bdb/bdb_expt.hpp>
41 #include <db/bdb/bdb_types.hpp>
42 #include <db/bdb/bdb_file.hpp>
43 #include <db/bdb/bdb_env.hpp>
44 #include <db/bdb/bdb_cursor.hpp>
45 #include <db/bdb/bdb_blob.hpp>
46 #include <db/bdb/bdb_map.hpp>
47 #include <db/bdb/bdb_blobcache.hpp>
48 #include <db/bdb/bdb_filedump.hpp>
49 #include <db/bdb/bdb_trans.hpp>
50 #include <db/bdb/bdb_query.hpp>
51 #include <db/bdb/bdb_util.hpp>
53 
54 #include <util/line_reader.hpp>
55 
56 #include <common/test_assert.h> /* This header must go last */
57 
59 
60 
61 
62 
63 ////////////////////////////////
64 // Test functions, classes, etc.
65 //
66 
67 
68 /// @internal
69 struct TestRec
70 {
71  unsigned count;
72  unsigned blob_size;
73 };
74 
75 
76 ////////////////////////////////
77 // Test application
78 //
79 
80 /// @internal
82 {
83 public:
85 
86 public:
87  void Init(void);
88  int Run(void);
89 
90  void LoadTestSet(const string& file_name);
91 
92  void ReadTestSet(vector<TestRec> & test_set, const string& file_name);
93  void LoadSplitStore(vector<TestRec>& test_set,
94  TBlobSplitStore& split_store);
95 
96 };
97 
98 
99 void
100 CBDB_SplitTest::LoadSplitStore(vector<TestRec>& test_set,
101  TBlobSplitStore& split_store)
102 {
103  unsigned blob_id = 1;
105 
106  for (size_t i = 0; i < test_set.size(); ++i) {
107  TestRec& r = test_set[i];
108  buffer.resize(r.blob_size);
109  cout << "\nsize=" << r.blob_size << " count=" << r.count << endl;
111  for (;r.count;r.count--) {
112  split_store.UpdateInsert(blob_id,
113  buffer.data(),
114  buffer.size());
115  ++blob_id;
116  if ((r.count % 10000) == 0) {
117  cerr << ".";
118  }
119  }
120  cerr << "Elapsed = " << sw.Elapsed() << endl;
121  } // for
122 }
123 
125 {
126  CBDB_Env env;
127  env.SetLogInMemory(true);
128  env.SetLogBSize(50 * 1024 * 1024);
129  env.SetCacheSize(400 * 1024 * 1024);
130  env.OpenWithTrans("e:\\db_split", CBDB_Env::eThreaded);
131 
132  vector<TestRec> test_set;
133  ReadTestSet(test_set, file_name);
134  vector<TestRec> test_set2(test_set);
135 
136  if (test_set.size() == 0) {
137  cout << "Empty test load." << endl;
138  return;
139  }
140  cout << "Loaded " << test_set.size() << " records." << endl;
141 
142  {{
143  TBlobSplitStore split_store_hash(new CBDB_BlobDeMux);
144  split_store_hash.Open("split_hash",
147  split_store_hash.SetEnv(env);
148  //split_store_hash.SetVolumeCacheSize(100* 1024 * 1024);
149 
151  cout << "Loading hash store." << endl;
152  LoadSplitStore(test_set, split_store_hash);
153  cout << "Ok. elapsed=" << sw.Elapsed() << endl << endl;
154  }}
155 
156  {{
157  TBlobSplitStore split_store_btree(new CBDB_BlobDeMux);
158  split_store_btree.Open("split_btree",
161  split_store_btree.SetVolumeCacheSize(100 * 1024 * 1024);
162 
164  cout << "Loading btree store." << endl;
165  LoadSplitStore(test_set2, split_store_btree);
166  cout << "Ok. elapsed=" << sw.Elapsed() << endl << endl;
167  }}
168 
169 }
170 
171 void CBDB_SplitTest::ReadTestSet(vector<TestRec>& test_set,
172  const string& file_name)
173 {
174  cout << "Loading " << file_name << " ... " << endl;
175  test_set.resize(0);
176  CNcbiIfstream is(file_name.c_str());
177  if (!is.good()) {
178  return;
179  }
180  CStreamLineReader lr(is);
181  for(++lr; !lr.AtEOF(); ++lr) {
182  CTempString st = *lr;
183  string count_str, size_str;
184  string s = NStr::TruncateSpaces(string(st));
185  NStr::SplitInTwo(s, " \t", count_str, size_str);
186  TestRec rec;
187  rec.count = NStr::StringToUInt(count_str,
191  rec.blob_size = NStr::StringToUInt(size_str,
195  if (rec.count && rec.blob_size) {
196  test_set.push_back(rec);
197  } else {
198  cout << "Blank record: " << st << endl;
199  }
200  } // for
201  cout << "ok " << endl;
202 
203 }
204 
205 
206 
208 {
210 
215 
216  unique_ptr<CArgDescriptions> d(new CArgDescriptions);
217  d->SetUsageContext("test_bdb_split",
218  "test BDB split storage");
219  SetupArgDescriptions(d.release());
220 }
221 
222 
223 
225 {
226  //LoadTestSet("e:\\db_split\\db_split.txt");
227  //return 0;
228 
229  cout << "Run BDB split storage test" << endl << endl;
230 
231  char* buf_small = new char[256];
232  char* buf_large = new char[1024*1024];
233  char* buf_read = new char[2*1024*1024];
234  void* buf = buf_read;
235 
236  ::strcpy(buf_small, "test small 1");
237  ::strcpy(buf_large, "test large 1");
238 
239  try
240  {
241  {{
242  TBlobSplitStore split_store(new CBDB_BlobDeMux(1024*1024));
243 
244  split_store.Open("split", CBDB_RawFile::eCreate);
245 
246  split_store.Insert(2, buf_large, 1024 * 1024);
247  split_store.Insert(1, buf_small, 256);
248 
249  ::strcpy(buf_small, "test small 2");
250  ::strcpy(buf_large, "test large 2");
251 
252  split_store.UpdateInsert(3, buf_small, 256);
253  split_store.UpdateInsert(4, buf_large, 1024 * 1024);
254 
255  split_store.FreeUnusedMem();
256 
258  split_store.GetIdVector(&bv);
259 
260  split_store.Save();
261  }}
262 
263  {{
264  TBlobSplitStore split_store(new CBDB_BlobDeMux(1024*1024));
265 
266  split_store.Open("split", CBDB_RawFile::eReadOnly);
267  EBDB_ErrCode err;
268  err =
269  split_store.Fetch(1, &buf,
270  2*1024*1024, CBDB_RawFile::eReallocForbidden, 0);
271  assert(err == eBDB_Ok);
272  int res = strcmp(buf_read, "test small 1");
273  assert(res == 0);
274  err =
275  split_store.Fetch(4, &buf,
276  2*1024*1024, CBDB_RawFile::eReallocForbidden, 0);
277  assert(err == eBDB_Ok);
278  res = strcmp(buf_read, "test large 2");
279  assert(res == 0);
280 
281  CBDB_RawFile::TBuffer chbuf(10);
282  err = split_store.ReadRealloc(4, chbuf);
283  assert(err == eBDB_Ok);
284  assert(chbuf.size() > 12);
285  res = strcmp((const char*)&chbuf[0], "test large 2");
286  assert(res == 0);
287 
288 
289  }}
290 
291  }
292  catch (CBDB_ErrnoException& ex)
293  {
294  cout << "Error! DBD errno exception:" << ex.what();
295  return 1;
296  }
297  catch (CBDB_LibException& ex)
298  {
299  cout << "Error! DBD library exception:" << ex.what();
300  return 1;
301  }
302 
303  cout << endl;
304  cout << "TEST execution completed successfully!" << endl << endl;
305  return 0;
306 }
307 
308 
309 ///////////////////////////////////
310 // APPLICATION OBJECT and MAIN
311 //
312 
313 int main(int argc, const char* argv[])
314 {
315  // Execute main application function
316  return CBDB_SplitTest().AppMain(argc, argv);
317 }
318 
319 
BDB library BLOB support.
ICache interface implementation on top of Berkeley DB.
Berkeley BDB file cursor.
Wrapper around Berkeley DB environment structure.
Exception specifications for BDB library.
BDB File management.
BDB File covertion into text.
Queries for BDB library.
BDB library split BLOB store.
Wrapper around Berkeley DB transaction structure.
CArgDescriptions –.
Definition: ncbiargs.hpp:541
Volume split BLOB demultiplexer.
BLOB storage based on single unsigned integer key Supports BLOB volumes and different base page size ...
BDB environment object a collection including support for some or all of caching, locking,...
Definition: bdb_env.hpp:61
BDB errno exception class.
Definition: bdb_expt.hpp:84
BDB library exception.
Definition: bdb_expt.hpp:121
void LoadTestSet(const string &file_name)
CBDB_BlobSplitStore< bm::bvector<> > TBlobSplitStore
void ReadTestSet(vector< TestRec > &test_set, const string &file_name)
void Init(void)
Initialize the application.
int Run(void)
Run the application.
void LoadSplitStore(vector< TestRec > &test_set, TBlobSplitStore &split_store)
Reallocable memory buffer (no memory copy overhead) Mimics vector<>, without the overhead of explicit...
size_type size() const
CStopWatch –.
Definition: ncbitime.hpp:1938
Simple implementation of ILineReader for i(o)streams.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
const char * file_name[]
static HENV env
Definition: transaction2.c:38
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
EBDB_ErrCode Insert(unsigned id, const void *data, size_t size, unsigned *coord)
Insert BLOB into the storage.
void FreeUnusedMem()
Reclaim unused memory.
EBDB_ErrCode ReadRealloc(unsigned id, CBDB_RawFile::TBuffer &buffer)
Read BLOB into vector.
void SetVolumeCacheSize(unsigned int cache_size)
EBDB_ErrCode UpdateInsert(unsigned id, const void *data, size_t size, unsigned *coord)
Update or insert BLOB.
void Save(typename TDeMuxStore::ECompact compact_vectors=TDeMuxStore::eCompact)
Save storage dictionary (demux disposition).
EBDB_ErrCode Fetch(unsigned id, void **buf, size_t buf_size, CBDB_RawFile::EReallocMode allow_realloc, size_t *blob_size)
Fetch LOB record directly into the provided '*buf'.
void SetEnv(CBDB_Env &env)
Associate with the environment. Should be called before opening.
void GetIdVector(TBitVector *bv) const
Get all id of all BLOBs stored.
void Open(const string &storage_name, CBDB_RawFile::EOpenMode open_mode, CBDB_RawFile::EDBType db_type=CBDB_RawFile::eBtree)
Open storage (reads storage dictionary into memory)
EBDB_ErrCode
BDB Return codes.
Definition: bdb_file.hpp:57
@ eCreate
implies 'eReadWrite' too
Definition: bdb_file.hpp:81
@ eReallocForbidden
Definition: bdb_file.hpp:97
@ eBDB_Ok
Definition: bdb_file.hpp:58
@ eThreaded
corresponds to DB_THREAD
Definition: bdb_env.hpp:64
void SetDiagPostFlag(EDiagPostFlag flag)
Set the specified flag (globally).
Definition: ncbidiag.cpp:6070
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
void SetDiagTrace(EDiagTrace how, EDiagTrace dflt=eDT_Default)
Set the diagnostic trace settings.
Definition: ncbidiag.cpp:6226
@ eDPF_Line
Source line.
Definition: ncbidiag.hpp:695
@ eDPF_Trace
Default flags to use when tracing.
Definition: ncbidiag.hpp:722
@ eDPF_File
File name (not full path)
Definition: ncbidiag.hpp:693
@ eDT_Enable
Enable messages of severity "eDiag_Trace".
Definition: ncbidiag.hpp:1550
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3554
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ fAllowTrailingSpaces
Ignore trailing space characters.
Definition: ncbistr.hpp:297
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ fAllowLeadingSpaces
Ignore leading spaces in converted string.
Definition: ncbistr.hpp:294
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
@ eStart
Start timer immediately after creating.
Definition: ncbitime.hpp:1942
static CStopWatch sw
char * buf
int i
Lightweight interface for getting lines of data with minimal memory copying.
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Defines: CTimeFormat - storage class for time format.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static pcre_uint8 * buffer
Definition: pcretest.c:1051
#define assert(x)
Definition: srv_diag.hpp:58
unsigned count
unsigned blob_size
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
Modified on Sun Apr 14 05:25:15 2024 by modify_doxy.py rev. 669887