NCBI C++ ToolKit
writedb_lmdb.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_WRITERS_WRITEDB__WRITEDB_LMDB_HPP
2 #define OBJTOOLS_WRITERS_WRITEDB__WRITEDB_LMDB_HPP
3 
4 /* $Id:
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Amelia Fong
30  *
31  */
32 
33 /// @file writedb_lmdb.hpp
34 /// Defines lmdb implementation of string-key database.
35 ///
36 /// Defines classes:
37 /// CWriteDB_LMDB
38 ///
39 /// Implemented for: UNIX, MS-Windows
40 
43 
44 #include <memory>
45 
48 
50 
51 #ifdef NCBI_OS_MSWIN
52 #define DEFAULT_LMDB_MAP_SIZE 500000
53 #define DEFAULT_TAXID_MAP_SIZE 500000
54 #else
55 #define DEFAULT_LMDB_MAP_SIZE 700000000
56 #define DEFAULT_TAXID_MAP_SIZE 300000000
57 #endif
58 
59 /// This class supports creation of a string accession to integer OID
60 /// lmdb database
61 
63 {
64 
65 public:
66 
67  /// Constructor for LMDB write access
68  /// @param dbname Database name
69  CWriteDB_LMDB(const string& dbname, Uint8 map_size = DEFAULT_LMDB_MAP_SIZE, Uint8 capacity = 500000);
70 
71  // Destructor
72  ~CWriteDB_LMDB();
73 
74  /// Create volume table
75  /// This api should only be called once to create vol info for all vols in the db
76  /// @param vol_names name of the vol (vector index corresponds to the vol num)
77  /// @param vol_num_oids num of oids in the vol (vector index corresponds to the vol num)
78  void InsertVolumesInfo(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids);
79 
80  /// Add entries in bulk as fetched from CSeqDB::GetSeqIDs.
81  /// This api needs to be called in sequential order of OIDs
82  /// This api should only be called once for each OID
83  /// @param oid OID
84  /// @param seqids list<CRef<CSeq_id> > from CSeqDB::GetSeqIDs
85  /// @return number of rows added to database
86  /// @see InsertEntry
87  int InsertEntries(const list<CRef<CSeq_id>> & seqids, const blastdb::TOid oid);
88 
89  /// Add entries in bulk as fetched from CSeqDB::GetSeqIDs.
90  /// This api needs to be called in sequential order of OIDs
91  /// This api should only be called once for each OID
92  /// @param oid OID
93  /// @param seqids vector<CRef<CSeq_id> > from CSeqDB::GetSeqIDs
94  /// @return number of rows added to database
95  /// @see InsertEntry
96  int InsertEntries(const vector<CRef<CSeq_id>> & seqids, const blastdb::TOid oid);
97 
98 private:
99  void x_CommitTransaction();
100  void x_InsertEntry(const CRef<CSeq_id> &seqid, const blastdb::TOid oid);
101  void x_CreateOidToSeqidsLookupFile();
102  void x_Resize();
103  void x_IncreaseEnvMapSize();
104  void x_IncreaseEnvMapSize(const vector<string> & vol_names, const vector<blastdb::TOid> & vol_num_oids);
105 
106  string m_Db;
109  unsigned int m_MaxEntryPerTxn;
111  struct SKeyValuePair {
112  string id;
115  SKeyValuePair() : id(kEmptyStr), oid(kSeqDBEntryNotFound), saveToOidList(false) {}
116  static bool cmp_key(const SKeyValuePair & v, const SKeyValuePair & k) {
117  if(v.id == k.id) {
118  blastdb::TOid mask = 0xFF;
119  for(unsigned int i=0; i < sizeof(blastdb::TOid); i++) {
120  if( (v.oid & mask) != (k.oid & mask)) {
121  return (v.oid & mask) < (k.oid & mask);
122  }
123  mask = mask << 8;
124  }
125  }
126  return v.id < k.id;
127  }
128  };
129  vector<SKeyValuePair> m_list;
130  void x_Split(vector<SKeyValuePair>::iterator b, vector<SKeyValuePair>::iterator e, const unsigned int min_chunk_size);
131 };
132 
133 
134 /// This class supports creation of tax id list lookup files
135 
137 {
138 
139 public:
140 
141  /// Constructor for LMDB write access
142  /// @param dbname Database name
143  CWriteDB_TaxID(const string& dbname, Uint8 map_size = DEFAULT_TAXID_MAP_SIZE, Uint8 capacity = 500000);
144 
145  // Destructor
146  ~CWriteDB_TaxID();
147 
148 
149  /// Add tax id entries in bulk for each oid
150  /// This api needs to be called in sequential order of OIDs
151  /// This api should only be called once for each OID
152  /// @param oid OID
153  /// @param tax_ids list for oid
154  /// @return number of rows added to database
155  /// @see InsertEntry
156  int InsertEntries(const set<TTaxId> & tax_ids, const blastdb::TOid oid);
157 
158 private:
159  void x_CommitTransaction();
160  void x_CreateOidToTaxIdsLookupFile();
161  void x_CreateTaxIdToOidsLookupFile();
162  void x_Resize();
163  void x_IncreaseEnvMapSize();
164 
165 
166  string m_Db;
169  unsigned int m_MaxEntryPerTxn;
170  template <class valueType>
171  struct SKeyValuePair {
173  valueType value;
174  SKeyValuePair(TTaxId t, valueType v) : tax_id(t), value(v) {}
175  static bool cmp_key(const SKeyValuePair & v, const SKeyValuePair & k) {
176  if(v.tax_id == k.tax_id) {
177  return v.value < k.value;
178  }
179  return v.tax_id < k.tax_id;
180  }
181  };
182  vector<SKeyValuePair<blastdb::TOid> > m_TaxId2OidList;
183  vector<SKeyValuePair<Uint8> > m_TaxId2OffsetsList;
184 };
185 
186 
188 
189 #endif
ncbi::TMaskedQueryRegions mask
CObject –.
Definition: ncbiobj.hpp:180
This class supports creation of a string accession to integer OID lmdb database.
vector< SKeyValuePair > m_list
lmdb::env & m_Env
unsigned int m_MaxEntryPerTxn
size_t m_TotalIdsLength
This class supports creation of tax id list lookup files.
lmdb::env & m_Env
vector< SKeyValuePair< blastdb::TOid > > m_TaxId2OidList
vector< SKeyValuePair< Uint8 > > m_TaxId2OffsetsList
unsigned int m_MaxEntryPerTxn
Resource class for `MDB_env*` handles.
Definition: lmdb++.h:1094
#define false
Definition: bool.h:36
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
int i
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
EIPRangeType t
Definition: ncbi_localip.c:101
Defines BLAST database access classes.
Defines interface to interact with LMDB files.
const blastdb::TOid kSeqDBEntryNotFound
Int4 TOid
Ordinal ID in BLAST databases.
Definition: seqdbcommon.hpp:58
static bool cmp_key(const SKeyValuePair &v, const SKeyValuePair &k)
SKeyValuePair(TTaxId t, valueType v)
static bool cmp_key(const SKeyValuePair &v, const SKeyValuePair &k)
#define DEFAULT_LMDB_MAP_SIZE
USING_SCOPE(objects)
#define DEFAULT_TAXID_MAP_SIZE
USING_NCBI_SCOPE
Modified on Fri Sep 20 14:58:18 2024 by modify_doxy.py rev. 669887