NCBI C++ ToolKit
seqdbcol.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_READERS_SEQDB__SEQDBCOL_HPP
2 #define OBJTOOLS_READERS_SEQDB__SEQDBCOL_HPP
3 
4 /* $Id: seqdbcol.hpp 92678 2021-02-05 18:10:16Z fongah2 $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Kevin Bealer
30  *
31  */
32 
33 /// @file seqdbcol.hpp
34 /// Defines database column access classes.
35 ///
36 /// Defines classes:
37 /// CSeqDBColumn
38 ///
39 /// Implemented for: UNIX, MS-Windows
40 
43 #include <objects/seq/seq__.hpp>
44 
46 
47 /// Import definitions from the objects namespace.
49 
50 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
51  (!defined(NCBI_COMPILER_MIPSPRO)) )
52 
53 /// CSeqDBColumn class.
54 ///
55 /// This code supports arbitrary user-defined data columns. These can
56 /// be produced as part of a database volume, and accessed via SeqDB,
57 /// or can be independent entities, not associated with any database.
58 
59 class CSeqDBColumn : public CObject {
60 public:
61  /// Constructor.
62  ///
63  /// The constructor verifies the existence and some of the data of
64  /// the files making up this database column. Since column files
65  /// may be external to a database volume, this objects manages its
66  /// own CSeqDBAtlasHolder object and `flush' callback. For the
67  /// same reason, the lock holder is optional here (an internal one
68  /// is used if one is not provided.)
69  ///
70  /// @param basename
71  /// The base name of the volume. [in]
72  /// @param index_extn
73  /// The file extension for the index file. [in]
74  /// @param data_extn
75  /// The file extension for the data file. [in]
76  /// @param lockedp
77  /// The lock holder object for this thread (or NULL). [in]
78  CSeqDBColumn(const string & basename,
79  const string & index_extn,
80  const string & data_extn,
81  CSeqDBLockHold * lockedp);
82 
83  /// Destructor.
84  ~CSeqDBColumn();
85 
86  /// Determine if the column exists.
87  ///
88  /// This method tests whether a column with the given name and
89  /// file extensions exists. An alternative to calling this method
90  /// is to try to construct the column and catch an exception if
91  /// the construction attempt fails.
92  ///
93  /// @param basename
94  /// The base name of the volume. [in]
95  /// @param index_extn
96  /// The file extension for the index file. [in]
97  /// @param data_extn
98  /// The file extension for the data file. [in]
99  /// @param atlas
100  /// A reference to the memory management layer. [in]
101  /// @param lockedp
102  /// The lock holder object for this thread. [in]
103  static bool ColumnExists(const string & basename,
104  const string & extn,
105  CSeqDBAtlas & atlas);
106 
107  /// Get the column title.
108  /// @return The column title.
109  const string & GetTitle() const;
110 
111  /// Get the column's Key/Value meta data.
112  /// @return All key/value meta data stored in this column.
114 
115  /// Get the number of OIDs stored here.
116  /// @return The number of OIDs stored here.
117  int GetNumOIDs() const;
118 
119  /// Fetch the data blob for the given oid.
120  ///
121  /// This version fetches the data for the given blob, optionally
122  /// incrementing the memory region so that it will not be garbage
123  /// collected until the blob in question refers to another memory
124  /// region. If `keep' is true, the blob will contain an object
125  /// designed to maintain the memory mapping until the next time
126  /// the blob data is assigned or modified (which must be done when
127  /// this thread does not hold the Atlas lock). Otherwise the
128  /// memory mapping will only be guaranteed held until the lock is
129  /// released or the atlas is asked to provide another memory
130  /// region.
131  ///
132  /// @param oid The OID of the blob. [in]
133  /// @param blob The data will be returned here. [out]
134  /// @param keep If true, increment the memory region. [in]
135  /// @param lockedp The lock holder object for this thread. [in]
136  void GetBlob(int oid,
137  CBlastDbBlob & blob,
138  bool keep,
139  CSeqDBLockHold * lockedp);
140 
141  /// Flush any held memory.
142  void Flush();
143 
144 private:
145  /// String format used by column files.
146  static const CBlastDbBlob::EStringFormat
148 
149  /// File offset type.
151 
152  /// Prevent copy construction.
154 
155  /// Prevent copy assignment.
157 
158  /// Open files and read field data from the atlas.
159  /// @param locked The lock holder object for this thread. [in]
160  void x_ReadFields(CSeqDBLockHold & locked);
161 
162  /// Open files and read field data from the atlas.
163  /// @param locked The lock holder object for this thread. [in]
164  void x_ReadMetaData(CSeqDBLockHold & locked);
165 
166  /// Which file to access.
167  enum ESelectFile {
168  e_Index = 101, ///< Use index file.
169  e_Data = 102 ///< Use data file.
170  };
171 
172  /// Get a range of the index or data file.
173  ///
174  /// A range of the index or data file is acquired and returned in
175  /// the provided blob.
176  ///
177  /// @param begin The start offset for this range of data. [in]
178  /// @param end The end (post) offset for this range of data. [in]
179  /// @param select_file Whether to use the index or data file. [in]
180  /// @param lifetime Should the blob maintain the memory mapping? [in]
181  /// @param blob The data will be returned here. [out]
182  /// @param locked The lock holder object for this thread. [in]
183  void x_GetFileRange(TIndx begin,
184  TIndx end,
185  ESelectFile select_file,
186  bool lifetime,
187  CBlastDbBlob & blob,
188  CSeqDBLockHold & locked);
189 
190  //
191  // Data
192  //
193 
194  /// This callback functor allows the atlas code to flush any
195  /// cached region holds prior to garbage collection.
196  //CSeqDBColumnFlush m_FlushCB;
197 
198  /// Insures that a copy of the atlas exists.
200 
201  /// Reference to the atlas.
203 
204  /// Index file.
206 
207  /// Data file.
209 
210  /// Index file lease.
212 
213  /// Data file lease.
215 
216  /// Number of OIDs (Blobs) in this column.
218 
219  /// Total length of data stored in the data file.
221 
222  /// Start offset (in the index file) of the metadata section.
224 
225  /// Start offset (in the index file) of the offset array.
227 
228  /// The title identifies this column's purpose.
229  string m_Title;
230 
231  /// The create date of the column files.
232  string m_Date;
233 
234  /// All key/value metadata for this column.
236 };
237 
238 
239 /// Database-wide column information.
240 ///
241 /// Users of a SeqDB database treat the column title and column ID as
242 /// corresponding to one database column spanning the entire OID range
243 /// of the database. This class holds data used by CSeqDBImpl to map
244 /// the functionality of all columns with the same title from all
245 /// database volumes into one conceptual database-wide column.
246 
247 class CSeqDB_ColumnEntry : public CObject {
248 public:
249  /// Constructor.
250  /// @param indices The indices of this column in each volume.
251  CSeqDB_ColumnEntry(const vector<int> & indices);
252 
253  /// Get a volume-specific column ID.
254  /// @param volnum The index of the volume.
255  /// @return The column ID for this column entry's column.
256  int GetVolumeIndex(int volnum)
257  {
258  _ASSERT(volnum < (int)m_VolIndices.size());
259  return m_VolIndices[volnum];
260  }
261 
262  /// Determine if we have the metadata map yet.
263  /// @return true If the metadata map is computed yet.
264  bool HaveMap()
265  {
266  return m_HaveMap;
267  }
268 
269  /// Indicate that the metadata map is now complete.
270  void SetHaveMap()
271  {
272  _ASSERT(! m_HaveMap);
273  m_HaveMap = true;
274  }
275 
276  /// Get the metadata map.
277  ///
278  /// This method returns the database-wide metadata map for this
279  /// column, which is a potentially lossy combination of the maps
280  /// for all of the per-volume columns with this title.
281  ///
282  /// @return The combined metadata map for this column.
284  {
286  return m_Map;
287  }
288 
289  /// Add a meta-data key/value association.
290  ///
291  /// Where volumes disagree on the value of a given metadata key,
292  /// the policy is to use the first value we find for each key.
293  ///
294  /// @param k The key to look up. [in]
295  /// @param v The value to read for this key. [in]
296  void SetMapValue(const string & k, const string & v);
297 
298 private:
299  /// The indices of columns with this title in each volume.
300  vector<int> m_VolIndices;
301 
302  /// True if the metadata map is stored.
303  bool m_HaveMap;
304 
305  /// The combined metadata map for this column.
307 };
308 
309 #endif
310 
312 
313 #endif // OBJTOOLS_READERS_SEQDB__SEQDBCOL_HPP
314 
315 
`Blob' Class for SeqDB (and WriteDB).
Definition: seqdbblob.hpp:56
EStringFormat
String termination style.
Definition: seqdbblob.hpp:233
@ eSizeVar
Write string length as VarInt, then string data.
Definition: seqdbblob.hpp:237
CObject –.
Definition: ncbiobj.hpp:180
Guard object for the SeqDBAtlas singleton.
Definition: seqdbatlas.hpp:631
CSeqDBAtlas class.
Definition: seqdbatlas.hpp:297
CNcbiStreamoff TIndx
The type used for file offsets.
Definition: seqdbatlas.hpp:301
CSeqDBColumn class.
Definition: seqdbcol.hpp:59
string m_Title
The title identifies this column's purpose.
Definition: seqdbcol.hpp:229
Int4 m_NumOIDs
Number of OIDs (Blobs) in this column.
Definition: seqdbcol.hpp:217
CSeqDBColumn(const string &basename, const string &index_extn, const string &data_extn, CSeqDBLockHold *lockedp)
Constructor.
Definition: seqdbcol.cpp:102
static bool ColumnExists(const string &basename, const string &extn, CSeqDBAtlas &atlas)
Determine if the column exists.
Definition: seqdbcol.cpp:155
void x_ReadMetaData(CSeqDBLockHold &locked)
Open files and read field data from the atlas.
Definition: seqdbcol.cpp:273
CSeqDBAtlasHolder m_AtlasHolder
This callback functor allows the atlas code to flush any cached region holds prior to garbage collect...
Definition: seqdbcol.hpp:199
CSeqDBColumn(const CSeqDBColumn &)
Prevent copy construction.
void Flush()
Flush any held memory.
Definition: seqdbcol.cpp:175
ESelectFile
Which file to access.
Definition: seqdbcol.hpp:167
@ e_Index
Use index file.
Definition: seqdbcol.hpp:168
@ e_Data
Use data file.
Definition: seqdbcol.hpp:169
Int8 m_DataLength
Total length of data stored in the data file.
Definition: seqdbcol.hpp:220
Int4 m_OffsetArrayStart
Start offset (in the index file) of the offset array.
Definition: seqdbcol.hpp:226
int GetNumOIDs() const
Get the number of OIDs stored here.
Definition: seqdbcol.cpp:170
CSeqDBRawFile m_DataFile
Data file.
Definition: seqdbcol.hpp:208
map< string, string > m_MetaData
All key/value metadata for this column.
Definition: seqdbcol.hpp:235
const map< string, string > & GetMetaData()
Get the column's Key/Value meta data.
Definition: seqdbcol.cpp:355
CSeqDBFileMemMap m_DataLease
Data file lease.
Definition: seqdbcol.hpp:214
void x_GetFileRange(TIndx begin, TIndx end, ESelectFile select_file, bool lifetime, CBlastDbBlob &blob, CSeqDBLockHold &locked)
Get a range of the index or data file.
Definition: seqdbcol.cpp:181
static const CBlastDbBlob::EStringFormat kStringFmt
String format used by column files.
Definition: seqdbcol.hpp:147
void GetBlob(int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold *lockedp)
Fetch the data blob for the given oid.
Definition: seqdbcol.cpp:322
string m_Date
The create date of the column files.
Definition: seqdbcol.hpp:232
CSeqDBAtlas & m_Atlas
Reference to the atlas.
Definition: seqdbcol.hpp:202
const string & GetTitle() const
Get the column title.
Definition: seqdbcol.cpp:164
CSeqDBColumn & operator=(CSeqDBColumn &)
Prevent copy assignment.
CSeqDBRawFile m_IndexFile
Index file.
Definition: seqdbcol.hpp:205
Int4 m_MetaDataStart
Start offset (in the index file) of the metadata section.
Definition: seqdbcol.hpp:223
CSeqDBFileMemMap m_IndexLease
Index file lease.
Definition: seqdbcol.hpp:211
~CSeqDBColumn()
Destructor.
Definition: seqdbcol.cpp:147
void x_ReadFields(CSeqDBLockHold &locked)
Open files and read field data from the atlas.
Definition: seqdbcol.cpp:206
CSeqDBAtlas::TIndx TIndx
File offset type.
Definition: seqdbcol.hpp:150
CSeqDBLockHold.
Definition: seqdbatlas.hpp:166
Raw file.
Definition: seqdbfile.hpp:64
Database-wide column information.
Definition: seqdbcol.hpp:247
CSeqDB_ColumnEntry(const vector< int > &indices)
Constructor.
Definition: seqdbcol.cpp:363
const map< string, string > & GetMap()
Get the metadata map.
Definition: seqdbcol.hpp:283
vector< int > m_VolIndices
The indices of columns with this title in each volume.
Definition: seqdbcol.hpp:300
bool m_HaveMap
True if the metadata map is stored.
Definition: seqdbcol.hpp:303
void SetHaveMap()
Indicate that the metadata map is now complete.
Definition: seqdbcol.hpp:270
int GetVolumeIndex(int volnum)
Get a volume-specific column ID.
Definition: seqdbcol.hpp:256
bool HaveMap()
Determine if we have the metadata map yet.
Definition: seqdbcol.hpp:264
map< string, string > m_Map
The combined metadata map for this column.
Definition: seqdbcol.hpp:306
void SetMapValue(const string &k, const string &v)
Add a meta-data key/value association.
Definition: seqdbcol.cpp:368
#define basename(path)
Definition: replacements.h:116
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
The SeqDB memory management layer.
USING_SCOPE(objects)
Import definitions from the objects namespace.
CSeqDBAtlas::TIndx TIndx
Index file.
Definition: seqdbfile.cpp:69
File access objects for CSeqDB.
#define _ASSERT
Modified on Fri Sep 20 14:57:06 2024 by modify_doxy.py rev. 669887