NCBI C++ ToolKit
|
Search Toolkit Book for CWriteDB_Volume
CWriteDB_Volume class. More...
#include "writedb_volume.hpp"
(Private to src/objtools/blast/seqdb_writer
.)
Public Types | |
typedef CWriteDB::EIndexType | EIndexType |
Whether and what kind of indices to build. More... | |
typedef vector< CRef< CSeq_id > > | TIdList |
Type used for lists of identifiers. More... | |
typedef vector< CRef< CBlastDbBlob > > | TBlobList |
Type used for lists of identifiers. More... | |
typedef CWriteDB_Column::TColumnMeta | TColumnMeta |
Type used for database column meta-data. More... | |
Public Types inherited from CObject | |
enum | EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern } |
Control filling of newly allocated memory. More... | |
typedef CObjectCounterLocker | TLockerType |
Default locker type for CRef. More... | |
typedef atomic< Uint8 > | TCounter |
Counter type is CAtomiCounter. More... | |
typedef Uint8 | TCount |
Alias for value type of counter. More... | |
Public Member Functions | |
CWriteDB_Volume (const string &dbname, bool protein, const string &title, const string &date, int index, Uint8 max_file_size, Uint8 max_letters, EIndexType indices, EBlastDbVersion dbver=eBDB_Version5, Uint8 oid_masks=EOidMaskType::fNone) | |
Build a database volume. More... | |
~CWriteDB_Volume () | |
Destructor. More... | |
bool | WriteSequence (const string &seq, const string &ambig, const string &binhdr, const TIdList &ids, int pig, int hash, const TBlobList &blobs, int maskcol_id=-1) |
Add a sequence to this volume. More... | |
void | RenameSingle () |
Rename all volumes files to single-volume names. More... | |
void | RenameFileIndex (unsigned int num_digits) |
void | Close () |
Close the volume. More... | |
const string & | GetVolumeName () const |
Get the name of the volume. More... | |
const int & | GetOID () const |
Get the current OID of the volume. More... | |
void | ListFiles (vector< string > &files) const |
List all files associated with this volume. More... | |
int | CreateColumn (const string &title, const TColumnMeta &meta, Uint8 max_sz, bool mbo=true) |
Create a new database column. More... | |
void | AddColumnMetaData (int col_id, const string &key, const string &value) |
Add meta data to a column. More... | |
Public Member Functions inherited from CObject | |
CObject (void) | |
Constructor. More... | |
CObject (const CObject &src) | |
Copy constructor. More... | |
virtual | ~CObject (void) |
Destructor. More... | |
CObject & | operator= (const CObject &src) THROWS_NONE |
Assignment operator. More... | |
bool | CanBeDeleted (void) const THROWS_NONE |
Check if object can be deleted. More... | |
bool | IsAllocatedInPool (void) const THROWS_NONE |
Check if object is allocated in memory pool (not system heap) More... | |
bool | Referenced (void) const THROWS_NONE |
Check if object is referenced. More... | |
bool | ReferencedOnlyOnce (void) const THROWS_NONE |
Check if object is referenced only once. More... | |
void | AddReference (void) const |
Add reference to object. More... | |
void | RemoveReference (void) const |
Remove reference to object. More... | |
void | ReleaseReference (void) const |
Remove reference without deleting object. More... | |
virtual void | DoNotDeleteThisObject (void) |
Mark this object as not allocated in heap – do not delete this object. More... | |
virtual void | DoDeleteThisObject (void) |
Mark this object as allocated in heap – object can be deleted. More... | |
void * | operator new (size_t size) |
Define new operator for memory allocation. More... | |
void * | operator new[] (size_t size) |
Define new[] operator for 'array' memory allocation. More... | |
void | operator delete (void *ptr) |
Define delete operator for memory deallocation. More... | |
void | operator delete[] (void *ptr) |
Define delete[] operator for memory deallocation. More... | |
void * | operator new (size_t size, void *place) |
Define new operator. More... | |
void | operator delete (void *ptr, void *place) |
Define delete operator. More... | |
void * | operator new (size_t size, CObjectMemoryPool *place) |
Define new operator using memory pool. More... | |
void | operator delete (void *ptr, CObjectMemoryPool *place) |
Define delete operator. More... | |
virtual void | DebugDump (CDebugDumpContext ddc, unsigned int depth) const |
Define method for dumping debug information. More... | |
Public Member Functions inherited from CDebugDumpable | |
CDebugDumpable (void) | |
virtual | ~CDebugDumpable (void) |
void | DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const |
void | DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const |
void | DumpToConsole (void) const |
Private Member Functions | |
int | x_FindNuclLength (const string &seq) |
Compute base-length of compressed nucleotide sequence. More... | |
Private Attributes | |
string | m_DbName |
Base name of the database. More... | |
string | m_VolName |
Database name plus version (if used). More... | |
bool | m_Protein |
True for protein; false for nucleotide. More... | |
string | m_Title |
Database title (same for all volumes). More... | |
string | m_Date |
Construct time (same for all volumes). More... | |
int | m_Index |
Index of this volume (1 based). More... | |
EIndexType | m_Indices |
Indices are sparse, full, or disabled. More... | |
EBlastDbVersion | m_DbVersion |
Blast DB version. More... | |
Uint8 | m_OidMasks |
Oid masks. More... | |
int | m_OID |
Next assigned OID. More... | |
bool | m_Open |
True if user can still append sequences. More... | |
CRef< CWriteDB_IndexFile > | m_Idx |
Index file (pin / nin). More... | |
CRef< CWriteDB_HeaderFile > | m_Hdr |
Header file (phr / nhr). More... | |
CRef< CWriteDB_SequenceFile > | m_Seq |
Sequence file (psq / nsq). More... | |
CRef< CWriteDB_Isam > | m_AccIsam |
Accession index (psi+psd / nsi+nsd). More... | |
CRef< CWriteDB_Isam > | m_GiIsam |
GI index (pni+pnd / nni+nnd). More... | |
CRef< CWriteDB_Isam > | m_PigIsam |
PIG index (ppi+ppd, protein only). More... | |
CRef< CWriteDB_Isam > | m_TraceIsam |
Trace ID index (pti+ptd or nti+ntd). More... | |
CRef< CWriteDB_Isam > | m_HashIsam |
Hash index (phi+phd or nhi+nhd). More... | |
CRef< CWriteDB_GiIndex > | m_GiIndex |
OID->GI lookup (pgx or ngx). More... | |
CRef< CWriteDB_OidList > | m_ExModelList |
vector< CRef< CWriteDB_Column > > | m_Columns |
Database columns. More... | |
set< string > | m_IdSet |
Included Seq_ids. More... | |
Additional Inherited Members | |
Static Public Member Functions inherited from CObject | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (void) |
Define method to throw null pointer exception. More... | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (const type_info &type) |
static EAllocFillMode | GetAllocFillMode (void) |
static void | SetAllocFillMode (EAllocFillMode mode) |
static void | SetAllocFillMode (const string &value) |
Set mode from configuration parameter value. More... | |
Static Public Member Functions inherited from CDebugDumpable | |
static void | EnableDebugDump (bool on) |
Static Public Attributes inherited from CObject | |
static const TCount | eCounterBitsCanBeDeleted = 1 << 0 |
Define possible object states. More... | |
static const TCount | eCounterBitsInPlainHeap = 1 << 1 |
Heap signature was found. More... | |
static const TCount | eCounterBitsPlaceMask |
Mask for 'in heap' state flags. More... | |
static const int | eCounterStep = 1 << 2 |
Skip over the "in heap" bits. More... | |
static const TCount | eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2) |
Minimal value for valid objects (reference counter is zero) Must be a single bit value. More... | |
static const TCount | eCounterStateMask |
Valid object, and object in heap. More... | |
Protected Member Functions inherited from CObject | |
virtual void | DeleteThis (void) |
Virtual method "deleting" this object. More... | |
CWriteDB_Volume class.
This manufactures a blast database volume from sequences.
Definition at line 140 of file writedb_volume.hpp.
Whether and what kind of indices to build.
Definition at line 143 of file writedb_volume.hpp.
typedef vector< CRef<CBlastDbBlob> > CWriteDB_Volume::TBlobList |
Type used for lists of identifiers.
Definition at line 149 of file writedb_volume.hpp.
Type used for database column meta-data.
Definition at line 250 of file writedb_volume.hpp.
typedef vector< CRef<CSeq_id> > CWriteDB_Volume::TIdList |
Type used for lists of identifiers.
Definition at line 146 of file writedb_volume.hpp.
CWriteDB_Volume::CWriteDB_Volume | ( | const string & | dbname, |
bool | protein, | ||
const string & | title, | ||
const string & | date, | ||
int | index, | ||
Uint8 | max_file_size, | ||
Uint8 | max_letters, | ||
EIndexType | indices, | ||
EBlastDbVersion | dbver = eBDB_Version5 , |
||
Uint8 | oid_masks = EOidMaskType::fNone |
||
) |
Build a database volume.
dbname | Base name of the database, such as 'nr'. |
protein | True if the database is a protein database. |
title | Title of the database. |
date | Creation date of the database. |
index | Volume index (for filename). |
max_file_size | Maximum file size for this volume. |
max_letters | Maximum number of letters for this volume. |
indices | Type of indices to build. |
Definition at line 44 of file writedb_volume.cpp.
References dbname(), eAcc, CWriteDB::eAddHash, CWriteDB::eAddTrace, eBDB_Version5, eGi, eHash, CWriteDB::eNoIndex, ePig, CWriteDB::eSparseIndex, eTrace, fExcludeModel, m_AccIsam, m_DbName, m_DbVersion, m_ExModelList, m_GiIndex, m_GiIsam, m_HashIsam, m_Hdr, m_Idx, m_Index, m_Indices, m_OidMasks, m_PigIsam, m_Protein, m_Seq, m_TraceIsam, m_VolName, CWriteDB_File::MakeShortName(), and CRef< C, Locker >::Reset().
CWriteDB_Volume::~CWriteDB_Volume | ( | ) |
Destructor.
The Close() method will be called if it has not already been.
Definition at line 146 of file writedb_volume.cpp.
Add meta data to a column.
In addition to normal blob data, database columns can store a `dictionary' of user-defined metadata in key/value form. This method adds one such key/value pair to the column. Specifying a key a second time causes replacement of the previous value. Using this mechanism to store large amounts of data may have a negative impact on performance.
col_id | Specifies the column to add this metadata to. |
key | A unique key string. |
value | A value string. |
Definition at line 559 of file writedb_volume.cpp.
References ncbi::grid::netcache::search::fields::key, m_Columns, NCBI_THROW, and rapidjson::value.
Referenced by CWriteDB_Impl::AddColumnMetaData().
void CWriteDB_Volume::Close | ( | void | ) |
Close the volume.
This method finalizes and closes all files associated with this volume. (This is not a trivial operation, because ISAM indices and the index file (pin or nin) cannot be written until all of the data has been seen.)
Definition at line 334 of file writedb_volume.cpp.
References set< Key, Compare >::clear(), CWriteDB_File::Close(), CWriteDB_Isam::Close(), CWriteDB_OidList::Close(), CWriteDB::eNoIndex, GetOID(), m_AccIsam, m_Columns, m_ExModelList, m_GiIndex, m_GiIsam, m_HashIsam, m_Hdr, m_IdSet, m_Idx, m_Indices, m_Open, m_PigIsam, m_Protein, m_Seq, m_TraceIsam, NON_CONST_ITERATE, and CRef< C, Locker >::NotEmpty().
Referenced by CWriteDB_Impl::Close(), CWriteDB_Impl::x_Publish(), and ~CWriteDB_Volume().
int CWriteDB_Volume::CreateColumn | ( | const string & | title, |
const TColumnMeta & | meta, | ||
Uint8 | max_sz, | ||
bool | mbo = true |
||
) |
Create a new database column.
title | The title of the new column. |
meta | Metadata to store in the new column. |
max_sz | max file size. |
Definition at line 505 of file writedb_volume.cpp.
References m_Columns, m_DbName, m_Index, m_OID, m_Protein, and NCBI_THROW.
Referenced by CWriteDB_Impl::CreateColumn(), and CWriteDB_Impl::x_Publish().
Get the current OID of the volume.
The current OID is needed for generating BL_ORD_ID.
Definition at line 238 of file writedb_volume.hpp.
References m_OID.
Referenced by CWriteDB_Impl::Close(), Close(), and CWriteDB_Impl::x_CookHeader().
Get the name of the volume.
The volume name includes the path and version (if a version is used) but does not include the extension. It is the name that would be provided to SeqDB to open this volume. This method should be called after RenameSingle() if that method is going to be called.
Definition at line 228 of file writedb_volume.hpp.
References m_VolName.
Referenced by CWriteDB_Impl::Close(), and CWriteDB_Impl::x_MakeAlias().
void CWriteDB_Volume::ListFiles | ( | vector< string > & | files | ) | const |
List all files associated with this volume.
files | The filenames will be appended to this vector. |
Definition at line 461 of file writedb_volume.cpp.
References CWriteDB_File::GetFilename(), ITERATE, CWriteDB_Isam::ListFiles(), m_AccIsam, m_Columns, m_ExModelList, m_GiIndex, m_GiIsam, m_HashIsam, m_Hdr, m_Idx, m_PigIsam, m_Seq, m_TraceIsam, and CRef< C, Locker >::NotEmpty().
void CWriteDB_Volume::RenameFileIndex | ( | unsigned int | num_digits | ) |
Definition at line 417 of file writedb_volume.cpp.
References _ASSERT, CWriteDB::eNoIndex, CWriteDB_File::GetFilename(), log10(), m_AccIsam, m_Columns, m_ExModelList, m_GiIndex, m_GiIsam, m_HashIsam, m_Hdr, m_Idx, m_Index, m_Indices, m_Open, m_PigIsam, m_Protein, m_Seq, m_TraceIsam, m_VolName, NON_CONST_ITERATE, CRef< C, Locker >::NotEmpty(), CWriteDB_File::RenameFileIndex(), CWriteDB_Isam::RenameFileIndex(), and t.
Referenced by CWriteDB_Impl::Close().
void CWriteDB_Volume::RenameSingle | ( | ) |
Rename all volumes files to single-volume names.
When volume component files are generated by WriteDB, the volume names include a volume index. This method renames the generated files for this volume to names that do not include the volume index. This method should not be called until the volume is Close()s.
Definition at line 376 of file writedb_volume.cpp.
References _ASSERT, CWriteDB::eNoIndex, m_AccIsam, m_Columns, m_DbName, m_ExModelList, m_GiIndex, m_GiIsam, m_HashIsam, m_Hdr, m_Idx, m_Indices, m_Open, m_PigIsam, m_Protein, m_Seq, m_TraceIsam, m_VolName, NON_CONST_ITERATE, CRef< C, Locker >::NotEmpty(), CWriteDB_File::RenameSingle(), and CWriteDB_Isam::RenameSingle().
Referenced by CWriteDB_Impl::Close().
bool CWriteDB_Volume::WriteSequence | ( | const string & | seq, |
const string & | ambig, | ||
const string & | binhdr, | ||
const TIdList & | ids, | ||
int | pig, | ||
int | hash, | ||
const TBlobList & | blobs, | ||
int | maskcol_id = -1 |
||
) |
Add a sequence to this volume.
The provided data represents all information for one non-redundant sequence that will be added to this volume.
seq | Sequence data in format ncbi2na or ncbistdaa. |
ambig | Ambiguities (for protein this should be empty). |
binhdr | Binary headers (blast deflines in binary ASN.1). |
ids | List of identifiers for ISAM construction. |
pig | PIG protein identifier (zero if not available.) |
hash | Sequence Hash (zero if not available.) |
Definition at line 153 of file writedb_volume.cpp.
References _ASSERT, CWriteDB_GiIndex::AddGi(), CWriteDB_Isam::AddHash(), CWriteDB_Isam::AddIds(), CWriteDB_OidList::AddOid(), CWriteDB_Isam::AddPig(), CWriteDB_HeaderFile::AddSequence(), CWriteDB_SequenceFile::AddSequence(), CWriteDB_IndexFile::AddSequence(), ambig(), CWriteDB_IndexFile::CanFit(), CWriteDB_Isam::CanFit(), CWriteDB_HeaderFile::CanFit(), CWriteDB_SequenceFile::CanFit(), CSeq_id::eDefault, CRef< C, Locker >::Empty(), CWriteDB::eNoIndex, CSeq_id::fAcc_predicted, CSeq_id::fLabel_Default, CSeq_id::fLabel_UpperCase, CSeq_id_Base::GetGi(), set< Key, Compare >::insert(), int, INVALID_GI, CSeq_id_Base::IsGi(), ITERATE, kEmptyStr, m_AccIsam, m_Columns, m_ExModelList, m_GiIndex, m_GiIsam, m_HashIsam, m_Hdr, m_IdSet, m_Idx, m_Indices, m_OID, m_Open, m_PigIsam, m_Protein, m_Seq, m_TraceIsam, msg(), NCBI_THROW, CRef< C, Locker >::NotEmpty(), set< Key, Compare >::size(), and x_FindNuclLength().
Referenced by CWriteDB_Impl::x_Publish().
Compute base-length of compressed nucleotide sequence.
Nucleotide sequences stored on disk are packed 4 bases to a byte, except for the last byte. That byte has 0-3 bases of real sequence data plus a 'remainder' value (from 0-3) that indicates how many of the bases of the last byte are sequence data. This method finds the exact length in bases for a nucleotide sequence packed in this way.
seq | Ncbi2na sequence with length remainder encoding. |
Definition at line 326 of file writedb_volume.cpp.
References _ASSERT, m_Protein, and WriteDB_FindSequenceLength().
Referenced by WriteSequence().
|
private |
Accession index (psi+psd / nsi+nsd).
Definition at line 304 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Database columns.
Definition at line 315 of file writedb_volume.hpp.
Referenced by AddColumnMetaData(), Close(), CreateColumn(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Construct time (same for all volumes).
Definition at line 287 of file writedb_volume.hpp.
|
private |
Base name of the database.
Definition at line 283 of file writedb_volume.hpp.
Referenced by CreateColumn(), CWriteDB_Volume(), and RenameSingle().
|
private |
Blast DB version.
Definition at line 290 of file writedb_volume.hpp.
Referenced by CWriteDB_Volume().
|
private |
Definition at line 310 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
OID->GI lookup (pgx or ngx).
Definition at line 309 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
GI index (pni+pnd / nni+nnd).
Definition at line 305 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Hash index (phi+phd or nhi+nhd).
Definition at line 308 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Header file (phr / nhr).
Definition at line 301 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
Included Seq_ids.
Definition at line 319 of file writedb_volume.hpp.
Referenced by Close(), and WriteSequence().
|
private |
Index file (pin / nin).
Definition at line 300 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Index of this volume (1 based).
Definition at line 288 of file writedb_volume.hpp.
Referenced by CreateColumn(), CWriteDB_Volume(), and RenameFileIndex().
|
private |
Indices are sparse, full, or disabled.
Definition at line 289 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Next assigned OID.
Definition at line 295 of file writedb_volume.hpp.
Referenced by CreateColumn(), GetOID(), and WriteSequence().
|
private |
|
private |
True if user can still append sequences.
Definition at line 296 of file writedb_volume.hpp.
Referenced by Close(), RenameFileIndex(), RenameSingle(), WriteSequence(), and ~CWriteDB_Volume().
|
private |
PIG index (ppi+ppd, protein only).
Definition at line 306 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
True for protein; false for nucleotide.
Definition at line 285 of file writedb_volume.hpp.
Referenced by Close(), CreateColumn(), CWriteDB_Volume(), RenameFileIndex(), RenameSingle(), WriteSequence(), and x_FindNuclLength().
|
private |
Sequence file (psq / nsq).
Definition at line 302 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Database title (same for all volumes).
Definition at line 286 of file writedb_volume.hpp.
|
private |
Trace ID index (pti+ptd or nti+ntd).
Definition at line 307 of file writedb_volume.hpp.
Referenced by Close(), CWriteDB_Volume(), ListFiles(), RenameFileIndex(), RenameSingle(), and WriteSequence().
|
private |
Database name plus version (if used).
Definition at line 284 of file writedb_volume.hpp.
Referenced by CWriteDB_Volume(), GetVolumeName(), RenameFileIndex(), and RenameSingle().