61 m_OidMasks (oid_masks),
154 const string &
ambig,
155 const string & binhdr,
167 if (! (seq.size() && binhdr.size())) {
170 "Error: Cannot find CBioseq or deflines.");
179 bool overfull =
false;
189 int num = (
int)idlist.size();
207 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
208 (!defined(NCBI_COMPILER_MIPSPRO)) )
209 for(
int blob_i = 0; blob_i < (
int) blobs.size(); blob_i++) {
212 if (!
m_Columns[blob_i / 2]->CanFit(blobs[blob_i]->Size())) {
224 if (
m_OID && overfull) {
232 pair<set<string>::iterator,
bool > rv;
239 if((rv.second ==
false) && (!(*iter)->IsLocal())) {
241 msg <<
"Error: Duplicate seq_ids are found: " << endl <<
id << endl;
248 msg <<
"Error: Duplicate seq_ids are found: " << endl
254 unsigned int off_hdr(0), off_seq(0), off_amb(0);
272 const CSeq_id & seqid = **iter;
294 size_t model_id_count = 0;
305 if(model_id_count == num_accs) {
309 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
310 (!defined(NCBI_COMPILER_MIPSPRO)) )
311 for(
int col_i = 0; col_i < (
int)
m_Columns.size(); col_i++) {
312 _ASSERT(col_i * 2 < (
int) blobs.size());
313 if (col_i == maskcol_id) {
314 m_Columns[col_i]->AddBlob(*blobs[col_i * 2], *blobs[col_i * 2 + 1]);
316 m_Columns[col_i]->AddBlob(*blobs[col_i * 2]);
368 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
369 (!defined(NCBI_COMPILER_MIPSPRO)) )
408 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
409 (!defined(NCBI_COMPILER_MIPSPRO)) )
411 (**iter).RenameSingle();
426 size_t t = index_filename.find_last_of(
".");
451 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
452 (!defined(NCBI_COMPILER_MIPSPRO)) )
454 (**iter).RenameFileIndex(num_digits);
495 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
496 (!defined(NCBI_COMPILER_MIPSPRO)) )
498 (**iter).ListFiles(files,
true);
503 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
504 (!defined(NCBI_COMPILER_MIPSPRO)) )
517 "Error: Cannot have more than 36 columns.");
520 extn[1] =
"abcdefghijklmnopqrstuvwxyz0123456789"[col_id];
539 if (mbo) new_col->AddByteOrder(
m_DbName,
549 for(
int j = 0; j <
m_OID; j++) {
550 if (mbo) new_col->AddBlob(blank, blank);
551 else new_col->AddBlob(blank);
561 const string &
value)
563 if ((col_id < 0) || (col_id >= (
int)
m_Columns.size())) {
565 "Error: provided column ID is not valid");
578 m_Type(mask_type), m_TotalOids(0), m_Map(
NULL), m_MapSize(0) { }
583 m_MapSize = (size_t) ((num_oids - 1U) / BITWIDTH + 1U);
599 const int BITSHIFT = 3;
600 const uint32_t BITMASK = (1U << BITSHIFT) - 1U;
609 size_t offset = *oid >> BITSHIFT;
`Blob' Class for SeqDB (and WriteDB).
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CWriteDB_IndexFile class.
unsigned int WriteInt4(int data)
Write an Int4 (in bigendian order) to the file.
const string & GetFilename() const
Get the current filename for this file.
virtual void RenameFileIndex(unsigned int num_digits)
void Create()
Create and open the file.
void Close()
Close the file, flushing any remaining data to disk.
static string MakeShortName(const string &base, int index)
Construct the short name for a volume.
unsigned int Write(const CTempString &data)
Write contents of a string to the file.
virtual void RenameSingle()
Rename this file, disincluding the volume index.
This class builds the volume index file (pin or nin).
void AddSequence(int length, unsigned int hdr, unsigned int seq)
Add a sequence to a protein index file (pin).
bool CanFit()
Returns true if another sequence can fit into the file.
void AddHash(int oid, int hash)
Set a sequence's hash value.
bool CanFit(int num)
Tests whether there is room for a given number of IDs.
void ListFiles(vector< string > &files) const
List Filenames.
void AddPig(int oid, int pig)
Set PIG for a protein sequence.
void AddIds(int oid, const TIdList &ids)
Add sequence IDs to the index file.
void RenameSingle()
Rename files to single-volume names.
void Close()
Flush data to disk and close all associated files.
void RenameFileIndex(unsigned int num_digits)
void x_CreateBitMap(int num_oids)
void x_Flush()
This should flush any unwritten data to disk.
void Close(int total_oids)
Total num of oids in db or vol.
CWriteDB_OidList(const string &dbname, bool protein, int index, Uint8 max_fsize, EOidMaskType mask_type)
vector< uint32_t > m_OidList
bool CanFit(int size, int letters)
Returns true if the specified amount of data would fit.
void AddSequence(const string &sequence, unsigned int &offset, int length)
Add a protein sequence to this file.
~CWriteDB_Volume()
Destructor.
Uint8 m_OidMasks
Oid masks.
vector< CRef< CSeq_id > > TIdList
Type used for lists of identifiers.
void RenameSingle()
Rename all volumes files to single-volume names.
void AddColumnMetaData(int col_id, const string &key, const string &value)
Add meta data to a column.
void RenameFileIndex(unsigned int num_digits)
set< string > m_IdSet
Included Seq_ids.
CRef< CWriteDB_Isam > m_PigIsam
PIG index (ppi+ppd, protein only).
CRef< CWriteDB_OidList > m_ExModelList
string m_VolName
Database name plus version (if used).
CRef< CWriteDB_HeaderFile > m_Hdr
Header file (phr / nhr).
CWriteDB_Volume(const string &dbname, bool protein, const string &title, const string &date, int index, Uint8 max_file_size, Uint8 max_letters, EIndexType indices, EBlastDbVersion dbver=eBDB_Version5, Uint8 oid_masks=EOidMaskType::fNone)
Build a database volume.
const int & GetOID() const
Get the current OID of the volume.
bool m_Open
True if user can still append sequences.
vector< CRef< CBlastDbBlob > > TBlobList
Type used for lists of identifiers.
CRef< CWriteDB_Isam > m_HashIsam
Hash index (phi+phd or nhi+nhd).
CRef< CWriteDB_Isam > m_AccIsam
Accession index (psi+psd / nsi+nsd).
CRef< CWriteDB_Isam > m_TraceIsam
Trace ID index (pti+ptd or nti+ntd).
EBlastDbVersion m_DbVersion
Blast DB version.
int m_OID
Next assigned OID.
int CreateColumn(const string &title, const TColumnMeta &meta, Uint8 max_sz, bool mbo=true)
Create a new database column.
int m_Index
Index of this volume (1 based).
bool WriteSequence(const string &seq, const string &ambig, const string &binhdr, const TIdList &ids, int pig, int hash, const TBlobList &blobs, int maskcol_id=-1)
Add a sequence to this volume.
CRef< CWriteDB_Isam > m_GiIsam
GI index (pni+pnd / nni+nnd).
CRef< CWriteDB_GiIndex > m_GiIndex
OID->GI lookup (pgx or ngx).
CRef< CWriteDB_SequenceFile > m_Seq
Sequence file (psq / nsq).
bool m_Protein
True for protein; false for nucleotide.
int x_FindNuclLength(const string &seq)
Compute base-length of compressed nucleotide sequence.
EIndexType m_Indices
Indices are sparse, full, or disabled.
void Close()
Close the volume.
CRef< CWriteDB_IndexFile > m_Idx
Index file (pin / nin).
string m_DbName
Base name of the database.
vector< CRef< CWriteDB_Column > > m_Columns
Database columns.
void ListFiles(vector< string > &files) const
List all files associated with this volume.
EIndexType
Whether and what kind of indices to build.
@ eAddHash
Add an index from sequence hash to OID.
@ eSparseIndex
Use only simple accessions in the string index.
@ eNoIndex
Build a database without any indices.
@ eAddTrace
OR this in to add an index for trace IDs.
iterator_bool insert(const value_type &val)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
@ fLabel_UpperCase
Upper case label, with special encoding for PDB chain-ids.
@ fLabel_Default
default options - always show the version
@ eDefault
default is to show type + content
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
TGi GetGi(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
const string SeqDB_GetOidMaskFileExt(bool db_is_protein, EOidMaskType t)
EBlastDbVersion
BLAST database version.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static bool ambig(char c)
Defines exception class for WriteDB.
int WriteDB_FindSequenceLength(bool protein, const string &seq)
Compute length of sequence from raw packing.
@ eAcc
Accession (string) Index.
@ ePig
Protein Identifier Group.
USING_SCOPE(std)
Include C++ std library symbols.
Code for database volume construction.