CPP_DOC/doxyhtml/writedb__volume_8cpp_source.html

 /*  $Id: writedb_volume.cpp 97110 2022-06-21 14:00:37Z fongah2 $

  * ===========================================================================

  *

  *                            PUBLIC DOMAIN NOTICE

  *               National Center for Biotechnology Information

  *

  *  This software/database is a "United States Government Work" under the

  *  terms of the United States Copyright Act.  It was written as part of

  *  the author's official duties as a United States Government employee and

  *  thus cannot be copyrighted.  This software/database is freely available

  *  to the public for use. The National Library of Medicine and the U.S.

  *  Government have not placed any restriction on its use or reproduction.

  *

  *  Although all reasonable efforts have been taken to ensure the accuracy

  *  and reliability of the software and data, the NLM and the U.S.

  *  Government do not and cannot warrant the performance or results that

  *  may be obtained by using this software or data. The NLM and the U.S.

  *  Government disclaim all warranties, express or implied, including

  *  warranties of performance, merchantability or fitness for any particular

  *  purpose.

  *

  *  Please cite the author in any work or product based on this material.

  *

  * ===========================================================================

  *

  * Author:  Kevin Bealer

  *

  */


 /// @file writedb_volume.cpp

 /// Implementation for the CWriteDB_Volume class.

 /// class for WriteDB.

 #include <ncbi_pch.hpp>

 #include "writedb_volume.hpp"

 #include <objtools/blast/seqdb_writer/writedb_error.hpp>

 #include <iostream>

 #include <cmath>


 BEGIN_NCBI_SCOPE


 /// Include C++ std library symbols.

 USING_SCOPE(std);


 CWriteDB_Volume::CWriteDB_Volume(const string & dbname,

                                  bool           protein,

                                  const string & title,

                                  const string & date,

                                  int            index,

                                  Uint8          max_file_size,

                                  Uint8          max_letters,

                                  EIndexType     indices,

                                  EBlastDbVersion dbver,

                                  Uint8           oid_masks)

     : m_DbName      (dbname),

       m_Protein     (protein),

       m_Title       (title),

       m_Date        (date),

       m_Index       (index),

       m_Indices     (indices),

       m_DbVersion   (dbver),

       m_OidMasks    (oid_masks),

       m_OID         (0),

       m_Open        (true)

 {

     m_VolName = CWriteDB_File::MakeShortName(m_DbName, m_Index);


     m_Idx.Reset(new CWriteDB_IndexFile(dbname,

                                        protein,

                                        title,

                                        date,

                                        index,

                                        max_file_size,

                                        dbver));


     m_Hdr.Reset(new CWriteDB_HeaderFile(dbname,

                                         protein,

                                         index,

                                         max_file_size));


     m_Seq.Reset(new CWriteDB_SequenceFile(dbname,

                                           protein,

                                           index,

                                           max_file_size,

                                           max_letters));


     if (m_Indices != CWriteDB::eNoIndex) {

         bool sparse =

             (m_Indices & CWriteDB::eSparseIndex) == CWriteDB::eSparseIndex;


         if (m_Protein) {

             m_PigIsam.Reset(new CWriteDB_Isam(ePig,

                                               dbname,

                                               protein,

                                               index,

                                               max_file_size,

                                               false));

         }


         m_GiIsam.Reset(new CWriteDB_Isam(eGi,

                                          dbname,

                                          protein,

                                          index,

                                          max_file_size,

                                          false));

         if(m_DbVersion != eBDB_Version5) {

             m_AccIsam.Reset(new CWriteDB_Isam(eAcc,

                                           dbname,

                                           protein,

                                           index,

                                           max_file_size,

                                           sparse));

         }

         if (m_Indices & CWriteDB::eAddTrace) {

             m_TraceIsam.Reset(new CWriteDB_Isam(eTrace,

                                                 dbname,

                                                 protein,

                                                 index,

                                                 max_file_size,

                                                 false));

         }


         if (m_Indices & CWriteDB::eAddHash) {

             m_HashIsam.Reset(new CWriteDB_Isam(eHash,

                                                dbname,

                                                protein,

                                                index,

                                                max_file_size,

                                                false));

         }


         m_GiIndex.Reset(new CWriteDB_GiIndex(dbname,

                                              protein,

                                              index,

                                              max_file_size));

     }


     if (m_OidMasks & EOidMaskType::fExcludeModel) {

         m_ExModelList.Reset(new CWriteDB_OidList(dbname,

                                                  protein,

                                                  index,

                                                  max_file_size,

                                                  EOidMaskType::fExcludeModel));

     }

 }


 CWriteDB_Volume::~CWriteDB_Volume()

 {

     if (m_Open) {

         Close();

     }

 }


 bool CWriteDB_Volume::WriteSequence(const string      & seq,

                                     const string      & ambig,

                                     const string      & binhdr,

                                     const TIdList     & idlist,

                                     int                 pig,

                                     int                 hash,

                                     const TBlobList   & blobs,

                                     int                 maskcol_id)

 {

     // Zero is a legal hash value, but we should not be computing the

     // hash value if there is no corresponding ISAM file.


     _ASSERT((! hash) || m_HashIsam.NotEmpty());


     if (! (seq.size() && binhdr.size())) {

             NCBI_THROW(CWriteDBException,

                        eArgErr,

                        "Error: Cannot find CBioseq or deflines.");

     }


     _ASSERT(m_Open);


     int length = (m_Protein

                   ? (int) seq.size()

                   : x_FindNuclLength(seq));


     bool overfull = false;


     if (! (m_Idx->CanFit() &&

            m_Hdr->CanFit((int)binhdr.size()) &&

            m_Seq->CanFit((int)(seq.size() + ambig.size()), length))) {

         overfull = true;

     }


     if (m_Indices != CWriteDB::eNoIndex) {


         int num = (int)idlist.size();


         if (! ( (m_AccIsam.Empty() || m_AccIsam->CanFit(num)) &&

                m_GiIsam->CanFit(num) &&

                (m_TraceIsam.Empty() || m_TraceIsam->CanFit(num)))) {

             overfull = true;

         }


         if (m_Protein && (! m_PigIsam->CanFit(1))) {

             overfull = true;

         }


         if (m_HashIsam.NotEmpty() && (! m_HashIsam->CanFit(1))) {

             overfull = true;

         }

     }


 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \

      (!defined(NCBI_COMPILER_MIPSPRO)) )

     for(int blob_i = 0; blob_i < (int) blobs.size(); blob_i++) {

         _ASSERT(blob_i / 2 < (int) m_Columns.size());


         if (! m_Columns[blob_i / 2]->CanFit(blobs[blob_i]->Size())) {

             overfull = true;

             break;

         }

     }

 #endif


     // Exception - if volume has no data, ignore the file size limits;

     // otherwise there would be either a hard failure or an infinite

     // recursion of building empty volumes.  Building a volume that's

     // too big is considered preferable to either of these outcomes.


     if (m_OID && overfull) {

         return false;

     }


     // check the uniqueness of id

     if (m_Indices != CWriteDB::eNoIndex) {

         set<string>::size_type orig_size = m_IdSet.size();

         string id;

         pair<set<string>::iterator, bool > rv;

         CSeq_id::TLabelFlags label_flags =

             CSeq_id::fLabel_Default | CSeq_id::fLabel_UpperCase;

         ITERATE(TIdList, iter, idlist) {

             id = kEmptyStr;

             (*iter)->GetLabel(&id, CSeq_id::eDefault, label_flags);

             rv = m_IdSet.insert(id);

             if((rv.second == false) && (!(*iter)->IsLocal())) {

                 CNcbiOstrstream msg;

                 msg << "Error: Duplicate seq_ids are found: " << endl << id << endl;

                 NCBI_THROW(CWriteDBException, eArgErr, CNcbiOstrstreamToString(msg));

             }

         }


         if(m_IdSet.size() == orig_size) {

             CNcbiOstrstream msg;

             msg << "Error: Duplicate seq_ids are found: " << endl

             << id << endl;

             NCBI_THROW(CWriteDBException, eArgErr, CNcbiOstrstreamToString(msg));

         }

     }


     unsigned int off_hdr(0), off_seq(0), off_amb(0);


     m_Hdr->AddSequence(binhdr, off_hdr);


     if (m_Protein) {

         m_Seq->AddSequence(seq, off_seq, length);

         m_Idx->AddSequence((int) seq.size(), off_hdr, off_seq);

     } else {

         m_Seq->AddSequence(seq, ambig, off_seq, off_amb, length);

         m_Idx->AddSequence(length, off_hdr, off_seq, off_amb);

     }


     if (m_Indices != CWriteDB::eNoIndex) {

         if(m_AccIsam.NotEmpty()) m_AccIsam->AddIds(m_OID, idlist);

         m_GiIsam->AddIds(m_OID, idlist);


         TGi gi = INVALID_GI;

         ITERATE(TIdList, iter, idlist) {

             const CSeq_id & seqid = **iter;

             if (seqid.IsGi()) {

                 gi = seqid.GetGi();

                 break;

             }

         }

         m_GiIndex->AddGi(gi);


         if (m_Protein && pig) {

             m_PigIsam->AddPig(m_OID, pig);

         }


         if (m_TraceIsam.NotEmpty()) {

             m_TraceIsam->AddIds(m_OID, idlist);

         }


         if (m_HashIsam.NotEmpty()) {

             m_HashIsam->AddHash(m_OID, hash);

         }

     }


     if (m_ExModelList.NotEmpty()) {

         size_t model_id_count = 0;

         size_t num_accs = 0;

         ITERATE(TIdList, id, idlist) {

             if ((*id)->IsGi()) {

                 continue;

             }

             if ((*id)->IdentifyAccession() & CSeq_id::fAcc_predicted) {

                 model_id_count ++;

             }

             num_accs ++;

         }

         if(model_id_count == num_accs) {

             m_ExModelList->AddOid(m_OID);

         }

     }

 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \

      (!defined(NCBI_COMPILER_MIPSPRO)) )

     for(int col_i = 0; col_i < (int)m_Columns.size(); col_i++) {

         _ASSERT(col_i * 2 < (int) blobs.size());

         if (col_i == maskcol_id) {

              m_Columns[col_i]->AddBlob(*blobs[col_i * 2], *blobs[col_i * 2 + 1]);

         } else {

              m_Columns[col_i]->AddBlob(*blobs[col_i * 2]);

         }

     }

 #endif


     m_OID ++;


     return true;

 }


 int CWriteDB_Volume::x_FindNuclLength(const string & seq)

 {

     _ASSERT(! m_Protein);

     _ASSERT(seq.size());


     return WriteDB_FindSequenceLength(m_Protein, seq);

 }


 void CWriteDB_Volume::Close()

 {

     if (m_Open) {

         m_Open = false;


         // close each file.

         m_Idx->Close();

         m_Hdr->Close();

         m_Seq->Close();


         if (m_Indices != CWriteDB::eNoIndex) {

             if (m_Protein) {

                 m_PigIsam->Close();

             }

             m_GiIsam->Close();

             if(m_AccIsam.NotEmpty()) m_AccIsam->Close();

             m_GiIndex->Close();


             if (m_TraceIsam.NotEmpty()) {

                 m_TraceIsam->Close();

             }


             if (m_HashIsam.NotEmpty()) {

                 m_HashIsam->Close();

             }

             m_IdSet.clear();

         }

     }


     if (m_ExModelList.NotEmpty()) {

         m_ExModelList->Close(GetOID());

     }


 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \

      (!defined(NCBI_COMPILER_MIPSPRO)) )

     NON_CONST_ITERATE(vector< CRef<CWriteDB_Column> >, iter, m_Columns) {

         (**iter).Close();

     }

 #endif

 }


 void CWriteDB_Volume::RenameSingle()

 {

     _ASSERT(! m_Open);

     m_VolName = m_DbName;


     // rename all files to 'single volume' notation.

     m_Idx->RenameSingle();

     m_Hdr->RenameSingle();

     m_Seq->RenameSingle();


     if (m_Indices != CWriteDB::eNoIndex) {

         if (m_Protein) {

             m_PigIsam->RenameSingle();

         }

         m_GiIsam->RenameSingle();

         if(m_AccIsam.NotEmpty()) m_AccIsam->RenameSingle();

         m_GiIndex->RenameSingle();


         if (m_TraceIsam.NotEmpty()) {

             m_TraceIsam->RenameSingle();

         }


         if (m_HashIsam.NotEmpty()) {

             m_HashIsam->RenameSingle();

         }

     }


     if (m_ExModelList.NotEmpty()) {

         m_ExModelList->RenameSingle();

     }


 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \

      (!defined(NCBI_COMPILER_MIPSPRO)) )

     NON_CONST_ITERATE(vector< CRef<CWriteDB_Column> >, iter, m_Columns) {

         (**iter).RenameSingle();

     }

 #endif

 }


 void CWriteDB_Volume::RenameFileIndex(unsigned int num_digits)

 {

     _ASSERT(! m_Open);

     m_Idx->RenameFileIndex(num_digits);

     m_Hdr->RenameFileIndex(num_digits);

     m_Seq->RenameFileIndex(num_digits);


     if (log10(m_Index) +1 < num_digits) {

         string index_filename = m_Idx->GetFilename();

         size_t t = index_filename.find_last_of(".");

         m_VolName = index_filename.substr(0, t);

     }


     if (m_Indices != CWriteDB::eNoIndex) {

         if (m_Protein) {

             m_PigIsam->RenameFileIndex(num_digits);

         }

         m_GiIsam->RenameFileIndex(num_digits);

         if(m_AccIsam.NotEmpty()) m_AccIsam->RenameFileIndex(num_digits);

         m_GiIndex->RenameFileIndex(num_digits);


         if (m_TraceIsam.NotEmpty()) {

             m_TraceIsam->RenameFileIndex(num_digits);

         }


         if (m_HashIsam.NotEmpty()) {

             m_HashIsam->RenameFileIndex(num_digits);

         }

     }


     if (m_ExModelList.NotEmpty()) {

         m_ExModelList->RenameFileIndex(num_digits);

     }


 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \

      (!defined(NCBI_COMPILER_MIPSPRO)) )

     NON_CONST_ITERATE(vector< CRef<CWriteDB_Column> >, iter, m_Columns) {

         (**iter).RenameFileIndex(num_digits);

     }

 #endif

 }


 void CWriteDB_Volume::ListFiles(vector<string> & files) const

 {

     files.push_back(m_Idx->GetFilename());

     files.push_back(m_Hdr->GetFilename());

     files.push_back(m_Seq->GetFilename());


     if (m_AccIsam.NotEmpty()) {

         m_AccIsam->ListFiles(files);

     }


     if (m_GiIsam.NotEmpty()) {

         m_GiIsam->ListFiles(files);

     }


     if (m_PigIsam.NotEmpty()) {

         m_PigIsam->ListFiles(files);

     }


     if (m_TraceIsam.NotEmpty()) {

         m_TraceIsam->ListFiles(files);

     }


     if (m_HashIsam.NotEmpty()) {

         m_HashIsam->ListFiles(files);

     }


     if (m_GiIndex.NotEmpty()) {

         files.push_back(m_GiIndex->GetFilename());

     }


     if (m_ExModelList.NotEmpty()) {

         files.push_back(m_ExModelList->GetFilename());

     }


 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \

      (!defined(NCBI_COMPILER_MIPSPRO)) )

     ITERATE(vector< CRef<CWriteDB_Column> >, iter, m_Columns) {

         (**iter).ListFiles(files, true);

     }

 #endif

 }


 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \

      (!defined(NCBI_COMPILER_MIPSPRO)) )

 int CWriteDB_Volume::CreateColumn(const string      & title,

                                   const TColumnMeta & meta,

                                   Uint8               max_sz,

                                   bool                mbo)

 {

     int col_id = m_Columns.size();


     string extn(m_Protein ? "p??" : "n??");


     if (col_id >= 36) {

         NCBI_THROW(CWriteDBException,

                    eArgErr,

                    "Error: Cannot have more than 36 columns.");

     }


     extn[1] = "abcdefghijklmnopqrstuvwxyz0123456789"[col_id];


     string extn2 = extn;

     string extn3 = extn;


     extn[2] = 'a';

     extn2[2] = 'b';

     extn3[2] = 'c';


     CRef<CWriteDB_Column> new_col

         (new CWriteDB_Column(m_DbName,

                              extn,

                              extn2,

                              m_Index,

                              title,

                              meta,

                              max_sz));


     /* For support of multiple byte orders */

     if (mbo) new_col->AddByteOrder(m_DbName,

                              extn3,

                              m_Index,

                              max_sz);


     // If the OID is not zero, then add all the blank records for the

     // prior OIDs to the new column.


     CBlastDbBlob blank;


     for(int j = 0; j < m_OID; j++) {

         if (mbo) new_col->AddBlob(blank, blank);

         else     new_col->AddBlob(blank);

     }


     m_Columns.push_back(new_col);


     return col_id;

 }


 void CWriteDB_Volume::AddColumnMetaData(int            col_id,

                                         const string & key,

                                         const string & value)

 {

     if ((col_id < 0) || (col_id >= (int) m_Columns.size())) {

         NCBI_THROW(CWriteDBException, eArgErr,

                    "Error: provided column ID is not valid");

     }


     m_Columns[col_id]->AddMetaData(key, value);

 }

 #endif


 CWriteDB_OidList::CWriteDB_OidList(const string & dbname,

                                    bool           protein,

                                    int            index,

                                    Uint8          max_fsize,

                                    EOidMaskType   mask_type)

     : CWriteDB_File (dbname, SeqDB_GetOidMaskFileExt(protein, mask_type), index, max_fsize, false),

       m_Type(mask_type), m_TotalOids(0), m_Map(NULL), m_MapSize(0) { }


 void CWriteDB_OidList::x_CreateBitMap(int num_oids)

 {

     const uint32_t BITWIDTH = 8U * sizeof(uint8_t);

     m_MapSize = (size_t) ((num_oids - 1U) / BITWIDTH + 1U);


     if (m_Map != NULL) {

         NCBI_THROW(CWriteDBException, eArgErr, "Bit map exists");

     }


     try {

         m_Map = new uint8_t[m_MapSize];

     }

     catch (CException & e) {

         NCBI_THROW(CWriteDBException, eArgErr, "Error allocatong memory for bit map");

     }


     memset(m_Map, 0xFF, m_MapSize);


     // Define bitmask.

     const int BITSHIFT = 3;

     const uint32_t BITMASK = (1U << BITSHIFT) - 1U;    // 0b111 = 0x7


     // Get address of mask and its allocated length in bytes.

     uint8_t* mask = (uint8_t*) m_Map;


     // For each oid in the set...

     ITERATE(vector<uint32_t>, oid, m_OidList) {


         // Calculate byte offset into mask.

         size_t offset = *oid >> BITSHIFT;


         // Check for overrun of the mask memory.

         if (offset >= m_MapSize) {

             // Bail out.

             NCBI_THROW(CWriteDBException, eArgErr, "overrun of mask memory");

         }


         // Create byte mask.

         // First oid of each group of 8 gets MSB (bit 7),

         // and last of 8 gets LSB (bit 0).

         uint8_t mask_bit = (uint8_t) (1U << (7U - (*oid & BITMASK)));


         // OR byte mask into mask array.

         if (m_Type & (EOidMaskType::fExcludeModel)) {

             mask[offset] &=(~mask_bit);

         }

     }


 }


 void CWriteDB_OidList::x_CreateMaskFile()

 {

     // Write max oid in big-endian form to mask file.

     uint32_t max_oid = m_TotalOids - 1U;

     Create();

     WriteInt4(max_oid); // This api writes Big Endian

     Write((char *) m_Map, m_MapSize);

 }


 void CWriteDB_OidList::x_Flush() {


     Int4 num_oids = m_OidList.size();


     LOG_POST(Info << "Num of excluded oids" << num_oids);

     if (!m_TotalOids ){

         LOG_POST(Info<< "No oid list created for mode " << m_Type);

         return;

     }

     x_CreateBitMap(m_TotalOids);

     x_CreateMaskFile();

 }


 END_NCBI_SCOPE


mask
ncbi::TMaskedQueryRegions mask
Definition: blastxml_format.cpp:59

CBlastDbBlob
`Blob' Class for SeqDB (and WriteDB).
Definition: seqdbblob.hpp:56

CException
Definition: ncbiexpt.hpp:877

CNcbiOstrstreamToString
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802

CNcbistrstream_Base< IO_PREFIX::ostrstream, IOS_BASE::out >

CRef
CRef –.
Definition: ncbiobj.hpp:618

CSeq_id
Definition: Seq_id.hpp:71

CStrictId< SStrictId_Gi, SStrictId_Gi::TId >

CWriteDBException
CWriteDBException.
Definition: writedb_error.hpp:51

CWriteDB_Column
CWriteDB_Column class.
Definition: writedb_column.hpp:224

CWriteDB_File
CWriteDB_IndexFile class.
Definition: writedb_files.hpp:57

CWriteDB_File::WriteInt4
unsigned int WriteInt4(int data)
Write an Int4 (in bigendian order) to the file.
Definition: writedb_files.hpp:103

CWriteDB_File::GetFilename
const string & GetFilename() const
Get the current filename for this file.
Definition: writedb_files.hpp:157

CWriteDB_File::RenameFileIndex
virtual void RenameFileIndex(unsigned int num_digits)
Definition: writedb_files.cpp:279

CWriteDB_File::Create
void Create()
Create and open the file.
Definition: writedb_files.cpp:197

CWriteDB_File::Close
void Close()
Close the file, flushing any remaining data to disk.
Definition: writedb_files.cpp:259

CWriteDB_File::MakeShortName
static string MakeShortName(const string &base, int index)
Construct the short name for a volume.
Definition: writedb_files.cpp:235

CWriteDB_File::Write
unsigned int Write(const CTempString &data)
Write contents of a string to the file.
Definition: writedb_files.cpp:204

CWriteDB_File::RenameSingle
virtual void RenameSingle()
Rename this file, disincluding the volume index.
Definition: writedb_files.cpp:267

CWriteDB_GiIndex
CWriteDB_GiIndex class.
Definition: writedb_volume.hpp:55

CWriteDB_GiIndex::AddGi
void AddGi(TGi gi)
Definition: writedb_volume.hpp:65

CWriteDB_HeaderFile
This class builds the volume header file (phr or nhr).
Definition: writedb_files.hpp:346

CWriteDB_HeaderFile::AddSequence
void AddSequence(const string &binhdr, unsigned int &offset)
Add binary header data to this file.
Definition: writedb_files.hpp:379

CWriteDB_HeaderFile::CanFit
bool CanFit(int size)
Returns true if the specified amount of data would fit.
Definition: writedb_files.hpp:365

CWriteDB_IndexFile
This class builds the volume index file (pin or nin).
Definition: writedb_files.hpp:210

CWriteDB_IndexFile::AddSequence
void AddSequence(int length, unsigned int hdr, unsigned int seq)
Add a sequence to a protein index file (pin).
Definition: writedb_files.hpp:246

CWriteDB_IndexFile::CanFit
bool CanFit()
Returns true if another sequence can fit into the file.
Definition: writedb_files.hpp:228

CWriteDB_Isam
CWriteDB_Isam class.
Definition: writedb_isam.hpp:401

CWriteDB_Isam::AddHash
void AddHash(int oid, int hash)
Set a sequence's hash value.
Definition: writedb_isam.cpp:139

CWriteDB_Isam::CanFit
bool CanFit(int num)
Tests whether there is room for a given number of IDs.
Definition: writedb_isam.cpp:124

CWriteDB_Isam::ListFiles
void ListFiles(vector< string > &files) const
List Filenames.
Definition: writedb_isam.cpp:819

CWriteDB_Isam::AddPig
void AddPig(int oid, int pig)
Set PIG for a protein sequence.
Definition: writedb_isam.cpp:134

CWriteDB_Isam::AddIds
void AddIds(int oid, const TIdList &ids)
Add sequence IDs to the index file.
Definition: writedb_isam.cpp:129

CWriteDB_Isam::RenameSingle
void RenameSingle()
Rename files to single-volume names.
Definition: writedb_isam.cpp:154

CWriteDB_Isam::Close
void Close()
Flush data to disk and close all associated files.
Definition: writedb_isam.cpp:144

CWriteDB_Isam::RenameFileIndex
void RenameFileIndex(unsigned int num_digits)
Definition: writedb_isam.cpp:160

CWriteDB_OidList
Definition: writedb_volume.hpp:100

CWriteDB_OidList::x_CreateBitMap
void x_CreateBitMap(int num_oids)
Definition: writedb_volume.cpp:580

CWriteDB_OidList::x_Flush
void x_Flush()
This should flush any unwritten data to disk.
Definition: writedb_volume.cpp:639

CWriteDB_OidList::Close
void Close(int total_oids)
Total num of oids in db or vol.
Definition: writedb_volume.hpp:119

CWriteDB_OidList::x_CreateMaskFile
void x_CreateMaskFile()
Definition: writedb_volume.cpp:630

CWriteDB_OidList::AddOid
void AddOid(int oid)
Definition: writedb_volume.hpp:115

CWriteDB_OidList::CWriteDB_OidList
CWriteDB_OidList(const string &dbname, bool protein, int index, Uint8 max_fsize, EOidMaskType mask_type)
Definition: writedb_volume.cpp:572

CWriteDB_OidList::m_OidList
vector< uint32_t > m_OidList
Definition: writedb_volume.hpp:129

CWriteDB_OidList::m_Type
EOidMaskType m_Type
Definition: writedb_volume.hpp:128

CWriteDB_OidList::m_TotalOids
int m_TotalOids
Definition: writedb_volume.hpp:130

CWriteDB_OidList::m_MapSize
size_t m_MapSize
Definition: writedb_volume.hpp:132

CWriteDB_OidList::m_Map
uint8_t * m_Map
Definition: writedb_volume.hpp:131

CWriteDB_SequenceFile
Definition: writedb_files.hpp:396

CWriteDB_SequenceFile::CanFit
bool CanFit(int size, int letters)
Returns true if the specified amount of data would fit.
Definition: writedb_files.hpp:419

CWriteDB_SequenceFile::AddSequence
void AddSequence(const string &sequence, unsigned int &offset, int length)
Add a protein sequence to this file.
Definition: writedb_files.hpp:443

CWriteDB_Volume::~CWriteDB_Volume
~CWriteDB_Volume()
Destructor.
Definition: writedb_volume.cpp:146

CWriteDB_Volume::m_OidMasks
Uint8 m_OidMasks
Oid masks.
Definition: writedb_volume.hpp:291

CWriteDB_Volume::TIdList
vector< CRef< CSeq_id > > TIdList
Type used for lists of identifiers.
Definition: writedb_volume.hpp:146

CWriteDB_Volume::RenameSingle
void RenameSingle()
Rename all volumes files to single-volume names.
Definition: writedb_volume.cpp:376

CWriteDB_Volume::AddColumnMetaData
void AddColumnMetaData(int col_id, const string &key, const string &value)
Add meta data to a column.
Definition: writedb_volume.cpp:559

CWriteDB_Volume::RenameFileIndex
void RenameFileIndex(unsigned int num_digits)
Definition: writedb_volume.cpp:417

CWriteDB_Volume::m_IdSet
set< string > m_IdSet
Included Seq_ids.
Definition: writedb_volume.hpp:319

CWriteDB_Volume::m_PigIsam
CRef< CWriteDB_Isam > m_PigIsam
PIG index (ppi+ppd, protein only).
Definition: writedb_volume.hpp:306

CWriteDB_Volume::m_ExModelList
CRef< CWriteDB_OidList > m_ExModelList
Definition: writedb_volume.hpp:310

CWriteDB_Volume::m_VolName
string m_VolName
Database name plus version (if used).
Definition: writedb_volume.hpp:284

CWriteDB_Volume::m_Hdr
CRef< CWriteDB_HeaderFile > m_Hdr
Header file (phr / nhr).
Definition: writedb_volume.hpp:301

CWriteDB_Volume::CWriteDB_Volume
CWriteDB_Volume(const string &dbname, bool protein, const string &title, const string &date, int index, Uint8 max_file_size, Uint8 max_letters, EIndexType indices, EBlastDbVersion dbver=eBDB_Version5, Uint8 oid_masks=EOidMaskType::fNone)
Build a database volume.
Definition: writedb_volume.cpp:44

CWriteDB_Volume::GetOID
const int & GetOID() const
Get the current OID of the volume.
Definition: writedb_volume.hpp:238

CWriteDB_Volume::m_Open
bool m_Open
True if user can still append sequences.
Definition: writedb_volume.hpp:296

CWriteDB_Volume::TBlobList
vector< CRef< CBlastDbBlob > > TBlobList
Type used for lists of identifiers.
Definition: writedb_volume.hpp:149

CWriteDB_Volume::m_HashIsam
CRef< CWriteDB_Isam > m_HashIsam
Hash index (phi+phd or nhi+nhd).
Definition: writedb_volume.hpp:308

CWriteDB_Volume::m_AccIsam
CRef< CWriteDB_Isam > m_AccIsam
Accession index (psi+psd / nsi+nsd).
Definition: writedb_volume.hpp:304

CWriteDB_Volume::m_TraceIsam
CRef< CWriteDB_Isam > m_TraceIsam
Trace ID index (pti+ptd or nti+ntd).
Definition: writedb_volume.hpp:307

CWriteDB_Volume::m_DbVersion
EBlastDbVersion m_DbVersion
Blast DB version.
Definition: writedb_volume.hpp:290

CWriteDB_Volume::m_OID
int m_OID
Next assigned OID.
Definition: writedb_volume.hpp:295

CWriteDB_Volume::CreateColumn
int CreateColumn(const string &title, const TColumnMeta &meta, Uint8 max_sz, bool mbo=true)
Create a new database column.
Definition: writedb_volume.cpp:505

CWriteDB_Volume::m_Index
int m_Index
Index of this volume (1 based).
Definition: writedb_volume.hpp:288

CWriteDB_Volume::WriteSequence
bool WriteSequence(const string &seq, const string &ambig, const string &binhdr, const TIdList &ids, int pig, int hash, const TBlobList &blobs, int maskcol_id=-1)
Add a sequence to this volume.
Definition: writedb_volume.cpp:153

CWriteDB_Volume::m_GiIsam
CRef< CWriteDB_Isam > m_GiIsam
GI index (pni+pnd / nni+nnd).
Definition: writedb_volume.hpp:305

CWriteDB_Volume::m_GiIndex
CRef< CWriteDB_GiIndex > m_GiIndex
OID->GI lookup (pgx or ngx).
Definition: writedb_volume.hpp:309

CWriteDB_Volume::m_Seq
CRef< CWriteDB_SequenceFile > m_Seq
Sequence file (psq / nsq).
Definition: writedb_volume.hpp:302

CWriteDB_Volume::m_Protein
bool m_Protein
True for protein; false for nucleotide.
Definition: writedb_volume.hpp:285

CWriteDB_Volume::x_FindNuclLength
int x_FindNuclLength(const string &seq)
Compute base-length of compressed nucleotide sequence.
Definition: writedb_volume.cpp:326

CWriteDB_Volume::m_Indices
EIndexType m_Indices
Indices are sparse, full, or disabled.
Definition: writedb_volume.hpp:289

CWriteDB_Volume::Close
void Close()
Close the volume.
Definition: writedb_volume.cpp:334

CWriteDB_Volume::m_Idx
CRef< CWriteDB_IndexFile > m_Idx
Index file (pin / nin).
Definition: writedb_volume.hpp:300

CWriteDB_Volume::m_DbName
string m_DbName
Base name of the database.
Definition: writedb_volume.hpp:283

CWriteDB_Volume::m_Columns
vector< CRef< CWriteDB_Column > > m_Columns
Database columns.
Definition: writedb_volume.hpp:315

CWriteDB_Volume::ListFiles
void ListFiles(vector< string > &files) const
List all files associated with this volume.
Definition: writedb_volume.cpp:461

CWriteDB::EIndexType
EIndexType
Whether and what kind of indices to build.
Definition: writedb.hpp:104

CWriteDB::eAddHash
@ eAddHash
Add an index from sequence hash to OID.
Definition: writedb.hpp:126

CWriteDB::eSparseIndex
@ eSparseIndex
Use only simple accessions in the string index.
Definition: writedb.hpp:109

CWriteDB::eNoIndex
@ eNoIndex
Build a database without any indices.
Definition: writedb.hpp:106

CWriteDB::eAddTrace
@ eAddTrace
OR this in to add an index for trace IDs.
Definition: writedb.hpp:115

map< string, string >

set
Definition: set.hpp:45

set::insert
iterator_bool insert(const value_type &val)
Definition: set.hpp:149

set::clear
void clear()
Definition: set.hpp:153

set::size
size_type size() const
Definition: set.hpp:132

true
#define true
Definition: bool.h:35

false
#define false
Definition: bool.h:36

offset
int offset
Definition: replacements.h:160

uint8_t
unsigned char uint8_t
Definition: tds_sysdep_public.h:35

uint32_t
Uint4 uint32_t
Definition: tds_sysdep_public.h:54

INVALID_GI
#define INVALID_GI
Definition: ncbimisc.hpp:1089

ITERATE
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815

NON_CONST_ITERATE
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822

NULL
#define NULL
Definition: ncbistd.hpp:225

LOG_POST
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226

NCBI_THROW
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704

Info
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185

CSeq_id::TLabelFlags
int TLabelFlags
Definition: Seq_id.hpp:625

CSeq_id::fLabel_UpperCase
@ fLabel_UpperCase
Upper case label, with special encoding for PDB chain-ids.
Definition: Seq_id.hpp:620

CSeq_id::fLabel_Default
@ fLabel_Default
default options - always show the version
Definition: Seq_id.hpp:623

CSeq_id::fAcc_predicted
@ fAcc_predicted
Definition: Seq_id.hpp:254

CSeq_id::eDefault
@ eDefault
default is to show type + content
Definition: Seq_id.hpp:611

CRef::Reset
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773

CRef::NotEmpty
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726

CRef::Empty
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719

Int4
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102

Uint8
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105

END_NCBI_SCOPE
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103

BEGIN_NCBI_SCOPE
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100

kEmptyStr
#define kEmptyStr
Definition: ncbistr.hpp:123

CSeq_id_Base::GetGi
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889

CSeq_id_Base::IsGi
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883

dbname
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929

int
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210

ncbi::grid::netcache::search::fields::key
const struct ncbi::grid::netcache::search::fields::KEY key

rapidjson::value
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227

t
EIPRangeType t
Definition: ncbi_localip.c:101

ncbi_pch.hpp

log10
T log10(T x_)
Definition: njn_function.hpp:103

SeqDB_GetOidMaskFileExt
const string SeqDB_GetOidMaskFileExt(bool db_is_protein, EOidMaskType t)
Definition: seqdbcommon.cpp:2706

EBlastDbVersion
EBlastDbVersion
BLAST database version.
Definition: seqdbcommon.hpp:51

eBDB_Version5
@ eBDB_Version5
Definition: seqdbcommon.hpp:53

EOidMaskType
EOidMaskType
Definition: seqdbcommon.hpp:159

fExcludeModel
@ fExcludeModel
Definition: seqdbcommon.hpp:161

msg
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition: sljitNativeS390X.c:658

uint8_t
#define uint8_t
Definition: config.h:54

hash
Definition: _hash_fun.h:40

_ASSERT
#define _ASSERT
Definition: test_assert_impl.h:173

ambig
static bool ambig(char c)
Definition: win_mask_gen_counts.cpp:75

writedb_error.hpp
Defines exception class for WriteDB.

WriteDB_FindSequenceLength
int WriteDB_FindSequenceLength(bool protein, const string &seq)
Compute length of sequence from raw packing.
Definition: writedb_general.cpp:80

eGi
@ eGi
GI Index.
Definition: writedb_isam.hpp:60

eTrace
@ eTrace
Trace ID Index.
Definition: writedb_isam.hpp:63

eAcc
@ eAcc
Accession (string) Index.
Definition: writedb_isam.hpp:57

ePig
@ ePig
Protein Identifier Group.
Definition: writedb_isam.hpp:54

eHash
@ eHash
Hash Index.
Definition: writedb_isam.hpp:66

USING_SCOPE
USING_SCOPE(std)
Include C++ std library symbols.

writedb_volume.hpp
Code for database volume construction.