NCBI C++ ToolKit
|
Search Toolkit Book for CBlastKmerBuildIndex
#include <algo/blast/proteinkmer/blastkmerindex.hpp>
Public Member Functions | |
CBlastKmerBuildIndex (CRef< CSeqDB > seqdb, int kmerSize=5, int numHashFct=32, int samples=0, int compress=2, int alphabet=0, int version=0, int chunkSize=150) | |
Constructor. More... | |
~CBlastKmerBuildIndex () | |
Destructor. More... | |
void | Build (int numThreads=1) |
Build the index. More... | |
![]() | |
CObject (void) | |
Constructor. More... | |
CObject (const CObject &src) | |
Copy constructor. More... | |
virtual | ~CObject (void) |
Destructor. More... | |
CObject & | operator= (const CObject &src) THROWS_NONE |
Assignment operator. More... | |
bool | CanBeDeleted (void) const THROWS_NONE |
Check if object can be deleted. More... | |
bool | IsAllocatedInPool (void) const THROWS_NONE |
Check if object is allocated in memory pool (not system heap) More... | |
bool | Referenced (void) const THROWS_NONE |
Check if object is referenced. More... | |
bool | ReferencedOnlyOnce (void) const THROWS_NONE |
Check if object is referenced only once. More... | |
void | AddReference (void) const |
Add reference to object. More... | |
void | RemoveReference (void) const |
Remove reference to object. More... | |
void | ReleaseReference (void) const |
Remove reference without deleting object. More... | |
virtual void | DoNotDeleteThisObject (void) |
Mark this object as not allocated in heap – do not delete this object. More... | |
virtual void | DoDeleteThisObject (void) |
Mark this object as allocated in heap – object can be deleted. More... | |
void * | operator new (size_t size) |
Define new operator for memory allocation. More... | |
void * | operator new[] (size_t size) |
Define new[] operator for 'array' memory allocation. More... | |
void | operator delete (void *ptr) |
Define delete operator for memory deallocation. More... | |
void | operator delete[] (void *ptr) |
Define delete[] operator for memory deallocation. More... | |
void * | operator new (size_t size, void *place) |
Define new operator. More... | |
void | operator delete (void *ptr, void *place) |
Define delete operator. More... | |
void * | operator new (size_t size, CObjectMemoryPool *place) |
Define new operator using memory pool. More... | |
void | operator delete (void *ptr, CObjectMemoryPool *place) |
Define delete operator. More... | |
virtual void | DebugDump (CDebugDumpContext ddc, unsigned int depth) const |
Define method for dumping debug information. More... | |
![]() | |
CDebugDumpable (void) | |
virtual | ~CDebugDumpable (void) |
void | DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const |
void | DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const |
void | DumpToConsole (void) const |
Private Member Functions | |
void | x_WriteDataFile (vector< vector< vector< uint32_t > > > &seq_hash, int num_seqs, CNcbiOfstream &data_file) |
Writes out the data file. More... | |
void | x_BuildIndex (string &name, int start=0, int number=0) |
BUild index for an individual BLAST volume. More... | |
Private Attributes | |
int | m_NumHashFct |
int | m_NumBands |
Number of hash functions. More... | |
int | m_RowsPerBand |
Number of LSH bands. More... | |
int | m_KmerSize |
Number of rows per band. More... | |
CRef< CSeqDB > | m_SeqDB |
Residues in kmer. More... | |
bool | m_DoSeg |
BLAST database. More... | |
int | m_Samples |
Should Seg be run on sequences. More... | |
int | m_Compress |
Number of samples (Buhler only) More... | |
int | m_Alphabet |
Compress the arrays for Jaccard matches. More... | |
int | m_Version |
0 for 15 letters, 1 for 10 letters. More... | |
int | m_ChunkSize |
version of index file More... | |
Additional Inherited Members | |
![]() | |
enum | EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern } |
Control filling of newly allocated memory. More... | |
typedef CObjectCounterLocker | TLockerType |
Default locker type for CRef. More... | |
typedef atomic< Uint8 > | TCounter |
Counter type is CAtomiCounter. More... | |
typedef Uint8 | TCount |
Alias for value type of counter. More... | |
![]() | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (void) |
Define method to throw null pointer exception. More... | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (const type_info &type) |
static EAllocFillMode | GetAllocFillMode (void) |
static void | SetAllocFillMode (EAllocFillMode mode) |
static void | SetAllocFillMode (const string &value) |
Set mode from configuration parameter value. More... | |
![]() | |
static void | EnableDebugDump (bool on) |
![]() | |
static const TCount | eCounterBitsCanBeDeleted = 1 << 0 |
Define possible object states. More... | |
static const TCount | eCounterBitsInPlainHeap = 1 << 1 |
Heap signature was found. More... | |
static const TCount | eCounterBitsPlaceMask |
Mask for 'in heap' state flags. More... | |
static const int | eCounterStep = 1 << 2 |
Skip over the "in heap" bits. More... | |
static const TCount | eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2) |
Minimal value for valid objects (reference counter is zero) Must be a single bit value. More... | |
static const TCount | eCounterStateMask |
Valid object, and object in heap. More... | |
![]() | |
virtual void | DeleteThis (void) |
Virtual method "deleting" this object. More... | |
Definition at line 51 of file blastkmerindex.hpp.
CBlastKmerBuildIndex::CBlastKmerBuildIndex | ( | CRef< CSeqDB > | seqdb, |
int | kmerSize = 5 , |
||
int | numHashFct = 32 , |
||
int | samples = 0 , |
||
int | compress = 2 , |
||
int | alphabet = 0 , |
||
int | version = 0 , |
||
int | chunkSize = 150 |
||
) |
Constructor.
Definition at line 108 of file blastkmerindex.cpp.
|
inline |
Destructor.
Definition at line 66 of file blastkmerindex.hpp.
void CBlastKmerBuildIndex::Build | ( | int | numThreads = 1 | ) |
Build the index.
Definition at line 461 of file blastkmerindex.cpp.
References CSeqDB::eProtein, CSeqDB::FindVolumePaths(), CSeqDB::GetDBNameList(), CSeqDB::GetNumSeqs(), last(), m_SeqDB, NULL, compile_time_bits::range(), CSeqDB::SetNumberOfThreads(), CDirEntry::SplitPath(), and x_BuildIndex().
Referenced by BOOST_AUTO_TEST_CASE(), and CBlastKmerBuildIndexApplication::Run().
BUild index for an individual BLAST volume.
Definition at line 532 of file blastkmerindex.cpp.
References _ASSERT, a, b, GetKValues(), CSeqDB::GetNumSeqs(), GetRandomNumbers(), i, KMER_LSH_ARRAY_SIZE, m_Alphabet, m_ChunkSize, m_Compress, m_DoSeg, m_KmerSize, m_NumBands, m_NumHashFct, m_RowsPerBand, m_Samples, m_SeqDB, m_Version, NCBI_THROW, out(), s_BlastKmerLoadBadMers(), s_Get_LSH_index_hashes(), s_Get_LSH_index_hashes2(), s_Get_LSH_index_hashes5(), s_MinhashSequences(), s_MinhashSequences2(), ncbi::grid::netcache::search::fields::size, and x_WriteDataFile().
Referenced by Build().
|
private |
Writes out the data file.
Definition at line 762 of file blastkmerindex.cpp.
References b, m_Compress, m_NumHashFct, m_Version, n, pearson_hash_int2byte(), pearson_hash_int2short(), and ct::sort().
Referenced by x_BuildIndex().
|
private |
Compress the arrays for Jaccard matches.
Definition at line 93 of file blastkmerindex.hpp.
Referenced by x_BuildIndex().
|
private |
version of index file
Definition at line 97 of file blastkmerindex.hpp.
Referenced by x_BuildIndex().
|
private |
Number of samples (Buhler only)
Definition at line 91 of file blastkmerindex.hpp.
Referenced by x_BuildIndex(), and x_WriteDataFile().
|
private |
|
private |
Number of rows per band.
Definition at line 83 of file blastkmerindex.hpp.
Referenced by x_BuildIndex().
|
private |
Number of hash functions.
Definition at line 79 of file blastkmerindex.hpp.
Referenced by x_BuildIndex().
|
private |
Definition at line 77 of file blastkmerindex.hpp.
Referenced by x_BuildIndex(), and x_WriteDataFile().
|
private |
Number of LSH bands.
Definition at line 81 of file blastkmerindex.hpp.
Referenced by x_BuildIndex().
|
private |
Should Seg be run on sequences.
Definition at line 89 of file blastkmerindex.hpp.
Referenced by x_BuildIndex().
Residues in kmer.
Definition at line 85 of file blastkmerindex.hpp.
Referenced by Build(), and x_BuildIndex().
|
private |
0 for 15 letters, 1 for 10 letters.
Definition at line 95 of file blastkmerindex.hpp.
Referenced by x_BuildIndex(), and x_WriteDataFile().