NCBI C++ ToolKit
|
Search Toolkit Book for CMinHashFile
Access data in Minhash files. More...
#include <algo/blast/proteinkmer/mhfile.hpp>
Public Member Functions | |
CMinHashFile (const string &indexname) | |
parameterized constructor More... | |
int | GetVersion (void) const |
int | GetNumSeqs (void) const |
int | GetNumHashes (void) const |
Returns the number of values in an array of hashes (probably 32) More... | |
int | GetSegStatus (void) const |
int | GetKmerSize (void) const |
Returns the length of the KMER. More... | |
int | GetRows (void) const |
int | GetDataWidth (void) const |
int | GetAlphabet (void) const |
One of two alphabets from Shiryev et al. More... | |
int | GetLSHSize (void) const |
int | GetLSHStart (void) const |
uint64_t | GetLSHMatchEnd (void) const |
int | GetChunkSize (void) const |
Get number of letters in a chunk (version 3 or higher) More... | |
uint32_t * | GetRandomNumbers (void) const |
void | GetBadMers (vector< int > &badMers) const |
Overrepresented KMERs. More... | |
unsigned char * | GetKValues (void) const |
LSH points for Buhler approach. More... | |
uint64_t * | GetLSHArray (void) const |
int * | GetHits (uint64_t offset) const |
uint32_t * | GetMinHits (int oid) const |
void | GetMinHits (int oid, int &subjectOid, vector< uint32_t > &hits) const |
Gets the database OID and vector of hash values for entry given by oid. More... | |
int | GetNumSignatures () const |
Returns the number of hash arrays. More... | |
![]() | |
CObject (void) | |
Constructor. More... | |
CObject (const CObject &src) | |
Copy constructor. More... | |
virtual | ~CObject (void) |
Destructor. More... | |
CObject & | operator= (const CObject &src) THROWS_NONE |
Assignment operator. More... | |
bool | CanBeDeleted (void) const THROWS_NONE |
Check if object can be deleted. More... | |
bool | IsAllocatedInPool (void) const THROWS_NONE |
Check if object is allocated in memory pool (not system heap) More... | |
bool | Referenced (void) const THROWS_NONE |
Check if object is referenced. More... | |
bool | ReferencedOnlyOnce (void) const THROWS_NONE |
Check if object is referenced only once. More... | |
void | AddReference (void) const |
Add reference to object. More... | |
void | RemoveReference (void) const |
Remove reference to object. More... | |
void | ReleaseReference (void) const |
Remove reference without deleting object. More... | |
virtual void | DoNotDeleteThisObject (void) |
Mark this object as not allocated in heap – do not delete this object. More... | |
virtual void | DoDeleteThisObject (void) |
Mark this object as allocated in heap – object can be deleted. More... | |
void * | operator new (size_t size) |
Define new operator for memory allocation. More... | |
void * | operator new[] (size_t size) |
Define new[] operator for 'array' memory allocation. More... | |
void | operator delete (void *ptr) |
Define delete operator for memory deallocation. More... | |
void | operator delete[] (void *ptr) |
Define delete[] operator for memory deallocation. More... | |
void * | operator new (size_t size, void *place) |
Define new operator. More... | |
void | operator delete (void *ptr, void *place) |
Define delete operator. More... | |
void * | operator new (size_t size, CObjectMemoryPool *place) |
Define new operator using memory pool. More... | |
void | operator delete (void *ptr, CObjectMemoryPool *place) |
Define delete operator. More... | |
virtual void | DebugDump (CDebugDumpContext ddc, unsigned int depth) const |
Define method for dumping debug information. More... | |
![]() | |
CDebugDumpable (void) | |
virtual | ~CDebugDumpable (void) |
void | DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const |
void | DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const |
void | DumpToConsole (void) const |
Private Member Functions | |
uint32_t * | x_GetMinHits32 (int oid, int &subjectOid) const |
uint16_t * | x_GetMinHits16 (int oid, int &subjectOid) const |
unsigned char * | x_GetMinHits8 (int oid, int &subjectOid) const |
void | x_Init () |
Private Attributes | |
unique_ptr< CMemoryFile > | m_MmappedIndex |
unique_ptr< CMemoryFile > | m_MmappedData |
MinHashIndexHeader * | m_Data |
unsigned char * | m_MinHitsData |
Pointer to start of min-hits arrays. More... | |
Int8 | m_DataFileSize |
m_MmappedData File size More... | |
string | m_IndexName |
Name of the index file. More... | |
Additional Inherited Members | |
![]() | |
enum | EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern } |
Control filling of newly allocated memory. More... | |
typedef CObjectCounterLocker | TLockerType |
Default locker type for CRef. More... | |
typedef atomic< Uint8 > | TCounter |
Counter type is CAtomiCounter. More... | |
typedef Uint8 | TCount |
Alias for value type of counter. More... | |
![]() | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (void) |
Define method to throw null pointer exception. More... | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (const type_info &type) |
static EAllocFillMode | GetAllocFillMode (void) |
static void | SetAllocFillMode (EAllocFillMode mode) |
static void | SetAllocFillMode (const string &value) |
Set mode from configuration parameter value. More... | |
![]() | |
static void | EnableDebugDump (bool on) |
![]() | |
static const TCount | eCounterBitsCanBeDeleted = 1 << 0 |
Define possible object states. More... | |
static const TCount | eCounterBitsInPlainHeap = 1 << 1 |
Heap signature was found. More... | |
static const TCount | eCounterBitsPlaceMask |
Mask for 'in heap' state flags. More... | |
static const int | eCounterStep = 1 << 2 |
Skip over the "in heap" bits. More... | |
static const TCount | eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2) |
Minimal value for valid objects (reference counter is zero) Must be a single bit value. More... | |
static const TCount | eCounterStateMask |
Valid object, and object in heap. More... | |
![]() | |
virtual void | DeleteThis (void) |
Virtual method "deleting" this object. More... | |
Access data in Minhash files.
Definition at line 108 of file mhfile.hpp.
parameterized constructor
Parameterized constructor.
Definition at line 43 of file mhfile.cpp.
References m_MmappedData, m_MmappedIndex, NCBI_THROW, and x_Init().
|
inline |
One of two alphabets from Shiryev et al.
(2007), Bioinformatics, 23:2949-2951 0 means 15 letters (based on SE_B(14)), 1 means 10 letters (based on SE-V(10))
Definition at line 131 of file mhfile.hpp.
References MinHashIndexHeader::Alphabet, and m_Data.
Referenced by BOOST_AUTO_TEST_CASE(), CBlastKmer::Run(), and CBlastKmer::x_RunKmerFile().
void CMinHashFile::GetBadMers | ( | vector< int > & | badMers | ) | const |
Overrepresented KMERs.
Definition at line 112 of file mhfile.cpp.
References GetVersion(), i, KMER_RANDOM_NUM_OFFSET, and m_MmappedIndex.
Referenced by CBlastKmer::Run().
|
inline |
Get number of letters in a chunk (version 3 or higher)
Definition at line 140 of file mhfile.hpp.
References MinHashIndexHeader::chunkSize, and m_Data.
Referenced by BOOST_AUTO_TEST_CASE(), and CBlastKmer::Run().
|
inline |
Definition at line 127 of file mhfile.hpp.
References MinHashIndexHeader::dataWidth, and m_Data.
Referenced by BOOST_AUTO_TEST_CASE(), GetMinHits(), GetNumSignatures(), neighbor_query(), x_GetMinHits16(), x_GetMinHits32(), and x_GetMinHits8().
Definition at line 153 of file mhfile.hpp.
References m_MmappedIndex, and offset.
Referenced by neighbor_query().
|
inline |
Returns the length of the KMER.
Definition at line 123 of file mhfile.hpp.
References MinHashIndexHeader::kmerNum, and m_Data.
Referenced by BOOST_AUTO_TEST_CASE(), CBlastKmer::Run(), and s_BlastKmerVerifyVolume().
unsigned char * CMinHashFile::GetKValues | ( | void | ) | const |
LSH points for Buhler approach.
Definition at line 128 of file mhfile.cpp.
References GetNumHashes(), GetVersion(), KMER_RANDOM_NUM_OFFSET, and m_MmappedIndex.
Referenced by CBlastKmer::Run().
|
inline |
Definition at line 151 of file mhfile.hpp.
References GetLSHStart(), and m_MmappedIndex.
Referenced by BOOST_AUTO_TEST_CASE(), s_BlastKmerVerifyVolume(), and CBlastKmer::x_RunKmerFile().
|
inline |
Definition at line 137 of file mhfile.hpp.
References MinHashIndexHeader::LSHMatchEnd, and m_Data.
|
inline |
Definition at line 133 of file mhfile.hpp.
References MinHashIndexHeader::LSHSize, and m_Data.
Referenced by BOOST_AUTO_TEST_CASE(), and s_BlastKmerVerifyVolume().
|
inline |
Definition at line 135 of file mhfile.hpp.
References MinHashIndexHeader::LSHStart, and m_Data.
Referenced by BOOST_AUTO_TEST_CASE(), and GetLSHArray().
Definition at line 155 of file mhfile.hpp.
References GetNumHashes(), and m_MinHitsData.
Referenced by neighbor_query(), and s_BlastKmerVerifyVolume().
Gets the database OID and vector of hash values for entry given by oid.
oid | Entry to fetch. |
subjectOid | OID of the BLAST database (for current volume) |
hits | Vector of the hash values read from disk. |
Definition at line 73 of file mhfile.cpp.
References a, GetDataWidth(), GetNumHashes(), x_GetMinHits16(), x_GetMinHits32(), and x_GetMinHits8().
|
inline |
Returns the number of values in an array of hashes (probably 32)
Definition at line 118 of file mhfile.hpp.
References m_Data, and MinHashIndexHeader::num_hashes.
Referenced by BOOST_AUTO_TEST_CASE(), GetKValues(), GetMinHits(), GetNumSignatures(), CBlastKmer::Run(), s_BlastKmerVerifyVolume(), x_GetMinHits16(), x_GetMinHits32(), x_GetMinHits8(), and CBlastKmer::x_RunKmerFile().
|
inline |
Definition at line 115 of file mhfile.hpp.
References m_Data, and MinHashIndexHeader::num_seqs.
Referenced by s_BlastKmerVerifyVolume(), and CBlastKmer::x_RunKmerFile().
|
inline |
Returns the number of hash arrays.
Definition at line 164 of file mhfile.hpp.
References GetDataWidth(), GetNumHashes(), and m_DataFileSize.
uint32_t * CMinHashFile::GetRandomNumbers | ( | void | ) | const |
Definition at line 102 of file mhfile.cpp.
References GetVersion(), KMER_RANDOM_NUM_OFFSET, and m_MmappedIndex.
Referenced by CBlastKmer::Run().
|
inline |
Definition at line 125 of file mhfile.hpp.
References m_Data, and MinHashIndexHeader::rows_per_band.
Referenced by CBlastKmer::Run().
|
inline |
Definition at line 120 of file mhfile.hpp.
References MinHashIndexHeader::do_seg, and m_Data.
Referenced by CBlastKmer::Run().
|
inline |
Definition at line 113 of file mhfile.hpp.
References m_Data, and MinHashIndexHeader::version.
Referenced by BOOST_AUTO_TEST_CASE(), GetBadMers(), GetKValues(), GetRandomNumbers(), neighbor_query(), CBlastKmer::Run(), s_BlastKmerVerifyVolume(), and CBlastKmer::x_RunKmerFile().
Definition at line 148 of file mhfile.cpp.
References GetDataWidth(), GetNumHashes(), and m_MinHitsData.
Referenced by GetMinHits().
Definition at line 138 of file mhfile.cpp.
References GetDataWidth(), GetNumHashes(), and m_MinHitsData.
Referenced by GetMinHits().
Definition at line 159 of file mhfile.cpp.
References GetDataWidth(), GetNumHashes(), and m_MinHitsData.
Referenced by GetMinHits().
|
private |
Definition at line 54 of file mhfile.cpp.
References m_Data, m_DataFileSize, m_IndexName, m_MinHitsData, m_MmappedData, m_MmappedIndex, and NCBI_THROW.
Referenced by CMinHashFile().
|
private |
Definition at line 180 of file mhfile.hpp.
Referenced by GetAlphabet(), GetChunkSize(), GetDataWidth(), GetKmerSize(), GetLSHMatchEnd(), GetLSHSize(), GetLSHStart(), GetNumHashes(), GetNumSeqs(), GetRows(), GetSegStatus(), GetVersion(), and x_Init().
|
private |
m_MmappedData File size
Definition at line 188 of file mhfile.hpp.
Referenced by GetNumSignatures(), and x_Init().
|
private |
|
private |
Pointer to start of min-hits arrays.
Definition at line 183 of file mhfile.hpp.
Referenced by GetMinHits(), x_GetMinHits16(), x_GetMinHits32(), x_GetMinHits8(), and x_Init().
|
private |
Definition at line 178 of file mhfile.hpp.
Referenced by CMinHashFile(), and x_Init().
|
private |
Definition at line 175 of file mhfile.hpp.
Referenced by CMinHashFile(), GetBadMers(), GetHits(), GetKValues(), GetLSHArray(), GetRandomNumbers(), and x_Init().