NCBI C++ ToolKit
Public Member Functions | Private Member Functions | Private Attributes | List of all members
CMinHashFile Class Reference

Search Toolkit Book for CMinHashFile

Access data in Minhash files. More...

#include <algo/blast/proteinkmer/mhfile.hpp>

+ Inheritance diagram for CMinHashFile:
+ Collaboration diagram for CMinHashFile:

Public Member Functions

 CMinHashFile (const string &indexname)
 parameterized constructor More...
 
int GetVersion (void) const
 
int GetNumSeqs (void) const
 
int GetNumHashes (void) const
 Returns the number of values in an array of hashes (probably 32) More...
 
int GetSegStatus (void) const
 
int GetKmerSize (void) const
 Returns the length of the KMER. More...
 
int GetRows (void) const
 
int GetDataWidth (void) const
 
int GetAlphabet (void) const
 One of two alphabets from Shiryev et al. More...
 
int GetLSHSize (void) const
 
int GetLSHStart (void) const
 
uint64_t GetLSHMatchEnd (void) const
 
int GetChunkSize (void) const
 Get number of letters in a chunk (version 3 or higher) More...
 
uint32_tGetRandomNumbers (void) const
 
void GetBadMers (vector< int > &badMers) const
 Overrepresented KMERs. More...
 
unsigned char * GetKValues (void) const
 LSH points for Buhler approach. More...
 
uint64_tGetLSHArray (void) const
 
intGetHits (uint64_t offset) const
 
uint32_tGetMinHits (int oid) const
 
void GetMinHits (int oid, int &subjectOid, vector< uint32_t > &hits) const
 Gets the database OID and vector of hash values for entry given by oid. More...
 
int GetNumSignatures () const
 Returns the number of hash arrays. More...
 
- Public Member Functions inherited from CObject
 CObject (void)
 Constructor. More...
 
 CObject (const CObject &src)
 Copy constructor. More...
 
virtual ~CObject (void)
 Destructor. More...
 
CObjectoperator= (const CObject &src) THROWS_NONE
 Assignment operator. More...
 
bool CanBeDeleted (void) const THROWS_NONE
 Check if object can be deleted. More...
 
bool IsAllocatedInPool (void) const THROWS_NONE
 Check if object is allocated in memory pool (not system heap) More...
 
bool Referenced (void) const THROWS_NONE
 Check if object is referenced. More...
 
bool ReferencedOnlyOnce (void) const THROWS_NONE
 Check if object is referenced only once. More...
 
void AddReference (void) const
 Add reference to object. More...
 
void RemoveReference (void) const
 Remove reference to object. More...
 
void ReleaseReference (void) const
 Remove reference without deleting object. More...
 
virtual void DoNotDeleteThisObject (void)
 Mark this object as not allocated in heap – do not delete this object. More...
 
virtual void DoDeleteThisObject (void)
 Mark this object as allocated in heap – object can be deleted. More...
 
void * operator new (size_t size)
 Define new operator for memory allocation. More...
 
void * operator new[] (size_t size)
 Define new[] operator for 'array' memory allocation. More...
 
void operator delete (void *ptr)
 Define delete operator for memory deallocation. More...
 
void operator delete[] (void *ptr)
 Define delete[] operator for memory deallocation. More...
 
void * operator new (size_t size, void *place)
 Define new operator. More...
 
void operator delete (void *ptr, void *place)
 Define delete operator. More...
 
void * operator new (size_t size, CObjectMemoryPool *place)
 Define new operator using memory pool. More...
 
void operator delete (void *ptr, CObjectMemoryPool *place)
 Define delete operator. More...
 
virtual void DebugDump (CDebugDumpContext ddc, unsigned int depth) const
 Define method for dumping debug information. More...
 
- Public Member Functions inherited from CDebugDumpable
 CDebugDumpable (void)
 
virtual ~CDebugDumpable (void)
 
void DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const
 
void DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const
 
void DumpToConsole (void) const
 

Private Member Functions

uint32_tx_GetMinHits32 (int oid, int &subjectOid) const
 
uint16_tx_GetMinHits16 (int oid, int &subjectOid) const
 
unsigned char * x_GetMinHits8 (int oid, int &subjectOid) const
 
void x_Init ()
 

Private Attributes

unique_ptr< CMemoryFilem_MmappedIndex
 
unique_ptr< CMemoryFilem_MmappedData
 
MinHashIndexHeaderm_Data
 
unsigned char * m_MinHitsData
 Pointer to start of min-hits arrays. More...
 
Int8 m_DataFileSize
 m_MmappedData File size More...
 
string m_IndexName
 Name of the index file. More...
 

Additional Inherited Members

- Public Types inherited from CObject
enum  EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern }
 Control filling of newly allocated memory. More...
 
typedef CObjectCounterLocker TLockerType
 Default locker type for CRef. More...
 
typedef atomic< Uint8TCounter
 Counter type is CAtomiCounter. More...
 
typedef Uint8 TCount
 Alias for value type of counter. More...
 
- Static Public Member Functions inherited from CObject
static NCBI_XNCBI_EXPORT void ThrowNullPointerException (void)
 Define method to throw null pointer exception. More...
 
static NCBI_XNCBI_EXPORT void ThrowNullPointerException (const type_info &type)
 
static EAllocFillMode GetAllocFillMode (void)
 
static void SetAllocFillMode (EAllocFillMode mode)
 
static void SetAllocFillMode (const string &value)
 Set mode from configuration parameter value. More...
 
- Static Public Member Functions inherited from CDebugDumpable
static void EnableDebugDump (bool on)
 
- Static Public Attributes inherited from CObject
static const TCount eCounterBitsCanBeDeleted = 1 << 0
 Define possible object states. More...
 
static const TCount eCounterBitsInPlainHeap = 1 << 1
 Heap signature was found. More...
 
static const TCount eCounterBitsPlaceMask
 Mask for 'in heap' state flags. More...
 
static const int eCounterStep = 1 << 2
 Skip over the "in heap" bits. More...
 
static const TCount eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2)
 Minimal value for valid objects (reference counter is zero) Must be a single bit value. More...
 
static const TCount eCounterStateMask
 Valid object, and object in heap. More...
 
- Protected Member Functions inherited from CObject
virtual void DeleteThis (void)
 Virtual method "deleting" this object. More...
 

Detailed Description

Access data in Minhash files.

Definition at line 108 of file mhfile.hpp.

Constructor & Destructor Documentation

◆ CMinHashFile()

CMinHashFile::CMinHashFile ( const string indexname)

parameterized constructor

Parameterized constructor.

Definition at line 43 of file mhfile.cpp.

References m_MmappedData, m_MmappedIndex, NCBI_THROW, and x_Init().

Member Function Documentation

◆ GetAlphabet()

int CMinHashFile::GetAlphabet ( void  ) const
inline

One of two alphabets from Shiryev et al.

(2007), Bioinformatics, 23:2949-2951 0 means 15 letters (based on SE_B(14)), 1 means 10 letters (based on SE-V(10))

Definition at line 131 of file mhfile.hpp.

References MinHashIndexHeader::Alphabet, and m_Data.

Referenced by BOOST_AUTO_TEST_CASE(), CBlastKmer::Run(), and CBlastKmer::x_RunKmerFile().

◆ GetBadMers()

void CMinHashFile::GetBadMers ( vector< int > &  badMers) const

Overrepresented KMERs.

Definition at line 112 of file mhfile.cpp.

References GetVersion(), i, KMER_RANDOM_NUM_OFFSET, and m_MmappedIndex.

Referenced by CBlastKmer::Run().

◆ GetChunkSize()

int CMinHashFile::GetChunkSize ( void  ) const
inline

Get number of letters in a chunk (version 3 or higher)

Definition at line 140 of file mhfile.hpp.

References MinHashIndexHeader::chunkSize, and m_Data.

Referenced by BOOST_AUTO_TEST_CASE(), and CBlastKmer::Run().

◆ GetDataWidth()

int CMinHashFile::GetDataWidth ( void  ) const
inline

◆ GetHits()

int* CMinHashFile::GetHits ( uint64_t  offset) const
inline

Definition at line 153 of file mhfile.hpp.

References m_MmappedIndex, and offset.

Referenced by neighbor_query().

◆ GetKmerSize()

int CMinHashFile::GetKmerSize ( void  ) const
inline

Returns the length of the KMER.

Definition at line 123 of file mhfile.hpp.

References MinHashIndexHeader::kmerNum, and m_Data.

Referenced by BOOST_AUTO_TEST_CASE(), CBlastKmer::Run(), and s_BlastKmerVerifyVolume().

◆ GetKValues()

unsigned char * CMinHashFile::GetKValues ( void  ) const

LSH points for Buhler approach.

Definition at line 128 of file mhfile.cpp.

References GetNumHashes(), GetVersion(), KMER_RANDOM_NUM_OFFSET, and m_MmappedIndex.

Referenced by CBlastKmer::Run().

◆ GetLSHArray()

uint64_t* CMinHashFile::GetLSHArray ( void  ) const
inline

◆ GetLSHMatchEnd()

uint64_t CMinHashFile::GetLSHMatchEnd ( void  ) const
inline

Definition at line 137 of file mhfile.hpp.

References MinHashIndexHeader::LSHMatchEnd, and m_Data.

◆ GetLSHSize()

int CMinHashFile::GetLSHSize ( void  ) const
inline

Definition at line 133 of file mhfile.hpp.

References MinHashIndexHeader::LSHSize, and m_Data.

Referenced by BOOST_AUTO_TEST_CASE(), and s_BlastKmerVerifyVolume().

◆ GetLSHStart()

int CMinHashFile::GetLSHStart ( void  ) const
inline

Definition at line 135 of file mhfile.hpp.

References MinHashIndexHeader::LSHStart, and m_Data.

Referenced by BOOST_AUTO_TEST_CASE(), and GetLSHArray().

◆ GetMinHits() [1/2]

uint32_t* CMinHashFile::GetMinHits ( int  oid) const
inline

Definition at line 155 of file mhfile.hpp.

References GetNumHashes(), and m_MinHitsData.

Referenced by neighbor_query(), and s_BlastKmerVerifyVolume().

◆ GetMinHits() [2/2]

void CMinHashFile::GetMinHits ( int  oid,
int subjectOid,
vector< uint32_t > &  hits 
) const

Gets the database OID and vector of hash values for entry given by oid.

Parameters
oidEntry to fetch.
subjectOidOID of the BLAST database (for current volume)
hitsVector of the hash values read from disk.

Definition at line 73 of file mhfile.cpp.

References a, GetDataWidth(), GetNumHashes(), x_GetMinHits16(), x_GetMinHits32(), and x_GetMinHits8().

◆ GetNumHashes()

int CMinHashFile::GetNumHashes ( void  ) const
inline

Returns the number of values in an array of hashes (probably 32)

Definition at line 118 of file mhfile.hpp.

References m_Data, and MinHashIndexHeader::num_hashes.

Referenced by BOOST_AUTO_TEST_CASE(), GetKValues(), GetMinHits(), GetNumSignatures(), CBlastKmer::Run(), s_BlastKmerVerifyVolume(), x_GetMinHits16(), x_GetMinHits32(), x_GetMinHits8(), and CBlastKmer::x_RunKmerFile().

◆ GetNumSeqs()

int CMinHashFile::GetNumSeqs ( void  ) const
inline

Definition at line 115 of file mhfile.hpp.

References m_Data, and MinHashIndexHeader::num_seqs.

Referenced by s_BlastKmerVerifyVolume(), and CBlastKmer::x_RunKmerFile().

◆ GetNumSignatures()

int CMinHashFile::GetNumSignatures ( ) const
inline

Returns the number of hash arrays.

Definition at line 164 of file mhfile.hpp.

References GetDataWidth(), GetNumHashes(), and m_DataFileSize.

◆ GetRandomNumbers()

uint32_t * CMinHashFile::GetRandomNumbers ( void  ) const

Definition at line 102 of file mhfile.cpp.

References GetVersion(), KMER_RANDOM_NUM_OFFSET, and m_MmappedIndex.

Referenced by CBlastKmer::Run().

◆ GetRows()

int CMinHashFile::GetRows ( void  ) const
inline

Definition at line 125 of file mhfile.hpp.

References m_Data, and MinHashIndexHeader::rows_per_band.

Referenced by CBlastKmer::Run().

◆ GetSegStatus()

int CMinHashFile::GetSegStatus ( void  ) const
inline

Definition at line 120 of file mhfile.hpp.

References MinHashIndexHeader::do_seg, and m_Data.

Referenced by CBlastKmer::Run().

◆ GetVersion()

int CMinHashFile::GetVersion ( void  ) const
inline

◆ x_GetMinHits16()

uint16_t * CMinHashFile::x_GetMinHits16 ( int  oid,
int subjectOid 
) const
inlineprivate

Definition at line 148 of file mhfile.cpp.

References GetDataWidth(), GetNumHashes(), and m_MinHitsData.

Referenced by GetMinHits().

◆ x_GetMinHits32()

uint32_t * CMinHashFile::x_GetMinHits32 ( int  oid,
int subjectOid 
) const
inlineprivate

Definition at line 138 of file mhfile.cpp.

References GetDataWidth(), GetNumHashes(), and m_MinHitsData.

Referenced by GetMinHits().

◆ x_GetMinHits8()

unsigned char * CMinHashFile::x_GetMinHits8 ( int  oid,
int subjectOid 
) const
inlineprivate

Definition at line 159 of file mhfile.cpp.

References GetDataWidth(), GetNumHashes(), and m_MinHitsData.

Referenced by GetMinHits().

◆ x_Init()

void CMinHashFile::x_Init ( void  )
private

Definition at line 54 of file mhfile.cpp.

References m_Data, m_DataFileSize, m_IndexName, m_MinHitsData, m_MmappedData, m_MmappedIndex, and NCBI_THROW.

Referenced by CMinHashFile().

Member Data Documentation

◆ m_Data

MinHashIndexHeader* CMinHashFile::m_Data
private

◆ m_DataFileSize

Int8 CMinHashFile::m_DataFileSize
private

m_MmappedData File size

Definition at line 188 of file mhfile.hpp.

Referenced by GetNumSignatures(), and x_Init().

◆ m_IndexName

string CMinHashFile::m_IndexName
private

Name of the index file.

Definition at line 191 of file mhfile.hpp.

Referenced by x_Init().

◆ m_MinHitsData

unsigned char* CMinHashFile::m_MinHitsData
private

Pointer to start of min-hits arrays.

Definition at line 183 of file mhfile.hpp.

Referenced by GetMinHits(), x_GetMinHits16(), x_GetMinHits32(), x_GetMinHits8(), and x_Init().

◆ m_MmappedData

unique_ptr<CMemoryFile> CMinHashFile::m_MmappedData
private

Definition at line 178 of file mhfile.hpp.

Referenced by CMinHashFile(), and x_Init().

◆ m_MmappedIndex

unique_ptr<CMemoryFile> CMinHashFile::m_MmappedIndex
private

The documentation for this class was generated from the following files:
Modified on Tue Dec 05 02:18:58 2023 by modify_doxy.py rev. 669887