NCBI C++ ToolKit
|
Search Toolkit Book for CVDBBlastUtil
#include <algo/blast/vdb/vdb2blast_util.hpp>
Public Types | |
enum | IDType { eSRAId = 0 , eWGSId , eCSRALocalRefId , eCSRARefId , eUnknownId } |
![]() | |
enum | EAllocFillMode { eAllocFillNone = 1 , eAllocFillZero , eAllocFillPattern } |
Control filling of newly allocated memory. More... | |
typedef CObjectCounterLocker | TLockerType |
Default locker type for CRef. More... | |
typedef atomic< Uint8 > | TCounter |
Counter type is CAtomiCounter. More... | |
typedef Uint8 | TCount |
Alias for value type of counter. More... | |
Public Member Functions | |
CVDBBlastUtil (const string &strAllRuns, bool bOwnSeqSrc=false, bool bCSRA=false, bool bIncludeFilteredReads=false) | |
Constructor that creates and stores the SRA BlastSeqSrc object. More... | |
virtual | ~CVDBBlastUtil () |
Destructor. More... | |
BlastSeqSrc * | GetSRASeqSrc () |
Return the stored SRA BlastSeqSrc object. More... | |
CRef< blast::IBlastSeqInfoSrc > | GetSRASeqInfoSrc () |
Return the SRA BlastSeqInfoSrc object (create if none exists). More... | |
Uint4 | GetOIDFromVDBSeqId (CRef< objects::CSeq_id > seqId) |
Get the ordinal number (OID) for the given SRA sequence. More... | |
CRef< objects::CSeq_id > | GetVDBSeqIdFromOID (Uint4 oid) |
Get the SRA sequence SeqID given its ordinal number (OID). More... | |
CRef< objects::CBioseq > | CreateBioseqFromVDBSeqId (CRef< objects::CSeq_id > seqId) |
Construct a Bioseq object for the given SRA sequence. More... | |
CRef< objects::CBioseq > | CreateBioseqFromOid (Uint8 oid) |
void | AddSubjectsToScope (CRef< CScope > scope, CConstRef< CSeq_align_set > alnSet) |
Populate the CScope object with subject sequence Bioseqs. More... | |
void | FillVDBInfo (vector< CBlastFormatUtil::SDbInfo > &vecDbInfo) |
Populate the DB info list with information on open SRA runs. More... | |
bool | IsCSRAUtil () |
void | GetOidsFromSeqIds_WGS (const vector< string > &ids, vector< int > &oids) |
bool | IsWGS () |
![]() | |
CObject (void) | |
Constructor. More... | |
CObject (const CObject &src) | |
Copy constructor. More... | |
virtual | ~CObject (void) |
Destructor. More... | |
CObject & | operator= (const CObject &src) THROWS_NONE |
Assignment operator. More... | |
bool | CanBeDeleted (void) const THROWS_NONE |
Check if object can be deleted. More... | |
bool | IsAllocatedInPool (void) const THROWS_NONE |
Check if object is allocated in memory pool (not system heap) More... | |
bool | Referenced (void) const THROWS_NONE |
Check if object is referenced. More... | |
bool | ReferencedOnlyOnce (void) const THROWS_NONE |
Check if object is referenced only once. More... | |
void | AddReference (void) const |
Add reference to object. More... | |
void | RemoveReference (void) const |
Remove reference to object. More... | |
void | ReleaseReference (void) const |
Remove reference without deleting object. More... | |
virtual void | DoNotDeleteThisObject (void) |
Mark this object as not allocated in heap – do not delete this object. More... | |
virtual void | DoDeleteThisObject (void) |
Mark this object as allocated in heap – object can be deleted. More... | |
void * | operator new (size_t size) |
Define new operator for memory allocation. More... | |
void * | operator new[] (size_t size) |
Define new[] operator for 'array' memory allocation. More... | |
void | operator delete (void *ptr) |
Define delete operator for memory deallocation. More... | |
void | operator delete[] (void *ptr) |
Define delete[] operator for memory deallocation. More... | |
void * | operator new (size_t size, void *place) |
Define new operator. More... | |
void | operator delete (void *ptr, void *place) |
Define delete operator. More... | |
void * | operator new (size_t size, CObjectMemoryPool *place) |
Define new operator using memory pool. More... | |
void | operator delete (void *ptr, CObjectMemoryPool *place) |
Define delete operator. More... | |
virtual void | DebugDump (CDebugDumpContext ddc, unsigned int depth) const |
Define method for dumping debug information. More... | |
![]() | |
CDebugDumpable (void) | |
virtual | ~CDebugDumpable (void) |
void | DebugDumpText (ostream &out, const string &bundle, unsigned int depth) const |
void | DebugDumpFormat (CDebugDumpFormatter &ddf, const string &bundle, unsigned int depth) const |
void | DumpToConsole (void) const |
Static Public Member Functions | |
static Uint4 | SetupVDBManager () |
*Note* Call this in main thread first, if you are going to instantiate this object or use any of the static menthod in a thread More... | |
static void | ReleaseVDBManager () |
Call this release vdb manager if SetupManger has been explicitly called in the main thread. More... | |
static bool | IsSRA (const string &db_name) |
static bool | IsCSRA (const string &db_name) |
static IDType | VDBIdType (const CSeq_id &id) |
static void | GetVDBStats (const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, bool getRefStats=false) |
Fucntion to get around the OID (blastseqsrc) limit So num of seqs > int4 can be returned. More... | |
static void | GetVDBStats (const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, Uint8 &max_seq_length, Uint8 &av_seq_length, bool getRefStats=false) |
static void | GetAllStats (const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, Uint8 &ref_num_seqs, Uint8 &ref_length) |
static void | CheckVDBs (const vector< string > &vdbs) |
Function to check a list of dbs if they can be opened Throw an exception if any of the db cannot be opened. More... | |
static Uint4 | GetMaxNumCSRAThread (void) |
![]() | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (void) |
Define method to throw null pointer exception. More... | |
static NCBI_XNCBI_EXPORT void | ThrowNullPointerException (const type_info &type) |
static EAllocFillMode | GetAllocFillMode (void) |
static void | SetAllocFillMode (EAllocFillMode mode) |
static void | SetAllocFillMode (const string &value) |
Set mode from configuration parameter value. More... | |
![]() | |
static void | EnableDebugDump (bool on) |
Static Public Attributes | |
static const Uint8 | REF_SEQ_ID_MASK = (0x8000000000000000) |
![]() | |
static const TCount | eCounterBitsCanBeDeleted = 1 << 0 |
Define possible object states. More... | |
static const TCount | eCounterBitsInPlainHeap = 1 << 1 |
Heap signature was found. More... | |
static const TCount | eCounterBitsPlaceMask |
Mask for 'in heap' state flags. More... | |
static const int | eCounterStep = 1 << 2 |
Skip over the "in heap" bits. More... | |
static const TCount | eCounterValid = TCount(1) << (sizeof(TCount) * 8 - 2) |
Minimal value for valid objects (reference counter is zero) Must be a single bit value. More... | |
static const TCount | eCounterStateMask |
Valid object, and object in heap. More... | |
Private Member Functions | |
CVDBBlastUtil (bool bCSRA, const string &strAllRuns) | |
Temporary hack to get around oid limit (used by GetVDBStats) Shoudl be remove when. More... | |
void | x_GetSRARunAccessions (vector< string > &vecSRARunAccessions) |
Tokenize the stored whitespace-delimited string of SRA runs. More... | |
BlastSeqSrc * | x_MakeVDBSeqSrc () |
Construct an SRA BlastSeqSrc object from the given strings. More... | |
Private Attributes | |
bool | m_bOwnSeqSrc |
Release the BlastSeqSrc object in destructor. More... | |
string | m_strAllRuns |
Space-delimited list of opened SRA run accessions. More... | |
BlastSeqSrc * | m_seqSrc |
Pointer to a properly initialized SRA BlastSeqSrc. More... | |
bool | m_isCSRAUtil |
bool | m_IncludeFilteredReads |
Additional Inherited Members | |
![]() | |
virtual void | DeleteThis (void) |
Virtual method "deleting" this object. More... | |
High-level functions used for setting up and running Blast with SRAs.
This class provides several high-level functions for creating and managing the SRA BlastSeqSrc and SRA BlastSeqInfoSrc objects. It also provides conversions between OIDs and high-level sequence objects containing SRA-specific information (SeqIDs, Bioseqs).
Definition at line 66 of file vdb2blast_util.hpp.
Enumerator | |
---|---|
eSRAId | |
eWGSId | |
eCSRALocalRefId | |
eCSRARefId | |
eUnknownId |
Definition at line 165 of file vdb2blast_util.hpp.
CVDBBlastUtil::CVDBBlastUtil | ( | const string & | strAllRuns, |
bool | bOwnSeqSrc = false , |
||
bool | bCSRA = false , |
||
bool | bIncludeFilteredReads = false |
||
) |
Constructor that creates and stores the SRA BlastSeqSrc object.
strAllRuns | Space-delimited SRA run accessions to open. [in] |
bOwnSeqSrc | Release the BlastSeqSrc object in destructor. [in] |
CException |
Definition at line 280 of file vdb2blast_util.cpp.
References BlastSeqSrcGetNumSeqs(), eUnknown, m_seqSrc, NCBI_THROW, VDBSRC_OVERFLOW_RV, and x_MakeVDBSeqSrc().
|
virtual |
Destructor.
Definition at line 293 of file vdb2blast_util.cpp.
References BlastSeqSrcFree(), m_bOwnSeqSrc, and m_seqSrc.
Temporary hack to get around oid limit (used by GetVDBStats) Shoudl be remove when.
Definition at line 661 of file vdb2blast_util.cpp.
References m_seqSrc, and x_MakeVDBSeqSrc().
void CVDBBlastUtil::AddSubjectsToScope | ( | CRef< CScope > | scope, |
CConstRef< CSeq_align_set > | alnSet | ||
) |
Populate the CScope object with subject sequence Bioseqs.
This function must be called to populate the CScope object with the Bioseqs of every SRA sequence present in the Blast results. Without these Bioseqs, the Blast formatter would have no way to access and display the subject sequence data for the alignments.
scope | CScope object to populate [in] |
alnSet | Set of alignments representing the Blast results [in] |
CException |
Definition at line 623 of file vdb2blast_util.cpp.
References CScope::AddBioseq(), CSeq_id::Assign(), CreateBioseqFromVDBSeqId(), CScope::eExist_Get, CConstRef< C, Locker >::Empty(), CSeq_align_set_Base::Get(), CSeq_align::GetSeq_id(), and CScope::kPriority_Default.
Function to check a list of dbs if they can be opened Throw an exception if any of the db cannot be opened.
Definition at line 700 of file vdb2blast_util.cpp.
References ctll::empty(), free(), AutoPtr< X, Del >::get(), kEmptyStr, NCBI_THROW, SRABlastSeqSrcInit(), and strdup.
Referenced by CVDBAliasNode::x_ResolveVDBList().
Definition at line 529 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), CSeq_data_Base::e_Iupacna, CSeq_inst_Base::eMol_dna, Empty(), CSeq_inst_Base::eRepr_raw, ERR_POST, Error(), eUnknown, FALSE, CSeq_inst_Base::GetLength(), GetVDBSeqIdFromOID(), SVDBSRC_ErrMsg::isError, m_seqSrc, NCBI_THROW, NULL, SVBDSRC_SeqSrc_Data::reader_2na, CRef< C, Locker >::Reset(), CBioseq_Base::SetDescr(), CBioseq_Base::SetId(), CBioseq_Base::SetInst(), CSeq_inst_Base::SetLength(), CSeq_inst_Base::SetMol(), CSeq_inst_Base::SetRepr(), CSeq_inst_Base::SetSeq_data(), CSeqdesc_Base::SetTitle(), NStr::UInt8ToString(), NStr::UIntToString(), VDBSRC_FormatErrorMsg(), VDBSRC_Get2naSequenceAsString(), VDBSRC_InitEmptyErrorMsg(), VDBSRC_ReleaseErrorMsg(), and Warning().
Referenced by CVdbFastaExtractor::DumpAll().
Construct a Bioseq object for the given SRA sequence.
This function takes a SeqID indentifying an SRA sequence and constructs a Bioseq object with the same SeqID and the actual sequence data stored in IUPACNA format.
seqId | SeqID of the SRA sequence [in] |
CException |
Definition at line 459 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), CSeq_data_Base::e_Iupacna, CSeq_inst_Base::eMol_dna, CSeq_inst_Base::eRepr_raw, eUnknown, free(), CSeq_inst_Base::GetLength(), GetOIDFromVDBSeqId(), m_seqSrc, NCBI_THROW, CRef< C, Locker >::Reset(), CBioseq_Base::SetDescr(), CBioseq_Base::SetId(), CBioseq_Base::SetInst(), CSeq_inst_Base::SetLength(), CSeq_inst_Base::SetMol(), CSeq_inst_Base::SetRepr(), CSeq_inst_Base::SetSeq_data(), CSeqdesc_Base::SetTitle(), NStr::UInt8ToString(), NStr::UIntToString(), VDBSRC_FormatErrorMsg(), VDBSRC_Get4naSequenceAsString(), VDBSRC_InitEmptyErrorMsg(), and VDBSRC_ReleaseErrorMsg().
Referenced by AddSubjectsToScope(), and CVdbFastaExtractor::Write().
void CVDBBlastUtil::FillVDBInfo | ( | vector< CBlastFormatUtil::SDbInfo > & | vecDbInfo | ) |
Populate the DB info list with information on open SRA runs.
vecDbInfo | DB info list to populate [out] |
CException |
Definition at line 644 of file vdb2blast_util.cpp.
References BlastSeqSrcGetName(), BlastSeqSrcGetNumSeqs(), BlastSeqSrcGetTotLen(), eUnknown, m_seqSrc, NCBI_THROW, and NStr::Replace().
|
static |
Definition at line 769 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), eUnknown, SVDBSRC_ErrMsg::isError, NCBI_THROW, SVBDSRC_SeqSrc_Data::numSeqs, VDBSRC_GetTotSeqLen(), VDBSRC_InitEmptyErrorMsg(), and VDBSRC_MakeCSRASeqSrcFromSRASeqSrc().
Referenced by CLocalVDBBlast::PreprocessDBs().
|
static |
Definition at line 822 of file vdb2blast_util.cpp.
References CSystemInfo::GetTotalPhysicalMemorySize(), and VDB_2NA_CHUNK_BUF_SIZE.
Referenced by CLocalVDBBlast::PreprocessDBs().
Get the ordinal number (OID) for the given SRA sequence.
This function takes a SeqID identifying an SRA sequence (a single read, e.g. gnl|SRA|SRR000002.123691.1) and converts it to the internal ordinal number used by the SRA BlastSeqSrc.
seqId | SeqID of the SRA sequence [in] |
CException |
Definition at line 338 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), CDbtag_Base::CanGetDb(), CDbtag_Base::CanGetTag(), eUnknown, CSeq_id_Base::GetGeneral(), CSeq_id::GetSeqIdString(), CObject_id_Base::GetStr(), CDbtag_Base::GetTag(), CSeq_id_Base::IsGeneral(), CObject_id_Base::IsStr(), kEmptyStr, m_seqSrc, NCBI_THROW, NULL, and VDBSRC_GetOIDFromReadName().
Referenced by CreateBioseqFromVDBSeqId(), and CVdbFastaExtractor::Write().
Definition at line 832 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), Empty(), eUnknown, CSeq_id::GetSeqIdString(), GetSRASeqSrc(), GetVDBSeqIdFromOID(), i, NCBI_THROW, and SVBDSRC_SeqSrc_Data::numSeqs.
CRef< IBlastSeqInfoSrc > CVDBBlastUtil::GetSRASeqInfoSrc | ( | ) |
Return the SRA BlastSeqInfoSrc object (create if none exists).
CException |
Definition at line 313 of file vdb2blast_util.cpp.
References eUnknown, m_seqSrc, and NCBI_THROW.
Referenced by s_RunLocalVDBSearch(), and s_RunPsiVDBSearch().
BlastSeqSrc * CVDBBlastUtil::GetSRASeqSrc | ( | ) |
Return the stored SRA BlastSeqSrc object.
CException |
Definition at line 302 of file vdb2blast_util.cpp.
References eUnknown, m_seqSrc, and NCBI_THROW.
Referenced by CVdbFastaExtractor::DumpAll(), CVDBSeqInfoSrc::GetLength(), GetOidsFromSeqIds_WGS(), IsWGS(), s_RunLocalVDBSearch(), s_RunPsiVDBSearch(), and CVDBSeqInfoSrc::Size().
Get the SRA sequence SeqID given its ordinal number (OID).
This function returns the SeqID identifying an SRA sequence (a single read, e.g. gnl|SRA|SRR000002.123691.1) given its internal ordinal number (OID) used by the SRA BlastSeqSrc. It is guaranteed that one will get the same results by calling the GetId method of the BlastSeqInfoSrc object and taking the first General SeqId in the returned list.
oid | OID of the SRA sequence [in] |
CException |
Definition at line 403 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), CSeq_id::eAcc_unknown, eUnknown, NStr::Find(), FindBestChoice(), CSeq_id::fParse_RawText, NStr::fSplit_Tokenize, CSeq_id::IdentifyAccession(), m_isCSRAUtil, m_seqSrc, NCBI_THROW, NPOS, CSeq_id::ParseFastaIds(), CRef< C, Locker >::Reset(), s_IsWGSId(), CSeq_id::Score(), NStr::Split(), string, tmp, and VDBSRC_GetReadNameForOID().
Referenced by CreateBioseqFromOid(), CVDBSeqInfoSrc::GetId(), and GetOidsFromSeqIds_WGS().
|
static |
Fucntion to get around the OID (blastseqsrc) limit So num of seqs > int4 can be returned.
Definition at line 667 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), eUnknown, NCBI_THROW, SVBDSRC_SeqSrc_Data::numSeqs, and VDBSRC_GetTotSeqLen().
Referenced by CLocalVDBBlast::PreprocessDBs(), s_FillDBInfo(), and CBlastVdbCmdApp::x_PrintBlastDatabaseInformation().
|
static |
Definition at line 682 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), eUnknown, NCBI_THROW, SVBDSRC_SeqSrc_Data::numSeqs, VDBSRC_GetAvgSeqLen(), VDBSRC_GetMaxSeqLen(), and VDBSRC_GetTotSeqLen().
Definition at line 755 of file vdb2blast_util.cpp.
References IsSRA(), NCBI_THROW, and VDBSRC_IsCSRA().
Referenced by CLocalVDBBlast::PreprocessDBs(), and s_GetCSRADBs().
|
inline |
Definition at line 188 of file vdb2blast_util.hpp.
References m_isCSRAUtil.
Definition at line 385 of file vdb2blast_util.cpp.
References CDirEntry::GetPathSeparator(), kDigits, and tmp.
Referenced by GetVDBScope(), IsCSRA(), and s_RegisterLocalDataLoader().
bool CVDBBlastUtil::IsWGS | ( | ) |
Definition at line 863 of file vdb2blast_util.cpp.
References _BlastSeqSrcImpl_GetDataStructure(), eUnknown, NStr::fSplit_Tokenize, CDirEntry::GetPathSeparator(), GetSRASeqSrc(), i, m_isCSRAUtil, SVBDSRC_SeqSrc_Data::names, NCBI_THROW, s_IsWGSId(), NStr::Split(), and tmp.
|
static |
Call this release vdb manager if SetupManger has been explicitly called in the main thread.
Definition at line 750 of file vdb2blast_util.cpp.
References VDBSRC_ReleaseVDBManager().
Referenced by CVDBBlastnApp::Run(), and CVDBTblastnApp::Run().
|
static |
*Note* Call this in main thread first, if you are going to instantiate this object or use any of the static menthod in a thread
Definition at line 733 of file vdb2blast_util.cpp.
References NCBI_THROW, NULL, and VDBSRC_GetVDBManager().
Referenced by CVDBBlastnApp::Run(), and CVDBTblastnApp::Run().
|
static |
Definition at line 795 of file vdb2blast_util.cpp.
References CSeq_id_Base::e_General, CSeq_id::eAcc_unknown, eCSRALocalRefId, eCSRARefId, NStr::EqualNocase(), eSRAId, eUnknownId, eWGSId, CSeq_id::fParse_AnyRaw, CDbtag_Base::GetDb(), CSeq_id_Base::GetGeneral(), CSeq_id::GetSeqIdString(), CObject_id_Base::GetStr(), CDbtag_Base::GetTag(), CSeq_id::IdentifyAccession(), CObject_id_Base::IsStr(), NPOS, s_IsWGSId(), str(), and CSeq_id_Base::Which().
Referenced by CBlastVdbCmdApp::x_ProcessSearchRequest().
|
private |
Tokenize the stored whitespace-delimited string of SRA runs.
CVDBBlastUtil implementation.
vecSRARunAccessions | list of individual SRA run accessions [out] |
CException |
Definition at line 190 of file vdb2blast_util.cpp.
References set< Key, Compare >::begin(), copy(), set< Key, Compare >::end(), eUnknown, NStr::fSplit_Tokenize, m_strAllRuns, NCBI_THROW, and NStr::Split().
Referenced by x_MakeVDBSeqSrc().
|
private |
Construct an SRA BlastSeqSrc object from the given strings.
CException |
Definition at line 219 of file vdb2blast_util.cpp.
References BlastSeqSrcFree(), BlastSeqSrcGetInitError(), ctll::empty(), eUnknown, free(), kEmptyStr, m_IncludeFilteredReads, m_isCSRAUtil, NCBI_THROW, SRABlastSeqSrcInit(), strdup, and x_GetSRARunAccessions().
Referenced by CVDBBlastUtil().
|
private |
Release the BlastSeqSrc object in destructor.
Definition at line 215 of file vdb2blast_util.hpp.
Referenced by ~CVDBBlastUtil().
|
private |
Definition at line 221 of file vdb2blast_util.hpp.
Referenced by x_MakeVDBSeqSrc().
|
private |
Definition at line 220 of file vdb2blast_util.hpp.
Referenced by GetVDBSeqIdFromOID(), IsCSRAUtil(), IsWGS(), and x_MakeVDBSeqSrc().
|
private |
Pointer to a properly initialized SRA BlastSeqSrc.
Definition at line 219 of file vdb2blast_util.hpp.
Referenced by CreateBioseqFromOid(), CreateBioseqFromVDBSeqId(), CVDBBlastUtil(), FillVDBInfo(), GetOIDFromVDBSeqId(), GetSRASeqInfoSrc(), GetSRASeqSrc(), GetVDBSeqIdFromOID(), and ~CVDBBlastUtil().
|
private |
Space-delimited list of opened SRA run accessions.
Definition at line 217 of file vdb2blast_util.hpp.
Referenced by x_GetSRARunAccessions().
Definition at line 190 of file vdb2blast_util.hpp.