70 virtual list< CRef<objects::CSeq_id> >
GetId(
Uint4 oid)
const;
84 virtual size_t Size()
const;
93 const vector<TSeqRange>& target_ranges,
118 : m_sraBlastUtil(sraBlastUtil)
134 list< CRef<CSeq_id> > listIds;
135 listIds.push_back(seqIdVDB);
142 list< CRef<CSeq_id> > listIds =
GetId(oid);
146 seqLoc->
SetWhole().Assign(**listIds.begin());
173 const vector<TSeqRange>& target_ranges,
197 copy(vecSRARunAccessions.begin(),
198 vecSRARunAccessions.end(),
199 inserter(string_set, string_set.
begin()));
200 vecSRARunAccessions.clear();
202 back_inserter(vecSRARunAccessions));
207 "Failed to process SRA accession list: " +
m_strAllRuns);
210 if (vecSRARunAccessions.empty())
222 vector<string> vecSRARunAccessions;
226 Uint4 numRuns = vecSRARunAccessions.size();
227 char** vdbRunAccessions =
new char*[numRuns];
228 for (
Uint4 iRun = 0; iRun < numRuns; iRun++) {
229 if (!vecSRARunAccessions[iRun].
empty()) {
230 vdbRunAccessions[iRun] =
231 strdup(vecSRARunAccessions[iRun].c_str());
243 for (
Uint4 iRun = 0; iRun < numRuns; iRun++)
245 if(isRunExcluded[iRun]) {
246 excluded_runs += vdbRunAccessions[iRun];
247 excluded_runs +=
" ";
250 delete [] vdbRunAccessions;
251 delete [] isRunExcluded;
253 if(rc > 0 && excluded_runs !=
kEmptyStr) {
257 "Error opening the following db(s): " + excluded_runs);
263 "Failed to construct the VDB BlastSeqSrc object");
269 string strErrMsg(errMsg);
273 "VDB BlastSeqSrc construction failed: " + strErrMsg);
283 bool bIncludeFilteredReads):
284 m_bOwnSeqSrc(bOwnSeqSrc), m_strAllRuns(strAllRuns), m_isCSRAUtil(bCSRA),
285 m_IncludeFilteredReads(bIncludeFilteredReads)
328 size_t first_digit_pos =
id.find_first_of(
kDigits);
329 if((first_digit_pos > 3) && (first_digit_pos <= 6)) {
330 if(
id.find_first_not_of(
kDigits, first_digit_pos) == std::string::npos) {
345 const char * readName =
NULL;
352 "Incomplete SeqID for SRA sequence");
364 "Empty VDB tag in SeqID");
366 readName = nameStr.c_str();
378 "Failed to get the OID for the VDB tag: " +
string(readName));
387 if (!db_name.empty())
390 string tmp = db_name;
391 if(last_pos != string::npos) {
392 tmp = db_name.substr(last_pos +1);
396 if(
tmp.find_first_not_of(
kDigits, 4) == std::string::npos)
424 const string gnl_tag(
"gnl|SRA|");
425 string strId =
string(nameRun);
430 list<CRef<CSeq_id> > ids;
434 strId = gnl_tag + strId;
442 strId = gnl_tag + strId;
450 strId = gnl_tag + strId;
492 "Failed to read the VDB sequence string for OID=" +
497 if (!cstrSeq || strlen(cstrSeq) == 0)
500 "Got an empty VDB sequence string for OID=" +
513 bioseqResult->
SetInst(*seqInst);
516 bioseqResult->
SetId().push_back(seqId);
522 bioseqResult->
SetDescr().Set().push_back(descTitle);
557 "2na reader has not been initialized");
574 "Failed to read the VDB sequence string for OID=" +
585 if (!cstrSeq || strlen(cstrSeq) == 0)
588 "Got an empty VDB sequence string for OID=" +
596 "Failed to seq id for OID=" +
609 bioseqResult->
SetInst(*seqInst);
612 bioseqResult->
SetId().push_back(
id);
618 bioseqResult->
SetDescr().Set().push_back(descTitle);
629 CSeq_align_set::Tdata::const_iterator itAln;
630 for (itAln = alnSet->
Get().begin(); itAln != alnSet->
Get().end(); itAln++)
635 subjId->
Assign(subjIdFromAln);
652 CBlastFormatUtil::SDbInfo dbInfo;
653 dbInfo.is_protein =
false;
655 dbInfo.definition = dbInfo.name;
658 vecDbInfo.push_back(dbInfo);
662 m_bOwnSeqSrc(
true), m_strAllRuns(strAllRuns), m_isCSRAUtil(bCSRA)
683 Uint8 & max_seq_length,
Uint8 & av_seq_length,
bool getRefStats)
702 unsigned int numRuns = vdbs.size();
704 char** vdbRunAccessions =
new char*[numRuns];
705 for (
Uint4 iRun = 0; iRun < numRuns; iRun++) {
706 if (!vdbs[iRun].
empty()) {
707 vdbRunAccessions[iRun] =
strdup(vdbs[iRun].c_str());
714 SRABlastSeqSrcInit((
const char**)vdbRunAccessions, numRuns,
false, isRunExcluded.
get(), &rc,
false,
false);
718 for (
Uint4 iRun = 0; iRun < numRuns; iRun++)
720 if(isRunExcluded.
get()[iRun]) {
721 cannot_open += vdbs[iRun]+
" ";
723 free(vdbRunAccessions[iRun]);
725 delete [] vdbRunAccessions;
737 if(status != 0 || mgr ==
NULL) {
740 status = VdbBlastMgrKLogHandlerSetStdErr(mgr);
742 status = VdbBlastMgrKLogLibHandlerSetStdErr(mgr);
745 status = VdbBlastMgrKLogLevelSetWarn(mgr);
790 ref_num_seqs = vdbData->
numSeqs;
803 if ( srr_len !=
NPOS ) {
829 return (num_thread == 0? 1: num_thread);
834 int num_ids = ids.size();
844 for(
Uint8 j=0; j < num_seqs; j++) {
874 unsigned int num_wgs = 0;
878 for (
unsigned int i=0;
i < dbs.size();
i++) {
881 if(last_pos != string::npos) {
882 tmp = dbs[
i].substr(last_pos +1);
890 if (num_wgs == dbs.size()) {
Int4 BlastSeqSrcGetSeqLen(const BlastSeqSrc *seq_src, void *oid)
Retrieve sequence length (number of residues/bases)
char * BlastSeqSrcGetInitError(const BlastSeqSrc *seq_src)
Function to retrieve NULL terminated string containing the description of an initialization error or ...
Int4 BlastSeqSrcGetNumSeqs(const BlastSeqSrc *seq_src)
Get the number of sequences contained in the sequence source.
Int8 BlastSeqSrcGetTotLen(const BlastSeqSrc *seq_src)
Get the total length of all sequences in the sequence source.
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
const char * BlastSeqSrcGetName(const BlastSeqSrc *seq_src)
Get the Blast Sequence source name (e.g.
NCBI_XBLAST_EXPORT void * _BlastSeqSrcImpl_GetDataStructure(const BlastSeqSrc *var)
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
static Uint8 GetTotalPhysicalMemorySize(void)
Return the amount of actual/total physical memory, in bytes.
BlastSeqSrc * GetSRASeqSrc()
Return the stored SRA BlastSeqSrc object.
static Uint4 GetMaxNumCSRAThread(void)
void FillVDBInfo(vector< CBlastFormatUtil::SDbInfo > &vecDbInfo)
Populate the DB info list with information on open SRA runs.
string m_strAllRuns
Space-delimited list of opened SRA run accessions.
Uint4 GetOIDFromVDBSeqId(CRef< objects::CSeq_id > seqId)
Get the ordinal number (OID) for the given SRA sequence.
void x_GetSRARunAccessions(vector< string > &vecSRARunAccessions)
Tokenize the stored whitespace-delimited string of SRA runs.
bool m_bOwnSeqSrc
Release the BlastSeqSrc object in destructor.
CRef< objects::CSeq_id > GetVDBSeqIdFromOID(Uint4 oid)
Get the SRA sequence SeqID given its ordinal number (OID).
bool m_IncludeFilteredReads
static void GetVDBStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, bool getRefStats=false)
Fucntion to get around the OID (blastseqsrc) limit So num of seqs > int4 can be returned.
static Uint4 SetupVDBManager()
*Note* Call this in main thread first, if you are going to instantiate this object or use any of the ...
BlastSeqSrc * m_seqSrc
Pointer to a properly initialized SRA BlastSeqSrc.
CRef< objects::CBioseq > CreateBioseqFromVDBSeqId(CRef< objects::CSeq_id > seqId)
Construct a Bioseq object for the given SRA sequence.
static bool IsCSRA(const string &db_name)
static void CheckVDBs(const vector< string > &vdbs)
Function to check a list of dbs if they can be opened Throw an exception if any of the db cannot be o...
BlastSeqSrc * x_MakeVDBSeqSrc()
Construct an SRA BlastSeqSrc object from the given strings.
void GetOidsFromSeqIds_WGS(const vector< string > &ids, vector< int > &oids)
CRef< blast::IBlastSeqInfoSrc > GetSRASeqInfoSrc()
Return the SRA BlastSeqInfoSrc object (create if none exists).
virtual ~CVDBBlastUtil()
Destructor.
CVDBBlastUtil(const string &strAllRuns, bool bOwnSeqSrc=false, bool bCSRA=false, bool bIncludeFilteredReads=false)
Constructor that creates and stores the SRA BlastSeqSrc object.
static bool IsSRA(const string &db_name)
static void GetAllStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, Uint8 &ref_num_seqs, Uint8 &ref_length)
static void ReleaseVDBManager()
Call this release vdb manager if SetupManger has been explicitly called in the main thread.
void AddSubjectsToScope(CRef< CScope > scope, CConstRef< CSeq_align_set > alnSet)
Populate the CScope object with subject sequence Bioseqs.
CRef< objects::CBioseq > CreateBioseqFromOid(Uint8 oid)
static IDType VDBIdType(const CSeq_id &id)
virtual bool HasGiList() const
Returns true if the subject is restricted by a GI list, always returns false in this implementation.
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(Uint4 oid) const
Method to retrieve the sequence location given its ordinal number.
CVDBSeqInfoSrc(CRef< CVDBBlastUtil > sraBlastUtil)
Constructor taking a CVDBBlastUtil object.
virtual bool GetMasks(Uint4 oid, const vector< TSeqRange > &target_ranges, TMaskedSubjRegions &retval) const
Retrieves the subject masks for the corresponding oid, always returns false in this implementation.
virtual size_t Size() const
Returns the size of the underlying container of sequences.
virtual list< CRef< objects::CSeq_id > > GetId(Uint4 oid) const
Method to retrieve a sequence identifier given its ordinal number.
virtual bool CanReturnPartialSequence() const
Return true if the implementation can return anything besides a seq-loc for the entire sequence.
virtual ~CVDBSeqInfoSrc()
Destructor.
virtual Uint4 GetLength(Uint4 oid) const
Method to retrieve a sequence length given its ordinal number.
CRef< CVDBBlastUtil > m_sraBlastUtil
The CVDBBlastUtil object that takes care of various conversions.
Collection of masked regions for a single query sequence.
const_iterator begin() const
const_iterator end() const
#define VDBSRC_OVERFLOW_RV
void VDBSRC_ReleaseErrorMsg(TVDBErrMsg *vdbErrMsg)
Release the Error message.
void VDBSRC_InitEmptyErrorMsg(TVDBErrMsg *vdbErrMsg)
Initialize an empty Error message (No Error).
void VDBSRC_FormatErrorMsg(char **errMsg, const TVDBErrMsg *vdbErrMsg)
Format the error message as a single human-readable string.
bool Empty(const CNcbiOstrstream &src)
static const char * str(char *buf, int n)
element_type * get(void) const
Get pointer.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
@ fParse_RawText
Try to ID raw non-numeric accessions.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
@ kPriority_Default
Use default priority for added data.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)
Convert UInt8 to string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
bool CanGetTag(void) const
Check if it is safe to call GetTag method.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
bool IsGeneral(void) const
Check if variant General is selected.
E_Choice Which(void) const
Which variant is currently selected.
const TGeneral & GetGeneral(void) const
Get the variant data.
@ e_General
for other databases
TId & SetId(void)
Assign a value to Id data member.
TTitle & SetTitle(void)
Select the variant.
TLength GetLength(void) const
Get the Length member data.
void SetInst(TInst &value)
Assign a value to Inst data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
void SetRepr(TRepr value)
Assign a value to Repr data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eRepr_raw
continuous sequence
@ e_Iupacna
IUPAC 1 letter nuc acid code.
constexpr bool empty(list< Ts... >) noexcept
Defines to provide correct exporting from DLLs in some configurations.
Uint1 Boolean
bool replacment for C
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
BlastSeqSrc * SRABlastSeqSrcInit(const char **sraRunAccessions, Uint4 numRuns, Boolean isProtein, Boolean *excluded_runs, Uint4 *status, Boolean isCSRA, Boolean include_filtered_reads)
Allocate and initialize the SRA BlastSeqSrc object.
Complete type definition of Blast Sequence Source ADT.
Structure providing top-level VDB data access.
TVDB2naICReader * reader_2na
char * names
Names of the VDB data represented by this object (usually this will include all the SVDB run accessio...
Structure describing the error messages the library can generate.
Boolean isError
True if the object describes an error.
static const char kDigits[]
static bool s_IsWGSId(const string &id)
Boolean VDBSRC_GetOIDFromReadName(TVDBData *vdbData, const char *nameRun, Int4 *oid)
Get the sequence OID given its SRA-specific sequence information.
uint64_t VDBSRC_GetAvgSeqLen(TVDBData *vdbData)
Get the average sequence length in the open SRA data.
int VDBSRC_IsCSRA(const char *run)
Return 1 if run is csra, 0 if not and -1 for error.
VdbBlastMgr * VDBSRC_GetVDBManager(uint32_t *status)
This will call VdbBlastInit and intiailize a singleton for VDBBlastMgr This needs to be called in the...
void VDBSRC_ReleaseVDBManager()
This needs to be called if VDBSRC_GetVDBManager has been called in the main thread.
uint64_t VDBSRC_GetTotSeqLen(TVDBData *vdbData)
Get the total sequence length in the open SRA data.
void VDBSRC_MakeCSRASeqSrcFromSRASeqSrc(TVDBData *vdbData, TVDBErrMsg *vdbErrMsg, Boolean getStats)
uint64_t VDBSRC_GetMaxSeqLen(TVDBData *vdbData)
Get the maximum sequence length in the open SRA data.
Boolean VDBSRC_GetReadNameForOID(TVDBData *vdbData, Int4 oid, char *name_buffer, size_t buf_size)
Get the SRA-specific sequence information for the given OID.
File contains internal structures and functions for reading VDB databases.
#define VDB_2NA_CHUNK_BUF_SIZE
Boolean VDBSRC_Get4naSequenceAsString(TVDBData *vdbData, uint64_t oid, char **seqIupacna, TVDBErrMsg *vdbErrMsg)
Access and convert the selected sequence to a human-readable string.
Boolean VDBSRC_Get2naSequenceAsString(TVDBData *vdbData, uint64_t oid, char **seqIupacna, TVDBErrMsg *vdbErrMsg)