46 #ifndef SKIP_DOXYGEN_PROCESSING
99 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
100 (!defined(NCBI_COMPILER_MIPSPRO)) )
110 #ifdef KAPPA_PRINT_DIAGNOSTICS
113 s_SeqDbGetGiList(
void* seqdb_handle,
void* args)
123 datap->GetGis(*oid, gis);
125 Blast_GiList* retval = Blast_GiListNewEx(gis.size());
126 copy(gis.begin(), gis.end(), retval->data);
127 retval->num_used = gis.size();
208 return (
Int4) (total_length/num_seqs);
228 return datap->isProtein;
238 if (datap->isProtein ==
true) {
247 static const int kMaxLengthCutoff = 5000;
253 static const int kAvgLengthCutoff = 2048;
256 if ((
Int4)(total_length/num_seqs) < kAvgLengthCutoff) {
270 if (!seqdb_handle || !args)
return;
292 if (!seqdb_handle || !args)
308 list< CRef<CSeq_id> > seqids = seqdb.
GetSeqIDs(oid);
310 if (seqids.empty()) {
316 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
317 (!defined(NCBI_COMPILER_MIPSPRO)) )
319 ASSERT(datap->mask_algo_id != -1);
320 seqdb.
GetMaskData(oid, datap->mask_algo_id, datap->seq_ranges);
324 datap->copied =
false;
329 && !(datap->seq_ranges.empty())
347 if ( datap->isProtein || args->
ranges ==
NULL){
349 const_cast<char **
>(&
buf),
353 &(datap->seq_ranges) :
NULL));
365 const_cast<char **
>(&
buf),
370 &(datap->seq_ranges) :
NULL));
385 if (datap->copied && !has_sentinel_byte)
396 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
397 (!defined(NCBI_COMPILER_MIPSPRO)) )
401 (
SSeqRange*) datap->seq_ranges.get_data(),
402 datap->seq_ranges.size() + 1,
false, datap->mask_type) != 0) {
453 if (!seqdb_handle || !oid)
467 if (!seqdb_handle || !itr)
472 vector<int> oid_list;
495 for (index = 0; index < new_sz; ++index)
496 itr->
oid_list[index] = oid_list[index];
538 fprintf(stderr,
"Invalid iterator type: %d\n", itr->
itr_type);
574 Int4 mask_algo_id = -1,
576 : m_DbName(db), m_IsProtein(is_prot),
577 m_FirstDbSeq(first_oid), m_FinalDbSeq(final_oid),
578 m_MaskAlgoId(mask_algo_id), m_MaskType(mask_type)
584 char GetDbType()
const {
return m_IsProtein ?
'p' :
'n'; }
672 #ifdef KAPPA_PRINT_DIAGNOSTICS
673 _BlastSeqSrcImpl_SetGetGis (retval, & s_SeqDbGetGiList);
712 bool is_protein = (seqdb_args->
GetDbType() ==
'p');
724 datap->isProtein = is_protein;
727 if (datap->mask_algo_id > 0) {
728 vector<int> supported_algorithms;
730 if (find(supported_algorithms.begin(),
731 supported_algorithms.end(),
732 datap->mask_algo_id) == supported_algorithms.end()) {
734 oss <<
"Masking algorithm ID " << datap->mask_algo_id <<
" is "
735 <<
"not supported in " <<
736 (is_protein ?
"protein" :
"nucleotide") <<
" '"
737 << seqdb_args->
GetDbName() <<
"' BLAST database";
739 throw runtime_error(
msg);
743 }
catch (
const ncbi::CException& e) {
746 }
catch (
const std::exception& e) {
750 strdup(
"Caught unknown exception from CSeqDB constructor"));
771 mask_algo_id, mask_type);
ESubjectMaskingType
Define the possible subject masking types.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
BlastSeqSrc * BlastSeqSrcNew(const BlastSeqSrcNewInfo *bssn_info)
Allocates memory for a BlastSeqSrc structure and then invokes the constructor function defined in its...
#define BLAST_SEQSRC_SUCCESS
Successful sequence retrieval.
#define BLAST_SEQSRC_EXCLUDED
Sequence excluded due to filtering.
BlastSeqSrcSetRangesArg * BlastSeqSrcSetRangesArgFree(BlastSeqSrcSetRangesArg *arg)
free setrangearg
#define BLAST_SEQSRC_EOF
No more sequences available.
Definitions needed for implementing the BlastSeqSrc interface and low level details of the implementa...
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetReleaseSequence(BlastSeqSrc *var, ReleaseSeqBlkFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetResetChunkIterator(BlastSeqSrc *var, ResetChunkIteratorFnPtr arg)
NCBI_XBLAST_EXPORT void * _BlastSeqSrcImpl_GetDataStructure(const BlastSeqSrc *var)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLenStats(BlastSeqSrc *var, GetInt8FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSequence(BlastSeqSrc *var, GetSeqBlkFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqsStats(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLen(BlastSeqSrc *var, GetInt8FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetIsProt(BlastSeqSrc *var, GetBoolFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDataStructure(BlastSeqSrc *var, void *arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetCopyFnPtr(BlastSeqSrc *var, BlastSeqSrcCopier arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetAvgSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetIterNext(BlastSeqSrc *var, AdvanceIteratorFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMinSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetInitErrorStr(BlastSeqSrc *var, char *arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetSetNumberOfThreads(BlastSeqSrc *var, SetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDeleteFnPtr(BlastSeqSrc *var, BlastSeqSrcDestructor arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetName(BlastSeqSrc *var, GetStrFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMaxSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
@ eOidRange
Data is a range of contiguous ordinal ids (indices)
@ eOidList
Data is a list of discontiguous ordinal ids (indices)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqs(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetSetSeqRange(BlastSeqSrc *var, SetSeqRangeFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSupportsPartialFetching(BlastSeqSrc *var, GetBoolFnPtr arg)
Internal auxiliary setup classes/functions for C++ BLAST APIs.
Various auxiliary BLAST utility functions.
Int2 BlastSeqBlkSetSeqRanges(BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges, ESubjectMaskingType mask_type)
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
void BlastSequenceBlkClean(BLAST_SequenceBlk *seq_blk)
Deallocate memory only for the sequence in the sequence block.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
bool Blank() const
Check if an ID list is blank.
int GetMinLength() const
Returns the length of the shortest sequence in the database.
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
void SetIterationRange(int oid_begin, int oid_end)
Set Iteration Range.
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
const string & GetDBNameList() const
Get list of database names.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
void ResetInternalChunkBookmark()
Resets this object's internal chunk bookmark, which is used when the oid_state argument to GetNextOID...
EOidListType
Indicates how block of OIDs was returned.
int GetMaxLength() const
Returns the length of the largest sequence in the database.
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data)
Apply a range of offsets to a database sequence.
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType strategy, TSequenceRanges *partial_ranges, TSequenceRanges *masks=NULL) const
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
int GetNumSeqs() const
Returns the number of sequences available.
EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state=NULL)
Return a chunk of OIDs, and update the OID bookmark.
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
int GetNumSeqsStats() const
Returns the number of sequences available.
int GetAmbigSeqAlloc(int oid, char **buffer, int nucl_code, ESeqDBAllocType strategy, TSequenceRanges *masks=NULL) const
Get a pointer to sequence data with ambiguities.
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Setting the number of threads.
Uint8 GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
void GetMaskData(int oid, const vector< int > &algo_ids, TSequenceRanges &ranges)
Get masked ranges of a sequence.
CSeqDBIdSet GetIdSet() const
Get IdSet list attached to this database.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
Encapsulates the arguments needed to initialize CSeqDB.
iterator_bool insert(const value_type &val)
static BlastSeqSrc * s_SeqDbSrcCopy(BlastSeqSrc *seq_src)
SeqDb sequence source copier: creates a new reference to the CSeqDB object and copies the rest of the...
bool m_IsProtein
Is this database protein?
CSeqDBExpert & operator*()
Convenience to allow datap->method to use SeqDB methods.
ESubjectMaskingType GetMaskType() const
ESubjectMaskingType m_MaskType
static Int4 s_SeqDbGetNumSeqs(void *seqdb_handle, void *)
Retrieves the number of sequences in the BlastSeqSrc.
CRef< CSeqDBExpert > seqdb
SeqDB object.
static Int4 s_SeqDbGetNumSeqsStats(void *seqdb_handle, void *)
Retrieves the number of sequences from alias file to be used for.
static Boolean s_SeqDbGetSupportsPartialFetching(void *seqdb_handle, void *)
Determine if partial fetching should be enabled.
BlastSeqSrc * SeqDbBlastSeqSrcInit(const string &dbname, bool is_prot, Uint4 first_seq=0, Uint4 last_seq=0, Int4 mask_algo_id=-1, ESubjectMaskingType mask_type=eNoSubjMasking)
Initialize the sequence source structure.
CSeqDbSrcNewArgs(const string &db, bool is_prot, Uint4 first_oid=0, Uint4 final_oid=0, Int4 mask_algo_id=-1, ESubjectMaskingType mask_type=eNoSubjMasking)
Constructor.
static void s_InitNewSeqDbSrc(BlastSeqSrc *retval, TSeqDBData *datap)
Initializes the data structure and function pointers in a SeqDb based BlastSeqSrc.
CSeqDBExpert * operator->()
Convenience to allow datap->method to use SeqDB methods.
ESubjectMaskingType mask_type
int mask_algo_id
Algorithm ID and type for mask data fetching.
static void s_SeqDbResetChunkIterator(void *seqdb_handle)
Resets CSeqDB's internal chunk bookmark.
const string GetDbName() const
Getter functions for the private fields.
static void s_SeqDbSetNumberOfThreads(void *seqdb_handle, int n)
Setting number of threads in MT mode.
static Int4 s_SeqDbIteratorNext(void *seqdb_handle, BlastSeqSrcIterator *itr)
Finds the next not searched ordinal id in the iteration over BLAST database.
static Int4 s_SeqDbGetAvgLength(void *seqdb_handle, void *ignoreme)
Retrieves the average length of sequences in the BlastSeqSrc.
static const char * s_SeqDbGetName(void *seqdb_handle, void *)
Retrieves the name of the BLAST database.
CSeqDB::TSequenceRanges seq_ranges
Ranges of the sequence to include (for masking).
static BlastSeqSrc * s_SeqDbSrcNew(BlastSeqSrc *retval, void *args)
SeqDb sequence source constructor.
static BlastSeqSrc * s_SeqDbSrcFree(BlastSeqSrc *seq_src)
SeqDb sequence source destructor: frees its internal data structure.
SSeqDB_SeqSrc_Data(CSeqDB *ptr, int id, ESubjectMaskingType type)
Constructor.
static void s_SeqDbSetRanges(void *seqdb_handle, BlastSeqSrcSetRangesArg *args)
Set sequence ranges for partial fetching.
Uint4 m_FirstDbSeq
Ordinal id of the first sequence to search.
static Boolean s_SeqDbGetIsProt(void *seqdb_handle, void *)
Checks whether database is protein or nucleotide.
static Int4 s_SeqDbGetSeqLen(void *seqdb_handle, void *args)
Retrieve length of a given database sequence.
SSeqDB_SeqSrc_Data()
Constructor.
Uint4 GetFinalOid() const
Returns last database ordinal id covered by this BlastSeqSrc.
Uint4 GetFirstOid() const
Returns first database ordinal id covered by this BlastSeqSrc.
SSeqDB_SeqSrc_Data TSeqDBData
Int4 GetMaskAlgoId() const
Returns the default filtering algorithm to use with sequence data extracted from this BlastSeqSrc.
SSeqDB_SeqSrc_Data * clone()
Make a copy of this object, sharing the same SeqDB object.
static void s_SeqDbReleaseSequence(void *seqdb_handle, BlastSeqSrcGetSeqArg *args)
Returns the memory allocated for the sequence buffer to the CSeqDB interface.
static Int2 s_SeqDbGetNextChunk(void *seqdb_handle, BlastSeqSrcIterator *itr)
Assigns next chunk of the database to the sequence source iterator.
Int4 m_MaskAlgoId
filtering algorithm ID to use when retrieving sequence data
static Int4 s_SeqDbGetMinLength(void *seqdb_handle, void *)
Retrieves the length of the shortest sequence in the BlastSeqSrc.
string m_DbName
Database name.
static Int4 s_SeqDbGetMaxLength(void *seqdb_handle, void *)
Retrieves the length of the longest sequence in the BlastSeqSrc.
static Int8 s_SeqDbGetTotLen(void *seqdb_handle, void *)
Retrieves the total length of all sequences in the BlastSeqSrc.
char GetDbType() const
Returns database type: protein or nucleotide.
static Int2 s_SeqDbGetSequence(void *seqdb_handle, BlastSeqSrcGetSeqArg *args)
Retrieves the sequence meeting the criteria defined by its second argument.
Uint4 m_FinalDbSeq
Ordinal id of the last sequence to search.
static Int8 s_SeqDbGetTotLenStats(void *seqdb_handle, void *)
Retrieves the total length of all sequences from alias file.
static BlastSeqSrc * s_SeqDbSrcSharedNew(BlastSeqSrc *retval, void *args)
Populates a BlastSeqSrc, creating a new reference to the already existing SeqDb object.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
@ eDPF_ErrCodeExplanation
Error explanation (default)
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define UINT4_MAX
largest number represented by unsigned int.
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Defines `expert' version of CSeqDB interfaces.
Implementation of the BlastSeqSrc interface using the C++ BLAST databases API.
Uint4 GetSequenceType(const CBioseq_Handle &bsh)
Return a (corrected) set of flags identifying the sequence type.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Int4 oid
The ordinal id of the current sequence.
Boolean sequence_allocated
TRUE if memory has been allocated for sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Boolean reset_ranges
This option allows the BLAST engine to communicate with the BlastSeqSrc that the offset ranges for a ...
EBlastEncoding encoding
Encoding of sequence, i.e.
Boolean check_oid_exclusion
Check whether an OID is excluded due to overlapping filtering.
BlastSeqSrcSetRangesArg * ranges
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Complete type definition of Blast Sequence Source Iterator.
unsigned int chunk_sz
Size of the chunks to advance over the BlastSeqSrc, also size of oid_list member, this is provided to...
BlastSeqSrcItrType itr_type
Indicates which member to access: oid_list or oid_range.
int oid_range[2]
This is a half-closed interval [a,b)
int * oid_list
Array of ordinal ids used when itr_type is eOidList.
unsigned int current_pos
Keep track of this iterator's current position, implementations use UINT4_MAX to indicate this is uni...
Complete type definition of the structure used to create a new BlastSeqSrc.
BlastSeqSrcConstructor constructor
User-defined function to initialize a BlastSeqSrc structure.
void * ctor_argument
Argument to the above function.
Structure used as the argument to function SetRanges.
Int4 * ranges
Ranges in sorted order [in].
Int4 num_ranges
Number of actual ranges contained.
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Complete type definition of Blast Sequence Source ADT.
Structure to represent a range.
List of sequence offset ranges.
void push_back(const value_type &element)
Append extra element at the end.
Simple container to support SeqSrc-local data.
A structure containing two integers, used e.g.