89 m_MinLength(1), m_AvgLength(0), m_QuerySource(0), m_NumSeqs(0)
97 "Failed to initialize sequences for IQueryFactory");
109 m_MinLength(1), m_AvgLength(0), m_QuerySource(0), m_NumSeqs(static_cast<
Uint4>(subj_seqs.
size()))
115 vector<Int2> strand_v;
118 strand_v.push_back((
Int2) (*iter).seqloc->GetStrand());
120 sl->
Assign(*((*iter).seqloc));
125 mask_sl->
Assign(*((*iter).mask));
126 SSeqLoc sseq_loc(*sl, *((*iter).scope), *mask_sl);
127 temp_slv.push_back(sseq_loc);
131 SSeqLoc sseq_loc(*sl, *((*iter).scope));
132 temp_slv.push_back(sseq_loc);
139 ITERATE(vector<Int2>, s_iter, strand_v)
204 throw std::out_of_range(
"");
222 return (*seq_info)->GetMaxLength();
233 return (*seq_info)->GetMinLength();
245 if ((*seq_info)->GetAvgLength() == 0) {
246 const Uint4 num_seqs((*seq_info)->GetNumSeqs());
249 Int8 total_length(0);
250 for (
Uint4 index = 0; index < num_seqs; ++index)
251 total_length += (
Int8) (*seq_info)->GetSeqBlk(index)->length;
252 (*seq_info)->SetAvgLength((
Uint4) (total_length / num_seqs));
254 return (*seq_info)->GetAvgLength();
265 return (*seq_info)->GetNumSeqs();
305 return (
Boolean) (*seq_info)->GetIsProtein();
322 if ((*seq_info)->GetNumSeqs() == 0 || !args)
326 try { seq_blk = (*seq_info)->GetSeqBlk(args->
oid); }
327 catch (
const std::out_of_range&) {
377 index = *((
Int4*) args);
378 return (*seq_info)->GetSeqBlk(index)->length;
504 }
catch (
const ncbi::CException& e) {
506 }
catch (
const std::exception& e) {
510 strdup(
"Caught unknown exception from CQueryFactoryInfo constructor"));
547 if (query_factory.
Empty() && subj_seqs.empty()) {
549 "Must provide either a query factory or subject sequences");
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Declares the BLAST exception class.
Definitions which are dependant on the NCBI C++ Object Manager.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
BlastSeqSrc * BlastSeqSrcNew(const BlastSeqSrcNewInfo *bssn_info)
Allocates memory for a BlastSeqSrc structure and then invokes the constructor function defined in its...
#define BLAST_SEQSRC_SUCCESS
Successful sequence retrieval.
#define BLAST_SEQSRC_EOF
No more sequences available.
Definitions needed for implementing the BlastSeqSrc interface and low level details of the implementa...
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetReleaseSequence(BlastSeqSrc *var, ReleaseSeqBlkFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetResetChunkIterator(BlastSeqSrc *var, ResetChunkIteratorFnPtr arg)
NCBI_XBLAST_EXPORT void * _BlastSeqSrcImpl_GetDataStructure(const BlastSeqSrc *var)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLenStats(BlastSeqSrc *var, GetInt8FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSequence(BlastSeqSrc *var, GetSeqBlkFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqsStats(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLen(BlastSeqSrc *var, GetInt8FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetIsProt(BlastSeqSrc *var, GetBoolFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDataStructure(BlastSeqSrc *var, void *arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetCopyFnPtr(BlastSeqSrc *var, BlastSeqSrcCopier arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetAvgSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetIterNext(BlastSeqSrc *var, AdvanceIteratorFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMinSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetInitErrorStr(BlastSeqSrc *var, char *arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDeleteFnPtr(BlastSeqSrc *var, BlastSeqSrcDestructor arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetName(BlastSeqSrc *var, GetStrFnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMaxSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqs(BlastSeqSrc *var, GetInt4FnPtr arg)
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
void BlastSequenceBlkCopy(BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src)
Copies contents of the source sequence block without copying sequence buffers; sets all "field_alloca...
Defines BLAST error codes (user errors included)
Implements the IBlastQuerySource interface using a CBioseq_set as data source.
Contains information about all sequences in a set.
TSeqLocVector subj_seqs
The subject sequences.
Uint4 GetAvgLength()
Returns average length.
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
unsigned int m_MaxLength
Length of the longest sequence in this set.
BLAST_SequenceBlk * GetSeqBlk(Uint4 index)
Returns sequence block structure for one of the sequences.
EBlastProgramType program
BLAST program.
SQueryFactorySrcNewArgs(CRef< IQueryFactory > qf, const TSeqLocVector &subj_seqs, EBlastProgramType p)
Constructor.
CQueryFactoryInfo(CRef< IQueryFactory > qf, EBlastProgramType program)
Constructor from a vector of sequence location/scope pairs and a BLAST program type.
void SetupSubjects_OMF(IBlastQuerySource &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Object manager free version of SetupSubjects.
BlastSeqSrc * QueryFactoryBlastSeqSrcInit(CRef< IQueryFactory > query_factory, EBlastProgramType program)
Initialize the sequence source structure from a query factory.
static Int4 s_QueryFactoryGetSeqLen(void *multiseq_handle, void *args)
Retrieve length of a given sequence.
unsigned int m_AvgLength
Average length of sequences in this set.
bool m_IsProt
Are these sequences protein or nucleotide?
Uint4 m_NumSeqs
Number of sequences.
static void s_QueryFactoryReleaseSequence(void *, BlastSeqSrcGetSeqArg *args)
Deallocates the uncompressed sequence buffer if necessary.
static Int4 s_QueryFactoryGetNumSeqs(void *multiseq_handle, void *)
Retrieves the number of sequences in the BlastSeqSrc.
static Boolean s_QueryFactoryGetIsProt(void *multiseq_handle, void *)
Answers whether this object is for protein or nucleotide sequences.
static BlastSeqSrc * s_QueryFactoryBlastSeqSrcInit(CRef< IQueryFactory > query_factory, const TSeqLocVector &subj_seqs, EBlastProgramType program)
static const char * s_QueryFactoryGetName(void *, void *)
Always returns NcbiEmptyCStr.
~CQueryFactoryInfo()
Destructor.
static Int8 s_QueryFactoryGetTotLen(void *, void *)
Returns 0 as total length, indicating that this is NOT a database!
CRef< IBlastQuerySource > m_QuerySource
local query data obtained from the query factory
unsigned int m_MinLength
Length of the longest sequence in this set.
static Int8 s_QueryFactoryGetTotLenStats(void *, void *)
Returns 0 as total statistic length, as this implementation does not use alias files.
static BlastSeqSrc * s_QueryFactorySrcNew(BlastSeqSrc *retval, void *args)
Multi-sequence source constructor.
static void s_QueryFactoryResetChunkIter(void *)
Resets the internal bookmark iterator (N/A in this case)
static Int2 s_QueryFactoryGetSequence(void *multiseq_handle, BlastSeqSrcGetSeqArg *args)
Retrieves the sequence for a given index, in a given encoding.
static Int4 s_QueryFactoryGetMinLength(void *multiseq_handle, void *)
Retrieves the length of the longest sequence in the BlastSeqSrc.
virtual CRef< objects::CBioseq_set > GetBioseqSet()=0
Accessor for the CBioseq_set.
static BlastSeqSrc * s_QueryFactorySrcFree(BlastSeqSrc *seq_src)
Multi sequence source destructor: frees its internal data structure.
static BlastSeqSrc * s_QueryFactorySrcCopy(BlastSeqSrc *seq_src)
Multi-sequence sequence source copier: creates a new reference to the CQueryFactoryInfo object and co...
CRef< IRemoteQueryData > MakeRemoteQueryData()
Creates and caches an IRemoteQueryData.
Uint4 GetMaxLength()
Setter and getter functions for the private fields.
static Int2 s_QueryFactoryGetNextChunk(void *multiseq_handle, BlastSeqSrcIterator *itr)
Mirrors the database iteration interface.
static Int4 s_QueryFactoryGetNumSeqsStats(void *, void *)
Returns zero as this implementation does not use an alias file.
vector< BLAST_SequenceBlk * > m_SeqBlkVector
Vector of sequence blocks.
Uint4 GetNumSeqs()
Returns number of sequences.
virtual TSeqPos Size() const =0
Return the number of elements in the sequence container.
static Int4 s_QueryFactoryIteratorNext(void *multiseq_handle, BlastSeqSrcIterator *itr)
Gets the next sequence index, given a BlastSeqSrc pointer.
void SetAvgLength(Uint4 val)
Sets average length.
CRef< IQueryFactory > query_factory
The query factory.
Uint4 GetMinLength()
Returns minimal length of a set of sequences.
static Int4 s_QueryFactoryGetMaxLength(void *multiseq_handle, void *)
The following functions interact with the C API, and have to be declared extern "C".
bool GetIsProtein()
Answers whether sequences in this object are protein or nucleotide.
static Int4 s_QueryFactoryGetAvgLength(void *multiseq_handle, void *)
Retrieves the average length of the sequence in the BlastSeqSrc.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
@ eNa_strand_both
in forward orientation
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
Uint1 Boolean
bool replacment for C
#define FALSE
bool replacment for C indicating false.
#define UINT4_MAX
largest number represented by unsigned int.
Implementation of the BlastSeqSrc interface for a query factory.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Int4 oid
The ordinal id of the current sequence.
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
Boolean lcase_mask_allocated
TRUE if memory has been allocated for lcase_mask.
Uint1 * sequence
Sequence used for search (could be translation).
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
EBlastEncoding encoding
Encoding of sequence, i.e.
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Complete type definition of Blast Sequence Source Iterator.
unsigned int current_pos
Keep track of this iterator's current position, implementations use UINT4_MAX to indicate this is uni...
Complete type definition of the structure used to create a new BlastSeqSrc.
BlastSeqSrcConstructor constructor
User-defined function to initialize a BlastSeqSrc structure.
void * ctor_argument
Argument to the above function.
Complete type definition of Blast Sequence Source ADT.
Encapsulates the arguments needed to initialize multi-sequence source.
Structure to represent a single sequence to be fed to BLAST.