68 : m_TSeqLocVector(&v),
69 m_OwnTSeqLocVector(
false),
71 m_CalculatedMasks(
true),
74 x_AutoDetectGeneticCodes();
79 : m_TSeqLocVector(&v),
80 m_OwnTSeqLocVector(
false),
82 m_CalculatedMasks(
false),
83 m_Program(opts->GetProgramType())
91 m_OwnTSeqLocVector(
false),
93 m_CalculatedMasks(
false),
101 : m_QueryVector(& v),
102 m_OwnTSeqLocVector(
false),
104 m_CalculatedMasks(
false),
105 m_Program(opts->GetProgramType())
160 const CSeq_id*
id = sseqloc->seqloc->GetId();
161 CSeqdesc_CI desc_it(sseqloc->scope->GetBioseqHandle(*
id),
312 string* warnings)
const
338 string(
"Could not find length of query # ")
354 scope = (*m_TSeqLocVector)[
i].scope;
358 if ( !seqloc->
GetId() ) {
372 title = (*desc)->GetTitle();
410 strand_opt, messages);
416 vector<BLAST_SequenceBlk*>* seqblk_vec,
417 unsigned int* max_subjlen)
424 static unsigned char ctable[16] = {0xFF, 0x00, 0x01, 0xFF, 0x02, 0xFF, 0xFF, 0xFF,
425 0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
428 unsigned char * ncbi2na)
433 for(
int i = 0;
i < inp_bytes;
i++) {
435 unsigned char inp = ncbi4na[
i];
438 unsigned char b = inp & 0xF;
448 if (
b == 0 ||
b == 0x0F) {
450 ncbi2na[
i] = random.
GetRand() & 0x3;
454 int bitcount = ((
b & 1) + ((
b >> 1) & 1) +
455 ((
b >> 2) & 1) + ((
b >> 3) & 1));
461 int pick = random.
GetRand() % bitcount;
463 for(
int j = 0; j < 4; j++) {
465 if ((
b & (1 << j)) == 0)
569 objects::CScope* scope,
584 if (sequences.empty()) {
592 if (seq->seqloc->IsWhole()) {
598 }
else if (seq->seqloc->IsInt()) {
607 "Unsupported Seq-loc type used for query");
609 retval->AddInterval(
id,
range.GetFrom(),
range.GetTo());
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Definitions which are dependant on the NCBI C++ Object Manager.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
Declares class to encapsulate all BLAST options.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
Defines interface for retrieving sequence identifiers.
Declarations of auxiliary functions using IBlastSeqInfoSrc to retrieve ids and related sequence infor...
Internal auxiliary setup classes/functions for C++ BLAST APIs.
int GetGenCode(int def=1) const
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
Implements the object manager dependant version of the IBlastQuerySource.
CRef< objects::CScope > GetScope(size_type i) const
Get the scope containing a query by index.
TMaskedQueryRegions GetMaskedRegions(size_type i) const
Get the masked regions for a query by number.
size_type Size() const
Returns the number of queries found in this query vector.
vector< value_type >::size_type size_type
size_type type definition
CConstRef< objects::CSeq_loc > GetQuerySeqLoc(size_type i) const
Get the query Seq-loc for a query by index.
CRef< CBlastSearchQuery > GetBlastSearchQuery(size_type i) const
Get the CBlastSearchQuery object at index i.
Implementation of the IBlastSeqVector interface which obtains data from a CSeq_loc and a CScope relyi...
Thrown on an attempt to write unassigned data member.
Lightweight wrapper around sequence data which provides a CSeqVector-like interface to the data.
Collection of masked regions for a single query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static int base_length[29]
Calls sym dust lib in algo/dustmask and returns CSeq_locs for use by BLAST.
CRef< objects::CPacked_seqint > TSeqLocVector2Packed_seqint(const TSeqLocVector &sequences)
Converts a TSeqLocVector into a CPacked_seqint.
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(int i) const
Return the CSeq_loc associated with a sequence.
virtual const objects::CSeq_id * GetSeqId(int index) const
Return the sequence identifier associated with a sequence.
int GetWindowMaskerTaxId() const
Returns the tax id used for the windowmasker database to use, if set via SetWindowMaskerTaxId (otherw...
void SetupQueries_OMF(IBlastQuerySource &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
ObjMgr Free version of SetupQueries.
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
virtual TSeqPos GetLength(int i) const
Return the length of a sequence.
virtual void GetStrandData(objects::ENa_strand strand, unsigned char *buf)
@inheritDoc
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
SBlastSequence GetSequence_OMF(IBlastSeqVector &sv, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, std::string *warnings=0)
Object manager free version of GetSequence.
TMaskedQueryRegions PackedSeqLocToMaskedQueryRegions(CConstRef< objects::CSeq_loc > sloc, EBlastProgramType program, bool assume_both_strands=false)
Auxiliary function to convert a Seq-loc describing masked query regions to a TMaskedQueryRegions obje...
int GetDustFilteringLevel() const
void SetupSubjects_OMF(IBlastQuerySource &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Object manager free version of SetupSubjects.
TSeqLocVector * m_TSeqLocVector
Reference to input TSeqLocVector (or NULL if not used)
virtual string GetTitle(int index) const
Return the title of a sequence.
virtual SBlastSequence GetBlastSequence(int i, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, string *warnings=0) const
Return the sequence data for a sequence.
CBlastQuerySourceOM(TSeqLocVector &v, EBlastProgramType prog)
Constructor which takes a TSeqLocVector.
objects::ENa_strand m_Strand
Maintains the state of the strand currently saved by the implementation of this class.
CBlastSeqVectorOM(const CSeq_loc &seqloc, CScope &scope)
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
virtual void SetCoding(CSeq_data::E_Choice coding)
@inheritDoc
virtual SBlastSequence GetCompressedPlusStrand()
@inheritDoc
CRef< CBlastQueryVector > m_QueryVector
Reference to input CBlastQueryVector (or empty if not used)
virtual Uint1 operator[](TSeqPos pos) const
@inheritDoc
virtual TMaskedQueryRegions GetMaskedRegions(int i)
Return the filtered (masked) regions for a sequence.
const CBlastOptions * m_Options
BLAST algorithm options.
virtual TSeqPos x_Size() const
@inheritDoc
const CSeq_loc & m_SeqLoc
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
void x_SetStrand(ENa_strand s)
@inheritDoc
TAutoUint1Ptr data
Sequence data.
void x_AutoDetectGeneticCodes(void)
Tries to extract the genetic code using the CScope, if it succeeds, it supercedes what's specified in...
ESentinelType
Allows specification of whether sentinel bytes should be used or not.
bool GetDustFiltering() const
void Blast_FindWindowMaskerLoc(CBlastQueryVector &query, const CBlastOptions *opts)
Find Window Masker filtered locations using a BlastOptions.
virtual Uint4 GetGeneticCodeId(int index) const
Retrieve the genetic code associated with a sequence.
int GetQueryGeneticCode() const
void x_FixStrand(objects::ENa_strand &strand) const
If the Seq-loc is on the minus strand and the user is asking for the minus strand,...
CRef< objects::CSeq_loc > MaskedQueryRegionsToPackedSeqLoc(const TMaskedQueryRegions &sloc)
Interface to build a CSeq-loc from a TMaskedQueryRegion; note that conversion conversion in this dire...
void Blast_FindRepeatFilterLoc(TSeqLocVector &query_loc, const CBlastOptionsHandle *opts_handle)
Finds repeats locations for a given set of sequences.
static unsigned char ctable[16]
virtual objects::ENa_strand GetStrand(int i) const
Return strand for a sequence.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
int GetDustFilteringLinker() const
void Blast_FindDustFilterLoc(TSeqLocVector &queries, const CBlastNucleotideOptionsHandle *nucl_handle)
Finds dust locations for a given set of sequences by calling the the symmetric dust lib.
const char * GetWindowMaskerDatabase() const
Return the name of the windowmasker database to use.
void x_CalculateMasks()
Performs filtering on the query sequences to calculate the masked locations.
bool GetRepeatFiltering() const
Returns true if repeat filtering is on.
bool m_CalculatedMasks
this flag allows for lazy initialization of the masking locations
void SetupQueryInfo_OMF(const IBlastQuerySource &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
ObjMgr Free version of SetupQueryInfo.
const char * GetRepeatFilteringDB() const
Returns the name of the repeat filtering database to use.
TSeqPos size() const
Returns the length of the sequence data (in the case of nucleotides, only one strand)
int GetDustFilteringWindow() const
EBlastProgramType m_Program
BLAST program variable.
virtual CConstRef< objects::CSeq_loc > GetMask(int i)
Return the filtered (masked) regions for a sequence.
virtual TSeqPos Size() const
Return the number of elements in the sequence container.
bool m_OwnTSeqLocVector
flag to determine if the member above should or not be deleted in the destructor
virtual void x_SetMinusStrand()
@inheritDoc
virtual ~CBlastQuerySourceOM()
dtor which determines if the internal pointer to its data should be deleted or not.
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
virtual void x_SetPlusStrand()
@inheritDoc
void s_Ncbi4naToNcbi2na(const string &ncbi4na, int base_length, unsigned char *ncbi2na)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
element_type * get(void) const
Get pointer.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
ENa_strand GetStrand(void) const
Get the location's strand.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
const TDescr & GetDescr(void) const
bool CanGetDescr(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
ENa_strand GetStrand(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void SetCoding(TCoding coding)
const_iterator begin(void) const
const_iterator end(void) const
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
ENa_strand
strand of nucleic acid
list< CRef< CSeqdesc > > Tdata
const TSource & GetSource(void) const
Get the variant data.
@ e_Title
a title for this sequence
@ e_Source
source of materials, includes Org-ref
range(_Ty, _Ty) -> range< _Ty >
C++ implementation of repeats filtering for C++ BLAST.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to hold a sequence.
The query related information.
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Blast wrappers for WindowMasker filtering.