75 Uint4 shift = prev_len ? prev_len + 1 : 0;
109 ? seqloc_strand : strand_opt;
160 if (query_info.
Get() ==
NULL) {
170 if (is_na || translate) {
176 unsigned int ctx_index = 0;
178 Uint4 max_length = 0;
193 for (
unsigned int i = 0;
i < kNumContexts;
i++) {
194 unsigned int prot_length =
196 max_length =
MAX(max_length, prot_length);
197 min_length =
MIN(min_length, prot_length);
203 ctx_len = (
i<3) ? prot_length : 0;
211 ctx_len = (
i<3) ? 0 : prot_length;
227 max_length =
MAX(max_length, length);
228 min_length =
MIN(min_length, length);
268 ctx_index += kNumContexts;
289 start = end = num_contexts;
293 start = num_contexts/2;
298 end = num_contexts/2;
327 _ASSERT(query_index < mask->total_size);
333 int starting_context(0), ending_context(0);
340 const TSeqPos dna_length = query_length;
343 & (
mask->seqloc_array[query_index*num_contexts]);
347 for (
int i = starting_context;
i < ending_context;
i++) {
349 frames_seqloc[
i] = *seqloc_frames[frame];
360 mask->seqloc_array[query_index*num_contexts] =
361 *seqloc_frames[posframe];
362 seqloc_frames.
Release(posframe);
366 mask->seqloc_array[query_index*num_contexts+1] =
367 *seqloc_frames[negframe];
368 seqloc_frames.
Release(negframe);
372 mask->seqloc_array[query_index*num_contexts] =
373 *seqloc_frames[posframe];
375 mask->seqloc_array[query_index*num_contexts+1] =
376 *seqloc_frames[negframe];
378 seqloc_frames.
Release(posframe);
379 seqloc_frames.
Release(negframe);
387 mask->seqloc_array[query_index] = *seqloc_frames[0];
405 const TFrameSet& frames = frame_to_bsl.
ListFrames();
408 const int first_ctx =
static_cast<int>(kNumFrames) * query_index;
409 const int last_ctx =
static_cast<int>(kNumFrames) * (query_index + 1);
411 ITERATE(TFrameSet, iter, frames) {
412 int seqloc_frame = *iter;
415 for(
int ci = first_ctx; ci <= last_ctx; ci++) {
419 if (context_frame == seqloc_frame) {
451 if (! frame_to_bsl->
Empty()) {
452 if (frame_to_bsl->QueryHasMultipleFrames()) {
494 if (messages.size() != queries.
Size()) {
495 messages.resize(queries.
Size());
505 "Query sequence buffer");
513 unsigned int ctx_index = 0;
518 for(
TSeqPos index = 0; index < queries.
Size(); index++) {
535 string query_id =
id->GetSeqIdString();
539 if(query_id.size() > 35) {
540 query_id = query_id.substr(0, 25) +
".. ";
543 messages[index].SetQueryId(query_id);
569 int na_length = queries.
GetLength(index);
572 seqbuf_rev = sequence.
data.
get() + na_length + 1;
574 seqbuf_rev = sequence.
data.
get();
577 for (
unsigned int i = 0;
i < kNumContexts;
i++) {
600 ctx_index + 1 : ctx_index;
618 if ( !warnings.empty() ) {
622 messages[index].push_back(m);
638 messages[index].push_back(m);
642 ctx_index += kNumContexts;
656 (*seqblk)->lcase_mask =
mask.Release();
657 (*seqblk)->lcase_mask_allocated =
TRUE;
672 if (
range->IsInt()) {
677 length = total_length;
705 if ((*itr)->GetTo() >=
offset && p.
first < length) {
709 }
else if (slp->
IsMix()) {
713 if ((*itr)->IsInt()) {
714 p.
first = ((*itr)->GetInt().GetFrom() >
offset)? (*itr)->GetInt().GetFrom() -
offset : 0;
716 if ((*itr)->GetInt().GetTo() >=
offset && p.
first < length) {
719 }
else if ((*itr)->IsPnt()) {
720 p.
first = ((*itr)->GetPnt().GetPoint() >
offset)? (*itr)->GetPnt().GetPoint() -
offset : 0;
722 if ((*itr)->GetPnt().GetPoint() >=
offset && p.
first < length) {
735 vector<BLAST_SequenceBlk*>* seqblk_vec,
736 unsigned int* max_subjlen)
776 seqblk_vec->push_back(subj);
781 warning =
id->GetSeqIdString() +
" ";
787 warning +=
"Subject sequence contains no data";
820 if ( !masked_ranges.
empty() ) {
851 "Failed to get compressed nucleotide sequence");
858 seqblk_vec->push_back(subj);
879 vector<TSeqPos> replaced_residues;
880 vector<TSeqPos> invalid_residues;
882 static const size_t kMaxResiduesToWarnAbout = 20;
894 for (
i = 0;
i < sv.
size();
i++) {
897 replaced_residues.push_back(
i);
900 invalid_residues.push_back(
i);
905 if (invalid_residues.size() > 0) {
906 string error(
"Invalid residues found at positions ");
908 for (
i = 1;
i <
min(kMaxResiduesToWarnAbout, invalid_residues.size());
912 if (invalid_residues.size() > kMaxResiduesToWarnAbout) {
913 error +=
",... (only first ";
920 if (warnings && replaced_residues.size() > 0) {
921 *warnings +=
"One or more O characters replaced by X for ";
922 *warnings +=
"alignment score calculations at positions ";
924 for (
i = 1;
i <
min(kMaxResiduesToWarnAbout, replaced_residues.size());
928 if (replaced_residues.size() > kMaxResiduesToWarnAbout) {
929 *warnings +=
",... (only first ";
931 *warnings +=
" shown)";
1044 memcpy(buf_ptr,
plus.data.get(),
plus.length);
1045 buf_ptr +=
plus.length;
1049 memcpy(buf_ptr,
minus.data.get(),
minus.length);
1050 buf_ptr +=
minus.length;
1181 Uint1 bit_shift = 0;
1183 case 0: bit_shift = 6;
break;
1184 case 1: bit_shift = 4;
break;
1185 case 2: bit_shift = 2;
break;
1205 if (sequence_length == 0) {
1222 retval = sequence_length * 2;
1225 retval = sequence_length + 2;
1229 retval = sequence_length * 2;
1231 retval = sequence_length;
1239 retval = sequence_length + 2;
1267 BLASTGetTranslation(
const Uint1* seq,
const Uint1* seq_rev,
1268 const int nucl_length,
const short frame,
Uint1* translation)
1273 const Uint1* nucl_seq = frame >= 0 ? seq : seq_rev;
1274 translation[0] =
NULLB;
1280 translation[
pi++] = residue;
1296 full_path.erase(full_path.size() - matrix_name.size());
1297 char* matrix_path =
strdup(full_path.c_str());
1308 string mtx(matrix_name);
1313 if(!full_path.empty()){
1319 if(!full_path.empty()){
1329 if (
CDir(blastmat_env).Exists()) {
1330 full_path = blastmat_env;
1333 if (
CFile(full_path).Exists()) {
1337 full_path = blastmat_env;
1339 full_path += matrix_name;
1340 if (
CFile(full_path).Exists()) {
1345 full_path = blastmat_env;
1347 full_path += is_prot ?
"aa" :
"nt";
1350 if (
CFile(full_path).Exists()) {
1355 full_path = blastmat_env;
1357 full_path += is_prot ?
"aa" :
"nt";
1359 full_path += matrix_name;
1360 if (
CFile(full_path).Exists()) {
1370 if (
CFile(full_path).Exists()) {
1377 full_path += matrix_name;
1378 if (
CFile(full_path).Exists()) {
1392 string full_path = path + (is_prot ?
".pal" :
".nal");
1396 full_path = path + (is_prot ?
".pin" :
".nin");
1422 if (
CFile(blastdb_env).Exists()) {
1423 full_path = blastdb_env;
1428 retval.erase(retval.size() -
database.size());
1445 retval.erase(retval.size() -
database.size());
1455 unsigned int retval = 0;
1457 int debug_value =
static_cast<int>(p);
1459 string msg =
"Cannot get number of contexts for invalid program ";
1500 "blast::SetupQueryInfo failed");
1525 : m_Program(program)
1533 : m_Program(program)
1552 if(frame == ncbi::CSeqLocInfo::eFramePlus1)
1554 AddSeqLoc(intv, ncbi::CSeqLocInfo::eFramePlus2);
1555 frames.
insert(ncbi::CSeqLocInfo::eFramePlus2);
1556 AddSeqLoc(intv, ncbi::CSeqLocInfo::eFramePlus3);
1557 frames.
insert(ncbi::CSeqLocInfo::eFramePlus3);
1559 else if (frame == ncbi::CSeqLocInfo::eFrameMinus1)
1561 AddSeqLoc(intv, ncbi::CSeqLocInfo::eFrameMinus2);
1562 frames.
insert(ncbi::CSeqLocInfo::eFrameMinus2);
1563 AddSeqLoc(intv, ncbi::CSeqLocInfo::eFrameMinus3);
1564 frames.
insert(ncbi::CSeqLocInfo::eFrameMinus3);
1573 if ((*iter).second != 0) {
1604 short frame = iter->first;
1622 if (from >= kFrameLength)
1623 from = kFrameLength - 1;
1624 if (to >= kFrameLength)
1625 to = kFrameLength - 1;
1627 _ASSERT(from >= 0 && to >= 0);
1628 _ASSERT(from < kFrameLength && to < kFrameLength);
1629 itr->ssr->left = from;
1630 itr->ssr->right = to;
1641 if ((*iter).second != 0) {
1701 "Frame and program values are incompatible.");
1725 "IsMulti: unsupported program");
1743 for (
size_t i = 0;
i <
sizeof(kFrames)/
sizeof(*kFrames);
i++) {
1748 intv.GetFrom(), intv.GetTo());
1758 intv.GetFrom(), intv.GetTo());
@ eExtreme_Positional
numerical value
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
#define CODON_LENGTH
Codons are always of length 3.
BlastMaskLoc * BlastMaskLocNew(Int4 total)
Allocate memory for a BlastMaskLoc.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Declares class to encapsulate all BLAST options.
Boolean Blast_ProgramIsMapping(EBlastProgramType p)
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Uint4 QueryInfo_GetSeqBufLen(const BlastQueryInfo *qinfo)
Get the number of bytes required for the concatenated sequence buffer, given a query info structure.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
Utilities initialize/setup BLAST.
Int2 BlastSetup_Validate(const BlastQueryInfo *query_info, const BlastScoreBlk *score_blk)
Validation function for the setup of queries for the BLAST search.
void BlastSeqLoc_RestrictToInterval(BlastSeqLoc **mask, Int4 from, Int4 to)
Adjusts the mask locations coordinates to a sequence interval.
Internal auxiliary setup classes/functions for C++ BLAST APIs.
Various auxiliary BLAST utility functions.
#define NCBI2NA_MASK
Bit mask for obtaining a single base from a byte in ncbi2na format.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Int2 BlastSeqBlkSetSeqRanges(BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges, ESubjectMaskingType mask_type)
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
size_t BLAST_GetTranslatedProteinLength(size_t nucleotide_length, unsigned int context)
Calculates the length of frame for a translated protein.
Int2 BlastSeqBlkSetCompressedSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence)
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence wh...
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Int4 BLAST_GetTranslation(const Uint1 *query_seq, const Uint1 *query_seq_rev, Int4 nt_length, Int2 frame, Uint1 *buffer, const Uint1 *genetic_code)
GetTranslation to get the translation of the nucl.
#define IS_residue(x)
Does character encode a residue?
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType program)
Get the number of contexts for a given program.
Wrapper class for BLAST_SequenceBlk .
Defines BLAST error codes (user errors included)
Wrapper class for BlastMaskLoc .
Encapsulates ALL the BLAST algorithm's options.
Collection of BlastSeqLoc lists for filtering processing.
Wrapper class for BlastQueryInfo .
Defines system exceptions occurred while running BLAST.
static int CodonToIndex(char base1, char base2, char base3)
static CNcbiApplication * Instance(void)
Singleton method.
Error or Warning Message from search.
Lightweight wrapper around an indexed sequence container.
Lightweight wrapper around sequence data which provides a CSeqVector-like interface to the data.
Collection of masked regions for a single query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
iterator_bool insert(const value_type &val)
Defines the interface to interact with the genetic code singleton object.
Int2 GenCodeSingletonAdd(Uint4 gen_code_id, const Uint1 *gen_code_str)
Add a genetic code entry to the singleton.
Uint1 * GenCodeSingletonFind(Uint4 gen_code_id)
Returns the genetic code string for the requested genetic code id.
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
virtual TMaskedQueryRegions GetMaskedRegions(int index)=0
Return the filtered (masked) regions for a sequence.
bool Empty() const
Returns true if the container is empty, else false.
virtual CConstRef< objects::CSeq_loc > GetMask(int index)=0
Return the filtered (masked) regions for a sequence.
BlastQueryInfo * Get() const
void SetupQueries_OMF(IBlastQuerySource &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
ObjMgr Free version of SetupQueries.
static CRef< CBlastQueryFilteredFrames > s_GetRestrictedBlastSeqLocs(IBlastQuerySource &queries, int query_index, const BlastQueryInfo *qinfo, EBlastProgramType program)
Extract the masking locations for a single query into a CBlastQueryFilteredFrames object and adjust t...
BlastQueryInfo * Release()
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
SBlastSequence GetSequence_OMF(IBlastSeqVector &sv, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, std::string *warnings=0)
Object manager free version of GetSequence.
static void s_AdjustFirstContext(BlastQueryInfo *query_info, EBlastProgramType prog, ENa_strand strand_opt, const IBlastQuerySource &queries)
Adjust first context depending on the first query strand.
virtual Uint4 GetGeneticCodeId(int index) const =0
Retrieve the genetic code associated with a sequence.
void SetupSubjects_OMF(IBlastQuerySource &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Object manager free version of SetupSubjects.
bool m_TranslateCoords
True if this object's masked regions store DNA coordinates that will later be translated into protein...
#define BLASTAA_SIZE
Size of aminoacid alphabet.
void AddSeqLoc(const objects::CSeq_interval &intv, int frame)
Add a masked interval to the specified frame.
EBlastEncoding GetQueryEncoding(EBlastProgramType program)
Returns the encoding for the sequence data used in BLAST for the query.
TAutoUint1ArrayPtr FindGeneticCode(int genetic_code)
Retrieves the requested genetic code in Ncbistdaa format.
TSeqPos CalculateSeqBufferLength(TSeqPos sequence_length, EBlastEncoding encoding, objects::ENa_strand strand=objects::eNa_strand_unknown, ESentinelType sentinel=eSentinels) THROWS((CBlastException))
Calculates the length of the buffer to allocate given the desired encoding, strand (if applicable) an...
EBlastProgramType m_Program
The type of search being done.
static SBlastSequence GetSequenceCompressedNucleotide(IBlastSeqVector &sv)
Auxiliary function to retrieve plus strand in compressed (ncbi4na) format.
static bool s_IsValidResidue(Uint1 res)
Tests if a number represents a valid residue.
objects::ENa_strand GetStrandOption() const
static objects::ENa_strand s_BlastSetup_GetStrand(objects::ENa_strand seqloc_strand, EBlastProgramType program, objects::ENa_strand strand_opt)
Internal function to choose between the strand specified in a Seq-loc (which specified the query stra...
static void s_SeqLoc2MaskedSubjRanges(const CSeq_loc *slp, const CSeq_loc *range, Int4 total_length, CSeqDB::TSequenceRanges &output)
bool x_NeedsTrans()
Returns true if this program needs coordinate translation.
char * s_GetCStringOfMatrixPath(string &full_path, const string &matrix_name)
Get the path to the matrix, without the actual matrix name.
static SBlastSequence GetSequenceProtein(IBlastSeqVector &sv, string *warnings=0)
Protein sequences are always encoded in eBlastEncodingProtein and always have sentinel bytes around s...
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(int index) const =0
Return the CSeq_loc associated with a sequence.
bool Empty()
Returns true if this object contains any masking information.
static bool BlastDbFileExists(string &path, bool is_prot)
Checks if a BLAST database exists at a given file path: looks for an alias file first,...
virtual const objects::CSeq_id * GetSeqId(int index) const =0
Return the sequence identifier associated with a sequence.
set< ETranslationFrame > m_Frames
Frames for masked locations.
static SBlastSequence GetSequenceSingleNucleotideStrand(IBlastSeqVector &sv, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel)
Auxiliary function to retrieve a single strand of a nucleotide sequence.
virtual string GetTitle(int index) const =0
Return the title of a sequence.
const Uint1 NCBI4NA_TO_BLASTNA[]
Translates between ncbi4na and blastna.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
bool QueryHasMultipleFrames() const
Check whether the query is multiframe for this type of search.
TAutoUint1Ptr data
Sequence data.
virtual SBlastSequence GetCompressedPlusStrand()=0
Returns the compressed nucleotide data for the plus strand, still occupying one base per byte.
ESentinelType
Allows specification of whether sentinel bytes should be used or not.
objects::ENa_strand BlastSetup_GetStrand(const objects::CSeq_loc &query_seqloc, EBlastProgramType program, objects::ENa_strand strand_option)
Choose between a Seq-loc specified query strand and the strand obtained from the CBlastOptions.
CSeqLocInfo::ETranslationFrame ETranslationFrame
Data type for frame value, however inputs to methods use "int" instead of this type for readability a...
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
string ToString() const
Converts messages to a string, which is returned.
size_t GetNumFrames() const
virtual void GetStrandData(objects::ENa_strand strand, unsigned char *buf)
Retrieve strand data in one chunk.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
virtual objects::ENa_strand GetStrand(int index) const =0
Return strand for a sequence.
#define BLASTNA_SIZE
Size of nucleic acid alphabet.
unsigned int GetNumberOfContexts(EBlastProgramType p)
Returns the number of contexts for a given BLAST program.
const Uint1 kProtSentinel
Sentinel byte for protein sequences.
const set< ETranslationFrame > & ListFrames()
Returns the list of frame values for which this object contains masking information.
static SBlastSequence GetSequenceNucleotideBothStrands(IBlastSeqVector &sv, EBlastEncoding encoding, ESentinelType sentinel)
Auxiliary function to retrieve both strands of a nucleotide sequence.
virtual TSeqPos GetLength(int index) const =0
Return the length of a sequence.
virtual SBlastSequence GetBlastSequence(int index, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, std::string *warnings=0) const =0
Return the sequence data for a sequence.
void UseProteinCoords(TSeqPos dna_length)
Adjusts all stored masks from nucleotide to protein offsets.
const Uint1 kNuclSentinel
Sentinel nibble for nucleotide sequences.
string FindBlastDbPath(const char *dbname, bool is_prot)
Returns the path (including a trailing path separator) to the location where the BLAST database can b...
virtual void SetCoding(objects::CSeq_data::E_Choice coding)=0
Sets the encoding for the sequence data.
EBlastEncoding GetSubjectEncoding(EBlastProgramType program)
Returns the encoding for the sequence data used in BLAST2Sequences for the subject.
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
BlastSeqLoc ** operator[](int frame)
Access the BlastSeqLocs for a given frame.
CBlastQueryFilteredFrames(EBlastProgramType program)
Construct container for frame values and BlastSeqLocs for the specified search program.
static void s_InvalidateQueryContexts(BlastQueryInfo *qinfo, int query_index)
Mark the contexts corresponding to the query identified by query_index as invalid.
SBlastSequence CompressNcbi2na(const SBlastSequence &source)
Compresses the sequence data passed in to the function from 1 base per byte to 4 bases per byte.
BlastQueryInfo * SafeSetupQueryInfo(const IBlastQuerySource &queries, const CBlastOptions *options)
Wrapper around SetupQueryInfo.
void Release(int frame)
Release the BlastSeqLocs for a given frame.
virtual TSeqPos Size() const =0
Return the number of elements in the sequence container.
~CBlastQueryFilteredFrames()
Destructor; frees any BlastSeqLoc lists not released by the caller.
static void s_QueryInfo_SetContext(BlastQueryInfo *qinfo, Uint4 index, Uint4 length)
Set field values for one element of the context array of a concatenated query.
static void s_ComputeStartEndContexts(ENa_strand strand, int num_contexts, int &start, int &end)
Calculate the starting and ending contexts for a given strand.
void SetupQueryInfo_OMF(const IBlastQuerySource &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
ObjMgr Free version of SetupQueryInfo.
virtual int GetSegmentInfo(int index) const =0
Get segment information (for mapping paired short reads)
TFrameSet m_Seqlocs
Frame and BlastSeqLoc* data.
BLAST_SequenceBlk * SafeSetupQueries(IBlastQuerySource &queries, const CBlastOptions *options, BlastQueryInfo *query_info, TSearchMessages &messages)
Wrapper around SetupQueries.
TSeqPos size() const
Returns the length of the sequence data (in the case of nucleotides, only one strand)
void x_VerifyFrame(int frame)
Verify the specified frame value.
Uint1 GetSentinelByte(EBlastEncoding encoding) THROWS((CBlastException))
Convenience function to centralize the knowledge of which sentinel bytes we use for supported encodin...
BLAST_SequenceBlk * Release()
static void s_RestrictSeqLocs_Multiframe(CBlastQueryFilteredFrames &frame_to_bsl, const IBlastQuerySource &queries, int query_index, const BlastQueryInfo *qinfo)
Restricts the masked locations in frame_to_bsl for the case when the BLAST program requires the query...
static void s_AddMask(EBlastProgramType prog, BlastMaskLoc *mask, int query_index, CBlastQueryFilteredFrames &seqloc_frames, ENa_strand strand, TSeqPos query_length)
Adds seqloc_frames to mask.
TFrameSet m_SeqlocTails
Frame and tail of BlastSeqLoc* linked list (to speed up appending)
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingProtein
NCBIstdaa.
@ eBlastEncodingError
Error value for encoding.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
@ eBlastEncodingNcbi2na
NCBI2na.
@ eInvalidArgument
Invalid argument to some function/method (could be programmer error - prefer assertions in those case...
@ eNoSentinels
Do not use sentinel bytes.
@ eSentinels
Use sentinel bytes.
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
const CNcbiEnvironment & GetEnvironment(void) const
Get the application's cached environment.
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
element_type * get(void) const
Get pointer.
element_type * release(void)
Release will release ownership of pointer to caller.
@ eFollowLinks
Follow symbolic links.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
const string & Get(const string &name, bool *found=NULL) const
Get environment value by name.
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
#define NCBI_RETHROW_SAME(prev_exception, message)
Generic macro to re-throw the same exception.
void Warning(CExceptionArgs_Base &args)
static string NormalizePath(const string &path, EFollowLinks follow_links=eIgnoreLinks)
Normalize a path.
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
virtual bool Exists(void) const
Check existence of file.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
TSeqPos GetStop(ESeqLocExtremes ext) const
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
virtual bool HasEntry(const string §ion, const string &name=kEmptyStr, TFlags flags=0) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
const Tdata & Get(void) const
Get the member data.
TFrom GetFrom(void) const
Get the From member data.
list< CRef< CSeq_loc > > Tdata
E_Choice Which(void) const
Which variant is currently selected.
const Tdata & Get(void) const
Get the member data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
const TMix & GetMix(void) const
Get the variant data.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
@ eNa_strand_both
in forward orientation
@ e_not_set
No variant selected.
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Ncbi4na
4 bit nucleic acid code
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
#define MIN(a, b)
returns smaller of a and b.
#define INT4_MAX
largest nubmer represented by signed int
Uint1 Boolean
bool replacment for C
#define ABS(a)
returns absolute value of a (|a|)
#define NULLB
terminating byte of a char* string.
#define MAX(a, b)
returns larger of a and b.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
static pcre_uint8 * buffer
static SQLCHAR output[256]
Defines BLAST database access classes.
Structure to hold a sequence.
Uint4 num_seq_ranges
Number of elements in seq_ranges.
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
Boolean lcase_mask_allocated
TRUE if memory has been allocated for lcase_mask.
Int4 length
Length of sequence.
Uint1 * gen_code_string
for nucleotide subject sequences (tblast[nx]), the genetic code used to create a translated protein s...
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 segment_flags
Flags describing segments for paired reads.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int4 query_index
Index of query (same for all frames)
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Structure for keeping the query masking information.
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Uint4 min_length
Length of the shortest among the concatenated queries.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
Used to hold a set of positions, mostly used for filtering.
struct BlastSeqLoc * next
next in linked list
Structure to represent a range.
List of sequence offset ranges.
value_type * get_data() const
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
A structure containing two integers, used e.g.
static const string kTitle
CTraceGlyph inline method implementation.
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
voidp calloc(uInt items, uInt size)