NCBI C++ ToolKit
blast_objmgr_tools.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 
2 /* ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho / Kevin Bealer
27  *
28  */
29 
30 /// @file blast_objmgr_tools.cpp
31 /// Functions in xblast API code that interact with object manager.
32 
33 #include <ncbi_pch.hpp>
34 #include <objmgr/seq_vector.hpp>
38 
40 #include "blast_setup.hpp"
41 #include "blast_objmgr_priv.hpp"
45 
46 #include <serial/iterator.hpp>
47 #include <objmgr/seqdesc_ci.hpp>
49 #include "blast_seqalign.hpp"
50 
51 #include "dust_filter.hpp"
53 #include "winmask_filter.hpp"
54 
55 /** @addtogroup AlgoBlast
56  *
57  * @{
58  */
59 
62 BEGIN_SCOPE(blast)
63 
64 // N.B.: the const is removed, but the v object is never really changed,
65 // therefore we keep it as a const argument
67  EBlastProgramType program)
68  : m_TSeqLocVector(&v),
69  m_OwnTSeqLocVector(false),
70  m_Options(0),
71  m_CalculatedMasks(true),
72  m_Program(program)
73 {
74  x_AutoDetectGeneticCodes();
75 }
76 
78  const CBlastOptions* opts)
79  : m_TSeqLocVector(&v),
80  m_OwnTSeqLocVector(false),
81  m_Options(opts),
82  m_CalculatedMasks(false),
83  m_Program(opts->GetProgramType())
84 {
86 }
87 
89  EBlastProgramType program)
90  : m_QueryVector(& v),
91  m_OwnTSeqLocVector(false),
92  m_Options(0),
93  m_CalculatedMasks(false),
94  m_Program(program)
95 {
97 }
98 
100  const CBlastOptions * opts)
101  : m_QueryVector(& v),
102  m_OwnTSeqLocVector(false),
103  m_Options(opts),
104  m_CalculatedMasks(false),
105  m_Program(opts->GetProgramType())
106 {
108 }
109 
110 void
112 {
115  return;
116  }
117 
118  if (m_QueryVector.NotEmpty()) {
120  i < m_QueryVector->Size(); i++) {
123 
125  query->SetGeneticCodeId(m_Options->GetQueryGeneticCode());
126  }
127 
128  if (query->GetGeneticCodeId() != BLAST_GENETIC_CODE) {
129  // presumably this has already been set, so skip fetching it
130  // again
131  continue;
132  }
133 
134  const CSeq_id* id = query->GetQuerySeqLoc()->GetId();
135  CSeqdesc_CI desc_it(query->GetScope()->GetBioseqHandle(*id),
137  if (desc_it) {
138  try {
139  query->SetGeneticCodeId(desc_it->GetSource().GetGenCode());
140  }
141  catch(CUnassignedMember &) {
142  query->SetGeneticCodeId(BLAST_GENETIC_CODE);
143  }
144  }
145  }
146  } else {
149 
151  sseqloc->genetic_code_id = m_Options->GetQueryGeneticCode();
152  }
153 
154  if (sseqloc->genetic_code_id != BLAST_GENETIC_CODE) {
155  // presumably this has already been set, so skip fetching it
156  // again
157  continue;
158  }
159 
160  const CSeq_id* id = sseqloc->seqloc->GetId();
161  CSeqdesc_CI desc_it(sseqloc->scope->GetBioseqHandle(*id),
163  if (desc_it) {
164  try {
165  sseqloc->genetic_code_id = desc_it->GetSource().GetGenCode();
166  }
167  catch(CUnassignedMember &) {
168  sseqloc->genetic_code_id = BLAST_GENETIC_CODE;
169  }
170  }
171  }
172  }
173 }
174 
175 void
177 {
178  /// Calculate the masks only once
179  if (m_CalculatedMasks) {
180  return;
181  }
182 
183  // Without the options we cannot obtain the parameters to do the
184  // filtering on the queries to obtain the masks
185  if ( !m_Options ) {
186  m_CalculatedMasks = true;
187  return;
188  }
189 
192 
193  if (m_Options->GetDustFiltering()) {
194  if (m_QueryVector.NotEmpty()) {
196  static_cast<Uint4>(m_Options->GetDustFilteringLevel()),
197  static_cast<Uint4>(m_Options->GetDustFilteringWindow()),
198  static_cast<Uint4>(m_Options->GetDustFilteringLinker()));
199  } else {
201  static_cast<Uint4>(m_Options->GetDustFilteringLevel()),
202  static_cast<Uint4>(m_Options->GetDustFilteringWindow()),
203  static_cast<Uint4>(m_Options->GetDustFilteringLinker()));
204  }
205  }
206  if (m_Options->GetRepeatFiltering()) {
207  string rep_db = m_Options->GetRepeatFilteringDB();
208 
209  if (m_QueryVector.NotEmpty()) {
210  Blast_FindRepeatFilterLoc(*m_QueryVector, rep_db.c_str());
211  } else {
212  Blast_FindRepeatFilterLoc(*m_TSeqLocVector, rep_db.c_str());
213  }
214  }
215 
218 
219  if (m_QueryVector.NotEmpty()) {
221  } else {
223  }
224  }
225  }
226 
227  m_CalculatedMasks = true;
228 }
229 
231 {
233  delete m_TSeqLocVector;
235  }
236 }
237 
238 ENa_strand
240 {
241  if (m_QueryVector.NotEmpty()) {
244  } else {
245  return sequence::GetStrand(*(*m_TSeqLocVector)[i].seqloc,
246  (*m_TSeqLocVector)[i].scope);
247  }
248 }
249 
252 {
254 
255  if (m_QueryVector.NotEmpty()) {
257  } else {
259  m_Program, (*m_TSeqLocVector)[i].ignore_strand_in_mask);
260  }
261 }
262 
265 {
267 
268  if (m_QueryVector.NotEmpty()) {
270  } else {
271  return (*m_TSeqLocVector)[i].mask;
272  }
273 }
274 
277 {
278  if (m_QueryVector.NotEmpty()) {
279  return m_QueryVector->GetQuerySeqLoc(i);
280  } else {
281  return (*m_TSeqLocVector)[i].seqloc;
282  }
283 }
284 
285 const CSeq_id*
287 {
288  if (m_QueryVector.NotEmpty()) {
291  } else {
292  return & sequence::GetId(*(*m_TSeqLocVector)[i].seqloc,
293  (*m_TSeqLocVector)[i].scope);
294  }
295 }
296 
297 Uint4
299 {
300  if (m_QueryVector.NotEmpty()) {
301  return m_QueryVector->GetBlastSearchQuery(i)->GetGeneticCodeId();
302  } else {
303  return (*m_TSeqLocVector)[i].genetic_code_id;
304  }
305 }
306 
309  EBlastEncoding encoding,
310  objects::ENa_strand strand,
311  ESentinelType sentinel,
312  string* warnings) const
313 {
314  if (m_QueryVector.NotEmpty()) {
315  return GetSequence(*m_QueryVector->GetQuerySeqLoc(i), encoding,
316  m_QueryVector->GetScope(i), strand, sentinel, warnings);
317  } else {
318  return GetSequence(*(*m_TSeqLocVector)[i].seqloc, encoding,
319  (*m_TSeqLocVector)[i].scope, strand, sentinel, warnings);
320  }
321 }
322 
323 TSeqPos
325 {
327 
328  if (m_QueryVector.NotEmpty()) {
331  } else if (! m_TSeqLocVector->empty()) {
332  rv = sequence::GetLength(*(*m_TSeqLocVector)[i].seqloc,
333  (*m_TSeqLocVector)[i].scope);
334  }
335 
336  if (rv == numeric_limits<TSeqPos>::max()) {
337  NCBI_THROW(CBlastException, eInvalidArgument,
338  string("Could not find length of query # ")
339  + NStr::IntToString(i) + " with Seq-id ["
340  + GetSeqId(i)->AsFastaString() + "]");
341  }
342 
343  return rv;
344 }
345 
346 string
348 {
349  CConstRef<CSeq_loc> seqloc = GetSeqLoc(i);
350  CRef<CScope> scope;
351  if (m_QueryVector.NotEmpty()) {
352  scope = m_QueryVector->GetScope(i);
353  } else if (! m_TSeqLocVector->empty()) {
354  scope = (*m_TSeqLocVector)[i].scope;
355  }
356  _ASSERT(seqloc.NotEmpty());
357  _ASSERT(scope.NotEmpty());
358  if ( !seqloc->GetId() ) {
359  return kEmptyStr;
360  }
361 
362  CBioseq_Handle bh = scope->GetBioseqHandle(*seqloc->GetId());
363  if (!bh)
364  return kEmptyStr;
365 
366  string title(kEmptyStr);
367  if (bh.CanGetDescr())
368  {
369  const CSeq_descr::Tdata& descr = bh.GetDescr();
370  ITERATE(CSeq_descr::Tdata, desc, descr) {
371  if ((*desc)->Which() == CSeqdesc::e_Title && title == kEmptyStr) {
372  title = (*desc)->GetTitle();
373  break;
374  }
375  }
376  }
377 
378  return title;
379 }
380 
381 TSeqPos
383 {
384  if (m_QueryVector.NotEmpty()) {
385  return static_cast<TSeqPos>(m_QueryVector->Size());
386  } else {
387  return static_cast<TSeqPos>(m_TSeqLocVector->size());
388  }
389 }
390 
391 void
394  objects::ENa_strand strand_opt,
395  BlastQueryInfo** qinfo)
396 {
397  SetupQueryInfo_OMF(CBlastQuerySourceOM(queries, prog), prog, strand_opt, qinfo);
398 }
399 
400 void
402  BlastQueryInfo* qinfo,
403  BLAST_SequenceBlk** seqblk,
405  objects::ENa_strand strand_opt,
406  TSearchMessages& messages)
407 {
408  CBlastQuerySourceOM query_src(queries, prog);
409  SetupQueries_OMF(query_src, qinfo, seqblk, prog,
410  strand_opt, messages);
411 }
412 
413 void
416  vector<BLAST_SequenceBlk*>* seqblk_vec,
417  unsigned int* max_subjlen)
418 {
419  CBlastQuerySourceOM subj_src(subjects, prog);
420  SetupSubjects_OMF(subj_src, prog, seqblk_vec, max_subjlen);
421 }
422 
423 
424 static unsigned char ctable[16] = {0xFF, 0x00, 0x01, 0xFF, 0x02, 0xFF, 0xFF, 0xFF,
425  0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
426 
427 void s_Ncbi4naToNcbi2na(const string & ncbi4na, int base_length,
428  unsigned char * ncbi2na)
429 {
430  int inp_bytes = base_length;
431  CRandom random(base_length);
432 
433  for(int i = 0; i < inp_bytes; i++) {
434  // one input byte
435  unsigned char inp = ncbi4na[i];
436 
437  // represents 2 bases
438  unsigned char b = inp & 0xF;
439 
440  // compress each to 2 bits
441  unsigned char c = ctable[b];
442 
443  if (c != 0xFF) {
444  // No ambiguities, so we can do this the easy way.
445  ncbi2na[i] = c;
446 
447  } else {
448  if (b == 0 || b == 0x0F) {
449  //gap or N
450  ncbi2na[i] = random.GetRand() & 0x3;
451  }
452  else {
453 
454  int bitcount = ((b & 1) + ((b >> 1) & 1) +
455  ((b >> 2) & 1) + ((b >> 3) & 1));
456 
457  // 1-bit ambiguities here, indicate an error in this class.
458  _ASSERT(bitcount >= 2);
459  _ASSERT(bitcount <= 3);
460 
461  int pick = random.GetRand() % bitcount;
462 
463  for(int j = 0; j < 4; j++) {
464  // skip 0 bits in input.
465  if ((b & (1 << j)) == 0)
466  continue;
467 
468  // If the bitcount is zero, this is the bit we want.
469  if (! pick) {
470  ncbi2na[i] = j;
471  break;
472  }
473  // Else, decrement.
474  pick--;
475  }
476  }
477  }
478  }
479 
480 }
481 
482 /// Implementation of the IBlastSeqVector interface which obtains data from a
483 /// CSeq_loc and a CScope relying on the CSeqVector class
485 {
486 public:
487  CBlastSeqVectorOM(const CSeq_loc& seqloc, CScope& scope)
488  : m_SeqLoc(seqloc), m_Scope(scope), m_SeqVector(seqloc, scope)
489  {
491  }
492 
493  /** @inheritDoc */
494  virtual void SetCoding(CSeq_data::E_Choice coding) {
495  m_SeqVector.SetCoding(coding);
496  }
497 
498  /** @inheritDoc */
499  virtual Uint1 operator[] (TSeqPos pos) const { return m_SeqVector[pos]; }
500 
501  /** @inheritDoc */
502  virtual void GetStrandData(objects::ENa_strand strand, unsigned char* buf) {
503  x_FixStrand(strand);
504  for (CSeqVector_CI itr(m_SeqVector, strand); itr; ++itr) {
505  // treat gap '-' as 'N'
506  if (itr.IsInGap()) {
507  *buf++ = 0xf;
508  } else {
509  *buf++ = *itr;
510  }
511  }
512  }
513 
514  /** @inheritDoc */
516  SBlastSequence retval(size());
517  string ncbi4na = kEmptyStr;
519  s_Ncbi4naToNcbi2na(ncbi4na, size(), retval.data.get());
520  return retval;
521  }
522 
523 protected:
524  /** @inheritDoc */
525  virtual TSeqPos x_Size() const {
526  return m_SeqVector.size();
527  }
528  /** @inheritDoc */
529  virtual void x_SetPlusStrand() {
531  }
532  /** @inheritDoc */
533  virtual void x_SetMinusStrand() {
535  }
536  /** @inheritDoc
537  * @note for this class, this might be inefficient, please use
538  * GetStrandData with the appropriate strand
539  */
541  x_FixStrand(s);
543  if (s != m_Strand) {
546  }
547  }
548 
549  /// If the Seq-loc is on the minus strand and the user is
550  /// asking for the minus strand, we change the user's request
551  /// to the plus strand. If we did not do this, we would get
552  /// the plus strand (ie it would be reversed twice)
553  /// @param strand strand to handle [in|out]
554  void x_FixStrand(objects::ENa_strand& strand) const {
555  if (eNa_strand_minus == strand &&
557  strand = eNa_strand_plus;
558  }
559  }
560 
561 private:
565 };
566 
568 GetSequence(const objects::CSeq_loc& sl, EBlastEncoding encoding,
569  objects::CScope* scope,
570  objects::ENa_strand strand,
571  ESentinelType sentinel,
572  std::string* warnings)
573 {
574  // Retrieves the correct strand (plus or minus), but not both
575  CBlastSeqVectorOM sv = CBlastSeqVectorOM(sl, *scope);
576  return GetSequence_OMF(sv, encoding, strand, sentinel, warnings);
577 }
578 
579 
582 {
583  CRef<CPacked_seqint> retval;
584  if (sequences.empty()) {
585  return retval;
586  }
587 
588  retval.Reset(new CPacked_seqint);
589  ITERATE(TSeqLocVector, seq, sequences) {
590  const CSeq_id& id(sequence::GetId(*seq->seqloc, &*seq->scope));
592  if (seq->seqloc->IsWhole()) {
593  try {
594  range.Set(0, sequence::GetLength(*seq->seqloc, &*seq->scope));
595  } catch (const CException&) {
597  }
598  } else if (seq->seqloc->IsInt()) {
599  try {
600  range.SetFrom(sequence::GetStart(*seq->seqloc, &*seq->scope));
601  range.SetTo(sequence::GetStop(*seq->seqloc, &*seq->scope));
602  } catch (const CException&) {
604  }
605  } else {
606  NCBI_THROW(CBlastException, eNotSupported,
607  "Unsupported Seq-loc type used for query");
608  }
609  retval->AddInterval(id, range.GetFrom(), range.GetTo());
610  }
611  return retval;
612 }
613 
614 END_SCOPE(blast)
616 
617 /* @} */
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Definitions which are dependant on the NCBI C++ Object Manager.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
Declares class to encapsulate all BLAST options.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Definition: blast_program.c:60
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Definition: blast_program.c:43
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definition: blast_program.c:63
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
Defines interface for retrieving sequence identifiers.
Declarations of auxiliary functions using IBlastSeqInfoSrc to retrieve ids and related sequence infor...
Internal auxiliary setup classes/functions for C++ BLAST APIs.
ncbi::TMaskedQueryRegions mask
int GetGenCode(int def=1) const
Definition: BioSource.cpp:73
CBioseq_Handle –.
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
Implements the object manager dependant version of the IBlastQuerySource.
Query Vector.
Definition: sseqloc.hpp:276
CRef< objects::CScope > GetScope(size_type i) const
Get the scope containing a query by index.
Definition: sseqloc.hpp:322
TMaskedQueryRegions GetMaskedRegions(size_type i) const
Get the masked regions for a query by number.
Definition: sseqloc.hpp:331
size_type Size() const
Returns the number of queries found in this query vector.
Definition: sseqloc.hpp:305
vector< value_type >::size_type size_type
size_type type definition
Definition: sseqloc.hpp:282
CConstRef< objects::CSeq_loc > GetQuerySeqLoc(size_type i) const
Get the query Seq-loc for a query by index.
Definition: sseqloc.hpp:313
CRef< CBlastSearchQuery > GetBlastSearchQuery(size_type i) const
Get the CBlastSearchQuery object at index i.
Definition: sseqloc.hpp:367
Implementation of the IBlastSeqVector interface which obtains data from a CSeq_loc and a CScope relyi...
CRandom::
Definition: random_gen.hpp:66
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Thrown on an attempt to write unassigned data member.
Definition: exception.hpp:84
Lightweight wrapper around sequence data which provides a CSeqVector-like interface to the data.
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static int base_length[29]
Calls sym dust lib in algo/dustmask and returns CSeq_locs for use by BLAST.
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
CRef< objects::CPacked_seqint > TSeqLocVector2Packed_seqint(const TSeqLocVector &sequences)
Converts a TSeqLocVector into a CPacked_seqint.
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(int i) const
Return the CSeq_loc associated with a sequence.
virtual const objects::CSeq_id * GetSeqId(int index) const
Return the sequence identifier associated with a sequence.
int GetWindowMaskerTaxId() const
Returns the tax id used for the windowmasker database to use, if set via SetWindowMaskerTaxId (otherw...
void SetupQueries_OMF(IBlastQuerySource &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
ObjMgr Free version of SetupQueries.
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
virtual TSeqPos GetLength(int i) const
Return the length of a sequence.
virtual void GetStrandData(objects::ENa_strand strand, unsigned char *buf)
@inheritDoc
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
SBlastSequence GetSequence_OMF(IBlastSeqVector &sv, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, std::string *warnings=0)
Object manager free version of GetSequence.
TMaskedQueryRegions PackedSeqLocToMaskedQueryRegions(CConstRef< objects::CSeq_loc > sloc, EBlastProgramType program, bool assume_both_strands=false)
Auxiliary function to convert a Seq-loc describing masked query regions to a TMaskedQueryRegions obje...
int GetDustFilteringLevel() const
void SetupSubjects_OMF(IBlastQuerySource &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Object manager free version of SetupSubjects.
TSeqLocVector * m_TSeqLocVector
Reference to input TSeqLocVector (or NULL if not used)
virtual string GetTitle(int index) const
Return the title of a sequence.
virtual SBlastSequence GetBlastSequence(int i, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, string *warnings=0) const
Return the sequence data for a sequence.
CBlastQuerySourceOM(TSeqLocVector &v, EBlastProgramType prog)
Constructor which takes a TSeqLocVector.
objects::ENa_strand m_Strand
Maintains the state of the strand currently saved by the implementation of this class.
CBlastSeqVectorOM(const CSeq_loc &seqloc, CScope &scope)
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
virtual void SetCoding(CSeq_data::E_Choice coding)
@inheritDoc
virtual SBlastSequence GetCompressedPlusStrand()
@inheritDoc
CRef< CBlastQueryVector > m_QueryVector
Reference to input CBlastQueryVector (or empty if not used)
virtual Uint1 operator[](TSeqPos pos) const
@inheritDoc
virtual TMaskedQueryRegions GetMaskedRegions(int i)
Return the filtered (masked) regions for a sequence.
const CBlastOptions * m_Options
BLAST algorithm options.
virtual TSeqPos x_Size() const
@inheritDoc
const CSeq_loc & m_SeqLoc
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
void x_SetStrand(ENa_strand s)
@inheritDoc
TAutoUint1Ptr data
Sequence data.
Definition: blast_setup.hpp:64
void x_AutoDetectGeneticCodes(void)
Tries to extract the genetic code using the CScope, if it succeeds, it supercedes what's specified in...
ESentinelType
Allows specification of whether sentinel bytes should be used or not.
Definition: blast_setup.hpp:93
bool GetDustFiltering() const
void Blast_FindWindowMaskerLoc(CBlastQueryVector &query, const CBlastOptions *opts)
Find Window Masker filtered locations using a BlastOptions.
virtual Uint4 GetGeneticCodeId(int index) const
Retrieve the genetic code associated with a sequence.
int GetQueryGeneticCode() const
void x_FixStrand(objects::ENa_strand &strand) const
If the Seq-loc is on the minus strand and the user is asking for the minus strand,...
CRef< objects::CSeq_loc > MaskedQueryRegionsToPackedSeqLoc(const TMaskedQueryRegions &sloc)
Interface to build a CSeq-loc from a TMaskedQueryRegion; note that conversion conversion in this dire...
void Blast_FindRepeatFilterLoc(TSeqLocVector &query_loc, const CBlastOptionsHandle *opts_handle)
Finds repeats locations for a given set of sequences.
static unsigned char ctable[16]
virtual objects::ENa_strand GetStrand(int i) const
Return strand for a sequence.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
int GetDustFilteringLinker() const
void Blast_FindDustFilterLoc(TSeqLocVector &queries, const CBlastNucleotideOptionsHandle *nucl_handle)
Finds dust locations for a given set of sequences by calling the the symmetric dust lib.
Definition: dust_filter.cpp:60
const char * GetWindowMaskerDatabase() const
Return the name of the windowmasker database to use.
void x_CalculateMasks()
Performs filtering on the query sequences to calculate the masked locations.
bool GetRepeatFiltering() const
Returns true if repeat filtering is on.
bool m_CalculatedMasks
this flag allows for lazy initialization of the masking locations
void SetupQueryInfo_OMF(const IBlastQuerySource &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
ObjMgr Free version of SetupQueryInfo.
const char * GetRepeatFilteringDB() const
Returns the name of the repeat filtering database to use.
TSeqPos size() const
Returns the length of the sequence data (in the case of nucleotides, only one strand)
int GetDustFilteringWindow() const
EBlastProgramType m_Program
BLAST program variable.
virtual CConstRef< objects::CSeq_loc > GetMask(int i)
Return the filtered (masked) regions for a sequence.
virtual TSeqPos Size() const
Return the number of elements in the sequence container.
bool m_OwnTSeqLocVector
flag to determine if the member above should or not be deleted in the destructor
virtual void x_SetMinusStrand()
@inheritDoc
virtual ~CBlastQuerySourceOM()
dtor which determines if the internal pointer to its data should be deleted or not.
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
virtual void x_SetPlusStrand()
@inheritDoc
void s_Ncbi4naToNcbi2na(const string &ncbi4na, int base_length, unsigned char *ncbi2na)
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
const TDescr & GetDescr(void) const
bool CanGetDescr(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
ENa_strand GetStrand(void) const
Definition: seq_vector.hpp:336
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void SetCoding(TCoding coding)
const_iterator begin(void) const
Definition: seq_vector.hpp:298
const_iterator end(void) const
Definition: seq_vector.hpp:305
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
char * buf
int i
static char * prog
Definition: mdb_load.c:33
range(_Ty, _Ty) -> range< _Ty >
T max(T x_, T y_)
C++ implementation of repeats filtering for C++ BLAST.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure to hold a sequence.
Definition: blast_def.h:242
The query related information.
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Definition: blast_setup.hpp:62
static string query
#define _ASSERT
Blast wrappers for WindowMasker filtering.
Modified on Wed Apr 24 14:11:16 2024 by modify_doxy.py rev. 669887