NCBI C++ ToolKit
blast_setup.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_setup.hpp 75049 2016-10-17 19:00:07Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_setup.hpp
31  * Internal auxiliary setup classes/functions for C++ BLAST APIs.
32  * These facilities are free of any dependencies on the NCBI C++ object
33  * manager.
34  */
35 
36 #ifndef ALGO_BLAST_API___BLAST_SETUP__HPP
37 #define ALGO_BLAST_API___BLAST_SETUP__HPP
38 
43 
44 // Object includes
47 #include <objects/seq/Seq_data.hpp>
48 
49 /** @addtogroup AlgoBlast
50  *
51  * @{
52  */
53 
55 
56 BEGIN_SCOPE(blast)
57 class CBlastOptions;
58 
59 /// Structure to store sequence data and its length for use in the CORE
60 /// of BLAST (it's a malloc'ed array of Uint1 and its length)
61 /// FIXME: do not confuse with blast_seg.c's SSequence
63  // AutoPtr<Uint1, CDeleter<Uint1> > == TAutoUint1Ptr
64  TAutoUint1Ptr data; /**< Sequence data */
65  TSeqPos length; /**< Length of the buffer above (not
66  necessarily sequence length!) */
67 
68  /** Default constructor */
70  : data(NULL), length(0) {}
71 
72  /** Allocates a sequence buffer of the specified length
73  * @param buf_len number of bytes to allocate [in]
74  */
76  : data((Uint1*)calloc(buf_len, sizeof(Uint1))), length(buf_len)
77  {
78  if ( !data ) {
79  NCBI_THROW(CBlastSystemException, eOutOfMemory,
80  "Failed to allocate " + NStr::IntToString(buf_len) + " bytes");
81  }
82  }
83 
84  /** Parametrized constructor
85  * @param d buffer containing sequence data [in]
86  * @param l length of buffer above [in]
87  */
89  : data(d), length(l) {}
90 };
91 
92 /// Allows specification of whether sentinel bytes should be used or not
94  eSentinels, ///< Use sentinel bytes
95  eNoSentinels ///< Do not use sentinel bytes
96 };
97 
98 /// Lightweight wrapper around an indexed sequence container. These sequences
99 /// are then used to set up internal BLAST data structures for sequence data
101 {
102 public:
103  /// Our no-op virtual destructor
104  virtual ~IBlastQuerySource() {}
105 
106  /// Return strand for a sequence
107  /// @param index index of the sequence in the sequence container [in]
108  virtual objects::ENa_strand GetStrand(int index) const = 0;
109 
110  /// Return the number of elements in the sequence container
111  virtual TSeqPos Size() const = 0;
112 
113  /// Returns true if the container is empty, else false
114  bool Empty() const { return (Size() == 0); }
115 
116  /// Return the filtered (masked) regions for a sequence
117  /// @param index index of the sequence in the sequence container [in]
118  virtual CConstRef<objects::CSeq_loc> GetMask(int index) = 0;
119 
120  /// Return the filtered (masked) regions for a sequence
121  /// @param index index of the sequence in the sequence container [in]
122  virtual TMaskedQueryRegions GetMaskedRegions(int index) = 0;
123 
124  /// Return the CSeq_loc associated with a sequence
125  /// @param index index of the sequence in the sequence container [in]
126  virtual CConstRef<objects::CSeq_loc> GetSeqLoc(int index) const = 0;
127 
128  /// Return the sequence identifier associated with a sequence
129  /// @param index index of the sequence in the sequence container [in]
130  virtual const objects::CSeq_id* GetSeqId(int index) const = 0;
131 
132  /// Retrieve the genetic code associated with a sequence
133  /// @param index index of the sequence in the sequence container [in]
134  virtual Uint4 GetGeneticCodeId(int index) const = 0;
135 
136  /// Return the sequence data for a sequence
137  /// @param index index of the sequence in the sequence container [in]
138  /// @param encoding desired encoding [in]
139  /// @param strand strand to fetch [in]
140  /// @param sentinel specifies to use or not to use sentinel bytes around
141  /// sequence data. Note that this is ignored for proteins, as in the
142  /// CORE of BLAST, proteins always have sentinel bytes [in]
143  /// @param warnings if not NULL, warnings will be returned in this string
144  /// [in|out]
145  /// @return SBlastSequence structure containing sequence data requested
146  virtual SBlastSequence
147  GetBlastSequence(int index, EBlastEncoding encoding,
148  objects::ENa_strand strand, ESentinelType sentinel,
149  std::string* warnings = 0) const = 0;
150 
151  /// Return the length of a sequence
152  /// @param index index of the sequence in the sequence container [in]
153  virtual TSeqPos GetLength(int index) const = 0;
154 
155  /// Return the title of a sequence
156  /// @param index index of the sequence in the sequence container [in]
157  /// @return the sequence title or kEmptyStr if not available
158  virtual string GetTitle(int index) const = 0;
159 
160  /// Is this sequence followed by a mate (for mapping short reads)
161  NCBI_DEPRECATED virtual bool IsFirstOfAPair(int index) const = 0;
162 
163  /// Get segment information (for mapping paired short reads)
164  virtual int GetSegmentInfo(int index) const = 0;
165 };
166 
167 /// Choose between a Seq-loc specified query strand and the strand obtained
168 /// from the CBlastOptions
169 /// @param query_seqloc Seq-loc corresponding to a given query sequence [in]
170 /// @param program program type from the CORE's point of view [in]
171 /// @param strand_option strand as specified by the BLAST options [in]
174 BlastSetup_GetStrand(const objects::CSeq_loc& query_seqloc,
175  EBlastProgramType program,
176  objects::ENa_strand strand_option);
177 
178 /// Lightweight wrapper around sequence data which provides a CSeqVector-like
179 /// interface to the data
181 public:
182  /// Our no-op virtual destructor
183  virtual ~IBlastSeqVector() {}
184 
185  /// Sets the encoding for the sequence data.
186  /// Two encodings are really necessary: ncbistdaa and ncbi4na, both use 1
187  /// byte per residue/base
188  virtual void SetCoding(objects::CSeq_data::E_Choice coding) = 0;
189  /// Returns the length of the sequence data (in the case of nucleotides,
190  /// only one strand)
191  /// @throws CBlastException if the size returned is 0
192  TSeqPos size() const {
193  TSeqPos retval = x_Size();
194  if (retval == 0) {
195  NCBI_THROW(CBlastException, eInvalidArgument,
196  "Sequence contains no data");
197  }
198  return retval;
199  }
200  /// Allows index-based access to the sequence data
201  virtual Uint1 operator[] (TSeqPos pos) const = 0;
202 
203  /// Retrieve strand data in one chunk
204  /// @param strand strand to retrieve [in]
205  /// @param buf buffer in which to return the data, should be allocated by
206  /// caller with enough capacity to copy the entire sequence data
207  /// @note default implementation still gets it one character at a time
208  virtual void GetStrandData(objects::ENa_strand strand,
209  unsigned char* buf) {
210  if ( objects::IsForward(strand) ) {
211  x_SetPlusStrand();
212  } else {
213  x_SetMinusStrand();
214  }
215  for (TSeqPos pos = 0, size = x_Size(); pos < size; ++pos) {
216  buf[pos] = operator[](pos);
217  }
218  }
219 
220  /// For nucleotide sequences this instructs the implementation to convert
221  /// its representation to be that of the plus strand
222  void SetPlusStrand() {
223  x_SetPlusStrand();
224  m_Strand = objects::eNa_strand_plus;
225  }
226  /// For nucleotide sequences this instructs the implementation to convert
227  /// its representation to be that of the minus strand
228  void SetMinusStrand() {
229  x_SetMinusStrand();
230  m_Strand = objects::eNa_strand_minus;
231  }
232  /// Accessor for the strand currently set
234  return m_Strand;
235  }
236  /// Returns the compressed nucleotide data for the plus strand, still
237  /// occupying one base per byte.
239 
240 protected:
241  /// Method which retrieves the size of the sequence vector, as described in
242  /// the size() method above
243  virtual TSeqPos x_Size() const = 0;
244  /// Method which does the work for setting the plus strand of the
245  /// nucleotide sequence data
246  virtual void x_SetPlusStrand() = 0;
247  /// Method which does the work for setting the minus strand of the
248  /// nucleotide sequence data
249  virtual void x_SetMinusStrand() = 0;
250 
251  /// Maintains the state of the strand currently saved by the implementation
252  /// of this class
254 };
255 
256 /** ObjMgr Free version of SetupQueryInfo.
257  * NB: effective length will be assigned inside the engine.
258  * @param queries Vector of query locations [in]
259  * @param prog program type from the CORE's point of view [in]
260  * @param strand_opt Unless the strand option is set to single strand, the
261  * actual CSeq_locs in the TSeqLocVector dictacte which strand to use
262  * during the search [in]
263  * @param qinfo Allocated query info structure [out]
264  */
266 void
269  objects::ENa_strand strand_opt,
270  BlastQueryInfo** qinfo);
271 
272 /// ObjMgr Free version of SetupQueries.
273 /// @param queries vector of blast::SSeqLoc structures [in]
274 /// @param qinfo BlastQueryInfo structure to obtain context information [in]
275 /// @param seqblk Structure to save sequence data, allocated in this
276 /// function [out]
277 /// @param messages object to save warnings/errors for all queries [out]
278 /// @param prog program type from the CORE's point of view [in]
279 /// @param strand_opt Unless the strand option is set to single strand, the
280 /// actual CSeq_locs in the TSeqLocVector dictacte which strand to use
281 /// during the search [in]
283 void
285  BlastQueryInfo* qinfo,
286  BLAST_SequenceBlk** seqblk,
288  objects::ENa_strand strand_opt,
289  TSearchMessages& messages);
290 
291 /** Object manager free version of SetupSubjects
292  * @param subjects Vector of subject locations [in]
293  * @param program BLAST program [in]
294  * @param seqblk_vec Vector of subject sequence data structures [out]
295  * @param max_subjlen Maximal length of the subject sequences [out]
296  */
298 void
300  EBlastProgramType program,
301  vector<BLAST_SequenceBlk*>* seqblk_vec,
302  unsigned int* max_subjlen);
303 
304 /** Object manager free version of GetSequence
305  */
309  objects::ENa_strand strand,
310  ESentinelType sentinel,
311  std::string* warnings = 0);
312 
313 /** Calculates the length of the buffer to allocate given the desired encoding,
314  * strand (if applicable) and use of sentinel bytes around sequence.
315  * @param sequence_length Length of the sequence [in]
316  * @param encoding Desired encoding for calculation (supported encodings are
317  * listed in GetSequence()) [in]
318  * @param strand Which strand to use for calculation [in]
319  * @param sentinel Whether to include or not sentinels in calculation. Same
320  * criteria as GetSequence() applies [in]
321  * @return Length of the buffer to allocate to contain original sequence of
322  * length sequence_length for given encoding and parameter constraints.
323  * If the sequence_length is 0, the return value will be 0 too
324  * @throws CBlastException in case of unsupported encoding
325  */
327 TSeqPos
328 CalculateSeqBufferLength(TSeqPos sequence_length, EBlastEncoding encoding,
329  objects::ENa_strand strand =
331  ESentinelType sentinel = eSentinels)
333 
334 /// Compresses the sequence data passed in to the function from 1 base per byte
335 /// to 4 bases per byte
336 /// @param source input sequence data in ncbi2na format, with ambiguities
337 /// randomized [in]
338 /// @return compressed version of the input
339 /// @throws CBlastException in case of memory allocation failure
340 /// @todo use CSeqConvert::Pack?
343 
344 /** Convenience function to centralize the knowledge of which sentinel bytes we
345  * use for supported encodings. Note that only eBlastEncodingProtein,
346  * eBlastEncodingNucleotide, and eBlastEncodingNcbi4na support sentinel bytes,
347  * any other values for encoding will cause an exception to be thrown.
348  * @param encoding Encoding for which a sentinel byte is needed [in]
349  * @return sentinel byte
350  * @throws CBlastException in case of unsupported encoding
351  */
354 
355 /** Returns the path (including a trailing path separator) to the location
356  * where the BLAST database can be found.
357  * @param dbname Database to search for
358  * @param is_prot true if this is a protein matrix
359  */
361 string
362 FindBlastDbPath(const char* dbname, bool is_prot);
363 
364 /** Returns the number of contexts for a given BLAST program
365  * @sa BLAST_GetNumberOfContexts
366  * @param p program
367  */
369 unsigned int
371 
372 
373 /// Returns the encoding for the sequence data used in BLAST for the query
374 /// @param program program type [in]
375 /// @throws CBlastException in case of unsupported program
379 
380 /// Returns the encoding for the sequence data used in BLAST2Sequences for
381 /// the subject
382 /// @param program program type [in]
383 /// @throws CBlastException in case of unsupported program
387 
388 /// Wrapper around SetupQueries
389 /// @param queries interface to obtain query data [in]
390 /// @param options BLAST algorithm options [in]
391 /// @param query_info BlastQueryInfo structure [in|out]
392 /// @param messages error/warning messages are returned here [in|out]
396  const CBlastOptions* options,
397  BlastQueryInfo* query_info,
398  TSearchMessages& messages);
399 
400 /// Wrapper around SetupQueryInfo
401 /// @param queries interface to obtain query data [in]
402 /// @param options BLAST algorithm options [in]
405 SafeSetupQueryInfo(const IBlastQuerySource& queries,
406  const CBlastOptions* options);
407 
408 
409 /// Returns the path to a specified matrix.
410 /// This is the implementation of the GET_MATRIX_PATH callback.
411 ///
412 /// @param matrix_name matrix name (e.g., BLOSUM62) [in]
413 /// @param is_prot matrix is for proteins if TRUE [in]
414 /// @return path to matrix, should be deallocated by user.
416 char*
417 BlastFindMatrixPath(const char* matrix_name, Boolean is_prot);
418 
419 /// Collection of BlastSeqLoc lists for filtering processing.
420 ///
421 /// This class acts as a container for frame values and collections of
422 /// BlastSeqLoc objects used by the blast filtering processing code.
423 /// The support for filtering of blastx searches adds complexity and
424 /// creates more opportunities for errors to occur. This class was
425 /// designed to handle some of that complexity, and guard against some
426 /// of those possible errors.
427 
429 public:
430  /// Data type for frame value, however inputs to methods use "int"
431  /// instead of this type for readability and brevity.
433 
434  /// Construct container for frame values and BlastSeqLocs for the
435  /// specified search program.
436  /// @param program The type of search being done.
438 
439  /// Construct container for frame values and BlastSeqLocs from a
440  /// TMaskedQueryRegions vector.
441  /// @param program Search program value used [in]
442  /// @param mqr MaskedQueryRegions to convert [in]
444  const TMaskedQueryRegions & mqr);
445 
446  /// Destructor; frees any BlastSeqLoc lists not released by the
447  /// caller.
449 
450  /// Add a masked interval to the specified frame.
451  ///
452  /// The specified interval of the specified frame is masked. This
453  /// creates a BlastSeqLoc object inside this container for that
454  /// frame, which will be freed at destruction time unless the
455  /// client code calls Release() for that frame.
456  ///
457  /// @param intv The interval to mask.
458  /// @param frame The specific frame, expressed as a value from ETranslationFrame, on which this interval falls.
459  void AddSeqLoc(const objects::CSeq_interval & intv, int frame);
460 
461  /// Access the BlastSeqLocs for a given frame.
462  ///
463  /// A pointer is returned to the list of BlastSeqLocs associated
464  /// with a given frame.
465  /// @param frame The specific frame, expressed as a value from ETranslationFrame, on which this interval falls.
466  BlastSeqLoc ** operator[](int frame);
467 
468  /// Release the BlastSeqLocs for a given frame.
469  ///
470  /// The given frame is cleared (the data removed) without freeing
471  /// the associated objects. The calling code takes responsibility
472  /// for freeing the associated list of objects.
473  /// @param frame The specific frame, expressed as a value from ETranslationFrame, on which this interval falls.
474  void Release(int frame);
475 
476  /// Check whether the query is multiframe for this type of search.
477  bool QueryHasMultipleFrames() const;
478 
479  /// Returns the list of frame values for which this object
480  /// contains masking information.
481  const set<ETranslationFrame>& ListFrames();
482 
483  /// Returns true if this object contains any masking information.
484  bool Empty();
485 
486  /// Adjusts all stored masks from nucleotide to protein offsets.
487  ///
488  /// Values stored here must be converted to protein offsets after
489  /// a certain stage of processing. This method only has an effect
490  /// for types of searches that need this service (which are those
491  /// searches where the query sequence is translated.) Additional
492  /// calls to this method will have no effect.
493  ///
494  /// @param dna_length The query length in nucleotide bases.
495  void UseProteinCoords(TSeqPos dna_length);
496 
497  size_t GetNumFrames() const {
498  return BLAST_GetNumberOfContexts(m_Program);
499  }
500 private:
501  /// Prevent copy construction.
503 
504  /// Prevent assignment.
506 
507  /// Verify the specified frame value.
508  void x_VerifyFrame(int frame);
509 
510  /// Returns true if this program needs coordinate translation.
511  bool x_NeedsTrans();
512 
513  /// The type of search being done.
515 
516  /// Frame and BlastSeqLoc* info type.
518 
519  /// Frame and BlastSeqLoc* data.
521  /// Frame and tail of BlastSeqLoc* linked list (to speed up appending)
523 
524  /// Frames for masked locations
526 
527  /// True if this object's masked regions store DNA coordinates
528  /// that will later be translated into protein coordinates.
530 };
531 
532 
533 END_SCOPE(blast)
535 
536 /* @} */
537 
538 #endif /* ALGO_BLAST_API___BLAST_SETUP__HPP */
bool IsForward(ENa_strand s)
Definition: Na_strand.hpp:68
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
Declares the BLAST exception class.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
The structures and functions in blast_options.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Definitions of special type used in BLAST.
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType program)
Get the number of contexts for a given program.
Definition: blast_util.c:1373
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
Collection of BlastSeqLoc lists for filtering processing.
Defines system exceptions occurred while running BLAST.
CObject –.
Definition: ncbiobj.hpp:180
Lightweight wrapper around an indexed sequence container.
Lightweight wrapper around sequence data which provides a CSeqVector-like interface to the data.
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
char data[12]
Definition: iconv.c:80
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
Definition: blast_setup.hpp:65
virtual TMaskedQueryRegions GetMaskedRegions(int index)=0
Return the filtered (masked) regions for a sequence.
bool Empty() const
Returns true if the container is empty, else false.
virtual CConstRef< objects::CSeq_loc > GetMask(int index)=0
Return the filtered (masked) regions for a sequence.
void SetupQueries_OMF(IBlastQuerySource &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
ObjMgr Free version of SetupQueries.
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
void SetMinusStrand()
For nucleotide sequences this instructs the implementation to convert its representation to be that o...
virtual bool IsFirstOfAPair(int index) const =0
Is this sequence followed by a mate (for mapping short reads)
SBlastSequence GetSequence_OMF(IBlastSeqVector &sv, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, std::string *warnings=0)
Object manager free version of GetSequence.
virtual Uint4 GetGeneticCodeId(int index) const =0
Retrieve the genetic code associated with a sequence.
void SetupSubjects_OMF(IBlastQuerySource &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Object manager free version of SetupSubjects.
bool m_TranslateCoords
True if this object's masked regions store DNA coordinates that will later be translated into protein...
CBlastQueryFilteredFrames(CBlastQueryFilteredFrames &f)
Prevent copy construction.
EBlastEncoding GetQueryEncoding(EBlastProgramType program)
Returns the encoding for the sequence data used in BLAST for the query.
map< ETranslationFrame, BlastSeqLoc * > TFrameSet
Frame and BlastSeqLoc* info type.
objects::ENa_strand GetStrand() const
Accessor for the strand currently set.
objects::ENa_strand m_Strand
Maintains the state of the strand currently saved by the implementation of this class.
TSeqPos CalculateSeqBufferLength(TSeqPos sequence_length, EBlastEncoding encoding, objects::ENa_strand strand=objects::eNa_strand_unknown, ESentinelType sentinel=eSentinels) THROWS((CBlastException))
Calculates the length of the buffer to allocate given the desired encoding, strand (if applicable) an...
EBlastProgramType m_Program
The type of search being done.
virtual TSeqPos x_Size() const =0
Method which retrieves the size of the sequence vector, as described in the size() method above.
SBlastSequence(Uint1 *d, TSeqPos l)
Parametrized constructor.
Definition: blast_setup.hpp:88
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(int index) const =0
Return the CSeq_loc associated with a sequence.
virtual const objects::CSeq_id * GetSeqId(int index) const =0
Return the sequence identifier associated with a sequence.
set< ETranslationFrame > m_Frames
Frames for masked locations.
virtual string GetTitle(int index) const =0
Return the title of a sequence.
virtual void x_SetMinusStrand()=0
Method which does the work for setting the minus strand of the nucleotide sequence data.
TAutoUint1Ptr data
Sequence data.
Definition: blast_setup.hpp:64
virtual SBlastSequence GetCompressedPlusStrand()=0
Returns the compressed nucleotide data for the plus strand, still occupying one base per byte.
ESentinelType
Allows specification of whether sentinel bytes should be used or not.
Definition: blast_setup.hpp:93
virtual ~IBlastSeqVector()
Our no-op virtual destructor.
objects::ENa_strand BlastSetup_GetStrand(const objects::CSeq_loc &query_seqloc, EBlastProgramType program, objects::ENa_strand strand_option)
Choose between a Seq-loc specified query strand and the strand obtained from the CBlastOptions.
CSeqLocInfo::ETranslationFrame ETranslationFrame
Data type for frame value, however inputs to methods use "int" instead of this type for readability a...
size_t GetNumFrames() const
virtual ~IBlastQuerySource()
Our no-op virtual destructor.
virtual void GetStrandData(objects::ENa_strand strand, unsigned char *buf)
Retrieve strand data in one chunk.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
virtual objects::ENa_strand GetStrand(int index) const =0
Return strand for a sequence.
unsigned int GetNumberOfContexts(EBlastProgramType p)
Returns the number of contexts for a given BLAST program.
virtual TSeqPos GetLength(int index) const =0
Return the length of a sequence.
virtual SBlastSequence GetBlastSequence(int index, EBlastEncoding encoding, objects::ENa_strand strand, ESentinelType sentinel, std::string *warnings=0) const =0
Return the sequence data for a sequence.
string FindBlastDbPath(const char *dbname, bool is_prot)
Returns the path (including a trailing path separator) to the location where the BLAST database can b...
virtual void SetCoding(objects::CSeq_data::E_Choice coding)=0
Sets the encoding for the sequence data.
EBlastEncoding GetSubjectEncoding(EBlastProgramType program)
Returns the encoding for the sequence data used in BLAST2Sequences for the subject.
void SetPlusStrand()
For nucleotide sequences this instructs the implementation to convert its representation to be that o...
SBlastSequence(TSeqPos buf_len)
Allocates a sequence buffer of the specified length.
Definition: blast_setup.hpp:75
SBlastSequence CompressNcbi2na(const SBlastSequence &source)
Compresses the sequence data passed in to the function from 1 base per byte to 4 bases per byte.
SBlastSequence()
Default constructor.
Definition: blast_setup.hpp:69
BlastQueryInfo * SafeSetupQueryInfo(const IBlastQuerySource &queries, const CBlastOptions *options)
Wrapper around SetupQueryInfo.
virtual TSeqPos Size() const =0
Return the number of elements in the sequence container.
void SetupQueryInfo_OMF(const IBlastQuerySource &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
ObjMgr Free version of SetupQueryInfo.
virtual int GetSegmentInfo(int index) const =0
Get segment information (for mapping paired short reads)
CBlastQueryFilteredFrames & operator=(CBlastQueryFilteredFrames &f)
Prevent assignment.
TFrameSet m_Seqlocs
Frame and BlastSeqLoc* data.
BLAST_SequenceBlk * SafeSetupQueries(IBlastQuerySource &queries, const CBlastOptions *options, BlastQueryInfo *query_info, TSearchMessages &messages)
Wrapper around SetupQueries.
TSeqPos size() const
Returns the length of the sequence data (in the case of nucleotides, only one strand)
Uint1 GetSentinelByte(EBlastEncoding encoding) THROWS((CBlastException))
Convenience function to centralize the knowledge of which sentinel bytes we use for supported encodin...
virtual void x_SetPlusStrand()=0
Method which does the work for setting the plus strand of the nucleotide sequence data.
TFrameSet m_SeqlocTails
Frame and tail of BlastSeqLoc* linked list (to speed up appending)
@ eNoSentinels
Do not use sentinel bytes.
Definition: blast_setup.hpp:95
@ eSentinels
Use sentinel bytes.
Definition: blast_setup.hpp:94
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define THROWS(x)
Definition: ncbiexpt.hpp:75
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
#define NCBI_DEPRECATED
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
char * buf
static char * prog
Definition: mdb_load.c:33
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
Definition: pointer.h:1149
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
Structure to hold a sequence.
Definition: blast_def.h:242
The query related information.
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Definition: blast_setup.hpp:62
voidp calloc(uInt items, uInt size)
Modified on Wed Apr 17 13:09:39 2024 by modify_doxy.py rev. 669887