NCBI C++ ToolKit
magicblast.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: magicblast.hpp 96450 2022-04-01 12:54:34Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Greg Boratyn
27  *
28  */
29 
30 /// @file magicblast.hpp
31 /// Declares CMagicBlast, the C++ API for the BLAST RNA-Seq mapping engine.
32 
33 #ifndef ALGO_BLAST_API___MAGICBLAST__HPP
34 #define ALGO_BLAST_API___MAGICBLAST__HPP
35 
43 
44 /** @addtogroup AlgoBlast
45  *
46  * @{
47  */
48 
50 BEGIN_SCOPE(blast)
51 
52 // Forward declarations
53 class IQueryFactory;
55 
56 /// BLAST RNA-Seq mapper
58 {
59 public:
60 
61 public:
62  /// Constructor to map short reads as queries to a genome as BLAST database
63  /// @param query_factory
64  /// Short reads sequence to map [in]
65  /// @param blastdb
66  /// Adapter to the BLAST database [in]
67  /// @param options
68  /// MAGIC-BLAST options [in]
69  CMagicBlast(CRef<IQueryFactory> query_factory,
70  CRef<CLocalDbAdapter> blastdb,
72 
73  /// Destructor
75 
76  /// Run the RNA-Seq mapping
78 
79  CRef<CMagicBlastResultSet> RunEx(void);
80 
82  {return m_Messages;}
83 
84 
85 protected:
86  /// Prohibit copy constructor
87  CMagicBlast(const CMagicBlast& rhs);
88  /// Prohibit assignment operator
90 
91  /// Perform sanity checks on input arguments
92  void x_Validate(void);
93 
94  int x_Run(void);
95 
96 
97  CRef<CSeq_align_set> x_BuildSeqAlignSet(
98  const BlastMappingResults* results);
99 
100  CRef<CMagicBlastResultSet> x_BuildResultSet(
101  const BlastMappingResults* results);
102 
103 
104  /// Create results
105  static CRef<CSeq_align_set> x_CreateSeqAlignSet(const HSPChain* results,
106  CRef<ILocalQueryData> qdata,
107  CRef<IBlastSeqInfoSrc> seqinfo_src,
108  const BlastQueryInfo* query_info,
109  bool btop_splice_signals);
110 
111 private:
112  /// Queries
114 
115  /// Reference to a BLAST subject/database object
117 
118  /// Options to configure the search
120 
121  /// Object that runs BLAST search
123 
124  /// Internal data strctures
126 
127  /// Warning and error messages
129 
130  /// Should BTOP strings be formatted with splice signals
132 };
133 
134 
135 /// Magic-BLAST results for a single query/read or a pair of reads
137 {
138 public:
139 
140  /// Information flags about mapping results
141  typedef enum {
142  /// Read is unaligned
143  fUnaligned = 1,
144 
145  /// Read did not pass quality filtering
146  fFiltered = 1 << 1
147  } EResultsInfo;
148 
149  typedef int TResultsInfo;
150 
151  /// Ordering of alignments
152  typedef enum {
153  eFwRevFirst = 0,
154  eRevFwFirst
155  } EOrdering;
156 
157 
158  /// Constructor for a pair
160  CRef<CSeq_align_set> aligns,
161  const TMaskedQueryRegions* query_mask = NULL,
162  const TMaskedQueryRegions* mate_mask = NULL,
163  int query_length = 0,
164  int mate_length = 0);
165 
166 
167  /// Constructor for a single read
169  CRef<CSeq_align_set> aligns,
170  const TMaskedQueryRegions* query_mask = NULL,
171  int query_length = 0);
172 
173  /// Get alignments
174  CConstRef<CSeq_align_set> GetSeqAlign(void) const {return m_Aligns;}
175 
176  /// Get non-const alignments
177  CRef<CSeq_align_set> SetSeqAlign(void) {return m_Aligns;}
178 
179  /// Are alignments computed for paired reads
180  bool IsPaired(void) const {return m_Paired;}
181 
182  /// Are an aligned pair concordant?
183  bool IsConcordant(void) const {return m_Concordant;}
184 
185  /// Get alignment flags for the query
186  TResultsInfo GetFirstInfo(void) const {return m_FirstInfo;}
187 
188  /// Get alignment flags for the mate
189  TResultsInfo GetLastInfo(void) const {return m_LastInfo;}
190 
191  /// Get query sequence id
192  const CSeq_id& GetQueryId(void) const {return *m_QueryId;}
193 
194  /// Get sequence id of the first segment of a paired read
195  const CSeq_id& GetFirstId(void) const {return GetQueryId();}
196 
197  /// Get sequence id of the last sequence of a paired read
198  const CSeq_id& GetLastId(void) const {return *m_MateId;}
199 
200  /// Is the query aligned
201  bool FirstAligned(void) const {return (m_FirstInfo & fUnaligned) == 0;}
202 
203  /// Is the mate aligned
204  bool LastAligned(void) const {return (m_LastInfo & fUnaligned) == 0;}
205 
206  /// Sort alignments by selected criteria (pair configuration)
207  void SortAlignments(EOrdering order);
208 
209 private:
210  void x_SetInfo(int first_length,
211  const TMaskedQueryRegions* first_masks,
212  int last_length = 0,
213  const TMaskedQueryRegions* last_masks = NULL);
214 
215 private:
216  /// Query id
218 
219  /// Mate id if results are for paired reads
221 
222  /// Alignments for a single or a pair of reads
224 
225  /// True if results are for paired reads
226  bool m_Paired;
227 
228  /// True if results are concordant pair
230 
231  /// Alignment flags for the query
233 
234  /// Alignment flags for the mate
236 };
237 
238 
239 /// Results of Magic-BLAST mapping
241 {
242 public:
243 
244  /// data type contained by this container
246 
247  /// size_type type definition
248  typedef vector<value_type>::size_type size_type;
249 
250  /// const_iterator type definition
251  typedef vector<value_type>::const_iterator const_iterator;
252 
253  /// iterator type definition
254  typedef vector<value_type>::iterator iterator;
255 
256  /// Create an empty results set
258 
259  /// Get all results as a single Seq-align-set object
260  /// @param no_discordant Report only paires aligned concordantly [in]
261  CRef<CSeq_align_set> GetFlatResults(bool no_discordant = false);
262 
263  /// Get number of results, provided to facilitate STL-style iteration
264  size_type size() const {return m_Results.size();}
265 
266  /// Is the container empty
267  bool empty() const {return size() == 0;}
268 
269  /// Returns const iteartor to the beginning of the container,
270  /// provided to facilitate STL-style iteartion
271  const_iterator begin() const {return m_Results.begin();}
272 
273  /// Returns const iterator to the end of the container
274  const_iterator end() const {return m_Results.end();}
275 
276  /// Returns iterator to the beginning of the container
277  iterator begin() {return m_Results.begin();}
278 
279  /// Returns iterator to the end of the container
280  iterator end() {return m_Results.end();}
281 
282  /// Clear all results
283  void clear() {m_Results.clear();}
284 
285  /// Reserve memory for a number of result elemetns
286  void reserve(size_t num) {m_Results.reserve(num);}
287 
288  /// Add results to the end of the container
290  {m_Results.push_back(element);}
291 
292 private:
295 
296  vector< CRef<CMagicBlastResults> > m_Results;
297 };
298 
299 
300 END_SCOPE(blast)
302 
303 /* @} */
304 
305 #endif /* ALGO_BLAST_API___MAGICBLAST__HPP */
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Definition of classes which constitute the results of running a BLAST search.
Results of Magic-BLAST mapping.
Definition: magicblast.hpp:241
Magic-BLAST results for a single query/read or a pair of reads.
Definition: magicblast.hpp:137
BLAST RNA-Seq mapper.
Definition: magicblast.hpp:58
CObject –.
Definition: ncbiobj.hpp:180
Class that supports setting the number of threads to use with a given algorithm.
Source of query sequence data for BLAST Provides an interface for search classes to retrieve sequence...
Definition: query_data.hpp:147
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
vector< CRef< CMagicBlastResults > > m_Results
Definition: magicblast.hpp:296
CMagicBlast(const CMagicBlast &rhs)
Prohibit copy constructor.
~CMagicBlast()
Destructor.
Definition: magicblast.hpp:74
CRef< CLocalDbAdapter > m_LocalDbAdapter
Reference to a BLAST subject/database object.
Definition: magicblast.hpp:116
bool m_Concordant
True if results are concordant pair.
Definition: magicblast.hpp:229
CRef< IQueryFactory > m_Queries
Queries.
Definition: magicblast.hpp:113
size_type size() const
Get number of results, provided to facilitate STL-style iteration.
Definition: magicblast.hpp:264
bool LastAligned(void) const
Is the mate aligned.
Definition: magicblast.hpp:204
bool m_Paired
True if results are for paired reads.
Definition: magicblast.hpp:226
CRef< SInternalData > m_InternalData
Internal data strctures.
Definition: magicblast.hpp:125
vector< value_type >::size_type size_type
size_type type definition
Definition: magicblast.hpp:248
const CSeq_id & GetFirstId(void) const
Get sequence id of the first segment of a paired read.
Definition: magicblast.hpp:195
CMagicBlast & operator=(const CMagicBlast &rhs)
Prohibit assignment operator.
void push_back(CMagicBlastResultSet::value_type &element)
Add results to the end of the container.
Definition: magicblast.hpp:289
CConstRef< CSeq_align_set > GetSeqAlign(void) const
Get alignments.
Definition: magicblast.hpp:174
bool IsConcordant(void) const
Are an aligned pair concordant?
Definition: magicblast.hpp:183
TSearchMessages m_Messages
Warning and error messages.
Definition: magicblast.hpp:128
void clear()
Clear all results.
Definition: magicblast.hpp:283
bool FirstAligned(void) const
Is the query aligned.
Definition: magicblast.hpp:201
const_iterator end() const
Returns const iterator to the end of the container.
Definition: magicblast.hpp:274
const_iterator begin() const
Returns const iteartor to the beginning of the container, provided to facilitate STL-style iteartion.
Definition: magicblast.hpp:271
CConstRef< CSeq_id > m_QueryId
Query id.
Definition: magicblast.hpp:217
bool empty() const
Is the container empty.
Definition: magicblast.hpp:267
iterator end()
Returns iterator to the end of the container.
Definition: magicblast.hpp:280
bool m_BtopSpliceSignals
Should BTOP strings be formatted with splice signals.
Definition: magicblast.hpp:131
CMagicBlastResultSet & operator=(const CMagicBlastResultSet &)
TSearchMessages GetSearchMessages(void) const
Definition: magicblast.hpp:81
bool IsPaired(void) const
Are alignments computed for paired reads.
Definition: magicblast.hpp:180
void reserve(size_t num)
Reserve memory for a number of result elemetns.
Definition: magicblast.hpp:286
CConstRef< CSeq_id > m_MateId
Mate id if results are for paired reads.
Definition: magicblast.hpp:220
TResultsInfo GetLastInfo(void) const
Get alignment flags for the mate.
Definition: magicblast.hpp:189
CMagicBlastResultSet(const CMagicBlastResultSet &)
TResultsInfo GetFirstInfo(void) const
Get alignment flags for the query.
Definition: magicblast.hpp:186
CRef< CMagicBlastResults > value_type
data type contained by this container
Definition: magicblast.hpp:245
iterator begin()
Returns iterator to the beginning of the container.
Definition: magicblast.hpp:277
TResultsInfo m_FirstInfo
Alignment flags for the query.
Definition: magicblast.hpp:232
vector< value_type >::iterator iterator
iterator type definition
Definition: magicblast.hpp:254
const CSeq_id & GetQueryId(void) const
Get query sequence id.
Definition: magicblast.hpp:192
CMagicBlastResultSet(void)
Create an empty results set.
Definition: magicblast.hpp:257
CRef< CSeq_align_set > m_Aligns
Alignments for a single or a pair of reads.
Definition: magicblast.hpp:223
CRef< CSeq_align_set > SetSeqAlign(void)
Get non-const alignments.
Definition: magicblast.hpp:177
CRef< CBlastPrelimSearch > m_PrelimSearch
Object that runs BLAST search.
Definition: magicblast.hpp:122
vector< value_type >::const_iterator const_iterator
const_iterator type definition
Definition: magicblast.hpp:251
CRef< CBlastOptions > m_Options
Options to configure the search.
Definition: magicblast.hpp:119
const CSeq_id & GetLastId(void) const
Get sequence id of the last sequence of a paired read.
Definition: magicblast.hpp:198
TResultsInfo m_LastInfo
Alignment flags for the mate.
Definition: magicblast.hpp:235
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
void Run(void)
Enter the main loop.
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
const struct ncbi::grid::netcache::search::fields::SIZE size
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Structure that contains BLAST mapping results.
Definition: spliced_hits.h:91
The query related information.
A chain of HSPs: spliced alignment.
Definition: spliced_hits.h:60
Modified on Sun Apr 14 05:26:23 2024 by modify_doxy.py rev. 669887