NCBI C++ ToolKit
sseqloc.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sseqloc.hpp 51718 2011-10-26 16:01:44Z maning $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file sseqloc.hpp
31  * Definition of SSeqLoc structure
32  */
33 
34 #ifndef ALGO_BLAST_API___SSEQLOC__HPP
35 #define ALGO_BLAST_API___SSEQLOC__HPP
36 
37 #include <corelib/ncbistd.hpp>
38 #include <objmgr/scope.hpp>
42 
44 BEGIN_SCOPE(blast)
45 
46 /// Structure to represent a single sequence to be fed to BLAST
47 struct SSeqLoc {
48  /// Seq-loc describing the sequence to use as query/subject to BLAST
49  /// The types of Seq-loc currently supported are: whole and seq-interval
51 
52  /// Scope where the sequence referenced can be found by the toolkit's
53  /// object manager
55 
56  /// Seq-loc describing regions to mask in the seqloc field
57  /// Acceptable types of Seq-loc are Seq-interval and Packed-int
58  /// @sa ignore_strand_in_mask
60 
61  /// This member dictates how the strand in the mask member is interpreted.
62  /// If true, it means that the Seq-loc in mask is assumed to be on the plus
63  /// strand AND that the complement of this should also be applied (i.e.:
64  /// the strand specification of the mask member will be ignored). If it's
65  /// false, then the strand specification of the mask member will be obeyed
66  /// and only those regions on specific strands will be masked.
67  /// @note the default value of this field is true
68  /// @sa mask
70 
71  /// Genetic code id if this sequence should be translated.
72  /// @note BLAST_GENETIC_CODE is the default, even though the sequence might
73  /// not need to be translated (i.e.: program type determines whether this
74  /// is used or not). The sentinel value to indicate that this field is not
75  /// applicable is numeric_limits<Uint4>::max().
77 
78  /// Default constructor
80  : seqloc(), scope(), mask(), ignore_strand_in_mask(true),
81  genetic_code_id(BLAST_GENETIC_CODE) {}
82 
83  /// Parameterized constructor
84  /// @param sl Sequence location [in]
85  /// @param s Scope to retrieve sl [in]
86  SSeqLoc(const objects::CSeq_loc* sl, objects::CScope* s)
87  : seqloc(sl), scope(s), mask(0), ignore_strand_in_mask(true),
88  genetic_code_id(BLAST_GENETIC_CODE) {}
89 
90  /// Parameterized constructor
91  /// @param sl Sequence location [in]
92  /// @param s Scope to retrieve sl [in]
93  SSeqLoc(const objects::CSeq_loc& sl, objects::CScope& s)
94  : seqloc(&sl), scope(&s), mask(0), ignore_strand_in_mask(true),
95  genetic_code_id(BLAST_GENETIC_CODE) {}
96 
97  /// Parameterized constructor
98  /// @param sl Sequence location [in]
99  /// @param s Scope to retrieve sl [in]
100  /// @param m Masking location(s) applicable to sl [in]
101  /// @param ignore_mask_strand Ignore the mask specified in m? [in]
102  SSeqLoc(const objects::CSeq_loc* sl, objects::CScope* s,
103  objects::CSeq_loc* m, bool ignore_mask_strand = true)
104  : seqloc(sl), scope(s), mask(m),
105  ignore_strand_in_mask(ignore_mask_strand),
106  genetic_code_id(BLAST_GENETIC_CODE) {
107  if (m != NULL && ignore_strand_in_mask) {
108  mask->ResetStrand();
109  }
110  }
111 
112  /// Parameterized constructor
113  /// @param sl Sequence location [in]
114  /// @param s Scope to retrieve sl [in]
115  /// @param m Masking location(s) applicable to sl [in]
116  /// @param ignore_mask_strand Ignore the mask specified in m? [in]
117  SSeqLoc(const objects::CSeq_loc& sl, objects::CScope& s,
118  objects::CSeq_loc& m, bool ignore_mask_strand = true)
119  : seqloc(&sl), scope(&s), mask(&m),
120  ignore_strand_in_mask(ignore_mask_strand),
121  genetic_code_id(BLAST_GENETIC_CODE) {
122  if (ignore_strand_in_mask) {
123  mask->ResetStrand();
124  }
125  }
126 };
127 
128 /// Vector of sequence locations
129 typedef vector<SSeqLoc> TSeqLocVector;
130 
131 /// Convert a TSeqLocVector to a CBioseq_set
132 /// @param input TSeqLocVector to convert [in]
133 /// @return CBioseq_set with CBioseqs from the input, or NULL of input is empty
137 
138 
139 /// Search Query
140 ///
141 /// This class represents the data relevant to one query in a blast
142 /// search. The types of Seq-loc currently supported are "whole" and
143 /// "int". The scope is expected to contain this Seq-loc, and the
144 /// mask represents the regions of this query that are disabled for
145 /// this search, or for some frames of this search, via one of several
146 /// algorithms, or that are specified by the user as masked regions.
147 class CBlastSearchQuery : public CObject {
148 public:
149  /// Constructor
150  ///
151  /// Build a CBlastSearchQuery object with no masking locations assigned
152  ///
153  /// @param sl The query itself.
154  /// @param sc The scope containing the query.
155  CBlastSearchQuery(const objects::CSeq_loc & sl,
156  objects::CScope & sc)
158  {
159  x_Validate();
160  }
161 
162  /// Constructor
163  ///
164  /// Build a CBlastSearchQuery object.
165  ///
166  /// @param sl The query itself.
167  /// @param sc The scope containing the query.
168  /// @param m Regions of the query that are masked.
169  CBlastSearchQuery(const objects::CSeq_loc & sl,
170  objects::CScope & sc,
172  : seqloc(& sl), scope(& sc), mask(m),
174  {
175  x_Validate();
176  }
177 
178  /// Default constructor
179  ///
180  /// This is necessary in order to add this type to a std::vector.
182 
183  /// Get the query Seq-loc.
184  /// @return The Seq-loc representing the query
186  return seqloc;
187  }
188 
189  /// Get the query Seq-id.
190  /// @return The Seq-id representing the query
192  return CConstRef<objects::CSeq_id>(seqloc->GetId());
193  }
194 
195  /// Get the query CScope.
196  /// @return The CScope containing the query
198  return scope;
199  }
200 
201  /// Get the genetic code id
202  void SetGeneticCodeId(Uint4 gc_id) {
203  genetic_code_id = gc_id;
204  }
205 
206  /// Get the genetic code id
208  return genetic_code_id;
209  }
210 
211  /// Get the masked query regions.
212  ///
213  /// The masked regions of the query, or of some frames or strands of the
214  /// query, are returned.
215  ///
216  /// @return The masked regions of the query.
218  return mask;
219  }
220 
221  /// Set the masked query regions.
222  ///
223  /// The indicated set of masked regions is applied to this query,
224  /// replacing any existing masked regions.
225  ///
226  /// @param mqr The set of regions to mask.
228  mask = mqr;
229  }
230 
231  /// Masked a region of this query.
232  ///
233  /// The CSeqLocInfo object is added to the list of masked regions
234  /// of this query.
235  ///
236  /// @param sli A CSeqLocInfo indicating the region to mask.
238  {
239  mask.push_back(sli);
240  }
241 
242  /// Get the length of the sequence represented by this object
243  TSeqPos GetLength() const {
245  }
246 private:
247  /// The Seq-loc representing the query.
249 
250  /// This scope contains the query.
252 
253  /// These regions of the query are masked.
255 
256  /// Genetic code id if this sequence should be translated.
257  /// If its value is numeric_limits<Uint4>::max(), it means that it's not
258  /// applicable
260 
261  /// Currently we only support whole or int. Throw exception otherwise
262  void x_Validate() {
263  if (seqloc->IsWhole() || seqloc->IsInt()) return;
264  NCBI_THROW(CBlastException, eInvalidArgument,
265  "Only whole or int typed seq_loc is supported for CBlastQueryVector");
266  }
267 };
268 
269 
270 /// Query Vector
271 ///
272 /// This class represents the data relevant to all queries in a blast
273 /// search. The queries are represented as CBlastSearchQuery objects.
274 /// Each contains a Seq-loc, scope, and a list of filtered regions.
275 
276 class CBlastQueryVector : public CObject {
277 public:
278  // data type contained by this container
280 
281  /// size_type type definition
282  typedef vector<value_type>::size_type size_type;
283 
284  /// const_iterator type definition
285  typedef vector<value_type>::const_iterator const_iterator;
286 
287  /// Add a query to the set.
288  ///
289  /// The CBlastSearchQuery is added to the list of queries for this
290  /// search.
291  ///
292  /// @param q A query to add to the set.
294  {
295  m_Queries.push_back(q);
296  }
297 
298  /// Returns true if this query vector is empty.
299  bool Empty() const
300  {
301  return m_Queries.empty();
302  }
303 
304  /// Returns the number of queries found in this query vector.
305  size_type Size() const
306  {
307  return m_Queries.size();
308  }
309 
310  /// Get the query Seq-loc for a query by index.
311  /// @param i The index of a query.
312  /// @return The Seq-loc representing the query.
314  {
315  _ASSERT(i < m_Queries.size());
316  return m_Queries[i]->GetQuerySeqLoc();
317  }
318 
319  /// Get the scope containing a query by index.
320  /// @param i The index of a query.
321  /// @return The CScope containing the query.
323  {
324  _ASSERT(i < m_Queries.size());
325  return m_Queries[i]->GetScope();
326  }
327 
328  /// Get the masked regions for a query by number.
329  /// @param i The index of a query.
330  /// @return The masked (filtered) regions of that query.
332  {
333  _ASSERT(i < m_Queries.size());
334  return m_Queries[i]->GetMaskedRegions();
335  }
336 
337  /// Convenience method to get a CSeq_loc representing the masking locations
338  /// @param i The index of a query.
339  /// @return The masked (filtered) regions of that query.
340  /// @throws CBlastException in case of errors in conversion
342  {
345  }
346 
347  /// Assign a list of masked regions to one query.
348  /// @param i The index of the query.
349  /// @param mqr The masked regions for this query.
351  {
352  _ASSERT(i < m_Queries.size());
353  m_Queries[i]->SetMaskedRegions(mqr);
354  }
355 
356  /// Add a masked region to the set for a query.
357  /// @param i The index of the query.
358  /// @param sli The masked region to add.
360  {
361  m_Queries[i]->AddMask(sli);
362  }
363 
364  /// Get the CBlastSearchQuery object at index i
365  /// @param i The index of a query.
368  {
369  _ASSERT(i < m_Queries.size());
370  return m_Queries[i];
371  }
372 
373  /// Get the CBlastSearchQuery object at index i
374  /// @param i The index of a query.
377  {
378  return GetBlastSearchQuery(i);
379  }
380 
381  /// Identical to Size, provided to facilitate STL-style iteration
382  size_type size() const { return Size(); }
383 
384  /// Returns const_iterator to beginning of container, provided to
385  /// facilitate STL-style iteration
386  const_iterator begin() const { return m_Queries.begin(); }
387 
388  /// Returns const_iterator to end of container, provided to
389  /// facilitate STL-style iteration
390  const_iterator end() const { return m_Queries.end(); }
391 
392  /// Clears the contents of this object
393  void clear() { m_Queries.clear(); }
394 
395  /// Add a value to the back of this container
396  /// @param element element to add [in]
397  void push_back(const value_type& element) { m_Queries.push_back(element); }
398 
399 private:
400  /// The set of queries used for a search.
401  vector< CRef<CBlastSearchQuery> > m_Queries;
402 };
403 
404 
405 END_SCOPE(blast)
407 
408 #endif /* ALGO_BLAST_API___SSEQLOC__HPP */
409 
410 
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
ncbi::TMaskedQueryRegions mask
Defines BLAST error codes (user errors included)
Query Vector.
Definition: sseqloc.hpp:276
bool Empty() const
Returns true if this query vector is empty.
Definition: sseqloc.hpp:299
const_iterator end() const
Returns const_iterator to end of container, provided to facilitate STL-style iteration.
Definition: sseqloc.hpp:390
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
Definition: sseqloc.hpp:293
CRef< objects::CScope > GetScope(size_type i) const
Get the scope containing a query by index.
Definition: sseqloc.hpp:322
TMaskedQueryRegions GetMaskedRegions(size_type i) const
Get the masked regions for a query by number.
Definition: sseqloc.hpp:331
void clear()
Clears the contents of this object.
Definition: sseqloc.hpp:393
const_iterator begin() const
Returns const_iterator to beginning of container, provided to facilitate STL-style iteration.
Definition: sseqloc.hpp:386
size_type Size() const
Returns the number of queries found in this query vector.
Definition: sseqloc.hpp:305
vector< value_type >::size_type size_type
size_type type definition
Definition: sseqloc.hpp:282
size_type size() const
Identical to Size, provided to facilitate STL-style iteration.
Definition: sseqloc.hpp:382
CConstRef< objects::CSeq_loc > GetQuerySeqLoc(size_type i) const
Get the query Seq-loc for a query by index.
Definition: sseqloc.hpp:313
void SetMaskedRegions(size_type i, TMaskedQueryRegions mqr)
Assign a list of masked regions to one query.
Definition: sseqloc.hpp:350
vector< CRef< CBlastSearchQuery > > m_Queries
The set of queries used for a search.
Definition: sseqloc.hpp:401
void AddMask(size_type i, CRef< CSeqLocInfo > sli)
Add a masked region to the set for a query.
Definition: sseqloc.hpp:359
vector< value_type >::const_iterator const_iterator
const_iterator type definition
Definition: sseqloc.hpp:285
CRef< objects::CSeq_loc > GetMasks(size_type i) const
Convenience method to get a CSeq_loc representing the masking locations.
Definition: sseqloc.hpp:341
void push_back(const value_type &element)
Add a value to the back of this container.
Definition: sseqloc.hpp:397
CRef< CBlastSearchQuery > value_type
Definition: sseqloc.hpp:279
CRef< CBlastSearchQuery > operator[](size_type i) const
Get the CBlastSearchQuery object at index i.
Definition: sseqloc.hpp:376
CRef< CBlastSearchQuery > GetBlastSearchQuery(size_type i) const
Get the CBlastSearchQuery object at index i.
Definition: sseqloc.hpp:367
Search Query.
Definition: sseqloc.hpp:147
void x_Validate()
Currently we only support whole or int. Throw exception otherwise.
Definition: sseqloc.hpp:262
CBlastSearchQuery()
Default constructor.
Definition: sseqloc.hpp:181
void SetGeneticCodeId(Uint4 gc_id)
Get the genetic code id.
Definition: sseqloc.hpp:202
Uint4 genetic_code_id
Genetic code id if this sequence should be translated.
Definition: sseqloc.hpp:259
CRef< objects::CScope > scope
This scope contains the query.
Definition: sseqloc.hpp:251
TMaskedQueryRegions mask
These regions of the query are masked.
Definition: sseqloc.hpp:254
CConstRef< objects::CSeq_loc > seqloc
The Seq-loc representing the query.
Definition: sseqloc.hpp:248
CConstRef< objects::CSeq_id > GetQueryId() const
Get the query Seq-id.
Definition: sseqloc.hpp:191
void SetMaskedRegions(TMaskedQueryRegions mqr)
Set the masked query regions.
Definition: sseqloc.hpp:227
CBlastSearchQuery(const objects::CSeq_loc &sl, objects::CScope &sc, TMaskedQueryRegions m)
Constructor.
Definition: sseqloc.hpp:169
TSeqPos GetLength() const
Get the length of the sequence represented by this object.
Definition: sseqloc.hpp:243
CBlastSearchQuery(const objects::CSeq_loc &sl, objects::CScope &sc)
Constructor.
Definition: sseqloc.hpp:155
CRef< objects::CScope > GetScope() const
Get the query CScope.
Definition: sseqloc.hpp:197
Uint4 GetGeneticCodeId() const
Get the genetic code id.
Definition: sseqloc.hpp:207
TMaskedQueryRegions GetMaskedRegions() const
Get the masked query regions.
Definition: sseqloc.hpp:217
void AddMask(CRef< CSeqLocInfo > sli)
Masked a region of this query.
Definition: sseqloc.hpp:237
CConstRef< objects::CSeq_loc > GetQuerySeqLoc() const
Get the query Seq-loc.
Definition: sseqloc.hpp:185
CObject –.
Definition: ncbiobj.hpp:180
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define true
Definition: bool.h:35
CRef< objects::CBioseq_set > TSeqLocVector2Bioseqs(const TSeqLocVector &input)
Convert a TSeqLocVector to a CBioseq_set.
Definition: blast_aux.cpp:1070
CRef< objects::CSeq_loc > MaskedQueryRegionsToPackedSeqLoc(const TMaskedQueryRegions &sloc)
Interface to build a CSeq-loc from a TMaskedQueryRegion; note that conversion conversion in this dire...
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static int input()
int i
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
bool ignore_strand_in_mask
This member dictates how the strand in the mask member is interpreted.
Definition: sseqloc.hpp:69
Uint4 genetic_code_id
Genetic code id if this sequence should be translated.
Definition: sseqloc.hpp:76
SSeqLoc()
Default constructor.
Definition: sseqloc.hpp:79
SSeqLoc(const objects::CSeq_loc *sl, objects::CScope *s, objects::CSeq_loc *m, bool ignore_mask_strand=true)
Parameterized constructor.
Definition: sseqloc.hpp:102
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
Definition: sseqloc.hpp:50
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager.
Definition: sseqloc.hpp:54
SSeqLoc(const objects::CSeq_loc *sl, objects::CScope *s)
Parameterized constructor.
Definition: sseqloc.hpp:86
CRef< objects::CSeq_loc > mask
Seq-loc describing regions to mask in the seqloc field Acceptable types of Seq-loc are Seq-interval a...
Definition: sseqloc.hpp:59
SSeqLoc(const objects::CSeq_loc &sl, objects::CScope &s)
Parameterized constructor.
Definition: sseqloc.hpp:93
SSeqLoc(const objects::CSeq_loc &sl, objects::CScope &s, objects::CSeq_loc &m, bool ignore_mask_strand=true)
Parameterized constructor.
Definition: sseqloc.hpp:117
#define _ASSERT
Modified on Fri Sep 20 14:57:06 2024 by modify_doxy.py rev. 669887