NCBI C++ ToolKit
seedtop.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_BLAST_API___SEEDTOP__HPP
2 #define ALGO_BLAST_API___SEEDTOP__HPP
3 
4 /* $Id: seedtop.hpp 55434 2012-08-16 15:39:47Z maning $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Ning Ma
30  *
31  */
32 
33 /// @file seedtop.hpp
34 /// Declares the CSeedTop class.
35 
36 
37 /** @addtogroup AlgoBlast
38  *
39  * @{
40  */
41 
42 #include <corelib/ncbistd.hpp>
46 
48 BEGIN_SCOPE(blast)
49 
50 struct SPatternUnit {
53  size_t at_least;
54  size_t at_most;
55  bool is_x;
56  SPatternUnit(const string unit) {
57  size_t tail_start = 0;
58  bool parse_failed = false;
59  is_x = false;
60  switch(unit[0]) {
61  case '[':
62  tail_start = unit.find(']');
63  if (tail_start == string::npos){
64  parse_failed = true;
65  break;
66  }
67  tail_start++;
68  allowed_letters = string(unit, 1, tail_start - 2);
69  break;
70  case '{':
71  tail_start = unit.find('}');
72  if (tail_start == string::npos){
73  parse_failed = true;
74  break;
75  }
76  tail_start++;
77  disallowed_letters = string(unit, 1, tail_start - 2);
78  break;
79  case 'X':
80  tail_start = 1;
81  is_x = true;
82  break;
83  default:
84  if (unit[0] > 'Z' || unit[0] < 'A'){
85  parse_failed = true;
86  break;
87  }
88  tail_start = 1;
89  allowed_letters = string(unit, 0, 1);
90  break;
91  }
92 
93  if (parse_failed) {
94  NCBI_THROW(CBlastException, eInvalidArgument, "Can not parse pattern file");
95  }
96 
97  // parse the (x,y) part
98  if (tail_start >= unit.size()) {
99  at_least = 1;
100  at_most = 2;
101  } else {
102  if (unit[tail_start] != '(' || unit[unit.size()-1] != ')') {
103  NCBI_THROW(CBlastException, eInvalidArgument, "Can not parse pattern file");
104  }
105  try {
106  string rep(unit, tail_start + 1, unit.size()-2-tail_start);
107  size_t pos_comma = rep.find(',');
108  if (pos_comma == rep.npos) {
109  at_least = NStr::StringToUInt(rep);
110  at_most = at_least + 1;
111  } else if (pos_comma == rep.size() -1) {
112  at_least = NStr::StringToUInt(string(rep, 0, pos_comma));
113  at_most = rep.npos;
114  } else {
115  at_least = NStr::StringToUInt(string(rep, 0, pos_comma));
116  at_most = NStr::StringToUInt(string(rep,
117  pos_comma + 1, rep.size()-1-pos_comma)) + 1;
118  }
119  } catch (...) {
120  NCBI_THROW(CBlastException, eInvalidArgument, "Can not parse pattern file");
121  }
122  }
123  }
124  bool test(Uint1 letter) {
125  if (allowed_letters != "") {
126  return (allowed_letters.find(letter) != allowed_letters.npos);
127  } else {
128  return (disallowed_letters.find(letter) == disallowed_letters.npos);
129  }
130  }
131 };
132 
134 public:
135  // the return type for seedtop search
136  // a vector of results (matches) as seq_loc on each subject
137  // the results will be sorted first by subject oid (if multiple subject
138  // sequences or database is supplied during construction), then by the first
139  // posotion of the match
140  typedef vector < CConstRef <CSeq_loc> > TSeedTopResults;
141 
142  // constructor
143  CSeedTop(const string & pattern); // seedtop pattern
144 
145  // search a database or a set of subject sequences
147 
148  // search a bioseq
150 
151 private:
152  const static EBlastProgramType m_Program = eBlastTypePhiBlastp;
153  string m_Pattern;
156  vector< struct SPatternUnit > m_Units;
157 
158  void x_ParsePattern();
159  void x_MakeLookupTable();
160  void x_MakeScoreBlk();
161  // parsing the result into a list of ranges
162  void x_GetPatternRanges(vector<int> &pos,
163  Uint4 off,
164  Uint1 *seq,
165  Uint4 len,
166  vector<vector<int> > &ranges);
167 };
168 
169 END_SCOPE(blast)
171 
172 
173 /* @} */
174 
175 
176 #endif /* ALGO_BLAST_API___SEEDTOP__HPP */
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypePhiBlastp
Definition: blast_program.h:86
Defines interface for retrieving sequence identifiers.
CBioseq_Handle –.
Defines BLAST error codes (user errors included)
Wrapper class for BlastScoreBlk .
Definition: blast_aux.hpp:333
Wrapper class for LookupTableWrap .
Definition: blast_aux.hpp:315
CObject –.
Definition: ncbiobj.hpp:180
Include a standard set of the NCBI C++ Toolkit most basic headers.
size_t at_least
Definition: seedtop.hpp:53
string disallowed_letters
Definition: seedtop.hpp:52
CBlastScoreBlk m_ScoreBlk
Definition: seedtop.hpp:155
CLookupTableWrap m_Lookup
Definition: seedtop.hpp:154
size_t at_most
Definition: seedtop.hpp:54
string m_Pattern
Definition: seedtop.hpp:153
bool test(Uint1 letter)
Definition: seedtop.hpp:124
vector< CConstRef< CSeq_loc > > TSeedTopResults
Definition: seedtop.hpp:140
vector< struct SPatternUnit > m_Units
Definition: seedtop.hpp:156
SPatternUnit(const string unit)
Definition: seedtop.hpp:56
string allowed_letters
Definition: seedtop.hpp:51
string
Definition: cgiapp.hpp:690
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
void Run(void)
Enter the main loop.
int len
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
static Uint4 letter(char c)
Modified on Fri Sep 20 14:57:58 2024 by modify_doxy.py rev. 669887