NCBI C++ ToolKit
nucprot.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * $Id: nucprot.hpp 71656 2016-03-21 20:10:21Z kiryutin $
3 *
4 * =========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannt warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * =========================================================================
27 *
28 * Author: Boris Kiryutin
29 *
30 * =========================================================================
31 */
32 
33 #ifndef NUCPROT_H
34 #define NUCPROT_H
35 
36 #include <corelib/ncbi_limits.hpp>
37 
38 #include <algorithm>
39 #include <sstream>
40 
41 #include "NSeq.hpp"
42 #include "PSeq.hpp"
43 #include "BackAlignInfo.hpp"
44 #include "scoring.hpp"
45 
47 
49 
51 BEGIN_SCOPE(prosplign)
52 
53 
54 enum EWMode {
55  eD,
56  eV, //v - gap
57  eH, //H - gap
58  eH1, // H gap (frameshift) of 3*N + 1 length
59  eH2, // H gap (frameshift) of 3*N + 2 length
60  eH3, // H gap of 3*N length
61  eFV, //v - gap with frameshift
62  eFH, //h - gap with frameshift
63  eS //splice
64 };
65 
66 /*
67 struct BMode {//info for back tracking
68  int wmode; //one of EWMode
69  int dmode, hmode, vmode, fhmode, fvmode;
70  int dslen, hslen, vslen, fhslen, fvslen;//splice length if ?mode shows splice
71  int wslen;
72 };
73 */
74 
75 struct CBMode {//back tracking for one stage fast version (AlignFNog, BackAlignFNog)
76  int wmode; // to keep w, h1-3 and v modes
77  int wlen, vlen, h1len, h2len, h3len; //splice lengths
78 };
79 
80 class CTranslationTable : public CObject {
81 public:
82  CTranslationTable(int gcode, bool allow_alt_starts);
83 
84  static int CharToNuc(char c);
85  static char NucToChar(int n);
86 
87  inline char nuc2a(int nuc1, int nuc2, int nuc3) const
88  {
89  return aa_table[nuc1*(8*8)+nuc2*8+nuc3]; // need 5, use 8 for speed
90  }
91 
92  char TranslateTriplet(char n1, char n2, char n3) const
93  {
95  }
96 
97  char TranslateTriplet(const string& triplet) const
98  {
99  return TranslateTriplet(triplet[0],triplet[1],triplet[2]);
100  }
101 
102 
103  char TranslateStartTriplet(char n1, char n2, char n3) const
104  {
105  if(m_allow_alt_starts) {
107  }
109  }
110 
111  char TranslateStartTriplet(const string& triplet) const
112  {
113  return TranslateStartTriplet(triplet[0],triplet[1],triplet[2]);
114  }
115 
116 private:
119  char aa_table[8*8*8];
120 };
121 
122 /// Substitution Matrix for Scoring Amino-Acid Alignments
124 public:
125  CSubstMatrix(const string& name, int scaling);
126 
127  void SetTranslationTable(const CTranslationTable* trans_table);
128 
129  inline int MultScore(int nuc1, int nuc2, int nuc3, char amin) const { return scaled_subst_matrix[int(amin)][int(m_trans_table->nuc2a(nuc1, nuc2, nuc3))]; }
130 
131  inline int ScaledScore(char amin1, char amin2) const { return scaled_subst_matrix[int(amin1)][int(amin2)]; }
132 
134 
135  string m_alphabet;
136 private:
137  int scaled_subst_matrix[256][256];
139 };
140 
141 
142 /* * fast score access * */
144 {
145 public:
146  void SetAmin(char amin, const CSubstMatrix& matrix);//call before GetScore() and/or GetScore(int n1, int n2, int n3)
147  void Init(const CNSeq& seq, const CSubstMatrix& matrix);//call before GetScore()
148  inline int GetScore() { return *++m_pos; }
149  inline int GetScore(int n1, int n2, int n3) const { return m_gpos[n1*25+n2*5+n3]; }
150  CFastIScore() : m_size(0), m_init(false) { m_scores.resize(1); }
151 private:
152  vector<int> m_scores;
153  int *m_pos;
154  int m_size;
155 
156  int *m_gpos;
157  vector<int> m_gscores;
158  void Init(const CSubstMatrix& matrix);
159  bool m_init;
160 
163 };
164 
165 
166 void ReadFasta(vector<char>& pseq, istream& ifs, bool is_nuc, string& id);
167 
168 //reads one time only
169 void ReadNucFa(CNSeq& seq, const string& fname, string& id);
170 void ReadProtFa(PSEQ& seq, const string& fname, string& id);
171 
172 class CAli;
173 class CAlignInfo;
175 
176 
177 //interface to interrupt calculations by user request from another thread
179 public:
181  {
182  m_Interrupt.Set(0);
185  }
186  void Interrupt(void) {
187  m_Interrupt.Add(1);
188  }
189 
190  typedef bool(* TInterruptFnPtr) (void *callback_data);
191 
192  inline void CheckUserInterrupt(void) const
193  {
194  if(m_Interrupt.Get()) NCBI_THROW(CProSplignException, eUserInterrupt, "Interrupted by user" );
195  if( ( m_InterruptFnPtr != NULL ) && m_InterruptFnPtr(m_InterruptData) ) NCBI_THROW(CProSplignException, eUserInterrupt, "Interrupted by user" );
196 
197  }
198 
199  void SetInterruptCallback( TInterruptFnPtr prg_callback, void* data)
200  {
201  m_InterruptFnPtr = prg_callback;
203  }
204 
205 private:
209 };
210 
211 
212 
213 // ***** versions without gap/frameshift penalty at the beginning/end
214 //fast variant of FrAlignNog1
215 int FrAlignFNog1(const CProSplignInterrupt& interrupt, CBackAlignInfo& bi, const PSEQ& pseq, const CNSeq& nseq, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix, bool left_gap = false, bool right_gap = false);
216 //**** good for FrAlignNog, FrAlignNog1, FrAlign and FrAlignFNog1
217 void FrBackAlign(CBackAlignInfo& bi, CAli& ali);
218 
219 // ***** versions without gap/frameshift penalty at the beginning/end ONE STAGE, FAST
220 int AlignFNog(const CProSplignInterrupt& interrupt, CTBackAlignInfo<CBMode>& bi, const PSEQ& pseq, const CNSeq& nseq, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix);
222 
223 // ***** versions without gap/frameshift penalty at the beginning/end FAST
224 int FindFGapIntronNog(const CProSplignInterrupt& interrupt, vector<pair<int, int> >& igi/*to return end gap/intron set*/,
225  const PSEQ& pseq, const CNSeq& nseq, bool& left_gap, bool& right_gap, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix);
226 
227 //*** mixed gap penalty (OLD, aka version 2)
228 int FindIGapIntrons(const CProSplignInterrupt& interrupt, vector<pair<int, int> >& igi/*to return end gap/intron set*/, const PSEQ& pseq, const CNSeq& nseq, int g/*gap opening*/, int e/*one nuc extension cost*/,
229  int f/*frameshift opening cost*/, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix);
230 int FrAlign(const CProSplignInterrupt& interrupt, CBackAlignInfo& bi, const PSEQ& pseq, const CNSeq& nseq, int g/*gap opening*/, int e/*one nuc extension cost*/,
231  int f/*frameshift opening cost*/, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix);
232 
233 
234 
235 // DEPRECATED:
236 // ***** versions without gap/frameshift penalty at the beginning/end
237 //pag f2
238 //int FrAlignNog(CBackAlignInfo& bi, const PSEQ& pseq, const CNSeq& nseq, int g/*gap opening*/, int e/*one nuc extension cost*/,
239 // int f/*frameshift opening cost*/);
240 //aka version 3, see page 14
241 //int FrAlignNog1(CBackAlignInfo& bi, const PSEQ& pseq, const CNSeq& nseq, int g/*gap opening*/, int e/*one nuc extension cost*/,
242 // int f/*frameshift opening cost*/);
243 
244 
245 END_SCOPE(prosplign)
247 
248 #endif //NUCPROT_H
vector< char > PSEQ
Definition: PSeq.hpp:48
Definition: Ali.hpp:60
CAtomicCounter –.
Definition: ncbicntr.hpp:71
vector< int > m_scores
Definition: nucprot.hpp:152
int GetScore()
Definition: nucprot.hpp:148
int * m_gpos
Definition: nucprot.hpp:156
void SetAmin(char amin, const CSubstMatrix &matrix)
Definition: nucprot.cpp:138
void Init(const CNSeq &seq, const CSubstMatrix &matrix)
Definition: nucprot.cpp:123
bool m_init
Definition: nucprot.hpp:159
vector< int > m_gscores
Definition: nucprot.hpp:157
int * m_pos
Definition: nucprot.hpp:153
CFastIScore & operator=(const CFastIScore &)
CFastIScore(const CFastIScore &)
int GetScore(int n1, int n2, int n3) const
Definition: nucprot.hpp:149
Definition: NSeq.hpp:52
CObject –.
Definition: ncbiobj.hpp:180
void CheckUserInterrupt(void) const
Definition: nucprot.hpp:192
CAtomicCounter m_Interrupt
Definition: nucprot.hpp:206
CProSplignInterrupt(void)
Definition: nucprot.hpp:180
void * m_InterruptData
Definition: nucprot.hpp:208
bool(* TInterruptFnPtr)(void *callback_data)
Definition: nucprot.hpp:190
void Interrupt(void)
Definition: nucprot.hpp:186
void SetInterruptCallback(TInterruptFnPtr prg_callback, void *data)
Definition: nucprot.hpp:199
TInterruptFnPtr m_InterruptFnPtr
Definition: nucprot.hpp:207
Substitution Matrix for Scoring Amino-Acid Alignments.
Definition: nucprot.hpp:123
CConstRef< CTranslationTable > m_trans_table
Definition: nucprot.hpp:138
void SetTranslationTable(const CTranslationTable *trans_table)
Definition: nucprot.cpp:91
const CTranslationTable & GetTranslationTable() const
Definition: nucprot.hpp:133
CSubstMatrix(const string &name, int scaling)
Definition: nucprot.cpp:54
int scaled_subst_matrix[256][256]
Definition: nucprot.hpp:137
int MultScore(int nuc1, int nuc2, int nuc3, char amin) const
Definition: nucprot.hpp:129
string m_alphabet
Definition: nucprot.hpp:135
int ScaledScore(char amin1, char amin2) const
Definition: nucprot.hpp:131
char GetStartResidue(int state) const
char GetCodonResidue(int state) const
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
char TranslateStartTriplet(char n1, char n2, char n3) const
Definition: nucprot.hpp:103
static char NucToChar(int n)
Definition: nucprot.cpp:114
char aa_table[8 *8 *8]
Definition: nucprot.hpp:119
bool m_allow_alt_starts
Definition: nucprot.hpp:118
char TranslateTriplet(char n1, char n2, char n3) const
Definition: nucprot.hpp:92
char TranslateStartTriplet(const string &triplet) const
Definition: nucprot.hpp:111
CTranslationTable(int gcode, bool allow_alt_starts)
Definition: nucprot.cpp:96
char nuc2a(int nuc1, int nuc2, int nuc3) const
Definition: nucprot.hpp:87
char TranslateTriplet(const string &triplet) const
Definition: nucprot.hpp:97
const CTrans_table & m_trans_table
Definition: nucprot.hpp:117
static int CharToNuc(char c)
Definition: nucprot.cpp:106
#define false
Definition: bool.h:36
#define bool
Definition: bool.h:34
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
#define NULL
Definition: ncbistd.hpp:225
void Set(TValue new_value) THROWS_NONE
Set atomic counter value.
Definition: ncbicntr.hpp:185
TValue Add(int delta) THROWS_NONE
Atomically add value (=delta), and return new counter value.
Definition: ncbicntr.hpp:278
TValue Get(void) const THROWS_NONE
Get atomic counter value.
Definition: ncbicntr.hpp:168
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
yy_size_t n
void ReadNucFa(CNSeq &seq, const string &fname, string &id)
EWMode
Definition: nucprot.hpp:54
@ eH2
Definition: nucprot.hpp:59
@ eH
Definition: nucprot.hpp:57
@ eFV
Definition: nucprot.hpp:61
@ eV
Definition: nucprot.hpp:56
@ eH3
Definition: nucprot.hpp:60
@ eS
Definition: nucprot.hpp:63
@ eH1
Definition: nucprot.hpp:58
@ eD
Definition: nucprot.hpp:55
@ eFH
Definition: nucprot.hpp:62
void FrBackAlign(CBackAlignInfo &bi, CAli &ali)
Definition: nucprot.cpp:684
void ReadFasta(vector< char > &pseq, istream &ifs, bool is_nuc, string &id)
int FindIGapIntrons(const CProSplignInterrupt &interrupt, vector< pair< int, int > > &igi, const PSEQ &pseq, const CNSeq &nseq, int g, int e, int f, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
Definition: nucprot.cpp:508
int FindFGapIntronNog(const CProSplignInterrupt &interrupt, vector< pair< int, int > > &igi, const PSEQ &pseq, const CNSeq &nseq, bool &left_gap, bool &right_gap, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
Definition: nucprot.cpp:260
int FrAlign(const CProSplignInterrupt &interrupt, CBackAlignInfo &bi, const PSEQ &pseq, const CNSeq &nseq, int g, int e, int f, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
Definition: nucprot.cpp:171
void BackAlignNog(CTBackAlignInfo< CBMode > &bi, CAli &ali)
Definition: nucprot.cpp:1143
int AlignFNog(const CProSplignInterrupt &interrupt, CTBackAlignInfo< CBMode > &bi, const PSEQ &pseq, const CNSeq &nseq, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
Definition: nucprot.cpp:937
void ReadProtFa(PSEQ &seq, const string &fname, string &id)
int FrAlignFNog1(const CProSplignInterrupt &interrupt, CBackAlignInfo &bi, const PSEQ &pseq, const CNSeq &nseq, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix, bool left_gap=false, bool right_gap=false)
Definition: nucprot.cpp:774
int wlen
Definition: nucprot.hpp:77
int h2len
Definition: nucprot.hpp:77
int h1len
Definition: nucprot.hpp:77
int vlen
Definition: nucprot.hpp:77
int h3len
Definition: nucprot.hpp:77
int wmode
Definition: nucprot.hpp:76
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Modified on Fri Sep 20 14:57:24 2024 by modify_doxy.py rev. 669887