NCBI C++ ToolKit
gnomon_seq.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gnomon_seq.cpp 101798 2024-02-13 17:18:22Z souvorov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software / database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software / database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Alexandre Souvorov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "gnomon_seq.hpp"
36 
38 BEGIN_SCOPE(gnomon)
39 
41 const char *const k_aa_table = "KNKNXTTTTTRSRSXIIMIXXXXXXQHQHXPPPPPRRRRRLLLLLXXXXXEDEDXAAAAAGGGGGVVVVVXXXXX*Y*YXSSSSS*CWCXLFLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
42 
43 void Convert(const CResidueVec& src, CEResidueVec& dst)
44 {
45  size_t len = src.size();
46  dst.clear();
47  dst.reserve(len);
48  for(size_t i = 0; i < len; ++i)
49  dst.push_back( fromACGT(src[i]) );
50 }
51 
52 void Convert(const CResidueVec& src, CDoubleStrandSeq& dst)
53 {
54  Convert(src,dst[ePlus]);
55  ReverseComplement(dst[ePlus],dst[eMinus]);
56 }
57 
58 void Convert(const CEResidueVec& src, CResidueVec& dst)
59 {
60  size_t len = src.size();
61  dst.clear();
62  dst.reserve(len);
63  for(size_t i = 0; i < len; ++i)
64  dst.push_back( toACGT(src[i]) );
65 }
66 
68 {
69  TSignedSeqPos len = (TSignedSeqPos)src.size();
70  dst.clear();
71  dst.reserve(len);
72  for(TSignedSeqPos i = len-1; i >= 0; --i)
73  dst.push_back(k_toMinus[(int)src[i]]);
74 }
75 
76 const TResidue codons[4][4] = {"ATG", "TAA", "TAG", "TGA"};
77 const TResidue rev_codons[4][4] = {"CAT", "TTA", "CTA", "TCA"};
78 static const EResidue s_ecodons0 [3] = { enA, enT, enG };
79 static const EResidue s_ecodons1 [3] = { enT, enA, enA };
80 static const EResidue s_ecodons2 [3] = { enT, enA, enG };
81 static const EResidue s_ecodons3 [3] = { enT, enG, enA };
82 static const EResidue s_ecodons0r[3] = { enC, enA, enT };
83 static const EResidue s_ecodons1r[3] = { enT, enT, enA };
84 static const EResidue s_ecodons2r[3] = { enC, enT, enA };
85 static const EResidue s_ecodons3r[3] = { enT, enC, enA };
88 
89 template <typename Res>
90 class res_traits {
91 public:
92  static Res _fromACGT(TResidue x)
93  { return x; }
94  static const Res* _codons(int i) { return codons[i]; }
95  static const Res* _rev_codons(int i) { return rev_codons[i]; }
96 };
97 
98 template<>
100 public:
102  { return fromACGT(x); }
103  static const EResidue* _codons(int i) { return ecodons[i]; }
104  static const EResidue* _rev_codons(int i) { return rev_ecodons[i]; }
105 };
106 
107 template <class Res>
108 bool IsStartCodon(const Res * seq, int strand) // seq points to A for both strands
109 {
110  const Res * start_codon;
111  if(strand == ePlus)
112  start_codon = res_traits<Res>::_codons(0);
113  else {
114  start_codon = res_traits<Res>::_rev_codons(0);
115  seq -= 2;
116  }
117  return equal(start_codon,start_codon+3,seq);
118 }
119 
120 template bool IsStartCodon<EResidue>(const EResidue * seq, int strand);
121 template bool IsStartCodon<TResidue>(const TResidue * seq, int strand);
122 
123 template <class Res>
124 bool IsStopCodon(const Res * seq, int strand) // seq points to T for both strands
125 {
126  if(strand == ePlus) {
127  if (*seq != res_traits<Res>::_codons(1)[0]) // T
128  return false;
129  ++seq;
130  for (int i = 1; i <= 3; ++i)
132  return true;
133  return false;
134  } else {
135  if (*seq != res_traits<Res>::_rev_codons(1)[2]) // A
136  return false;
137  seq -= 2;
138  for (int i = 1; i <= 3; ++i)
140  return true;
141  return false;
142  }
143 }
144 template bool IsStopCodon<EResidue>(const EResidue * seq, int strand);
145 template bool IsStopCodon<TResidue>(const TResidue * seq, int strand);
146 
147 
148 void FindAllCodonInstances(TIVec positions[], const EResidue codon[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
149 {
150  for (CEResidueVec::const_iterator pos = mrna.begin()+search_region.GetFrom(); (pos = search(pos,mrna.end(),codon,codon+3)) < mrna.begin()+search_region.GetTo(); ++pos) {
151  int l = (int)(pos-mrna.begin());
152  int frame = l%3;
153  if (fixed_frame==-1 || fixed_frame==frame)
154  positions[frame].push_back(l);
155  }
156 }
157 
158 void FindAllStarts(TIVec starts[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
159 {
160  FindAllCodonInstances(starts, ecodons[0], mrna, search_region, fixed_frame);
161 }
162 
163 void FindAllStops(TIVec stops[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
164 {
165  for (int i=1; i <=3; ++i)
166  FindAllCodonInstances(stops, ecodons[i], mrna, search_region, fixed_frame);
167  for (int f = 0; f < 3; ++f)
168  sort(stops[f].begin(), stops[f].end());
169 }
170 
171 
172 bool Partial5pCodonIsStop(const CEResidueVec& seq_strand, int start, int frame) {
173  if(frame == 0) // no partial codon
174  return false;
175 
176  int codon_start = start+frame-3;
177  if(codon_start >= 0 && IsStopCodon(&seq_strand[codon_start]))
178  return true;
179 
180  return false;
181 }
182 
183 void FindStartsStops(const CGeneModel& model, const CEResidueVec& contig_seq, const CEResidueVec& mrna, const CAlignMap& mrnamap, TIVec starts[3], TIVec stops[3], int& frame, bool obeystart)
184 {
185  int left_cds_limit = -1;
186  int reading_frame_start = (int)mrna.size();
187  int reading_frame_stop = (int)mrna.size();
188  int right_cds_limit = (int)mrna.size();
189  frame = -1;
190  EStrand strand = model.Strand();
191 
192  if (!model.ReadingFrame().Empty()) {
193  // left_cds_limit = mrnamap.MapOrigToEdited(model.GetCdsInfo().MaxCdsLimits().GetFrom());
194  // _ASSERT(left_cds_limit >= 0 || model.GetCdsInfo().MaxCdsLimits().GetFrom() == TSignedSeqRange::GetWholeFrom());
195  // right_cds_limit = mrnamap.MapOrigToEdited(model.GetCdsInfo().MaxCdsLimits().GetTo());
196  // _ASSERT(right_cds_limit >= 0 || model.GetCdsInfo().MaxCdsLimits().GetTo() == TSignedSeqRange::GetWholeTo());
197 
198  // if(strand == eMinus) {
199  // std::swap(left_cds_limit,right_cds_limit);
200  // }
201  // if(right_cds_limit < 0) right_cds_limit = mrna.size();
202 
203  TSignedSeqRange rf = mrnamap.MapRangeOrigToEdited(model.ReadingFrame(),true);
204  reading_frame_start = rf.GetFrom();
205  _ASSERT(reading_frame_start >= 0);
206  reading_frame_stop = rf.GetTo();
207  _ASSERT(reading_frame_stop >= 0);
208 
209  if (reading_frame_start == 0 && IsStartCodon(&mrna[reading_frame_start]) && reading_frame_start+3 < reading_frame_stop)
210  reading_frame_start += 3;
211 
212  _ASSERT( -1 <= left_cds_limit && left_cds_limit <= reading_frame_start );
213  _ASSERT( 0 <= reading_frame_start && reading_frame_start <= reading_frame_stop && reading_frame_stop < int(mrna.size()) );
214  _ASSERT( reading_frame_stop <= right_cds_limit && right_cds_limit <= int(mrna.size()) );
215 
216  frame = reading_frame_start%3;
217 
218  if (left_cds_limit<0) {
219  if (reading_frame_start >= 3) {
220  FindAllStops(stops,mrna,TSignedSeqRange(0,reading_frame_start),frame); // 5' inframe stops
221  }
222 
223  if (stops[frame].size()>0)
224  left_cds_limit = stops[frame].back()+3; // earliest start of CDS
225  } else {
226  FindAllStops(stops,mrna,TSignedSeqRange(0,left_cds_limit),frame);
227  }
228 
229  reading_frame_start = reading_frame_stop-5; // allow starts inside reading frame if not protein
230  if(model.GetCdsInfo().ProtReadingFrame().NotEmpty()) {
231  TSignedSeqRange protrf = mrnamap.MapRangeOrigToEdited(model.GetCdsInfo().ProtReadingFrame(),true);
232  reading_frame_start = min(protrf.GetFrom(),reading_frame_start);
233  }
234  }
235 
236  if (left_cds_limit<0) { // don't know frame or no upstream stop
237  TSignedSeqPos model_start = mrnamap.MapEditedToOrig(0);
238 
239  if(Include(model.GetCdsInfo().ProtReadingFrame(),model_start) && reading_frame_start < 3) {
240  starts[0].push_back(-3); // proteins are scored no matter what
241  } else {
242  if(strand == eMinus)
243  model_start = (TSignedSeqPos)contig_seq.size()-1-model_start;
244  for (int i = 0; i<3; ++i) {
245  if (frame == -1 || frame == i) {
246 
247  if(Partial5pCodonIsStop(contig_seq,model_start,i))
248  stops[i].push_back(i-3); // stop to limit MaxCds
249  else
250  starts[i].push_back(i-3); // fake start(s) for open cds
251 
252 
253  /*
254  if (UpstreamStartOrSplice(contig_seq,model_start,i))
255  starts[i].push_back(i-3);
256  else
257  stops[i].push_back(i-3); // bogus stop to limit MaxCds
258  */
259  }
260  }
261  }
262 
263  left_cds_limit = 0;
264  }
265 
266  if(obeystart && model.HasStart()) {
267  TSignedSeqRange start = mrnamap.MapRangeOrigToEdited(model.GetCdsInfo().Start(),false);
268  starts[frame].push_back(start.GetFrom());
269  } else if(reading_frame_start-left_cds_limit >= 3) {
270  FindAllStarts(starts,mrna,TSignedSeqRange(left_cds_limit,reading_frame_start-1),frame); // 5' starts or all starts
271  }
272 
273  if (frame==-1) {
274  FindAllStops(stops,mrna,TSignedSeqRange(0,(TSignedSeqPos)mrna.size()-1),frame); // all stops
275  } else if (right_cds_limit - reading_frame_stop >= 3) {
276  FindAllStops(stops,mrna,TSignedSeqRange(reading_frame_stop+1,right_cds_limit),frame); // inframe 3' stops
277  }
278 
279  if (int(mrna.size()) <= right_cds_limit) { // fake stops for partials
280  stops[mrna.size()%3].push_back((int)mrna.size());
281  stops[(mrna.size()-1)%3].push_back((int)mrna.size()-1);
282  stops[(mrna.size()-2)%3].push_back((int)mrna.size()-2);
283  }
284 
285 }
286 
287 bool FindUpstreamStop(const vector<int>& stops, int start, int& stop)
288 {
289  vector<int>::const_iterator it_stop = lower_bound(stops.begin(),stops.end(),start);
290 
291  if(it_stop != stops.begin()) {
292  stop = *(--it_stop);
293  return true;
294  } else
295  return false;
296 }
297 
298 bool FindFirstStart(const vector<int>& starts, int stop, int& start) {
299  auto it_start = lower_bound(starts.begin(), starts.end(), stop);
300  if(it_start != starts.end()) {
301  start = *it_start;
302  return true;
303  } else {
304  return false;
305  }
306 }
307 
308 /*
309 TInDels CAlignMap::GetAllCorrections() const {
310 
311  TInDels indels = GetInDels(false);
312  for(int range = 0; range < (int)m_orig_ranges.size(); ++range) {
313  const string& mism = m_edited_ranges[range].GetMismatch();
314  if(!mism.empty()) {
315  int len = m_orig_ranges[range].GetTo()-m_orig_ranges[range].GetFrom()+1;
316  _ASSERT(len == (int)mism.size());
317  indels.push_back(CInDelInfo(m_orig_ranges[range].GetFrom(), len, CInDelInfo::eMism, mism));
318  }
319  }
320  sort(indels.begin(), indels.end());
321 
322  return indels;
323 }
324 */
325 
326 void PushInDel(TInDels& indels, bool fs_only, TSignedSeqPos p, int len, CInDelInfo::EType type, const string& seq = "") {
327  if(!fs_only || len%3 != 0)
328  indels.push_back(CInDelInfo(p, len, type, seq));
329 }
330 
331 /*
332 TInDels CAlignMap::GetInDels(bool fs_only) const {
333  TInDels indels;
334 
335  if(m_orig_ranges.front().GetTypeFrom() != eGgap) {
336  if(m_orig_ranges.front().GetExtraFrom() > 0) { // everything starts from insertion
337  int len = m_orig_ranges.front().GetExtraFrom();
338  TSignedSeqPos p = m_orig_ranges.front().GetFrom()-len;
339  PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
340  }
341  if(m_edited_ranges.front().GetExtraFrom() > 0) { // everything starts from deletion
342  int len = m_edited_ranges.front().GetExtraFrom();
343  string seq = m_edited_ranges.front().GetExtraSeqFrom();
344  TSignedSeqPos p = m_orig_ranges.front().GetFrom();
345  PushInDel(indels, fs_only, p, len,CInDelInfo::eDel, seq);
346  }
347  }
348 
349  for(unsigned int range = 1; range < m_orig_ranges.size(); ++range) {
350  if(m_orig_ranges[range].GetTypeFrom() == eGgap)
351  continue;
352 
353  if(fs_only) {
354  int len = m_orig_ranges[range].GetExtraFrom()-m_edited_ranges[range].GetExtraFrom();
355  if(m_orig_ranges[range].GetTypeFrom() != eInDel)
356  len += m_orig_ranges[range-1].GetExtraTo()-m_edited_ranges[range-1].GetExtraTo();
357  if(len%3 == 0)
358  continue;
359  }
360 
361  if(m_orig_ranges[range-1].GetTypeTo() != eInDel) {
362  if(m_orig_ranges[range-1].GetExtraTo() > 0) {
363  int len = m_orig_ranges[range-1].GetExtraTo();
364  TSignedSeqPos p = m_orig_ranges[range-1].GetTo()+1;
365  PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
366  }
367  if(m_edited_ranges[range-1].GetExtraTo() > 0) {
368  int len = m_edited_ranges[range-1].GetExtraTo();
369  string seq = m_edited_ranges[range-1].GetExtraSeqTo();
370  TSignedSeqPos p = m_orig_ranges[range-1].GetTo()+m_orig_ranges[range-1].GetExtraTo()+1; // if there is insertion+deletion (mismatch) this is correct position
371  PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
372  }
373  }
374 
375  if(m_orig_ranges[range].GetExtraFrom() > 0) { // insertion on the left side
376  int len = m_orig_ranges[range].GetExtraFrom();
377  TSignedSeqPos p = m_orig_ranges[range].GetFrom()-len;
378  PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
379  }
380  if(m_edited_ranges[range].GetExtraFrom() > 0) { // deletion on the left side
381  int len = m_edited_ranges[range].GetExtraFrom();
382  string seq = m_edited_ranges[range].GetExtraSeqFrom();
383  TSignedSeqPos p = m_orig_ranges[range].GetFrom(); // if there is insertion+deletion (mismatch) this is correct position
384  PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
385  }
386  }
387 
388  if(m_orig_ranges.back().GetTypeTo() != eGgap) {
389  if(m_orig_ranges.back().GetExtraTo() > 0) { // everything ends by insertion
390  int len = m_orig_ranges.back().GetExtraTo();
391  TSignedSeqPos p = m_orig_ranges.back().GetTo()+1;
392  PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
393  }
394  if(m_edited_ranges.back().GetExtraTo() > 0) { // everything ends by deletion
395  int len = m_edited_ranges.back().GetExtraTo();
396  string seq = m_edited_ranges.back().GetExtraSeqTo();
397  TSignedSeqPos p = m_orig_ranges.back().GetTo()+1;
398  PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
399  }
400  }
401 
402  return indels;
403 }
404 */
405 
406 void CAlignMap::InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string& mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite,
407  EEdgeType left_type, EEdgeType right_type, const string& left_edit_extra_seq, const string& right_edit_extra_seq)
408 {
409  _ASSERT(len > 0);
410  _ASSERT(m_orig_ranges.empty() || (orig_start > m_orig_ranges.back().GetTo() && edited_start > m_edited_ranges.back().GetTo()));
411 
412  CAlignMap::SMapRangeEdge orig_from(orig_start, left_orige, left_type);
413  CAlignMap::SMapRangeEdge orig_to(orig_start+len-1, right_orige, right_type);
414  m_orig_ranges.push_back(SMapRange(orig_from, orig_to, kEmptyStr));
415 
416  CAlignMap::SMapRangeEdge edited_from(edited_start, left_edite, left_type, left_edit_extra_seq);
417  _ASSERT((int)left_edit_extra_seq.length() == 0 || (int)left_edit_extra_seq.length() == left_edite);
418  CAlignMap::SMapRangeEdge edited_to(edited_start+len-1, right_edite, right_type, right_edit_extra_seq);
419  _ASSERT((int)right_edit_extra_seq.length() == 0 || (int)right_edit_extra_seq.length() == right_edite);
420  m_edited_ranges.push_back(SMapRange(edited_from, edited_to, mism));
421 }
422 
423 TSignedSeqPos CAlignMap::InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin,
424  TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string& gseq_a, const string& gseq_b)
425 {
426  TInDels::const_iterator fsi = fsi_begin;
427  for( ;fsi != fsi_end && fsi->Loc() < orig_a; ++fsi ) {
428  _ASSERT( !fsi->IntersectingWith(orig_a,orig_b) );
429  }
430 
431  TSignedSeqPos left_orige = 0;
432  TSignedSeqPos left_edite = (TSignedSeqPos)gseq_a.length();
433  string left_edit_extra_seq = gseq_a;
434  string mism;
435 
436  for( ;fsi != fsi_end && fsi->Loc() == orig_a && !fsi->IsMismatch(); ++fsi ) { // first left end
437  if(fsi->IsInsertion()) {
438  _ASSERT(type_a != eBoundary);
439  orig_a += fsi->Len();
440  left_orige += fsi->Len();
441  } else {
442  edit_a += fsi->Len();
443  left_edite += fsi->Len();
444  left_edit_extra_seq += fsi->GetInDelV();
445  }
446  }
447  for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(int)mism.size(); ++fsi)
448  mism += fsi->GetInDelV();
449 
450  while(fsi != fsi_end && fsi->InDelEnd() <= orig_b+1) {
451  TSignedSeqPos len = (mism.empty() ? fsi->Loc()-orig_a : (TSignedSeqPos)mism.size());
452  _ASSERT(len > 0 && orig_a+len-1 <= orig_b);
453 
454  TSignedSeqPos bb = orig_a+len;
455  TSignedSeqPos right_orige = 0;
456  TSignedSeqPos right_edite = 0;
457  string right_edit_extra_seq;
458  for( ;fsi != fsi_end && fsi->Loc() == bb && !fsi->IsMismatch(); ++fsi ) { // right end
459  if (fsi->IsInsertion()) {
460  right_orige += fsi->Len();
461  bb += fsi->Len();
462  } else {
463  right_edite += fsi->Len();
464  right_edit_extra_seq += fsi->GetInDelV();
465  }
466  }
467 
468  TSignedSeqPos next_orig_a = orig_a+len+right_orige;
469  EEdgeType tb = eInDel;
470  if(next_orig_a > orig_b) {
471  right_edit_extra_seq += gseq_b;
472  right_edite += gseq_b.length();
473  tb = type_b;
474  }
475  InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, right_orige, right_edite, type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
476 
477  orig_a = next_orig_a;
478  edit_a += len+right_edite;
479  type_a = eInDel;
480  left_orige = right_orige;
481  left_edite = right_edite;
482  left_edit_extra_seq = right_edit_extra_seq;
483  mism.clear();
484  for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(int)mism.size(); ++fsi)
485  mism += fsi->GetInDelV();
486  }
487 
488  if(!mism.empty()) {
489  TSignedSeqPos len = (TSignedSeqPos)mism.size();
490  string right_edit_extra_seq;
491  EEdgeType tb = eInDel;
492  if(orig_a+len > orig_b) {
493  right_edit_extra_seq = gseq_b;
494  tb = type_b;
495  }
496  InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, 0, (TSignedSeqPos)gseq_b.length(), type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
497  orig_a += len;
498  edit_a += len;
499  type_a = eInDel;
500  left_orige = 0;
501  left_edite = 0;
502  left_edit_extra_seq.clear();
503  mism.clear();
504  }
505 
506  if(orig_a <= orig_b) {
507  int len = orig_b-orig_a+1;
508  _ASSERT(len > 0);
509  InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, 0, (TSignedSeqPos)gseq_b.length(), type_a, type_b, left_edit_extra_seq, gseq_b);
510  edit_a += len;
511  }
512 
513  return edit_a;
514 }
515 
516 CAlignMap::CAlignMap(const CGeneModel::TExons& exons, const vector<TSignedSeqRange>& transcript_exons, const TInDels& indels, EStrand orientation, int target_len ) : m_orientation(orientation), m_target_len(target_len) {
517 
518 #ifdef _DEBUG
519  _ASSERT(transcript_exons.size() == exons.size());
520  _ASSERT(transcript_exons.size() == 1 || (orientation == ePlus && transcript_exons.front().GetFrom() < transcript_exons.back().GetFrom()) ||
521  (orientation == eMinus && transcript_exons.front().GetFrom() > transcript_exons.back().GetFrom()));
522  TSignedSeqPos diff = 0;
523  for(unsigned int i = 0; i < exons.size(); ++i) {
524  TSignedSeqPos exonlen = (exons[i].Limits().Empty()) ? (TSignedSeqPos)exons[i].m_seq.length() : exons[i].Limits().GetLength();
525  diff += exonlen-(transcript_exons[i].GetTo()-transcript_exons[i].GetFrom()+1);
526  }
527  ITERATE(TInDels, f, indels) {
528  if(!f->IsMismatch())
529  diff += (f->IsDeletion()) ? f->Len() : -f->Len();
530  }
531  _ASSERT(diff == 0);
532 #endif
533 
534  m_orig_ranges.reserve(exons.size()+indels.size());
535  m_edited_ranges.reserve(exons.size()+indels.size());
536 
537  TSignedSeqPos estart = (m_orientation == ePlus) ? transcript_exons.front().GetFrom() : transcript_exons.back().GetFrom();
538  for(unsigned int i = 0; i < exons.size(); ++i) {
539  if(exons[i].Limits().Empty()) {
540  _ASSERT(i == 0 || exons[i-1].Limits().NotEmpty());
541  _ASSERT(i == exons.size()-1 || exons[i+1].Limits().NotEmpty());
542  } else {
543  EEdgeType type_a = exons[i].m_fsplice ? eSplice : eBoundary;
544  EEdgeType type_b = exons[i].m_ssplice ? eSplice : eBoundary;
545  string gseq_a;
546  string gseq_b;
547  if(i > 0 && exons[i-1].Limits().Empty()) { // prev exon is Ggap
548  type_a = eGgap;
549  gseq_a = exons[i-1].m_seq;
550  estart += gseq_a.length();
551  }
552  if(i < exons.size()-1 && exons[i+1].Limits().Empty()) { // next exon is Ggap
553  type_b = eGgap;
554  gseq_b = exons[i+1].m_seq;
555  }
556  if(m_orientation == eMinus) {
557  ReverseComplement(gseq_a.begin(),gseq_a.end());
558  ReverseComplement(gseq_b.begin(),gseq_b.end());
559  }
560  estart = InsertIndelRangesForInterval(exons[i].GetFrom(), exons[i].GetTo(), estart, indels.begin(), indels.end(), type_a, type_b, gseq_a, gseq_b);
561  }
562 
563  if(i != exons.size()-1) {
564  if(m_orientation == ePlus) {
565  estart += transcript_exons[i+1].GetFrom()-transcript_exons[i].GetTo()-1;
566  } else {
567  estart += transcript_exons[i].GetFrom()-transcript_exons[i+1].GetTo()-1;
568  }
569  }
570  }
571 }
572 
573 CAlignMap::CAlignMap(const CGeneModel::TExons& exons, const TInDels& indels, EStrand strand, TSignedSeqRange lim, int holelen, int polyalen) : m_orientation(strand) {
574 
575  TInDels::const_iterator fsi_begin = indels.begin();
576  TInDels::const_iterator fsi_end = indels.end();
577 
578  m_orig_ranges.reserve(exons.size()+(fsi_end-fsi_begin));
579  m_edited_ranges.reserve(exons.size()+(fsi_end-fsi_begin));
580 
581  TSignedSeqPos estart = 0;
582  for(unsigned int i = 0; i < exons.size(); ++i) {
583  if(exons[i].Limits().Empty()) {
584  _ASSERT(i == 0 || exons[i-1].Limits().NotEmpty());
585  _ASSERT(i == exons.size()-1 || exons[i+1].Limits().NotEmpty());
586  } else {
587  TSignedSeqPos start = exons[i].GetFrom();
588  TSignedSeqPos stop = exons[i].GetTo();
589  EEdgeType type_a = exons[i].m_fsplice ? eSplice : eBoundary;
590  EEdgeType type_b = exons[i].m_ssplice ? eSplice : eBoundary;
591  string gseq_a;
592  string gseq_b;
593  if(i > 0 && exons[i-1].Limits().Empty()) { // prev exon is Ggap
594  type_a = eGgap;
595  gseq_a = exons[i-1].m_seq;
596  estart += gseq_a.length();
597  }
598  if(i < exons.size()-1 && exons[i+1].Limits().Empty()) { // next exon is Ggap
599  type_b = eGgap;
600  gseq_b = exons[i+1].m_seq;
601  }
602  if(m_orientation == eMinus) {
603  ReverseComplement(gseq_a.begin(),gseq_a.end());
604  ReverseComplement(gseq_b.begin(),gseq_b.end());
605  }
606 
607  if(stop < lim.GetFrom()) continue;
608  if(lim.GetTo() < start) break;
609 
610  if(lim.GetFrom() >= start) {
611  start = lim.GetFrom();
612  type_a = eBoundary;
613  }
614  if(lim.GetTo() <= stop) {
615  stop = lim.GetTo();
616  type_b = eBoundary;
617  }
618 
619  estart = InsertIndelRangesForInterval(start, stop, estart, fsi_begin, fsi_end, type_a, type_b, gseq_a, gseq_b);
620  if(i != exons.size()-1 && (!exons[i+1].m_fsplice || !exons[i].m_ssplice))
621  estart += holelen;
622  }
623  }
624 
625  if(!m_edited_ranges.empty())
626  m_target_len = m_edited_ranges.back().GetExtendedTo()+1+polyalen;
627 
628  _ASSERT(m_edited_ranges.size() == m_orig_ranges.size());
629 }
630 
631 template <class In, class Out>
632 void CAlignMap::EditedSequence(const In& original_sequence, Out& edited_sequence, bool includeholes) const
633 {
634  edited_sequence.clear();
635 
636  string s;
637  if(includeholes) {
638  int l = (m_orientation == ePlus) ? m_edited_ranges.front().GetFrom() : CAlignMap::TargetLen()-m_edited_ranges.back().GetTo()-1;
639  s.insert(s.end(), l, 'N');
640  } else {
641  s = m_edited_ranges.front().GetExtraSeqFrom();
642  }
643  ITERATE(string, i, s)
644  edited_sequence.push_back(res_traits<typename Out::value_type>::_fromACGT(*i));
645 
646  for(int range = 0; range < (int)m_orig_ranges.size(); ++range) {
647  string seq = m_edited_ranges[range].GetMismatch();
648 
649  if(seq.empty()) {
650  int a = m_orig_ranges[range].GetFrom();
651  int b = m_orig_ranges[range].GetTo()+1;
652  for(int i = a; i < b; ++i)
653  edited_sequence.push_back(original_sequence[i]);
654  // edited_sequence.insert(edited_sequence.end(),original_sequence.begin()+a, original_sequence.begin()+b);
655  }
656 
657  if(range < (int)m_orig_ranges.size()-1) {
658  if(m_edited_ranges[range].GetTypeTo() == eBoundary) {
659  if(includeholes) {
660  int l = m_edited_ranges[range+1].GetFrom()-m_edited_ranges[range].GetTo()-1; // missed part
661  seq.insert(seq.end(), l, 'N');
662  }
663  } else if(m_edited_ranges[range].GetTypeTo() == eSplice) {
664  seq += m_edited_ranges[range].GetExtraSeqTo() + m_edited_ranges[range+1].GetExtraSeqFrom(); // indels from two exon ends
665  } else {
666  seq += m_edited_ranges[range].GetExtraSeqTo(); // indel inside exon or Ggap
667  }
668  } else {
669  if(includeholes) {
670  int l = (m_orientation == ePlus) ? CAlignMap::TargetLen()-m_edited_ranges.back().GetTo()-1 : m_edited_ranges.front().GetFrom();
671  seq.insert(seq.end(), l, 'N');
672  } else {
673  seq += m_edited_ranges.back().GetExtraSeqTo();
674  }
675  }
676  ITERATE(string, i, seq)
677  edited_sequence.push_back(res_traits<typename Out::value_type>::_fromACGT(*i));
678  }
679 
680  if(m_orientation == eMinus)
681  ReverseComplement(edited_sequence.begin(), edited_sequence.end());
682 }
683 
684 int CAlignMap::FindLowerRange(const vector<CAlignMap::SMapRange>& a, TSignedSeqPos p) {
685  int num = (int)(lower_bound(a.begin(), a.end(), CAlignMap::SMapRange(p+1, p+1, kEmptyStr))-a.begin()-1);
686  return num;
687 }
688 
689 template
690 void CAlignMap::EditedSequence<CResidueVec,CResidueVec>(const CResidueVec& original_sequence, CResidueVec& edited_sequence, bool includeholes) const;
691 template
692 void CAlignMap::EditedSequence<CEResidueVec,CEResidueVec>(const CEResidueVec& original_sequence, CEResidueVec& edited_sequence, bool includeholes) const;
693 template
694 void CAlignMap::EditedSequence<string,string>(const string& original_sequence, string& edited_sequence, bool includeholes) const;
695 template
696 void CAlignMap::EditedSequence<CAlignCollapser::CPartialString,string>(const CAlignCollapser::CPartialString& original_sequence, string& edited_sequence, bool includeholes) const;
697 
699 
700  if(m_orientation == eMinus) {
701  int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
702  TSignedSeqPos left = edited_range.GetTo();
703  TSignedSeqPos right = edited_range.GetFrom();
704 
705  if(left == TSignedSeqRange::GetWholeTo()) {
707  } else {
708  left = offset-left;
709  }
710 
711  if(right == TSignedSeqRange::GetWholeFrom()) {
712  right = TSignedSeqRange::GetWholeTo();
713  } else {
714  right = offset-right;
715  }
716 
717  edited_range = TSignedSeqRange(left, right);
718  }
719 
720  _ASSERT(edited_range.NotEmpty() && Include(TSignedSeqRange(m_edited_ranges.front().GetExtendedFrom(),m_edited_ranges.back().GetExtendedTo()), edited_range));
721 
722  TSignedSeqPos a = edited_range.GetFrom();
723  int numa = FindLowerRange(m_edited_ranges, a);
724 
725  if(numa < 0 || a > m_edited_ranges[numa].GetTo()) { // a was insertion on the genome, moved to first projectable point
726  ++numa;
727  if((int)m_edited_ranges.size() == numa)
728  return TSignedSeqRange::GetEmpty();
729  a = m_edited_ranges[numa].GetFrom();
730  }
731 
732  TSignedSeqPos b = edited_range.GetTo();
733  int numb = FindLowerRange(m_edited_ranges, b);
734 
735  if(b > m_edited_ranges[numb].GetTo()) { // a was insertion on the genome, moved to first projectable point
736  b = m_edited_ranges[numb].GetTo();
737  }
738 
739  if(m_orientation == eMinus) {
740  int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
741  TSignedSeqPos left = b;
742  TSignedSeqPos right = a;
743 
744  if(left == TSignedSeqRange::GetWholeTo()) {
746  } else {
747  left = offset-left;
748  }
749 
750  if(right == TSignedSeqRange::GetWholeFrom()) {
751  right = TSignedSeqRange::GetWholeTo();
752  } else {
753  right = offset-right;
754  }
755 
756  a = left;
757  b = right;
758  }
759 
760  return TSignedSeqRange(a, b);
761 }
762 
763 
764 TSignedSeqRange CAlignMap::ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons) const {
765 
766  _ASSERT(orig_range.NotEmpty() && Include(TSignedSeqRange(m_orig_ranges.front().GetExtendedFrom(),m_orig_ranges.back().GetExtendedTo()), orig_range));
767 
768  TSignedSeqPos a = orig_range.GetFrom();
769  int numa = FindLowerRange(m_orig_ranges, a);
770 
771  if(numa < 0 || a > m_orig_ranges[numa].GetTo()) { // a was insertion on the genome, moved to first projectable point
772  ++numa;
773  if((int)m_orig_ranges.size() == numa)
774  return TSignedSeqRange::GetEmpty();
775  a = m_orig_ranges[numa].GetFrom();
776  }
777  if(snap_to_codons) {
778  bool snapped = false;
779  while(!snapped) {
780  TSignedSeqPos tp = m_edited_ranges[numa].GetFrom()+a-m_orig_ranges[numa].GetFrom();
781  if(m_orientation == eMinus)
782  tp = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-tp;
783  if((m_orientation == ePlus && tp%3 == 0) || (m_orientation == eMinus && tp%3 == 2)) {
784  snapped = true;
785  } else { // not a codon boundary
786  if(a < m_orig_ranges[numa].GetTo()) { // can move in this interval
787  ++a;
788  } else { // moved to next interval
789  ++numa;
790  if((int)m_orig_ranges.size() == numa)
791  return TSignedSeqRange::GetEmpty();
792  a = m_orig_ranges[numa].GetFrom();
793  }
794  }
795  }
796  }
797 
798 
799  TSignedSeqPos b = orig_range.GetTo();
800  int numb = FindLowerRange(m_orig_ranges, b);
801 
802  if(b > m_orig_ranges[numb].GetTo()) { // a was insertion on the genome, moved to first projectable point
803  b = m_orig_ranges[numb].GetTo();
804  }
805  if(snap_to_codons) {
806  bool snapped = false;
807  while(!snapped) {
808  TSignedSeqPos tp = m_edited_ranges[numb].GetFrom()+b-m_orig_ranges[numb].GetFrom();
809  if(m_orientation == eMinus)
810  tp = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-tp;
811  if((m_orientation == ePlus && tp%3 == 2) || (m_orientation == eMinus && tp%3==0)) {
812  snapped = true;
813  } else { // not a codon boundary
814  if(b > m_orig_ranges[numb].GetFrom()) { // can move in this interval
815  --b;
816  } else { // moved to next interval
817  --numb;
818  if(numb < 0)
819  return TSignedSeqRange::GetEmpty();
820  b = m_orig_ranges[numb].GetTo();
821  }
822  }
823  }
824  }
825 
826  return TSignedSeqRange(a, b);
827 }
828 
830  orig_pos = MapOrigToEdited(orig_pos);
831  if(orig_pos < 0)
832  return orig_pos;
833  if(m_orientation == ePlus)
834  orig_pos += len;
835  else
836  orig_pos -= len;
837  orig_pos = MapEditedToOrig(orig_pos);
838  return orig_pos;
839 }
840 
841 
842 TSignedSeqPos CAlignMap::MapAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqPos p, ERangeEnd move_mode) {
843  if(p < a.front().GetExtendedFrom() || p > a.back().GetExtendedTo()) return -1;
844 
845  if(p < a.front().GetFrom()) {
846  if(move_mode == eLeftEnd && b.front().GetTypeFrom() != eGgap) {
847  return b.front().GetExtendedFrom();
848  } else {
849  return -1;
850  }
851  }
852 
853  if(p > a.back().GetTo()) {
854  if(move_mode == eRightEnd && b.back().GetTypeTo() != eGgap) {
855  return b.back().GetExtendedTo();
856  } else {
857  return -1;
858  }
859  }
860 
861  int num = FindLowerRange(a, p); // range a[num] exists and its start <= p
862  // if a[num+1] exists all points are > p
863  if(p > a[num].GetTo()) { // between ranges (insertion or intron in a), num+1 exists
864  if(a[num].GetTypeTo() == eGgap)
865  return -1;
866 
867  switch(move_mode) {
868  case eLeftEnd:
869  return b[num+1].GetExtendedFrom();
870  case eRightEnd:
871  return b[num].GetExtendedTo();
872  default:
873  return -1;
874  }
875  } else if(p == a[num].GetTo()) {
876  if(move_mode == eRightEnd && b[num].GetTypeTo() != eGgap) {
877  return b[num].GetExtendedTo();
878  } else if(p == a[num].GetFrom() && move_mode == eLeftEnd && b[num].GetTypeFrom() != eGgap) { // one base interval
879  return b[num].GetExtendedFrom();
880  } else {
881  return b[num].GetTo();
882  }
883  } else if(p == a[num].GetFrom()) {
884  if(move_mode == eLeftEnd && b[num].GetTypeFrom() != eGgap) {
885  return b[num].GetExtendedFrom();
886  } else {
887  return b[num].GetFrom();
888  }
889  } else {
890  return b[num].GetFrom()+p-a[num].GetFrom();
891  }
892 }
893 
896 
897  if(m_orientation == eMinus && p >= 0) {
898  p = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-p;
899  }
900  return p;
901 }
902 
904  if(m_orientation == eMinus) {
905  edited_pos = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-edited_pos;
906  }
907 
908  return MapAtoB(m_edited_ranges, m_orig_ranges, edited_pos, eSinglePoint);
909 }
910 
911 
912 TSignedSeqRange CAlignMap::MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend) {
913 
914  if(r.Empty()) return TSignedSeqRange::GetEmpty();
915 
916  TSignedSeqPos left;
917  if(r.GetFrom() == TSignedSeqRange::GetWholeFrom()) {
919  } else {
920  left = MapAtoB(a, b, r.GetFrom(), lend);
921  if(left < 0)
922  return TSignedSeqRange::GetEmpty();
923  }
924  TSignedSeqPos right;
925  if(r.GetTo() == TSignedSeqRange::GetWholeTo()) {
926  right = TSignedSeqRange::GetWholeTo();
927  } else {
928  right = MapAtoB(a, b, r.GetTo(), rend);
929  if(right < 0)
930  return TSignedSeqRange::GetEmpty();
931  }
932 
933  _ASSERT(right >= left);
934 
935  return TSignedSeqRange(left, right);
936 }
937 
939 
940  if(orig_range.Empty()) return TSignedSeqRange::GetEmpty();
941 
942  TSignedSeqRange er = MapRangeAtoB(m_orig_ranges, m_edited_ranges, orig_range, lend, rend);
943 
944  if(er.Empty() || m_orientation == ePlus)
945  return er;
946 
947  int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
948  TSignedSeqPos left = er.GetTo();
949  TSignedSeqPos right = er.GetFrom();
950 
951  if(left == TSignedSeqRange::GetWholeTo()) {
953  } else {
954  left = offset-left;
955  }
956 
957  if(right == TSignedSeqRange::GetWholeFrom()) {
958  right = TSignedSeqRange::GetWholeTo();
959  } else {
960  right = offset-right;
961  }
962 
963  return TSignedSeqRange(left, right);
964 }
965 
967 
968  if(edited_range.Empty()) return TSignedSeqRange::GetEmpty();
969 
970  if(m_orientation == eMinus) {
971  int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
972  TSignedSeqPos left = edited_range.GetTo();
973  TSignedSeqPos right = edited_range.GetFrom();
974 
975  if(left == TSignedSeqRange::GetWholeTo()) {
977  } else {
978  left = offset-left;
979  }
980 
981  if(right == TSignedSeqRange::GetWholeFrom()) {
982  right = TSignedSeqRange::GetWholeTo();
983  } else {
984  right = offset-right;
985  }
986 
987  edited_range = TSignedSeqRange(left, right);
988  }
989 
990  return MapRangeAtoB(m_edited_ranges, m_orig_ranges, edited_range, withextras);
991 }
992 
994 
995  int len = MapRangeOrigToEdited(ab, lend, rend).GetLength();
996 
997  for(int i = 1; i < (int)m_edited_ranges.size(); ++i) {
998  if(m_edited_ranges[i].GetTypeFrom() == eBoundary && Include(ab,m_orig_ranges[i].GetFrom()))
999  len -= m_edited_ranges[i].GetFrom()-m_edited_ranges[i-1].GetTo()-1;
1000  }
1001 
1002  return len;
1003 }
1004 
1005 int CAlignMap::FShiftedLen(TSignedSeqRange ab, bool withextras) const {
1006 
1007  int len = MapRangeOrigToEdited(ab, withextras).GetLength();
1008 
1009  for(int i = 1; i < (int)m_edited_ranges.size(); ++i) {
1010  if(m_edited_ranges[i].GetTypeFrom() == eBoundary && Include(ab,m_orig_ranges[i-1].GetTo()) && Include(ab,m_orig_ranges[i].GetFrom()))
1011  len -= m_edited_ranges[i].GetFrom()-m_edited_ranges[i-1].GetTo()-1;
1012  }
1013 
1014  return len;
1015 }
1016 
1017 
1018 
1019 END_SCOPE(gnomon)
TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const
Definition: gnomon_seq.cpp:829
static int FindLowerRange(const vector< CAlignMap::SMapRange > &a, TSignedSeqPos p)
Definition: gnomon_seq.cpp:684
TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const
Definition: gnomon_seq.cpp:698
int TargetLen() const
void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string &mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite, EEdgeType left_type, EEdgeType right_type, const string &left_edit_extra_seq, const string &right_edit_extra_seq)
Definition: gnomon_seq.cpp:406
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend)
Definition: gnomon_seq.cpp:912
vector< SMapRange > m_edited_ranges
TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string &gseq_a, const string &gseq_b)
Definition: gnomon_seq.cpp:423
TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const
Definition: gnomon_seq.cpp:966
TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const
Definition: gnomon_seq.cpp:894
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
Definition: gnomon_seq.cpp:632
TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const
Definition: gnomon_seq.cpp:903
TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const
Definition: gnomon_seq.cpp:764
int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const
Definition: gnomon_seq.cpp:993
static TSignedSeqPos MapAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqPos p, ERangeEnd move_mode)
Definition: gnomon_seq.cpp:842
EStrand m_orientation
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const
Definition: gnomon_seq.cpp:938
vector< SMapRange > m_orig_ranges
TSignedSeqRange Start() const
TSignedSeqRange ProtReadingFrame() const
TSignedSeqRange ReadingFrame() const
const CCDSInfo & GetCdsInfo() const
vector< CModelExon > TExons
bool HasStart() const
EStrand Strand() const
static EResidue _fromACGT(TResidue x)
Definition: gnomon_seq.cpp:101
static const EResidue * _rev_codons(int i)
Definition: gnomon_seq.cpp:104
static const EResidue * _codons(int i)
Definition: gnomon_seq.cpp:103
static const Res * _rev_codons(int i)
Definition: gnomon_seq.cpp:95
static const Res * _codons(int i)
Definition: gnomon_seq.cpp:94
static Res _fromACGT(TResidue x)
Definition: gnomon_seq.cpp:92
static ulg bb
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
int offset
Definition: replacements.h:160
static FILE * f
Definition: readconf.c:23
vector< TResidue > CResidueVec
vector< int > TIVec
@ enN
@ enC
@ enG
@ enT
@ enA
EStrand
@ eMinus
@ ePlus
bool Include(TSignedSeqRange big, TSignedSeqRange small)
vector< CInDelInfo > TInDels
objects::CSeqVectorTypes::TResidue TResidue
bool IsStopCodon(const Res *seq, int strand)
Definition: gnomon_seq.cpp:124
bool FindFirstStart(const vector< int > &starts, int stop, int &start)
Definition: gnomon_seq.cpp:298
bool FindUpstreamStop(const vector< int > &stops, int start, int &stop)
Definition: gnomon_seq.cpp:287
static const EResidue s_ecodons2[3]
Definition: gnomon_seq.cpp:80
const TResidue rev_codons[4][4]
Definition: gnomon_seq.cpp:77
const TResidue codons[4][4]
Definition: gnomon_seq.cpp:76
void FindAllStops(TIVec stops[], const CEResidueVec &mrna, TSignedSeqRange search_region, int fixed_frame)
Definition: gnomon_seq.cpp:163
const char *const k_aa_table
Definition: gnomon_seq.cpp:41
void FindStartsStops(const CGeneModel &model, const CEResidueVec &contig_seq, const CEResidueVec &mrna, const CAlignMap &mrnamap, TIVec starts[3], TIVec stops[3], int &frame, bool obeystart)
Definition: gnomon_seq.cpp:183
const EResidue k_toMinus[5]
Definition: gnomon_seq.cpp:40
void FindAllCodonInstances(TIVec positions[], const EResidue codon[], const CEResidueVec &mrna, TSignedSeqRange search_region, int fixed_frame)
Definition: gnomon_seq.cpp:148
const EResidue * ecodons[4]
Definition: gnomon_seq.cpp:86
static const EResidue s_ecodons0r[3]
Definition: gnomon_seq.cpp:82
static const EResidue s_ecodons1[3]
Definition: gnomon_seq.cpp:79
static const EResidue s_ecodons2r[3]
Definition: gnomon_seq.cpp:84
template bool IsStartCodon< TResidue >(const TResidue *seq, int strand)
const EResidue * rev_ecodons[4]
Definition: gnomon_seq.cpp:87
template bool IsStartCodon< EResidue >(const EResidue *seq, int strand)
void ReverseComplement(const CEResidueVec &src, CEResidueVec &dst)
Definition: gnomon_seq.cpp:67
bool IsStartCodon(const Res *seq, int strand)
Definition: gnomon_seq.cpp:108
template bool IsStopCodon< EResidue >(const EResidue *seq, int strand)
void Convert(const CResidueVec &src, CEResidueVec &dst)
Definition: gnomon_seq.cpp:43
template bool IsStopCodon< TResidue >(const TResidue *seq, int strand)
static const EResidue s_ecodons3r[3]
Definition: gnomon_seq.cpp:85
bool Partial5pCodonIsStop(const CEResidueVec &seq_strand, int start, int frame)
Definition: gnomon_seq.cpp:172
static const EResidue s_ecodons3[3]
Definition: gnomon_seq.cpp:81
void PushInDel(TInDels &indels, bool fs_only, TSignedSeqPos p, int len, CInDelInfo::EType type, const string &seq="")
Definition: gnomon_seq.cpp:326
void FindAllStarts(TIVec starts[], const CEResidueVec &mrna, TSignedSeqRange search_region, int fixed_frame)
Definition: gnomon_seq.cpp:158
static const EResidue s_ecodons0[3]
Definition: gnomon_seq.cpp:78
static const EResidue s_ecodons1r[3]
Definition: gnomon_seq.cpp:83
TResidue toACGT(EResidue c)
Definition: gnomon_seq.hpp:81
EResidue fromACGT(TResidue c)
Definition: gnomon_seq.hpp:59
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
position_type GetLength(void) const
Definition: range.hpp:158
bool NotEmpty(void) const
Definition: range.hpp:152
static TThisType GetEmpty(void)
Definition: range.hpp:306
bool Empty(void) const
Definition: range.hpp:148
static position_type GetWholeFrom(void)
Definition: range.hpp:256
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
static position_type GetWholeTo(void)
Definition: range.hpp:264
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
unsigned int a
Definition: ncbi_localip.c:102
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void Out(T t, int w, CNcbiOstream &to=cout)
Definition: parse.cpp:467
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition: type.c:6
#define _ASSERT
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:57:52 2024 by modify_doxy.py rev. 669887