NCBI C++ ToolKit
xcript_tests.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: xcript_tests.cpp 79404 2017-09-07 17:37:11Z astashya $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio, Josh Cherry
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
34 #include <corelib/ncbitime.hpp>
35 #include <objects/general/Date.hpp>
38 #include <objects/seq/Bioseq.hpp>
39 #include <objects/seq/MolInfo.hpp>
41 #include <objects/seq/Seq_inst.hpp>
42 #include <objects/seq/Seqdesc.hpp>
44 #include <objects/seq/Seq_hist.hpp>
47 #include <serial/iterator.hpp>
50 #include <objmgr/util/sequence.hpp>
51 #include <objmgr/seqdesc_ci.hpp>
52 #include <objmgr/feat_ci.hpp>
57 #include <objmgr/seq_vector.hpp>
58 #include <algo/gnomon/gnomon.hpp>
59 #include <algo/sequence/orf.hpp>
60 #include <map>
61 
64 USING_SCOPE(gnomon);
65 
66 
68  const CSeqTestContext* ctx) const
69 {
70  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
71  if (id && ctx) {
72  CBioseq_Handle handle = ctx->GetScope().GetBioseqHandle(*id);
73  return handle.CanGetInst_Mol() && handle.GetInst_Mol() == CSeq_inst::eMol_rna;
74  }
75  return false;
76 }
77 
78 
81  const CSeqTestContext* ctx)
82 {
84  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
85  if ( !id || !ctx ) {
86  return ref;
87  }
88 
89  ref.Reset(new CSeq_test_result_set());
90 
92  ref->Set().push_back(result);
93 
94  SAnnotSelector sel;
96  sel.SetResolveDepth(0);
97  CSeq_loc loc;
98  loc.SetWhole().Assign(*id);
99  CFeat_CI feat_iter(ctx->GetScope(), loc, sel);
100 
101  result->SetOutput_data()
102  .AddField("count", (int) feat_iter.GetSize());
103  return ref;
104 }
105 
106 
107 // A simplistic Kozak strength. Best is RNNXXXG, where
108 // XXX is the start codon. This is "strong". Having just
109 // R or just G is "moderate", and having neither is "weak".
111 {
112  eNone, // used to indicate that no start has been seen
115  eStrong
116 };
117 
118 
119 // vec must be set to IUPAC coding
121 {
122  int score = eWeak;
123  if (pos >= 3 &&
124  (vec[pos - 3] == 'A' || vec[pos - 3] == 'G')) {
125  ++score;
126  }
127  if (vec.size() > pos + 3 &&
128  vec[pos + 3] == 'G') {
129  ++score;
130  }
131  return EKozakStrength(score);
132 }
133 
134 
136 {
137  switch (strength) {
138  default:
139  case eNone:
140  return "none";
141 
142  case eWeak:
143  return "weak";
144 
145  case eModerate:
146  return "moderate";
147 
148  case eStrong:
149  return "strong";
150  }
151 }
152 
153 
154 // Return a SeqVector describing the coding regioin, in the
155 // correct orientation, plus the upstream region.
156 // Put the length of the upstream region in upstream_length
158  const CSeqTestContext* ctx,
159  TSeqPos& upstream_length)
160 {
161  CScope& scope = ctx->GetScope();
162  const CSeq_loc& first_cds_loc
163  = CSeq_loc_CI(feat_iter->GetLocation()).GetEmbeddingSeq_loc();
164  CRef<CSeq_loc> upstr(new CSeq_loc);
165  const CSeq_id& id = sequence::GetId(first_cds_loc, 0);
166  upstr->SetInt().SetId().Assign(id);
167  if (sequence::GetStrand(first_cds_loc) == eNa_strand_minus) {
168  upstr->SetInt().SetStrand(eNa_strand_minus);
169  upstr->SetInt().SetFrom(sequence::GetStop(first_cds_loc, 0) + 1);
170  upstr->SetInt().SetTo(sequence::GetLength(id, &scope) - 1);
171  } else {
172  upstr->SetInt().SetFrom(0);
173  upstr->SetInt().SetTo(sequence::GetStart(first_cds_loc, 0) - 1);
174  }
175  CSeq_loc loc;
176  loc.SetMix().AddSeqLoc(*upstr);
177  loc.SetMix().AddSeqLoc(feat_iter->GetLocation());
178  CSeqVector vec(loc, scope);
179  upstream_length = sequence::GetLength(*upstr, 0);
180  return vec;
181 }
182 
183 
184 // If set, return genetic code. Otherwise return
185 // "standard" genetic code.
187 {
188  if (cdr.CanGetCode()) {
189  return CConstRef<CGenetic_code>(&cdr.GetCode());
190  }
191  CRef<CGenetic_code> standard(new CGenetic_code);
193  code_id->SetId(1);
194  standard->Set().push_back(code_id);
195  return standard;
196 }
197 
198 
199 static void s_CdsFlags(const CSeq_id& id, const CSeqTestContext* ctx,
200  CFeat_CI feat_iter, CSeq_test_result& result)
201 {
202  result.SetOutput_data()
203  .AddField("is_partial",
204  feat_iter->IsSetPartial() && feat_iter->GetPartial());
205  result.SetOutput_data()
206  .AddField("is_pseudo",
207  feat_iter->IsSetPseudo() && feat_iter->GetPseudo());
208  result.SetOutput_data()
209  .AddField("is_except",
210  feat_iter->IsSetExcept() && feat_iter->GetExcept());
211 }
212 
213 
216  const CSeqTestContext* ctx)
217 {
218  return x_TestAllCdregions(obj, ctx, "cds_flags", s_CdsFlags);
219 }
220 
221 
222 static void s_InframeUpstreamStart(const CSeq_id& id,
223  const CSeqTestContext* ctx,
224  CFeat_CI feat_iter,
226 {
227  TSeqPos upstream_length;
228  CSeqVector vec =
229  s_GetCdregionPlusUpstream(feat_iter, ctx, upstream_length);
230  vec.SetIupacCoding();
231 
232  EKozakStrength strength;
233  EKozakStrength best_strength = eNone;
234  TSeqPos pos_nearest_best_start = 0; // initialize to avoid compiler warning
235  for (int i = upstream_length - 3; i >= 0; i -= 3) {
236  if (vec[i] == 'A' && vec[i + 1] == 'T' && vec[i + 2] == 'G') {
237  strength = s_GetKozakStrength(vec, i);
238  if (strength > best_strength) {
239  best_strength = strength;
240  pos_nearest_best_start = i;
241  }
242  }
243  }
244  result.SetOutput_data()
245  .AddField("inframe_upstream_start_exists", best_strength != eNone);
246  if (best_strength != eNone) {
247  result.SetOutput_data()
248  .AddField("inframe_upstream_start_best_kozak_strength",
249  s_KozakStrengthToString(best_strength));
250  result.SetOutput_data()
251  .AddField("nearest_best_upstream_start_distance",
252  int(upstream_length - pos_nearest_best_start - 3));
253  }
254 }
255 
256 
259  const CSeqTestContext* ctx)
260 {
261  return x_TestAllCdregions(obj, ctx, "inframe_upstream_start",
263 }
264 
265 
266 static void s_InframeUpstreamStop(const CSeq_id& id,
267  const CSeqTestContext* ctx,
268  CFeat_CI feat_iter, CSeq_test_result& result)
269 {
271  s_GetCode(feat_iter->GetData().GetCdregion());
273 
274  TSeqPos upstream_length;
275  CSeqVector vec =
276  s_GetCdregionPlusUpstream(feat_iter, ctx, upstream_length);
277  vec.SetIupacCoding();
278 
279  for (int i = upstream_length - 3; i >= 0; i -= 3) {
280  if (tbl.IsOrfStop(tbl.SetCodonState(vec[i], vec[i + 1],
281  vec[i + 2]))) {
282  result.SetOutput_data()
283  .AddField("inframe_upstream_stop_exists",
284  true);
285  result.SetOutput_data()
286  .AddField("nearest_inframe_upstream_stop_distance",
287  int(upstream_length - i - 3));
288  return;
289  }
290  }
291  result.SetOutput_data()
292  .AddField("inframe_upstream_stop_exists",
293  false);
294 }
295 
296 
299  const CSeqTestContext* ctx)
300 {
301  return x_TestAllCdregions(obj, ctx, "inframe_upstream_stop",
303 }
304 
305 
306 static void s_CodingPropensity(const CSeq_id& id, const CSeqTestContext* ctx,
307  CFeat_CI feat_iter, CSeq_test_result& result)
308 {
309  //creating CHMMParameters object from file is expensive, so
310  //created objects are cached in a static map.
311  //It is reasonable to expect that the number of parameter files
312  //per program is small, and so the cache size does not need
313  //to be limited.
314 
315  static std::map<string, CRef<CHMMParameters> > s_hmmparams_cache;
316  DEFINE_STATIC_FAST_MUTEX(map_mutex);
317 
318  CRef<CHMMParameters> hmm_params;
319 
320  if (!ctx->HasKey("gnomon_model_file")) {
321  return;
322  }
323 
324 
325  string model_file_name = (*ctx)["gnomon_model_file"];
326 
327  {{
328  CFastMutexGuard guard(map_mutex); //released when goes out of scope 4 lines below
329  if(s_hmmparams_cache.find(model_file_name) == s_hmmparams_cache.end()) {
330  CNcbiIfstream model_file(model_file_name.c_str());
331  s_hmmparams_cache[model_file_name] = CRef<CHMMParameters>(new CHMMParameters(model_file));
332  }
333  }}
334 
335  hmm_params = s_hmmparams_cache[model_file_name];
336 
337 
338  const CSeq_loc& cds = feat_iter->GetLocation();
339 
340  int gccontent=0;
341  double score = CCodingPropensity::GetScore(hmm_params, cds,
342  ctx->GetScope(), &gccontent);
343 
344  // Record results
345  result.SetOutput_data()
346  .AddField("model_file", model_file_name);
347  result.SetOutput_data()
348  .AddField("model_percent_gc", gccontent);
349  result.SetOutput_data()
350  .AddField("score", max(score, -1e100));
351 
352 }
353 
354 
357  const CSeqTestContext* ctx)
358 {
359  return x_TestAllCdregions(obj, ctx, "coding_propensity",
361 }
362 
363 
366  const CSeqTestContext* ctx)
367 {
369  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
370  if ( !id || !ctx ) {
371  return ref;
372  }
373 
374  ref.Reset(new CSeq_test_result_set());
375 
376  CRef<CSeq_test_result> result = x_SkeletalTestResult("transcript_length");
377  ref->Set().push_back(result);
378 
379  int len = ctx->GetScope()
380  .GetBioseqHandle(dynamic_cast<const CSeq_id&>(obj)).GetInst_Length();
381  result->SetOutput_data()
382  .AddField("length", len);
383  return ref;
384 }
385 
386 
387 static void s_CdsLength(const CSeq_id& id, const CSeqTestContext* ctx,
388  CFeat_CI feat_iter, CSeq_test_result& result)
389 {
390  result.SetOutput_data()
391  .AddField("length",
392  (int)sequence::GetLength(feat_iter->GetLocation(), 0));
393 }
394 
395 
398  const CSeqTestContext* ctx)
399 {
400  return x_TestAllCdregions(obj, ctx, "cds_length", s_CdsLength);
401 }
402 
403 
404 static void s_Utrs(const CSeq_id& id, const CSeqTestContext* ctx,
405  CFeat_CI feat_iter, CSeq_test_result& result)
406 {
407  const CSeq_loc& loc = feat_iter->GetLocation();
408  TSeqPos cds_from = sequence::GetStart(loc, 0);
409  TSeqPos cds_to = sequence::GetStop(loc, 0);
410  int xcript_len = ctx->GetScope().GetBioseqHandle(id).GetInst_Length();
411  result.SetOutput_data().AddField("length_5_prime_utr", (int) cds_from);
412  result.SetOutput_data().AddField("length_3_prime_utr",
413  (int) (xcript_len - cds_to - 1));
414 }
415 
416 
419  const CSeqTestContext* ctx)
420 {
421  return x_TestAllCdregions(obj, ctx, "utrs", s_Utrs);
422 }
423 
424 
425 static void s_CdsStartCodon(const CSeq_id& id, const CSeqTestContext* ctx,
426  CFeat_CI feat_iter, CSeq_test_result& result)
427 {
429  s_GetCode(feat_iter->GetData().GetCdregion());
431 
432  TSeqPos upstream_length;
433  CSeqVector vec =
434  s_GetCdregionPlusUpstream(feat_iter, ctx, upstream_length);
435  vec.SetIupacCoding();
436 
437  string seq;
438  vec.GetSeqData(upstream_length, upstream_length + 3, seq);
439  // is this an officially sanctioned start?
440  result.SetOutput_data()
441  .AddField("is_start",
442  tbl.IsOrfStart(tbl.SetCodonState(seq[0], seq[1], seq[2])));
443  // record it, whatever it is
444  result.SetOutput_data()
445  .AddField("first_codon", seq);
446 
447  result.SetOutput_data()
448  .AddField("kozak_strength",
449  s_KozakStrengthToString(s_GetKozakStrength(vec, upstream_length)));
450 }
451 
452 
455  const CSeqTestContext* ctx)
456 {
457  return x_TestAllCdregions(obj, ctx, "cds_start_codon", s_CdsStartCodon);
458 }
459 
460 
461 static void s_CdsStopCodon(const CSeq_id& id, const CSeqTestContext* ctx,
462  CFeat_CI feat_iter, CSeq_test_result& result)
463 {
465  s_GetCode(feat_iter->GetData().GetCdregion());
467 
468  CSeqVector vec(feat_iter->GetLocation(), ctx->GetScope());
469  vec.SetIupacCoding();
470  string seq;
471  vec.GetSeqData(vec.size() - 3, vec.size(), seq);
472  result.SetOutput_data()
473  .AddField("is_stop",
474  tbl.IsOrfStop(tbl.SetCodonState(seq[0], seq[1], seq[2])));
475 }
476 
477 
480  const CSeqTestContext* ctx)
481 {
482  return x_TestAllCdregions(obj, ctx, "cds_stop_codon", s_CdsStopCodon);
483 }
484 
485 
486 // Determine the position in a cds of the start of a Code-break
487 inline TSeqPos CodeBreakPosInCds(const CCode_break& code_break,
488  const CSeq_feat& feat, CScope& scope)
489 {
490  return sequence::LocationOffset(feat.GetLocation(), code_break.GetLoc(),
492 }
493 
494 
495 // Determine whether a Code-break is a selenocysteine
496 static bool s_IsSelenocysteine(const CCode_break& code_break)
497 {
498  switch (code_break.GetAa().Which()) {
500  return code_break.GetAa().GetNcbieaa() == 85;
502  return code_break.GetAa().GetNcbi8aa() == 24;
504  return code_break.GetAa().GetNcbistdaa() == 24;
506  default:
507  return false;
508  }
509 }
510 
511 
512 // Determine whether a position in a CDS feature is the beginning
513 // of a selenocysteine codon (according to Code-break's)
514 static bool s_IsSelenocysteine(TSeqPos pos_in_cds, CFeat_CI feat_iter, CScope& scope)
515 {
516  const CSeq_feat& feat = feat_iter->GetOriginalFeature();
517  if (!feat.GetData().GetCdregion().IsSetCode_break()) {
518  return false;
519  }
520  ITERATE (CCdregion::TCode_break, code_break,
521  feat.GetData().GetCdregion().GetCode_break ()) {
522  if (CodeBreakPosInCds(**code_break, feat, scope) == pos_in_cds
523  && s_IsSelenocysteine(**code_break)) {
524  return true;
525  }
526  }
527  return false;
528 }
529 
530 
531 static void s_PrematureStopCodon(const CSeq_id& id, const CSeqTestContext* ctx,
532  CFeat_CI feat_iter, CSeq_test_result& result)
533 {
535  s_GetCode(feat_iter->GetData().GetCdregion());
537 
538  CSeqVector vec(feat_iter->GetLocation(), ctx->GetScope());
539  vec.SetIupacCoding();
540 
541  TSeqPos start_translating;
542  switch (feat_iter->GetData().GetCdregion().GetFrame()) {
545  start_translating = 0;
546  break;
548  start_translating = 1;
549  break;
551  start_translating = 2;
552  break;
553  default:
554  // should never happen, but handle it to avoid compiler warning
555  start_translating = kInvalidSeqPos;
556  break;
557  }
558 
559  bool premature_stop_found = false;
560  for (TSeqPos i = start_translating; i < vec.size() - 3; i += 3) {
561  if (tbl.IsOrfStop(tbl.SetCodonState(vec[i], vec[i + 1],
562  vec[i + 2]))) {
563  if (!premature_stop_found) {
564  result.SetOutput_data()
565  .AddField("has_premature_stop_codon", true);
566  result.SetOutput_data()
567  .AddField("first_premature_stop_position",
568  static_cast<int>(i));
569  premature_stop_found = true;
570  }
571  // determine whether it's an annotated selenocysteine
572  if (!s_IsSelenocysteine(i, feat_iter, ctx->GetScope())) {
573  result.SetOutput_data()
574  .AddField("has_premature_stop_codon_not_sec", true);
575  result.SetOutput_data()
576  .AddField("first_premature_stop_position_not_sec",
577  static_cast<int>(i));
578  return;
579  }
580  }
581  }
582 
583  result.SetOutput_data()
584  .AddField("has_premature_stop_codon_not_sec", false);
585  if (!premature_stop_found) {
586  result.SetOutput_data()
587  .AddField("has_premature_stop_codon", false);
588  }
589 }
590 
591 
594  const CSeqTestContext* ctx)
595 {
596  return x_TestAllCdregions(obj, ctx, "premature_stop_codon",
598 }
599 
600 
601 // Walk the replace history to find the latest revision of a sequence
603 {
605  if ( !latest ) {
606  latest.Reset(&id);
607  }
608  return latest;
609 }
610 
611 
612 static void s_CompareProtProdToTrans(const CSeq_id& id,
613  const CSeqTestContext* ctx,
614  CFeat_CI feat_iter,
616 {
617  string translation;
618  CSeqTranslator::Translate(feat_iter->GetOriginalFeature(), ctx->GetScope(),
619  translation, false /* include_stop */);
620  result.SetOutput_data().AddField("length_translation",
621  int(translation.size()));
622 
623  if (!feat_iter->GetOriginalFeature().CanGetProduct()) {
624  // can't do comparison if there's no product annotated
625  return;
626  }
627 
628  const CSeq_loc& prod_loc = feat_iter->GetOriginalFeature().GetProduct();
629  const CSeq_id& prod_id = sequence::GetId(prod_loc, 0);
630  CSeqVector prod_vec(prod_loc, ctx->GetScope());
631  prod_vec.SetIupacCoding();
632 
633  TSeqPos ident_count = 0;
634  for (TSeqPos i = 0;
635  i < min(prod_vec.size(), (TSeqPos)translation.size()); ++i) {
636  if (prod_vec[i] == translation[i]) {
637  ++ident_count;
638  }
639  }
640 
641  result.SetOutput_data().AddField("length_annotated_prot_prod",
642  int(prod_vec.size()));
643  result.SetOutput_data()
644  .AddField("fraction_identity",
645  double(ident_count)
646  / max(prod_vec.size(), (TSeqPos)translation.size()));
647 
648  CConstRef<CSeq_id> updated_id = s_FindLatest(prod_id, ctx->GetScope());
649  if (updated_id->Equals(prod_id)) {
650  result.SetOutput_data()
651  .AddField("fraction_identity_updated_prot_prod",
652  double(ident_count)
653  / max(prod_vec.size(), (TSeqPos)translation.size()));
654  result.SetOutput_data().AddField("length_updated_prot_prod",
655  int(prod_vec.size()));
656  } else {
657  CBioseq_Handle updated_prod_hand
658  = ctx->GetScope().GetBioseqHandle(*updated_id);
659  CSeqVector updated_prod_vec = updated_prod_hand.GetSeqVector();
660  updated_prod_vec.SetIupacCoding();
661  TSeqPos ident_count = 0;
662  for (TSeqPos i = 0;
663  i < min(updated_prod_vec.size(), (TSeqPos)translation.size());
664  ++i) {
665  if (updated_prod_vec[i] == translation[i]) {
666  ++ident_count;
667  }
668  }
669  result.SetOutput_data()
670  .AddField("fraction_identity_updated_prot_prod",
671  double(ident_count)
672  / max(updated_prod_vec.size(),
673  (TSeqPos)translation.size()));
674  result.SetOutput_data().AddField("length_updated_prot_prod",
675  int(updated_prod_vec.size()));
676  }
677  result.SetOutput_data()
678  .AddField("prot_prod_updated", !updated_id->Equals(prod_id));
679  result.SetOutput_data()
680  .AddField("updated_prod_id", updated_id->AsFastaString());
681 }
682 
683 
686  const CSeqTestContext* ctx)
687 {
688  return x_TestAllCdregions(obj, ctx, "compare_prot_prod_to_trans",
690 }
691 
692 
695  const CSeqTestContext* ctx)
696 {
698  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
699  if ( !id || !ctx ) {
700  return ref;
701  }
702 
703  ref.Reset(new CSeq_test_result_set());
704 
706  ref->Set().push_back(result);
707 
708  CBioseq_Handle xcript_hand = ctx->GetScope().GetBioseqHandle(*id);
709  CSeqVector vec = xcript_hand.GetSeqVector();
710  vec.SetIupacCoding();
711 
712  //compute trailing a-count
713  {{
714  int pos(0);
715  for(pos = vec.size() - 1; pos > 0; --pos) {
716  if (vec[pos] != 'A') {
717  break;
718  }
719  }
720  result->SetOutput_data().AddField("trailing_a_count",
721  int(vec.size() - pos - 1));
722  }}
723 
724 
725  int tail_length(0);
726  //compute tail length allowing for mismatches.
727  //Note: there's similar logic for computing genomic polya priming in alignment tests
728  {{
729  static const int w_match = 1;
730  static const int w_mismatch = -4;
731  static const int x_dropoff = 15;
732 
733  size_t best_pos = NPOS;
734  int best_score = 0;
735  int curr_score = 0;
736 
737  for(size_t curr_pos = vec.size() - 1;
738  curr_pos > 0 && curr_score + x_dropoff > best_score;
739  --curr_pos)
740  {
741  curr_score += vec[curr_pos] == 'A' ? w_match : w_mismatch;
742  if(curr_score >= best_score) {
743  best_score = curr_score;
744  best_pos = curr_pos;
745  }
746  }
747  tail_length = (best_pos == NPOS) ? 0 : vec.size() - best_pos;
748  result->SetOutput_data().AddField("tail_length", tail_length);
749  }}
750 
751 
752  //find signal
753  {{
754  static string patterns[] = {
755  "AATAAA",
756  "ATTAAA",
757  "AGTAAA",
758  "TATAAA",
759  "CATAAA",
760  "GATAAA",
761  "AATATA",
762  "AATACA",
763  "AATAGA",
764  "ACTAAA",
765  "AAGAAA",
766  "AATGAA"
767  };
768 
769  size_t window = 50; //serch within 50 bases upstream of polya-site
770  size_t end_pos = vec.size() - 1 - tail_length;
771  size_t begin_pos = end_pos > window ? end_pos - window : 0;
772 
773  string seq;
774  vec.GetSeqData(begin_pos, end_pos, seq);
775 
776  for(int ii = 0; ii < 12; ii++) {
777  size_t pos = NStr::Find(seq, patterns[ii], NStr::eCase, NStr::eReverseSearch);
778  if(pos != NPOS) {
779  result->SetOutput_data().AddField("signal_pos", static_cast<int>(pos + begin_pos));
780  result->SetOutput_data().AddField("is_canonical_pas", (ii <= 1)); //AATAAA or ATTAAA
781  break;
782  }
783  }
784  }}
785 
786  return ref;
787 }
788 
789 
791 {
792  TSeqPos cds_start(kInvalidSeqPos);
793  for(CFeat_CI ci(bsh, SAnnotSelector(CSeqFeatData::e_Cdregion)); ci; ++ci) {
794  cds_start = ci->GetLocation().GetStart(eExtreme_Positional);
795  break;
796  }
797  if(cds_start == kInvalidSeqPos) {
798  return;
799  }
800 
801  COrf::TLocVec overlapping_uorfs, upstream_uorfs;
802  COrf::FindStrongKozakUOrfs(bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac), cds_start, overlapping_uorfs, upstream_uorfs);
803  result.SetOutput_data().AddField("overlapping_strong_uorfs", (int)overlapping_uorfs.size());
804  result.SetOutput_data().AddField("upstream_strong_uorfs", (int)upstream_uorfs.size());
805 }
806 
809  const CSeqTestContext* ctx)
810 {
812  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
813  if ( !id || !ctx ) {
814  return ref;
815  }
816 
817  ref.Reset(new CSeq_test_result_set());
818 
820  ref->Set().push_back(result);
821 
822  CBioseq_Handle xcript_hand = ctx->GetScope().GetBioseqHandle(*id);
823  CSeqVector vec = xcript_hand.GetSeqVector();
824  vec.SetIupacCoding();
825 
826  // Look for ORFs starting with any sense codon
827  vector<CRef<CSeq_loc> > orfs;
828  COrf::FindOrfs(vec, orfs);
829  TSeqPos max_orf_length_forward = 0;
830  TSeqPos max_orf_length_either = 0;
831  TSeqPos largest_forward_orf_end = 0; // intialized to avoid comp. warning
832  ITERATE (vector<CRef<CSeq_loc> >, orf, orfs) {
833  TSeqPos orf_length = sequence::GetLength(**orf, 0);
834  max_orf_length_either = max(max_orf_length_either, orf_length);
835  if ((*orf)->GetInt().GetStrand() != eNa_strand_minus) {
836  if (orf_length > max_orf_length_forward) {
837  max_orf_length_forward = orf_length;
838  largest_forward_orf_end = (*orf)->GetInt().GetTo();
839  }
840  max_orf_length_forward = max(max_orf_length_forward, orf_length);
841  }
842  }
843 
844  result->SetOutput_data().AddField("max_orf_length_forward_strand",
845  int(max_orf_length_forward));
846  result->SetOutput_data().AddField("largest_forward_orf_end_pos",
847  int(largest_forward_orf_end));
848  result->SetOutput_data().AddField("max_orf_length_either_strand",
849  int(max_orf_length_either));
850 
851  // Look for ORFs starting with ATG
852  orfs.clear();
853  vector<string> allowable_starts;
854  allowable_starts.push_back("ATG");
855  COrf::FindOrfs(vec, orfs, 3, 1, allowable_starts);
856  max_orf_length_forward = 0;
857  max_orf_length_either = 0;
858  ITERATE (vector<CRef<CSeq_loc> >, orf, orfs) {
859  TSeqPos orf_length = sequence::GetLength(**orf, 0);
860  max_orf_length_either = max(max_orf_length_either, orf_length);
861  if ((*orf)->GetInt().GetStrand() != eNa_strand_minus) {
862  if (orf_length > max_orf_length_forward) {
863  max_orf_length_forward = orf_length;
864  largest_forward_orf_end = (*orf)->GetInt().GetTo();
865  }
866  max_orf_length_forward = max(max_orf_length_forward, orf_length);
867  }
868  }
869 
870  result->SetOutput_data().AddField("max_atg_orf_length_forward_strand",
871  int(max_orf_length_forward));
872  result->SetOutput_data().AddField("largest_forward_atg_orf_end_pos",
873  int(largest_forward_orf_end));
874  result->SetOutput_data().AddField("max_atg_orf_length_either_strand",
875  int(max_orf_length_either));
876 
877  TestStrongKozakUorfs(xcript_hand, *result);
878 
879  return ref;
880 }
881 
882 
883 static void s_Code_break(const CSeq_id& id, const CSeqTestContext* ctx,
884  CFeat_CI feat_iter, CSeq_test_result& result)
885 {
886  int count, not_start_not_sec_count;
887  if (feat_iter->GetData().GetCdregion().IsSetCode_break()) {
888  count = feat_iter->GetData().GetCdregion().GetCode_break().size();
889  not_start_not_sec_count = 0;
890  ITERATE (CCdregion::TCode_break, code_break,
891  feat_iter->GetData().GetCdregion().GetCode_break()) {
892  TSeqPos pos = CodeBreakPosInCds(**code_break,
893  feat_iter->GetOriginalFeature(),
894  ctx->GetScope());
895  if (pos != 0 && !s_IsSelenocysteine(**code_break)) {
896  ++not_start_not_sec_count;
897  }
898  }
899  } else {
900  count = 0;
901  not_start_not_sec_count = 0;
902  }
903 
904  result.SetOutput_data()
905  .AddField("code_break_count", count);
906  result.SetOutput_data()
907  .AddField("code_break_not_start_not_sec_count",
908  not_start_not_sec_count);
909 }
910 
911 
914  const CSeqTestContext* ctx)
915 {
916  return x_TestAllCdregions(obj, ctx, "code_break",
917  s_Code_break);
918 }
919 
920 
921 static void s_OrfExtension(const CSeq_id& id,
922  const CSeqTestContext* ctx,
923  CFeat_CI feat_iter,
925 {
926  TSeqPos upstream_length;
927  CSeqVector vec =
928  s_GetCdregionPlusUpstream(feat_iter, ctx, upstream_length);
929  vec.SetIupacCoding();
930 
931  EKozakStrength strength;
932  vector<int> starts(eStrong + 1, upstream_length);
933  string codon;
934  for (int i = upstream_length - 3; i >= 0; i -= 3) {
935  vec.GetSeqData(i, i + 3, codon);
936  if (codon == "ATG") {
937  strength = s_GetKozakStrength(vec, i);
938  starts[strength] = i;
939  }
940  if (codon == "TAA" || codon == "TAG" || codon == "TGA") {
941  break;
942  }
943  }
944 
945  // MSS-59
946  // Count the total number of 'ATG' triplets found in the 5'UTR of a
947  // transcript, in frames 1, 2, and 3.
948  int upstream_utr_atg_count(0);
949  for (int i = upstream_length - 3; i >= 0; i -= 1) {
950  vec.GetSeqData(i, i + 3, codon);
951  if (codon == "ATG") {
952  upstream_utr_atg_count++;
953  }
954  }
955 
956  result.SetOutput_data()
957  .AddField("max_extension_weak_kozak",
958  static_cast<int>(upstream_length - starts[eWeak]));
959  result.SetOutput_data()
960  .AddField("max_extension_moderate_kozak",
961  static_cast<int>(upstream_length - starts[eModerate]));
962  result.SetOutput_data()
963  .AddField("max_extension_strong_kozak",
964  static_cast<int>(upstream_length - starts[eStrong]));
965  result.SetOutput_data()
966  .AddField("upstream_utr_atg_count",
967  upstream_utr_atg_count);
968 }
969 
970 
973  const CSeqTestContext* ctx)
974 {
975  return x_TestAllCdregions(obj, ctx, "orf_extension",
977 }
978 
979 
981 {
982  CSeqVector vec_copy(vec);
983  vec_copy.SetIupacCoding();
984  string seq;
985  vec_copy.GetSeqData(0, vec_copy.size(), seq);
986 
987  CSeq_data in_seq, out_seq;
988  in_seq.SetIupacna().Set(seq);
989  vector<TSeqPos> out_indices;
990 
991  return CSeqportUtil::GetAmbigs(in_seq, &out_seq, &out_indices);
992 }
993 
994 
995 static void s_CdsCountAmbiguities(const CSeq_id& id,
996  const CSeqTestContext* ctx,
997  CFeat_CI feat_iter, CSeq_test_result& result)
998 {
999  CSeqVector vec(feat_iter->GetLocation(), ctx->GetScope());
1000  result.SetOutput_data()
1001  .AddField("cds_ambiguity_count",
1002  static_cast<int>(s_CountAmbiguities(vec)));
1003 }
1004 
1005 
1008  const CSeqTestContext* ctx)
1009 {
1011  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
1012  if ( !id || !ctx ) {
1013  return rv;
1014  }
1015 
1016  // count for each coding region
1017  rv = x_TestAllCdregions(obj, ctx, "count_ambiguities",
1019 
1020  // count for entire transcript
1021  if (!rv) {
1022  rv.Reset(new CSeq_test_result_set());
1023  }
1024  CBioseq_Handle hand = ctx->GetScope().GetBioseqHandle(*id);
1025  CSeqVector vec = hand.GetSeqVector();
1026  CRef<CSeq_test_result> result = x_SkeletalTestResult("count_ambiguities");
1027  rv->Set().push_back(result);
1028  result->SetOutput_data()
1029  .AddField("ambiguity_count",
1030  static_cast<int>(s_CountAmbiguities(vec)));
1031  return rv;
1032 }
1033 
1034 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
CBioseq_Handle –.
CCdregion –.
Definition: Cdregion.hpp:66
CCode_break –.
Definition: Code_break.hpp:66
static double GetScore(CConstRef< CHMMParameters > hmm_params, const objects::CSeq_loc &cds, objects::CScope &scope, int *const gccontent, double *const startscore=0)
CConstRef –.
Definition: ncbiobj.hpp:1266
CFeat_CI –.
Definition: feat_ci.hpp:64
static const CTrans_table & GetTransTable(int id)
HMM model parameters just create it and pass to a Gnomon engine.
Definition: gnomon.hpp:55
static void FindStrongKozakUOrfs(const objects::CSeqVector &seq, TSeqPos cds_start, TLocVec &overlap_results, TLocVec &non_overlap_results, unsigned int min_length_bp=3, unsigned int non_overlap_min_length_bp=105, int genetic_code=1, size_t max_seq_gap=k_default_max_seq_gap)
Specifically find ORFS with a strong Kozak signal that are upstream of cds_start.
Definition: orf.cpp:383
vector< CRef< objects::CSeq_loc > > TLocVec
Definition: orf.hpp:55
static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)
Find ORFs in both orientations.
Definition: orf.cpp:336
CScope –.
Definition: scope.hpp:92
CSeqTestContext defines any contextual information that a derived class might need.
Definition: seqtest.hpp:52
CRef< objects::CSeq_test_result > x_SkeletalTestResult(const string &test_name)
Create a Seq-test-result with some fields filled in, including a name for this test,...
Definition: seqtest.cpp:54
CRef< objects::CSeq_test_result_set > x_TestAllCdregions(const CSerialObject &obj, const CSeqTestContext *ctx, const string &test_name, TCdregionTester cdregion_tester)
Given a Seq-id and a context, analyze all coding regions by calling a supplied function.
Definition: seqtest.cpp:69
CSeqVector –.
Definition: seq_vector.hpp:65
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_test_result_set –.
CSeq_test_result –.
static TSeqPos GetAmbigs(const CSeq_data &in_seq, CSeq_data *out_seq, vector< TSeqPos > *out_indices, CSeq_data::E_Choice to_code=CSeq_data::e_Ncbi2na, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
Base class for all serializable objects.
Definition: serialbase.hpp:150
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
bool CanTest(const CSerialObject &obj, const CSeqTestContext *ctx) const
Test to see whether the given object *can* be used in this test.
bool IsOrfStart(int state) const
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
bool IsOrfStop(int state) const
CS_CONTEXT * ctx
Definition: t0006.c:12
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
TPrim & Set(void)
Definition: serialbase.hpp:351
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_loc & GetEmbeddingSeq_loc(void) const
Get the nearest seq-loc containing the current range.
Definition: Seq_loc.cpp:2573
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
CConstRef< CSeq_id > FindLatestSequence(const CSeq_id &id, CScope &scope)
Given a seq-id check its replace history and try to find the latest revision.
Definition: sequence.cpp:763
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
bool IsSetExcept(void) const
bool GetExcept(void) const
bool GetPseudo(void) const
const CSeqFeatData & GetData(void) const
TInst_Mol GetInst_Mol(void) const
bool IsSetPseudo(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool CanGetInst_Mol(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool IsSetPartial(void) const
const CSeq_loc & GetLocation(void) const
bool GetPartial(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
#define DEFINE_STATIC_FAST_MUTEX(id)
Define static fast mutex and initialize it.
Definition: ncbimtx.hpp:496
Tdata & Set(void)
Assign a value to data member.
TNcbi8aa GetNcbi8aa(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
const TLoc & GetLoc(void) const
Get the Loc member data.
Tdata & Set(void)
Assign a value to data member.
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
list< CRef< CCode_break > > TCode_break
Definition: Cdregion_.hpp:111
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TCode & GetCode(void) const
Get the Code member data.
Definition: Cdregion_.hpp:712
const TCdregion & GetCdregion(void) const
Get the variant data.
const TAa & GetAa(void) const
Get the Aa member data.
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
TNcbieaa GetNcbieaa(void) const
Get the variant data.
bool CanGetCode(void) const
Check if it is safe to call GetCode method.
Definition: Cdregion_.hpp:706
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
Definition: Seq_feat_.hpp:1090
TNcbistdaa GetNcbistdaa(void) const
Get the variant data.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
@ e_Ncbi8aa
NCBI8aa code.
@ e_not_set
No variant selected.
@ e_Ncbieaa
ASCII value of NCBIeaa code.
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
TIupacna & SetIupacna(void)
Select the variant.
Definition: Seq_data_.hpp:517
int i
int len
Defines: CTimeFormat - storage class for time format.
T max(T x_, T y_)
T min(T x_, T y_)
static patstr * patterns
Definition: pcregrep.c:259
SAnnotSelector –.
Definition: inftrees.h:24
else result
Definition: token2.c:20
static CConstRef< CSeq_id > s_FindLatest(const CSeq_id &id, CScope &scope)
USING_SCOPE(objects)
static void s_CdsStopCodon(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CdsFlags(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_InframeUpstreamStart(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CdsLength(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CdsCountAmbiguities(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static CSeqVector s_GetCdregionPlusUpstream(CFeat_CI feat_iter, const CSeqTestContext *ctx, TSeqPos &upstream_length)
string s_KozakStrengthToString(EKozakStrength strength)
static void s_CompareProtProdToTrans(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_OrfExtension(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_InframeUpstreamStop(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
CConstRef< CGenetic_code > s_GetCode(const CCdregion &cdr)
static void s_Code_break(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
EKozakStrength s_GetKozakStrength(const CSeqVector &vec, TSeqPos pos)
static bool s_IsSelenocysteine(const CCode_break &code_break)
static void s_CdsStartCodon(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_Utrs(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_PrematureStopCodon(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CodingPropensity(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
EKozakStrength
@ eModerate
@ eNone
@ eWeak
@ eStrong
static TSeqPos s_CountAmbiguities(const CSeqVector &vec)
TSeqPos CodeBreakPosInCds(const CCode_break &code_break, const CSeq_feat &feat, CScope &scope)
void TestStrongKozakUorfs(const CBioseq_Handle bsh, CSeq_test_result &result)
Modified on Thu Jun 13 17:32:05 2024 by modify_doxy.py rev. 669887