NCBI C++ ToolKit
translation_problems.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: translation_problems.cpp 101247 2023-11-20 15:20:49Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Colleen Bollin
27  *
28  * File Description:
29  * detecting translation problems
30  * .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
38 
39 #include <objmgr/seq_vector.hpp>
40 #include <objmgr/util/sequence.hpp>
46 
49 BEGIN_SCOPE(validator)
50 using namespace sequence;
51 
52 
54 {
55  x_Reset();
56 }
57 
58 
60 {
61  m_ProblemFlags = 0;
62  m_RaggedLength = 0;
63  m_TransLen = 0;
64  m_ProtLen = 0;
65  m_HasDashXStart = false;
66  m_TranslStart = 0;
67  m_InternalStopCodons = 0;
68  m_TranslTerminalX = 0;
69  m_ProdTerminalX = 0;
70  m_UnableToTranslate = false;
71  m_AltStart = false;
72  m_UnparsedTranslExcept = false;
73  m_NumNonsenseIntrons = 0;
74  m_HasException = false;
75 }
76 
77 
79  const CSeq_feat& feat,
80  CBioseq_Handle loc_handle,
81  CBioseq_Handle prot_handle,
82  bool ignore_exceptions,
83  bool far_fetch_cds,
84  bool standalone_annot,
85  bool single_seq,
86  bool is_gpipe,
87  bool is_genomic,
88  bool is_refseq,
89  bool is_nt_or_ng_or_nw,
90  bool is_nc,
91  bool has_accession,
92  CScope* scope)
93 {
94  x_Reset();
95  // bail if not CDS
96  if (!feat.GetData().IsCdregion()) {
97  return;
98  }
99 
100  // do not validate for pseudo gene
101  if (feat.IsSetQual()) {
102  for (auto it = feat.GetQual().begin(); it != feat.GetQual().end(); it++) {
103  if ((*it)->IsSetQual() && NStr::EqualNocase((*it)->GetQual(), "pseudo")) {
104  return;
105  }
106  }
107  }
108 
109  bool has_errors = false, unclassified_except = false,
110  mismatch_except = false, frameshift_except = false,
111  rearrange_except = false, product_replaced = false,
112  mixed_population = false, low_quality = false,
113  report_errors = true, other_than_mismatch = false,
114  rna_editing = false, transcript_or_proteomic = false;
115  string farstr;
116 
117  if (!ignore_exceptions &&
118  feat.IsSetExcept() && feat.GetExcept() &&
119  feat.IsSetExcept_text()) {
120  const string& except_text = feat.GetExcept_text();
121  report_errors = ReportTranslationErrors(except_text);
122  x_GetExceptionFlags(
123  except_text,
124  unclassified_except,
125  mismatch_except,
126  frameshift_except,
127  rearrange_except,
128  product_replaced,
129  mixed_population,
130  low_quality,
131  rna_editing,
132  transcript_or_proteomic);
133  }
134 
135  m_HasException = !report_errors;
136 
137  // check frame
138  m_ProblemFlags |= x_CheckCDSFrame(feat, scope);
139 
140  // check for unparsed transl_except
141  if (feat.IsSetQual()) {
142  for (auto it = feat.GetQual().begin(); it != feat.GetQual().end(); it++) {
143  if ((*it)->IsSetQual() && NStr::Equal((*it)->GetQual(), "transl_except")) {
144  m_UnparsedTranslExcept = true;
145  }
146  }
147  }
148 
149  string transl_prot; // translated protein
150  bool got_stop = false;
151 
152  try {
153  transl_prot = TranslateCodingRegionForValidation(feat, *scope, m_AltStart);
154  } catch (CException&) {
155  m_UnableToTranslate = true;
156  }
157 
158  if (NStr::EndsWith(transl_prot, "*")) {
159  got_stop = true;
160  }
161 
162  if (HasBadStartCodon(feat.GetLocation(), transl_prot)) {
163  m_TranslStart = transl_prot.c_str()[0];
164  m_ProblemFlags |= eCDSTranslationProblem_BadStart;
165  }
166 
167  bool no_product = true;
168 
169  if (!m_UnableToTranslate) {
170 
171  // check for code break not on a codon
172  m_TranslExceptProblems = x_GetTranslExceptProblems(feat, loc_handle, scope, is_refseq);
173 
174  m_NumNonsenseIntrons = x_CountNonsenseIntrons(feat, scope);
175  if (x_ProteinHasTooManyXs(transl_prot)) {
176  m_ProblemFlags |= eCDSTranslationProblem_TooManyX;
177  }
178 
179  m_InternalStopCodons = CountInternalStopCodons(transl_prot);
180  if (m_InternalStopCodons > 5) {
181  // stop checking if too many stop codons
182  return;
183  }
184 
185  // get protein sequence
186 
187  if (!prot_handle) {
188  const CSeq_id* protid = nullptr;
189  try {
190  protid = &GetId(feat.GetProduct(), scope);
191  } catch (CException&) {}
192  if (protid && far_fetch_cds && feat.IsSetProduct()) {
193  m_ProblemFlags |= eCDSTranslationProblem_UnableToFetch;
194  } else if (protid && (!far_fetch_cds || feat.IsSetProduct())) {
195  // don't report missing protein sequence
196  } else if (!standalone_annot && transl_prot.length() > 6) {
197  if (!is_nt_or_ng_or_nw && (!is_nc || !single_seq)) {
198  m_ProblemFlags |= eCDSTranslationProblem_NoProtein;
199  }
200  }
201  }
202 
203  bool show_stop = true;
204 
205  if (prot_handle && prot_handle.IsAa()) {
206  no_product = false;
207 
208  CSeqVector prot_vec = prot_handle.GetSeqVector();
210 
211 
212  CalculateEffectiveTranslationLengths(transl_prot, prot_vec, m_TransLen, m_ProtLen);
213 
214  if (m_TransLen == m_ProtLen || has_accession) { // could be identical
215  if (prot_vec.size() > 0 && transl_prot.length() > 0 &&
216  prot_vec[0] != transl_prot[0]) {
217  bool no_beg, no_end;
218  FeatureHasEnds(feat, scope, no_beg, no_end);
219  if (feat.IsSetPartial() && feat.GetPartial() && (!no_beg) && (!no_end)) {
220  m_ProblemFlags |= eCDSTranslationProblem_ShouldStartPartial;
221  } else if (transl_prot[0] == '-' || transl_prot[0] == 'X') {
222  m_HasDashXStart = true;
223  }
224  }
225  m_Mismatches = x_GetTranslationMismatches(feat, prot_vec, transl_prot, has_accession);
226  }
227 
228  if (feat.CanGetPartial() && feat.GetPartial() &&
229  m_Mismatches.size() == 0) {
230  bool no_beg, no_end;
231  FeatureHasEnds(feat, scope, no_beg, no_end);
232  if (!no_beg && !no_end) {
233  if (report_errors) {
234  if (is_gpipe && is_genomic) {
235  // suppress in gpipe genomic
236  } else {
237  if (!got_stop) {
238  m_ProblemFlags |= eCDSTranslationProblem_ShouldBePartialButIsnt;
239  } else {
240  m_ProblemFlags |= eCDSTranslationProblem_ShouldNotBePartialButIs;
241  }
242  }
243  }
244  show_stop = false;
245  }
246  }
247 
248  if (!transl_prot.empty()) {
249  // look for discrepancy in number of terminal Xs between product and translation
250  m_TranslTerminalX = x_CountTerminalXs(transl_prot, (got_stop && (transl_prot.length() == prot_vec.size() + 1)));
251  m_ProdTerminalX = x_CountTerminalXs(prot_vec);
252 
253  }
254  }
255 
256  x_GetCdTransErrors(feat, prot_handle, show_stop, got_stop, scope);
257  }
258 
259  if (x_JustifiesException()) {
260  has_errors = true;
261  other_than_mismatch = true;
262  } else if (m_Mismatches.size() > 0) {
263  has_errors = true;
264  }
265 
266  if (!report_errors && !no_product) {
267  if (!has_errors) {
268  if (!frameshift_except && !rearrange_except && !mixed_population && !low_quality) {
269  m_ProblemFlags |= eCDSTranslationProblem_UnnecessaryException;
270  }
271  } else if (unclassified_except && !other_than_mismatch) {
272  if (m_Mismatches.size() * 50 <= m_ProtLen) {
273  m_ProblemFlags |= eCDSTranslationProblem_ErroneousException;
274  }
275  } else if (!product_replaced && !transcript_or_proteomic && !rna_editing) {
276  m_ProblemFlags |= eCDSTranslationProblem_UnqualifiedException;
277  }
278  }
279 
280 }
281 
282 
283 void CCDSTranslationProblems::x_GetCdTransErrors(const CSeq_feat& feat, CBioseq_Handle product, bool show_stop, bool got_stop, CScope* scope)
284 {
285  bool no_beg, no_end;
286  FeatureHasEnds(feat, product ? &(product.GetScope()) : scope, no_beg, no_end);
287  if (show_stop) {
288  if (!got_stop && !no_end) {
289  m_ProblemFlags |= eCDSTranslationProblem_NoStop;
290  } else if (got_stop && no_end) {
291  m_ProblemFlags |= eCDSTranslationProblem_StopPartial;
292  } else if (got_stop && !no_end) {
293  m_RaggedLength = x_CheckForRaggedEnd(feat, scope);
294  }
295  }
296 }
297 
299  const CSeq_feat& feat,
300  const CSeq_id& id,
301  const CCdregion& cdr,
302  TSeqPos start,
303  TSeqPos stop,
304  ENa_strand strand,
305  CScope *scope
306 )
307 
308 {
309  bool nonsense_intron = false;
310  CRef<CSeq_feat> tmp_cds (new CSeq_feat());
311 
312  tmp_cds->SetLocation().SetInt().SetFrom(start);
313  tmp_cds->SetLocation().SetInt().SetTo(stop);
314  tmp_cds->SetLocation().SetInt().SetStrand(strand);
315  tmp_cds->SetLocation().SetInt().SetId().Assign(id);
316 
317  tmp_cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
318  tmp_cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
319  tmp_cds->SetData().SetCdregion();
320  if ( cdr.IsSetCode()) {
321  tmp_cds->SetData().SetCdregion().SetCode().Assign(cdr.GetCode());
322  }
323 
324  bool alt_start = false;
325  try {
326  string transl_prot = TranslateCodingRegionForValidation(*tmp_cds, *scope, alt_start);
327 
328  if (NStr::Equal(transl_prot, "*")) {
329  nonsense_intron = true;
330  }
331  } catch (CException&) {
332  }
333  return nonsense_intron;
334 }
335 
336 
338 {
339  TSeqPos last_start = 0, last_stop = 0, start, stop;
340 
341  if (!feat.IsSetData() && !feat.GetData().IsCdregion()) {
342  return 0;
343  }
344  if (feat.IsSetExcept() || feat.IsSetExcept_text()) return 0;
345  const CCdregion& cdr = feat.GetData().GetCdregion();
346  if (cdr.IsSetCode_break()) return 0;
347 
348  size_t count = 0;
349  const CSeq_loc& loc = feat.GetLocation();
350 
351  CSeq_loc_CI prev;
352  for (CSeq_loc_CI curr(loc); curr; ++curr) {
353  start = curr.GetRange().GetFrom();
354  stop = curr.GetRange().GetTo();
355  if (prev && curr && IsSameBioseq(curr.GetSeq_id(), prev.GetSeq_id(), scope)) {
356  ENa_strand strand = curr.GetStrand();
357  if (strand == eNa_strand_minus) {
358  if (last_start - stop == 4) {
359  if (x_IsThreeBaseNonsense(feat, curr.GetSeq_id(), cdr, stop + 1, last_start - 1, strand, scope)) {
360  count++;
361  }
362  }
363  } else {
364  if (start - last_stop == 4) {
365  if (x_IsThreeBaseNonsense(feat, curr.GetSeq_id(), cdr, last_stop + 1, start - 1, strand, scope)) {
366  count++;
367  }
368  }
369  }
370  }
371  last_start = start;
372  last_stop = stop;
373  prev = curr;
374  }
375 
376  if (count > 0 && sequence::IsPseudo(feat, *scope)) return 0;
377  else return count;
378 }
379 
380 
381 vector<CRef<CSeq_loc> > CCDSTranslationProblems::GetNonsenseIntrons(const CSeq_feat& feat, CScope& scope)
382 {
383  vector<CRef<CSeq_loc> > intron_locs;
384 
385  TSeqPos last_start = 0, last_stop = 0, start, stop;
386 
387  if (!feat.IsSetData() && !feat.GetData().IsCdregion()) {
388  return intron_locs;
389  }
390  if (feat.IsSetExcept() || feat.IsSetExcept_text()) return intron_locs;
391  const CCdregion& cdr = feat.GetData().GetCdregion();
392  if (cdr.IsSetCode_break()) return intron_locs;
393 
394  const CSeq_loc& loc = feat.GetLocation();
395 
396  CSeq_loc_CI prev;
397  for (CSeq_loc_CI curr(loc); curr; ++curr) {
398  start = curr.GetRange().GetFrom();
399  stop = curr.GetRange().GetTo();
400  if (prev && curr && IsSameBioseq(curr.GetSeq_id(), prev.GetSeq_id(), &scope)) {
401  ENa_strand strand = curr.GetStrand();
402  if (strand == eNa_strand_minus) {
403  if (last_start - stop == 4) {
404  if (x_IsThreeBaseNonsense(feat, curr.GetSeq_id(), cdr, stop + 1, last_start - 1, strand, &scope)) {
405  CRef<CSeq_id> id(new CSeq_id());
406  id->Assign(curr.GetSeq_id());
407  CRef<CSeq_loc> intron_loc(new CSeq_loc(*id, stop + 1, last_start - 1, strand));
408  intron_locs.push_back(intron_loc);
409  }
410  }
411  } else {
412  if (start - last_stop == 4) {
413  if (x_IsThreeBaseNonsense(feat, curr.GetSeq_id(), cdr, last_stop + 1, start - 1, strand, &scope)) {
414  CRef<CSeq_id> id(new CSeq_id());
415  id->Assign(curr.GetSeq_id());
416  CRef<CSeq_loc> intron_loc(new CSeq_loc(*id, last_stop + 1, start - 1, strand));
417  intron_locs.push_back(intron_loc);
418  }
419  }
420  }
421  }
422  last_start = start;
423  last_stop = stop;
424  prev = curr;
425  }
426 
427  if (intron_locs.size() > 0 && sequence::IsPseudo(feat, scope)) {
428  intron_locs.clear();
429  }
430  return intron_locs;
431 
432 }
433 
434 
435 bool CCDSTranslationProblems::x_ProteinHasTooManyXs(const string& transl_prot)
436 {
437  size_t num_x = 0, num_nonx = 0;
438 
439  ITERATE(string, it, transl_prot) {
440  if (*it == 'X') {
441  num_x++;
442  } else {
443  num_nonx++;
444  }
445  }
446 
447  // report too many Xs
448  if (num_x > num_nonx) {
449  return true;
450  } else {
451  return false;
452  }
453 }
454 
455 
457  const string& except_text,
458  bool& unclassified_except,
459  bool& mismatch_except,
460  bool& frameshift_except,
461  bool& rearrange_except,
462  bool& product_replaced,
463  bool& mixed_population,
464  bool& low_quality,
465  bool& rna_editing,
466  bool& transcript_or_proteomic)
467 {
468  if (NStr::FindNoCase(except_text, "unclassified translation discrepancy") != NPOS) {
469  unclassified_except = true;
470  }
471  if (NStr::FindNoCase(except_text, "mismatches in translation") != NPOS) {
472  mismatch_except = true;
473  }
474  if (NStr::FindNoCase(except_text, "artificial frameshift") != NPOS) {
475  frameshift_except = true;
476  }
477  if (NStr::FindNoCase(except_text, "rearrangement required for product") != NPOS) {
478  rearrange_except = true;
479  }
480  if (NStr::FindNoCase(except_text, "translated product replaced") != NPOS) {
481  product_replaced = true;
482  }
483  if (NStr::FindNoCase(except_text, "heterogeneous population sequenced") != NPOS) {
484  mixed_population = true;
485  }
486  if (NStr::FindNoCase(except_text, "low-quality sequence region") != NPOS) {
487  low_quality = true;
488  }
489  if (NStr::FindNoCase (except_text, "RNA editing") != NPOS) {
490  rna_editing = true;
491  }
492  if (NStr::FindNoCase(except_text, "annotated by transcript or proteomic data") != NPOS) {
493  transcript_or_proteomic = true;
494  }
495 }
496 
497 
499 {
500  size_t rval = 0;
501 
502  const CCdregion& cdregion = feat.GetData().GetCdregion();
503  const CSeq_loc& location = feat.GetLocation();
504  unsigned int part_loc = SeqLocPartialCheck(location, scope);
505  string comment_text;
506  if (feat.IsSetComment()) {
507  comment_text = feat.GetComment();
508  }
509 
510  // check frame
511  if (cdregion.IsSetFrame()
512  && (cdregion.GetFrame() == CCdregion::eFrame_two || cdregion.GetFrame() == CCdregion::eFrame_three)) {
513  // coding region should be 5' partial
514  if (!(part_loc & eSeqlocPartial_Start)) {
515  rval |= eCDSTranslationProblem_FrameNotPartial;
516  } else if ((part_loc & eSeqlocPartial_Start) && !x_Is5AtEndSpliceSiteOrGap(location, *scope)) {
518  || NStr::Find(comment_text, "coding region disrupted by sequencing gap") != string::npos) {
519  // suppress
520  } else {
521  rval |= eCDSTranslationProblem_FrameNotConsensus;
522  }
523  }
524  }
525  return rval;
526 }
527 
528 
530 {
531  if (m_ProblemFlags & eCDSTranslationProblem_FrameNotPartial ||
532  m_ProblemFlags & eCDSTranslationProblem_FrameNotConsensus ||
533  m_ProblemFlags & eCDSTranslationProblem_NoStop ||
534  m_ProblemFlags & eCDSTranslationProblem_StopPartial ||
535  m_ProblemFlags & eCDSTranslationProblem_PastStop ||
536  m_ProblemFlags & eCDSTranslationProblem_ShouldStartPartial ||
537  m_ProblemFlags & eCDSTranslationProblem_BadStart ||
538  m_ProblemFlags & eCDSTranslationProblem_NoProtein ||
539  m_ProtLen != m_TransLen ||
540  m_InternalStopCodons > 0 ||
541  m_RaggedLength > 0 || m_HasDashXStart ||
542  m_UnableToTranslate) {
543  return true;
544  } else if (x_JustifiesException(m_TranslExceptProblems)) {
545  return true;
546  } else {
547  return false;
548  }
549 }
550 
551 
552 size_t CCDSTranslationProblems::x_CountTerminalXs(const string& transl_prot, bool skip_stop)
553 {
554  // look for discrepancy in number of terminal Xs between product and translation
555  size_t transl_terminal_x = 0;
556  size_t i = transl_prot.length() - 1;
557  if (i > 0 && transl_prot[i] == '*' && skip_stop) {
558  i--;
559  }
560  while (i > 0) {
561  if (transl_prot[i] == 'X') {
562  transl_terminal_x++;
563  } else {
564  break;
565  }
566  i--;
567  }
568  if (i == 0 && transl_prot[0] == 'X') {
569  transl_terminal_x++;
570  }
571  return transl_terminal_x;
572 }
573 
575 {
576  size_t prod_terminal_x = 0;
577  TSeqPos prod_len = prot_vec.size() - 1;
578  while (prod_len > 0 && prot_vec[prod_len] == 'X') {
579  prod_terminal_x++;
580  prod_len--;
581  }
582  if (prod_len == 0 && prot_vec[prod_len] == 'X') {
583  prod_terminal_x++;
584  }
585  return prod_terminal_x;
586 }
587 
588 
590 CCDSTranslationProblems::x_GetTranslationMismatches(const CSeq_feat& feat, const CSeqVector& prot_vec, const string& transl_prot, bool has_accession)
591 {
592  TTranslationMismatches mismatches;
593  size_t prot_len;
594  size_t len;
595 
596  CalculateEffectiveTranslationLengths(transl_prot, prot_vec, len, prot_len);
597 
598  if (len == prot_len || has_accession) { // could be identical
599  if (len > prot_len) {
600  len = prot_len;
601  }
602  for (TSeqPos i = 0; i < len; ++i) {
603  CSeqVectorTypes::TResidue p_res = prot_vec[i];
604  CSeqVectorTypes::TResidue t_res = transl_prot[i];
605 
606  if (t_res != p_res) {
607  if (i == 0) {
608  bool no_beg, no_end;
609  FeatureHasEnds(feat, &(prot_vec.GetScope()), no_beg, no_end);
610  if (feat.IsSetPartial() && feat.GetPartial() && (!no_beg) && (!no_end)) {
611  } else if (t_res == '-') {
612  } else {
613  mismatches.push_back({ p_res, t_res, i });
614  }
615  } else {
616  mismatches.push_back({ p_res, t_res, i });
617  }
618  }
619  }
620  }
621  return mismatches;
622 }
623 
624 
625 static bool x_LeuCUGstart(const CSeq_feat& feat)
626 {
627  if (! feat.IsSetExcept())
628  return false;
629  if (! feat.IsSetExcept_text())
630  return false;
631  const string& except_text = feat.GetExcept_text();
632  if (NStr::FindNoCase(except_text, "translation initiation by tRNA-Leu at CUG codon") == NPOS) return false;
633  if (feat.IsSetQual()) {
634  for (auto it = feat.GetQual().begin(); it != feat.GetQual().end(); it++) {
635  const CGb_qual& qual = **it;
636  if (qual.IsSetQual() && NStr::Compare(qual.GetQual(), "experiment") == 0) return true;
637  }
638  }
639  return false;
640 }
641 
642 
645 (const CSeq_feat& feat, CBioseq_Handle loc_handle, CScope* scope, bool is_refseq)
646 {
647  TTranslExceptProblems problems;
648 
649  if (!feat.IsSetData() || !feat.GetData().IsCdregion() || !feat.GetData().GetCdregion().IsSetCode_break()) {
650  return problems;
651  }
652 
653  TSeqPos len = GetLength(feat.GetLocation(), scope);
654  bool alt_start = false;
655 
656  // need to translate a version of the coding region without the code breaks,
657  // to see if the code breaks are necessary
658  const CCdregion& cdregion = feat.GetData().GetCdregion();
659  CRef<CSeq_feat> tmp_cds(new CSeq_feat());
660  tmp_cds->SetLocation().Assign(feat.GetLocation());
661  tmp_cds->SetData().SetCdregion();
662  if (cdregion.IsSetFrame()) {
663  tmp_cds->SetData().SetCdregion().SetFrame(cdregion.GetFrame());
664  }
665  if (cdregion.IsSetCode()) {
666  tmp_cds->SetData().SetCdregion().SetCode().Assign(cdregion.GetCode());
667  }
668 
669  // now, will use tmp_cds to translate individual code breaks;
670  tmp_cds->SetData().SetCdregion().ResetFrame();
671 
672  const CSeq_loc& loc = feat.GetLocation();
673 
674  for (auto cbr = cdregion.GetCode_break().begin(); cbr != cdregion.GetCode_break().end(); cbr++) {
675  if (!(*cbr)->IsSetLoc()) {
676  continue;
677  }
678  // if the code break is outside the coding region, skip
679  ECompare comp = Compare((*cbr)->GetLoc(), loc,
680  scope, fCompareOverlapping);
681  if ((comp != eContained) && (comp != eSame)) {
682  continue;
683  }
684 
685  TSeqPos codon_length = GetLength((*cbr)->GetLoc(), scope);
686  TSeqPos from = LocationOffset(loc, (*cbr)->GetLoc(),
687  eOffset_FromStart, scope);
688 
689  TSeqPos from_end = LocationOffset(loc, (*cbr)->GetLoc(),
690  eOffset_FromEnd, scope);
691 
692  TSeqPos to = from + codon_length - 1;
693 
694  // check for code break not on a codon
695  if (codon_length == 3 ||
696  ((codon_length == 1 || codon_length == 2) && to == len - 1)) {
697  size_t start_pos;
698  switch (cdregion.GetFrame()) {
700  start_pos = 1;
701  break;
703  start_pos = 2;
704  break;
705  default:
706  start_pos = 0;
707  break;
708  }
709  if ((from % 3) != start_pos) {
710  problems.push_back({ eTranslExceptPhase, 0, 0 });
711  }
712  }
713  if ((*cbr)->IsSetAa() && (*cbr)->IsSetLoc()) {
714  tmp_cds->SetLocation().Assign((*cbr)->GetLoc());
715  tmp_cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
716  tmp_cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
717  string cb_trans;
718  try {
720  *tmp_cds, *scope, cb_trans,
721  true, // include stop codons
722  false, // do not remove trailing X/B/Z
723  &alt_start);
724  } catch (CException&) {
725  }
726  size_t prot_pos = from / 3;
727 
728  unsigned char ex = 0;
729  vector<char> seqData;
730  string str;
731  bool not_set = false;
732 
733  switch ((*cbr)->GetAa().Which()) {
735  str = (*cbr)->GetAa().GetNcbi8aa();
737  ex = seqData[0];
738  break;
740  str = (*cbr)->GetAa().GetNcbi8aa();
742  ex = seqData[0];
743  break;
745  seqData.push_back((*cbr)->GetAa().GetNcbieaa());
746  ex = seqData[0];
747  break;
748  default:
749  // do nothing, code break wasn't actually set
750  not_set = true;
751  break;
752  }
753 
754  if (!not_set) {
755  string except_char;
756  except_char += ex;
757 
758  //At the beginning of the CDS
759  if (prot_pos == 0 && ex != 'M') {
760  if (prot_pos == 0 && ex == 'L' && x_LeuCUGstart(feat) && is_refseq) {
761  /* do not warn on explicitly documented unusual translation initiation at CUG without initiator tRNA-Met */
762  } else if ((!feat.IsSetPartial()) || (!feat.GetPartial())) {
763  problems.push_back({ eTranslExceptSuspicious, ex, prot_pos });
764  }
765  }
766 
767  // Anywhere in CDS, where exception has no effect
768  if (from_end > 0) {
769  if (from_end < 2 && NStr::Equal(except_char, "*")) {
770  // this is a necessary terminal transl_except
771  } else if (NStr::EqualNocase(cb_trans, except_char)) {
772  if (prot_pos == 0 && ex == 'L' && x_LeuCUGstart(feat) && is_refseq) {
773  /* do not warn on explicitly documented unusual translation initiation at CUG without initiator tRNA-Met */
774  } else {
775  problems.push_back({ eTranslExceptUnnecessary, ex, prot_pos });
776  }
777  }
778  } else if (!NStr::Equal(except_char, "*"))
779  {
780  if (NStr::Equal(cb_trans, except_char) ||
781  !loc.IsPartialStop(eExtreme_Biological))
782  {
783  const CGenetic_code *gcode;
784  CBioseq_Handle bsh;
785  CSeqVector vec;
786  TSeqPos from1;
787  string p;
788  string q;
789  bool altst;
790 
792  altst = false;
793  from1 = (*cbr)->GetLoc().GetStart(eExtreme_Biological);
794  vec.GetSeqData(from1, from1 + 3, q);
795 
796  if (cdregion.CanGetCode())
797  gcode = &cdregion.GetCode();
798  else
799  gcode = nullptr;
800 
803  gcode, &altst);
804 
805  if (!NStr::Equal(except_char, "*")) {
806  problems.push_back({ eTranslExceptUnexpected, ex, prot_pos });
807  }
808  }
809  } else
810  {
811  /*bsv
812  Stop codon here. Needs a check for abutting tRNA feature.
813  bsv*/
814  }
815  }
816  }
817  }
818  return problems;
819 }
820 
821 
823 {
824  for (auto it = problems.begin(); it != problems.end(); it++) {
825  if (it->problem == eTranslExceptPhase) {
826  return true;
827  }
828  }
829  return false;
830 }
831 
832 
834 {
835  CSeq_loc_CI loc_it(loc);
836  if (!loc_it) {
837  return false;
838  }
839  CConstRef<CSeq_loc> rng = loc_it.GetRangeAsSeq_loc();
840  if (!rng) {
841  return false;
842  }
843 
845  const CBioseq_Handle & bsh = scope.GetBioseqHandle(*rng);
846  if (!bsh) {
847  return false;
848  }
849 
850  ENa_strand strand = rng->GetStrand();
851  if (strand == eNa_strand_minus) {
852  TSeqPos seq_len = bsh.GetBioseqLength();
853  if (end < seq_len - 1) {
855  if (vec.IsInGap(end + 1)) {
856  if (vec.IsInGap (end)) {
857  // not ok - location overlaps gap
858  return false;
859  } else {
860  // ok, location abuts gap
861  }
862  } else if (end < seq_len - 2 && IsResidue (vec[end + 1]) && ConsistentWithC(vec[end + 1])
863  && IsResidue (vec[end + 2]) && ConsistentWithT(vec[end + 2])) {
864  // it's ok, it's abutting the reverse complement of AG
865  } else {
866  return false;
867  }
868  } else {
869  // it's ok, location endpoint is at the 3' end
870  }
871  } else {
872  if (end > 0 && end < bsh.GetBioseqLength()) {
874  if (vec.IsInGap(end - 1)) {
875  if (vec.IsInGap (end)) {
876  // not ok - location overlaps gap
877  return false;
878  } else {
879  // ok, location abuts gap
880  }
881  } else {
882  if (end > 1 && IsResidue (vec[end - 1]) && ConsistentWithG(vec[end - 1])
883  && IsResidue(vec[end - 2]) && ConsistentWithA(vec[end - 2])) {
884  //it's ok, it's abutting "AG"
885  } else {
886  return false;
887  }
888  }
889  } else {
890  // it's ok, location endpoint is at the 5' end
891  }
892  }
893  return true;
894 }
895 
896 
898 {
899  int ragged = 0;
900  if (!feat.IsSetData() || !feat.GetData().IsCdregion() || !feat.IsSetLocation()) {
901  return 0;
902  }
903  unsigned int part_loc = SeqLocPartialCheck(feat.GetLocation(), scope);
904  if (feat.IsSetProduct()) {
905  unsigned int part_prod = SeqLocPartialCheck(feat.GetProduct(), scope);
906  if ((part_loc & eSeqlocPartial_Stop) ||
907  (part_prod & eSeqlocPartial_Stop)) {
908  // not ragged
909  } else {
910  // complete stop, so check for ragged end
911  ragged = x_CheckForRaggedEnd(feat.GetLocation(), feat.GetData().GetCdregion(), scope);
912  }
913  }
914  return ragged;
915 }
916 
917 
919  const CSeq_loc& loc,
920  const CCdregion& cdregion,
921  CScope* scope)
922 {
923  size_t len = GetLength(loc, scope);
924  if (cdregion.GetFrame() > CCdregion::eFrame_one) {
925  len -= cdregion.GetFrame() - 1;
926  }
927 
928  int ragged = len % 3;
929  if (ragged > 0) {
930  len = GetLength(loc, scope);
931  size_t last_pos = 0;
932 
934  if (cdregion.IsSetCode_break()) {
935  for (auto cbr = cdregion.GetCode_break().begin(); cbr != cdregion.GetCode_break().end(); cbr++) {
936  SRelLoc rl(loc, (*cbr)->GetLoc(), scope);
937  ITERATE(SRelLoc::TRanges, rit, rl.m_Ranges) {
938  if ((*rit)->GetTo() > last_pos) {
939  last_pos = (*rit)->GetTo();
940  }
941  }
942  }
943  }
944 
945  // allowing a partial codon at the end
946  TSeqPos codon_length = range.GetLength();
947  if ((codon_length == 0 || codon_length == 1) &&
948  last_pos == len - 1) {
949  ragged = 0;
950  }
951  }
952  return ragged;
953 }
954 
955 
956 typedef enum {
963 static const char* const sc_BypassMrnaTransCheckText[] = {
964  "RNA editing",
965  "adjusted for low-quality genome",
966  "annotated by transcript or proteomic data",
967  "artificial frameshift",
968  "reasons given in citation",
969  "transcribed product replaced",
970  "unclassified transcription discrepancy",
971 };
974 
975 
976 size_t InterpretMrnaException(const string& except_text)
977 {
978  size_t rval = 0;
979 
980  ITERATE(TBypassMrnaTransCheckSet, it, sc_BypassMrnaTransCheck) {
981  if (NStr::FindNoCase(except_text, *it) != NPOS) {
982  rval |= eMRNAExcept_Biological;
983  }
984  }
985  if (NStr::FindNoCase(except_text, "RNA editing") != NPOS) {
986  rval |= eMRNAExcept_RNAEditing;
987  }
988  if (NStr::FindNoCase(except_text, "unclassified transcription discrepancy") != NPOS) {
989  rval |= eMRNAExcept_Unclassified;
990  }
991  if (NStr::FindNoCase(except_text, "mismatches in transcription") != NPOS) {
992  rval |= eMRNAExcept_Mismatch;
993  }
994  if (NStr::FindNoCase(except_text, "transcribed product replaced") != NPOS) {
996  }
997  return rval;
998 }
999 
1001  const CSeq_feat& feat,
1002  size_t& mismatches,
1003  bool ignore_exceptions,
1006  bool far_fetch,
1007  bool is_gpipe,
1008  bool is_genomic,
1009  CScope* scope)
1010 {
1011  size_t rval = 0;
1012  mismatches = 0;
1013  if (feat.CanGetPseudo() && feat.GetPseudo()) {
1014  return rval;
1015  }
1016  if (!feat.CanGetProduct()) {
1017  return rval;
1018  }
1019 
1020  size_t exception_flags = 0;
1021 
1022  if (!ignore_exceptions &&
1023  feat.CanGetExcept() && feat.GetExcept() &&
1024  feat.CanGetExcept_text()) {
1025  exception_flags = InterpretMrnaException(feat.GetExcept_text());
1026  }
1027  bool has_errors = false, other_than_mismatch = false;
1028  bool report_errors = !(exception_flags & eMRNAExcept_Biological) || (exception_flags & eMRNAExcept_Mismatch);
1029 
1030  CConstRef<CSeq_id> product_id;
1031  try {
1032  product_id.Reset(&GetId(feat.GetProduct(), scope));
1033  } catch (CException&) {
1034  }
1035  if (!product_id) {
1036  return rval;
1037  }
1038 
1039  if (!nuc) {
1040  if (exception_flags & eMRNAExcept_Unclassified) {
1041  rval |= eMRNAProblem_TransFail;
1042  }
1043  return rval;
1044  }
1045 
1046  size_t total = 0;
1047 
1048  // note - exception will be thrown when creating CSeqVector if wrong type set for bioseq seq-data
1049  try {
1050  if (nuc) {
1051 
1052  if (!rna) {
1053  if (far_fetch) {
1055  }
1056  return rval;
1057  }
1058 
1059  _ASSERT(nuc && rna);
1060 
1061  CSeqVector nuc_vec(feat.GetLocation(), *scope,
1064 
1065  TSeqPos nuc_len = nuc_vec.size();
1066  TSeqPos rna_len = rna_vec.size();
1067 
1068  if (nuc_len != rna_len) {
1069  has_errors = true;
1070  other_than_mismatch = true;
1071  if (nuc_len < rna_len) {
1072  size_t count_a = 0, count_no_a = 0;
1073  // count 'A's in the tail
1074  for (CSeqVector_CI iter(rna_vec, nuc_len); iter; ++iter) {
1075  if ((*iter == 'A') || (*iter == 'a')) {
1076  ++count_a;
1077  } else {
1078  ++count_no_a;
1079  }
1080  }
1081  if (count_a < (19 * count_no_a)) { // less then 5%
1082  if (report_errors || (exception_flags & eMRNAExcept_RNAEditing)) {
1084  }
1085  } else if (count_a > 0 && count_no_a == 0) {
1086  has_errors = true;
1087  other_than_mismatch = true;
1088  if (report_errors || (exception_flags & eMRNAExcept_RNAEditing)) {
1089  if (is_gpipe && is_genomic) {
1090  // suppress
1091  } else {
1092  rval |= eMRNAProblem_PolyATail100;
1093  }
1094  }
1095  } else {
1096  if (report_errors) {
1097  rval |= eMRNAProblem_PolyATail95;
1098  }
1099  }
1100  // allow base-by-base comparison on common length
1101  rna_len = nuc_len = min(nuc_len, rna_len);
1102 
1103  } else {
1104  if (report_errors) {
1106  }
1107  }
1108  }
1109 
1110  if (rna_len == nuc_len && nuc_len > 0) {
1111  CSeqVector_CI nuc_ci(nuc_vec);
1112  CSeqVector_CI rna_ci(rna_vec);
1113 
1114  // compare content of common length
1115  while ((nuc_ci && rna_ci) && (nuc_ci.GetPos() < nuc_len)) {
1116  if (*nuc_ci != *rna_ci) {
1117  ++mismatches;
1118  }
1119  ++nuc_ci;
1120  ++rna_ci;
1121  ++total;
1122  }
1123  if (mismatches > 0) {
1124  has_errors = true;
1125  if (report_errors && !(exception_flags & eMRNAExcept_Mismatch)) {
1126  rval |= eMRNAProblem_Mismatch;
1127  }
1128  }
1129  }
1130  }
1131  } catch (const CException&) {
1132  rval |= eMRNAProblem_TransFail;
1133  } catch (const std::exception&) {
1134  }
1135 
1136  if (!report_errors) {
1137  if (!has_errors) {
1139  } else if ((exception_flags & eMRNAExcept_Unclassified) && !other_than_mismatch) {
1140  if (mismatches * 50 <= total) {
1142  }
1143  } else if (exception_flags & eMRNAExcept_ProductReplaced) {
1145  }
1146  }
1147  return rval;
1148 }
1149 
1150 
1151 END_SCOPE(validator)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
CBioseq_Handle –.
vector< STranslExceptProblem > TTranslExceptProblems
static size_t x_CountTerminalXs(const string &transl_prot, bool skip_stop)
void CalculateTranslationProblems(const CSeq_feat &feat, CBioseq_Handle loc_handle, CBioseq_Handle prot_handle, bool ignore_exceptions, bool far_fetch_cds, bool standalone_annot, bool single_seq, bool is_gpipe, bool is_genomic, bool is_refseq, bool is_nt_or_ng_or_nw, bool is_nc, bool has_accession, CScope *scope)
static vector< CRef< CSeq_loc > > GetNonsenseIntrons(const CSeq_feat &feat, CScope &scope)
static bool x_ProteinHasTooManyXs(const string &transl_prot)
vector< STranslationMismatch > TTranslationMismatches
static TTranslationMismatches x_GetTranslationMismatches(const CSeq_feat &feat, const CSeqVector &prot_vec, const string &transl_prot, bool has_accession)
static int x_CheckForRaggedEnd(const CSeq_feat &feat, CScope *scope)
void x_GetCdTransErrors(const CSeq_feat &feat, CBioseq_Handle product, bool show_stop, bool got_stop, CScope *scope)
static bool x_IsThreeBaseNonsense(const CSeq_feat &feat, const CSeq_id &id, const CCdregion &cdr, TSeqPos start, TSeqPos stop, ENa_strand strand, CScope *scope)
static size_t x_CheckCDSFrame(const CSeq_feat &feat, CScope *scope)
static bool x_Is5AtEndSpliceSiteOrGap(const CSeq_loc &loc, CScope &scope)
TTranslExceptProblems x_GetTranslExceptProblems(const CSeq_feat &feat, CBioseq_Handle loc_handle, CScope *scope, bool is_refseq)
static void x_GetExceptionFlags(const string &except_text, bool &unclassified_except, bool &mismatch_except, bool &frameshift_except, bool &rearrange_except, bool &product_replaced, bool &mixed_population, bool &low_quality, bool &rna_editing, bool &transcript_or_proteomic)
static size_t x_CountNonsenseIntrons(const CSeq_feat &feat, CScope *scope)
CCdregion –.
Definition: Cdregion.hpp:66
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
CScope –.
Definition: scope.hpp:92
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
@ e_Ncbieaa
Definition: sequtil.hpp:57
@ e_Ncbi8aa
Definition: sequtil.hpp:56
@ e_Ncbistdaa
Definition: sequtil.hpp:58
CSeqVector –.
Definition: seq_vector.hpp:65
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
static const char location[]
Definition: config.c:97
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
ECompare
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ eSeqlocPartial_Nostart
@ eSeqlocPartial_Stop
@ eSeqlocPartial_Start
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eOffset_FromEnd
relative to end of location
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1428
vector< CRef< TRange > > TRanges
Definition: sequence.hpp:1132
TRanges m_Ranges
Definition: sequence.hpp:1149
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
@ fIs5PrimePartial
= 0x4 Translate first codon even if not start codon (because sequence is 5' partial)
Definition: sequence.hpp:984
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TSeqPos GetBioseqLength(void) const
bool IsAa(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetPos(void) const
unsigned char TResidue
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
Definition: seq_vector.hpp:277
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void SetCoding(TCoding coding)
CScope & GetScope(void) const
Definition: seq_vector.hpp:330
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
static TThisType GetEmpty(void)
Definition: range.hpp:306
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5296
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5383
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
Definition: Cdregion_.hpp:700
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
Definition: Seq_feat_.hpp:990
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
bool CanGetPartial(void) const
Check if it is safe to call GetPartial method.
Definition: Seq_feat_.hpp:949
const TCode & GetCode(void) const
Get the Code member data.
Definition: Cdregion_.hpp:712
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TCdregion & GetCdregion(void) const
Get the variant data.
bool CanGetExcept_text(void) const
Check if it is safe to call GetExcept_text method.
Definition: Seq_feat_.hpp:1399
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
Definition: Gb_qual_.hpp:200
bool CanGetExcept(void) const
Check if it is safe to call GetExcept method.
Definition: Seq_feat_.hpp:996
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
bool CanGetCode(void) const
Check if it is safe to call GetCode method.
Definition: Cdregion_.hpp:706
TExcept GetExcept(void) const
Get the Except member data.
Definition: Seq_feat_.hpp:1009
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
Definition: Seq_feat_.hpp:1090
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Gb_qual_.hpp:212
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
bool CanGetPseudo(void) const
Check if it is safe to call GetPseudo method.
Definition: Seq_feat_.hpp:1352
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
@ e_Ncbi8aa
NCBI8aa code.
@ e_Ncbieaa
ASCII value of NCBIeaa code.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
int i
int len
range(_Ty, _Ty) -> range< _Ty >
T min(T x_, T y_)
bool ConsistentWithT(Char ch)
Definition: utilities.cpp:2886
bool IsResidue(unsigned char residue)
Definition: utilities.hpp:88
void CalculateEffectiveTranslationLengths(const string &transl_prot, const CSeqVector &prot_vec, size_t &len, size_t &prot_len)
Definition: utilities.cpp:2597
bool ConsistentWithA(Char ch)
Definition: utilities.cpp:2871
bool ReportTranslationErrors(const string &except_text)
Definition: utilities.cpp:2341
bool ConsistentWithC(Char ch)
Definition: utilities.cpp:2876
size_t CountInternalStopCodons(const string &transl_prot)
Definition: utilities.cpp:2393
bool ConsistentWithG(Char ch)
Definition: utilities.cpp:2881
void FeatureHasEnds(const CSeq_feat &feat, CScope *scope, bool &no_beg, bool &no_end)
Definition: utilities.cpp:2546
bool s_PartialAtGapOrNs(CScope *scope, const CSeq_loc &loc, unsigned int tag, bool only_gap=false)
Definition: utilities.cpp:1176
bool HasBadStartCodon(const CSeq_feat &feat, CScope &scope, bool ignore_exceptions)
Definition: utilities.cpp:2362
string TranslateCodingRegionForValidation(const CSeq_feat &feat, CScope &scope, bool &alt_start)
Definition: utilities.cpp:2266
static const char * str(char *buf, int n)
Definition: stats.c:84
Location relative to a base Seq-loc: one (usually) or more ranges of offsets.
Definition: sequence.hpp:1124
#define _ASSERT
@ eMRNAExcept_Mismatch
@ eMRNAExcept_Unclassified
@ eMRNAExcept_Biological
@ eMRNAExcept_RNAEditing
@ eMRNAExcept_ProductReplaced
CStaticArraySet< const char *, PCase_CStr > TBypassMrnaTransCheckSet
static bool x_LeuCUGstart(const CSeq_feat &feat)
DEFINE_STATIC_ARRAY_MAP(TBypassMrnaTransCheckSet, sc_BypassMrnaTransCheck, sc_BypassMrnaTransCheckText)
static const char *const sc_BypassMrnaTransCheckText[]
size_t InterpretMrnaException(const string &except_text)
size_t GetMRNATranslationProblems(const CSeq_feat &feat, size_t &mismatches, bool ignore_exceptions, CBioseq_Handle nuc, CBioseq_Handle rna, bool far_fetch, bool is_gpipe, bool is_genomic, CScope *scope)
@ eMRNAProblem_UnnecessaryException
@ eMRNAProblem_UnableToFetch
@ eMRNAProblem_TranscriptLenLess
@ eMRNAProblem_PolyATail95
@ eMRNAProblem_TranscriptLenMore
@ eMRNAProblem_TransFail
@ eMRNAProblem_ProductReplaced
@ eMRNAProblem_Mismatch
@ eMRNAProblem_ErroneousException
@ eMRNAProblem_PolyATail100
Modified on Sat Dec 09 04:45:32 2023 by modify_doxy.py rev. 669887