33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
41 #include <serial/serialbase.hpp>
43 #include <objmgr/bioseq_handle.hpp>
45 #include <objmgr/seqdesc_ci.hpp>
46 #include <objmgr/seq_vector.hpp>
47 #include <objmgr/scope.hpp>
48 #include <objmgr/util/sequence.hpp>
49 #include <objmgr/util/feature.hpp>
60 #include <objects/seq/MolInfo.hpp>
61 #include <objects/seq/Bioseq.hpp>
66 #include <string>
71 BEGIN_SCOPE(validator)
72 using namespace sequence;
76 CSingleFeatValidator(feat, scope, imp) {
78  if (m_Gene) {
80  } else {
81  m_GeneIsPseudo = false;
82  }
83 }
87 {
88  if (!m_Feat.IsSetComment()) {
89  return;
90  }
92  const string& comment = m_Feat.GetComment();
93  if (NStr::Find(comment, "ambiguity in stop codon") != NPOS
96  if (stop_codon_loc) {
97  TSeqPos len = sequence::GetLength(*stop_codon_loc, &m_Scope);
98  CSeqVector vec(*stop_codon_loc, m_Scope, CBioseq_Handle::eCoding_Iupac);
99  string seq_string;
100  vec.GetSeqData(0, len - 1, seq_string);
101  bool found_ambig = false;
102  string::iterator it = seq_string.begin();
103  while (it != seq_string.end() && !found_ambig) {
104  if (*it != 'A' && *it != 'T' && *it != 'C' && *it != 'G' && *it != 'U') {
105  found_ambig = true;
106  }
107  ++it;
108  }
109  if (!found_ambig) {
111  "Feature comment indicates ambiguity in stop codon "
112  "but no ambiguities are present in stop codon.", m_Feat);
113  }
114  }
115  }
117  // look for EC number in comment
119  // suppress if protein has EC numbers
120  bool suppress = false;
121  if (m_ProductBioseq) {
123  if (prot_feat && prot_feat->GetData().GetProt().IsSetEc()) {
124  suppress = true;
125  }
126  }
127  if (!suppress) {
129  "Apparent EC number in CDS comment");
130  }
131  }
133 }
137 {
140  NStr::FindNoCase(text, "RNA editing") != NPOS) {
142  "CDS has both RNA editing /exception and /transl_except qualifiers");
143  }
144 }
148 ITERATE (CBioseq_Handle::TId, Itr, Var.GetId())
150 static
151 void s_LocIdType(CBioseq_Handle bsh, bool& is_nt, bool& is_ng, bool& is_nw, bool& is_nc)
152 {
153  is_nt = is_ng = is_nw = is_nc = false;
154  if (bsh) {
156  CSeq_id_Handle sid = *it;
157  switch (sid.Which()) {
158  case NCBI_SEQID(Embl):
159  case NCBI_SEQID(Ddbj):
160  case NCBI_SEQID(Other):
161  case NCBI_SEQID(Genbank):
162  {
164  is_nt |= (info == CSeq_id::eAcc_refseq_contig);
165  is_ng |= (info == CSeq_id::eAcc_refseq_genomic);
168  break;
169  }
170  default:
171  break;
172  }
173  }
174  }
175 }
177 static
178 void s_LocIdType(const CSeq_loc& loc, CScope& scope, const CSeq_entry& tse,
179  bool& is_nt, bool& is_ng, bool& is_nw, bool& is_nc)
180 {
181  is_nt = is_ng = is_nw = is_nc = false;
182  if (!IsOneBioseq(loc, &scope)) {
183  return;
184  }
185  const CSeq_id& id = GetId(loc, &scope);
186  try {
187  CBioseq_Handle bsh = scope.GetBioseqHandleFromTSE(id, tse);
188  if (bsh) {
189  s_LocIdType(bsh, is_nt, is_ng, is_nw, is_nc);
190  }
191  } catch (CException&) {
192  }
193 }
197 {
198  CCDSTranslationProblems problems;
199  bool is_nt, is_ng, is_nw, is_nc;
200  s_LocIdType(m_LocationBioseq, is_nt, is_ng, is_nw, is_nc);
203  m_Feat,
209  m_Imp.IsStandaloneAnnot() ? false : m_Imp.GetTSE().IsSeq(),
210  m_Imp.IsGpipe(),
211  m_Imp.IsGenomic(),
212  m_Imp.IsRefSeq(),
213  (is_nt || is_ng || is_nw),
214  is_nc,
215  (m_Imp.IsRefSeq() || m_Imp.IsGED() || m_Imp.IsTPE()),
216  &m_Scope);
217  if (!problems.UnableToTranslate() && !problems.HasException()) {
219  }
224  }
225  }
227  x_ReportTranslationProblems(problems);
228 }
231 int GetGcodeForName(const string& code_name)
232 {
235  if (NStr::EqualNocase((*it)->GetName(), code_name)) {
236  return (*it)->GetId();
237  }
238  }
239  return 255;
240 }
244 {
245  int gc = 0;
246  if (cdr.IsSetCode()) {
248  if ((*it)->IsId()) {
249  gc = (*it)->GetId();
250  } else if ((*it)->IsName()) {
251  gc = GetGcodeForName((*it)->GetName());
252  }
253  if (gc != 0) break;
254  }
255  }
256  return gc;
257 }
260 string GetInternalStopErrorMessage(const CSeq_feat& feat, size_t internal_stop_count, bool bad_start, char transl_start)
261 {
263  string gccode = NStr::IntToString(gc);
265  string error_message;
266  if (bad_start) {
267  bool got_dash = transl_start == '-';
268  string codon_desc = got_dash ? "illegal" : "ambiguous";
269  error_message = NStr::SizetToString(internal_stop_count) +
270  " internal stops (and " + codon_desc + " start codon). Genetic code [" + gccode + "]";
271  } else {
272  error_message = NStr::SizetToString(internal_stop_count) +
273  " internal stops. Genetic code [" + gccode + "]";
274  }
275  return error_message;
276 }
279 string GetInternalStopErrorMessage(const CSeq_feat& feat, const string& transl_prot)
280 {
281  size_t internal_stop_count = CountInternalStopCodons(transl_prot);
284  string gccode = NStr::IntToString(gc);
286  string error_message;
287  if (HasBadStartCodon(feat.GetLocation(), transl_prot)) {
288  bool got_dash = transl_prot[0] == '-';
289  string codon_desc = got_dash ? "illegal" : "ambiguous";
290  error_message = NStr::SizetToString(internal_stop_count) +
291  " internal stops (and " + codon_desc + " start codon). Genetic code [" + gccode + "]";
292  } else {
293  error_message = NStr::SizetToString(internal_stop_count) +
294  " internal stops. Genetic code [" + gccode + "]";
295  }
296  return error_message;
297 }
300 string GetStartCodonErrorMessage(const CSeq_feat& feat, const char first_char, size_t internal_stop_count)
301 {
302  bool got_dash = first_char == '-';
303  string codon_desc = got_dash ? "Illegal" : "Ambiguous";
304  string p_word = got_dash ? "Probably" : "Possibly";
307  string gccode = NStr::IntToString(gc);
309  string error_message;
311  if (internal_stop_count > 0) {
312  error_message = codon_desc + " start codon (and " +
313  NStr::SizetToString(internal_stop_count) +
314  " internal stops). " + p_word + " wrong genetic code [" +
315  gccode + "]";
316  } else {
317  error_message = codon_desc + " start codon used. Wrong genetic code [" +
318  gccode + "] or protein should be partial";
319  }
320  return error_message;
321 }
324 string GetStartCodonErrorMessage(const CSeq_feat& feat, const string& transl_prot)
325 {
326  size_t internal_stop_count = CountInternalStopCodons(transl_prot);
328  return GetStartCodonErrorMessage(feat, transl_prot[0], internal_stop_count);
329 }
333 {
334  size_t problem_flags = problems.GetTranslationProblemFlags();
336  string label;
337  const CSeq_id* protid = &GetId(m_Feat.GetProduct(), &m_Scope);
338  protid->GetLabel(&label);
339  EDiagSev sev = eDiag_Error;
340  if (protid->IsGeneral() && protid->GetGeneral().IsSetDb() &&
341  (NStr::EqualNocase(protid->GetGeneral().GetDb(), "ti") ||
342  NStr::EqualNocase(protid->GetGeneral().GetDb(), "SRA"))) {
343  sev = eDiag_Warning;
344  }
346  "Unable to fetch CDS product '" + label + "'");
347  }
349  if (!problems.HasException() && (problem_flags & CCDSTranslationProblems::eCDSTranslationProblem_NoProtein)) {
350  bool is_nt, is_ng, is_nw, is_nc;
351  s_LocIdType(m_Feat.GetLocation(), m_Scope, m_Imp.GetTSE(), is_nt, is_ng, is_nw, is_nc);
352  EDiagSev sev = eDiag_Error;
354  sev = eDiag_Warning;
355  }
356  if (is_nc) {
357  sev = eDiag_Warning;
358  }
360  "No protein Bioseq given");
361  }
363  bool unclassified_except = false;
364  if (m_Feat.IsSetExcept_text() && NStr::FindNoCase(m_Feat.GetExcept_text(), "unclassified translation discrepancy") != NPOS) {
365  unclassified_except = true;
366  }
370  if (!problems.HasException() && problems.HasUnparsedTranslExcept()) {
371  if (problems.GetInternalStopCodons() == 0 && problems.GetTranslationMismatches().size() == 0) {
373  "Unparsed transl_except qual (but protein is okay). Skipped");
374  } else {
376  "Unparsed transl_except qual. Skipped");
377  }
378  }
381  for (size_t i = 0; i < problems.GetNumNonsenseIntrons(); i++) {
382  EDiagSev sev = eDiag_Critical;
383  if (m_Imp.IsEmbl() || m_Imp.IsDdbj()) {
384  sev = eDiag_Error;
385  }
386  PostErr(sev, eErr_SEQ_FEAT_IntronIsStopCodon, "Triplet intron encodes stop codon");
387  }
390  PostErr(eDiag_Info, eErr_SEQ_FEAT_CDShasTooManyXs, "CDS translation consists of more than 50% X residues");
391  }
393  if (problems.UnableToTranslate()) {
394  if (!problems.HasException()) {
396  "Unable to translate");
397  }
398  }
400  if (!problems.UnableToTranslate() && !problems.AltStart() &&
402  NStr::Find(m_Feat.GetExcept_text(), "alternative start codon") != string::npos &&
406  "Unnecessary alternative start codon exception");
407  }
409  if ((!problems.HasException() || unclassified_except) && problems.GetInternalStopCodons() > 0) {
410  if (unclassified_except && m_Imp.IsGpipe()) {
411  // suppress if gpipe genomic
412  } else {
413  EDiagSev stop_sev = unclassified_except ? eDiag_Warning : eDiag_Error;
414  if (!m_Imp.IsRefSeq() && m_Imp.IsGI() && m_Imp.IsGED()) {
415  stop_sev = eDiag_Critical;
416  }
421  problems.GetTranslStartCharacter()));
422  }
423  }
425  if (!problems.HasException()) {
427  if (!unclassified_except && (problem_flags & CCDSTranslationProblems::eCDSTranslationProblem_BadStart)) {
428  string start_err_msg = GetStartCodonErrorMessage(m_Feat, problems.GetTranslStartCharacter(), problems.GetInternalStopCodons());
430  start_err_msg);
431  }
435  "Suspicious CDS location - reading frame > 1 but not 5' partial");
436  }
439  EDiagSev sev = eDiag_Warning;
441  {
442  sev = eDiag_Error;
443  }
445  "Suspicious CDS location - reading frame > 1 and not at consensus splice site");
446  }
450  "Missing stop codon");
451  }
454  "Got stop codon, but 3'end is labeled partial");
455  }
458  "Start of location should probably be partial");
459  }
460  if (problems.GetRaggedLength() > 0) {
462  "Coding region extends " + NStr::IntToString(problems.GetRaggedLength()) +
463  " base(s) past stop codon");
464  }
465  }
467  if (!problems.UnableToTranslate() && problems.GetProtLen() > 1.2 * problems.GetTransLen()) {
468  if ((!m_Feat.IsSetExcept_text()) || NStr::Find(m_Feat.GetExcept_text(), "annotated by transcript or proteomic data") == string::npos) {
469  string msg = "Protein product length [" + NStr::SizetToString(problems.GetProtLen()) +
470  "] is more than 120% of the ";
471  if (m_ProductIsFar) {
472  msg += "(far) ";
473  }
474  msg += "translation length [" + NStr::SizetToString(problems.GetTransLen()) + "]";
476  }
477  }
480  bool rna_editing = false;
481  if (m_Feat.IsSetExcept_text() && NStr::FindNoCase(m_Feat.GetExcept_text(), "RNA editing") != NPOS) {
482  rna_editing = true;
483  }
484  if (problems.GetProtLen() != problems.GetTransLen() &&
485  (!problems.HasException() ||
486  (rna_editing &&
487  (problems.GetProtLen() < problems.GetTransLen() - 1 || problems.GetProtLen() > problems.GetTransLen())))) {
488  string msg = "Given protein length [" + NStr::SizetToString(problems.GetProtLen()) +
489  "] does not match ";
490  if (m_ProductIsFar) {
491  msg += "(far) ";
492  }
493  msg += "translation length [" +
494  NStr::SizetToString(problems.GetTransLen()) + "]";
496  if (rna_editing) {
497  msg += " (RNA editing present)";
498  }
499  PostErr(rna_editing ? eDiag_Warning : eDiag_Error,
501  }
503  bool mismatch_except = false;
504  if (m_Feat.IsSetExcept_text() && NStr::FindNoCase(m_Feat.GetExcept_text(), "mismatches in translation") != NPOS) {
505  mismatch_except = true;
506  }
508  if (!problems.HasException() && !mismatch_except) {
510  }
512  if (problems.GetTranslTerminalX() != problems.GetProdTerminalX()) {
514  "Terminal X count for CDS translation (" + NStr::SizetToString(problems.GetTranslTerminalX())
515  + ") and protein product sequence (" + NStr::SizetToString(problems.GetProdTerminalX())
516  + ") are not equal");
517  }
521  "End of location should probably be partial");
522  }
525  "This SeqFeat should not be partial");
526  }
530  "CDS has exception but passes translation test");
531  }
535  "CDS has unclassified exception but only difference is "
536  + NStr::SizetToString(problems.GetTranslationMismatches().size()) + " mismatches out of "
537  + NStr::SizetToString(problems.GetProtLen()) + " residues");
538  }
542  "CDS has unnecessary translated product replaced exception");
543  }
545 }
549 {
550  string result;
552  CSeq_point pnt;
553  pnt.SetPoint(pos);
556  try {
557  pnt.SetId().Assign(GetId(m_Feat.GetProduct(), &m_Scope));
558  } catch (const CObjmgrUtilException&) {}
560  CSeq_loc tmp;
561  tmp.SetPnt(pnt);
566  return result;
567 }
571 {
572  string nuclocstr;
574  size_t num_mismatches = mismatches.size();
576  if (num_mismatches > 10) {
577  // report total number of mismatches and the details of the
578  // first and last.
579  nuclocstr = MapToNTCoords(mismatches.front().pos);
580  string msg =
581  NStr::SizetToString(mismatches.size()) + " mismatches found. " +
582  "First mismatch at " + NStr::IntToString(mismatches.front().pos + 1) +
583  ", residue in protein [";
584  msg += mismatches.front().prot_res;
585  msg += "] != translation [";
586  msg += mismatches.front().transl_res;
587  msg += "]";
588  if (!nuclocstr.empty()) {
589  msg += " at " + nuclocstr;
590  }
591  nuclocstr = MapToNTCoords(mismatches.back().pos);
592  msg +=
593  ". Last mismatch at " + NStr::IntToString(mismatches.back().pos + 1) +
594  ", residue in protein [";
595  msg += mismatches.back().prot_res;
596  msg += "] != translation [";
597  msg += mismatches.back().transl_res;
598  msg += "]";
599  if (!nuclocstr.empty()) {
600  msg += " at " + nuclocstr;
601  }
602  int gc = 0;
604  // We assume that the id is set for all Genetic_code
605  gc = m_Feat.GetData().GetCdregion().GetCode().GetId();
606  }
607  string gccode = NStr::IntToString(gc);
609  msg += ". Genetic code [" + gccode + "]";
611  } else {
612  // report individual mismatches
613  for (size_t i = 0; i < mismatches.size(); ++i) {
614  nuclocstr = MapToNTCoords(mismatches[i].pos);
615  if (mismatches[i].pos == 0 && mismatches[i].transl_res == '-') {
616  // skip - dash is expected to differ
617  num_mismatches--;
618  } else {
619  EDiagSev sev = eDiag_Error;
620  if (mismatches[i].prot_res == 'X' &&
621  (mismatches[i].transl_res == 'B' || mismatches[i].transl_res == 'Z' || mismatches[i].transl_res == 'J')) {
622  sev = eDiag_Warning;
623  }
624  string msg;
625  if (m_ProductIsFar) {
626  msg += "(far) ";
627  }
628  msg += "Residue " + NStr::IntToString(mismatches[i].pos + 1) +
629  " in protein [";
630  msg += mismatches[i].prot_res;
631  msg += "] != translation [";
632  msg += mismatches[i].transl_res;
633  msg += "]";
634  if (!nuclocstr.empty()) {
635  msg += " at " + nuclocstr;
636  }
638  }
639  }
640  }
641 }
645 {
646  for (auto it = problems.begin(); it != problems.end(); it++) {
647  string msg;
648  switch (it->problem) {
650  if (!has_exception) {
652  "transl_except qual out of frame.");
653  }
654  break;
656  msg = "Suspicious transl_except ";
657  msg += it->ex;
658  msg += " at first codon of complete CDS";
660  break;
662  msg = "Unnecessary transl_except ";
663  msg += it->ex;
664  msg += " at position ";
665  msg += NStr::SizetToString(it->prot_pos + 1);
667  msg);
668  break;
670  msg = "Unexpected transl_except ";
671  msg += it->ex;
672  msg += +" at position " + NStr::SizetToString(it->prot_pos + 1)
673  + " just past end of protein";
676  msg);
677  break;
678  }
679  }
680 }
684 {
685  const CCdregion& cds = m_Feat.GetData().GetCdregion();
686  const CSeq_loc& feat_loc = m_Feat.GetLocation();
687  const CCode_break* prev_cbr = nullptr;
690  const CCode_break& cbr = **it;
691  const CSeq_loc& cbr_loc = cbr.GetLoc();
692  ECompare comp = Compare(cbr_loc, feat_loc, &m_Scope, fCompareOverlapping);
693  if ( ((comp != eContained) && (comp != eSame)) || cbr_loc.IsNull() || cbr_loc.IsEmpty()) {
695  "Code-break location not in coding region");
696  } else if (m_Feat.IsSetProduct()) {
697  if (cbr_loc.GetStop(eExtreme_Biological) == feat_loc.GetStop(eExtreme_Biological)) {
698  // terminal exception - don't bother checking, can't be mapped
699  } else {
700  if (SeqLocCheck(cbr_loc, &m_Scope) == eSeqLocCheck_error) {
701  string lbl = GetValidatorLocationLabel(cbr_loc, m_Scope);
703  "Code-break: SeqLoc [" + lbl + "] out of range");
704  } else {
705  int frame = 0;
706  CRef<CSeq_loc> p_loc = SourceToProduct(m_Feat, cbr_loc, fS2P_AllowTer, &m_Scope, &frame);
707  if (!p_loc || p_loc->IsNull() || frame != 1) {
709  "Code-break location not in coding region - may be frame problem");
710  }
711  }
712  }
713  }
714  if (cbr_loc.IsPartialStart(eExtreme_Biological) ||
715  cbr_loc.IsPartialStop(eExtreme_Biological)) {
717  "Translation exception locations should not be partial");
718  }
719  if ( prev_cbr ) {
720  if ( Compare(cbr_loc, prev_cbr->GetLoc(), &m_Scope, fCompareOverlapping) == eSame ) {
721  string msg = "Multiple code-breaks at same location ";
722  string str = GetValidatorLocationLabel (cbr_loc, m_Scope);
723  if ( !str.empty() ) {
724  msg += "[" + str + "]";
725  }
727  msg);
728  }
729  }
730  prev_cbr = &cbr;
731  }
732 }
736 {
739  bool feat_is_pseudo = s_IsPseudo(m_Feat);
740  bool pseudo = feat_is_pseudo || m_GeneIsPseudo;
742  x_ValidateQuals();
745  const CCdregion& cdregion = m_Feat.GetData().GetCdregion();
746  if (cdregion.IsSetOrf() && cdregion.GetOrf() &&
747  m_Feat.IsSetProduct()) {
749  "An ORF coding region should not have a product");
750  }
752  if (pseudo) {
753  if (m_Feat.IsSetProduct()) {
754  if (feat_is_pseudo) {
756  "A pseudo coding region should not have a product");
757  } else if (m_GeneIsPseudo) {
759  "A coding region overlapped by a pseudogene should not have a product");
760  } else {
762  "A pseudo coding region should not have a product");
763  }
764  }
765  } else {
769  }
776  if (x_IsProductMisplaced()) {
777  if (m_Imp.IsSmallGenomeSet()) {
779  "Protein product not packaged in nuc-prot set with nucleotide in small genome set");
780  } else {
782  "Protein product not packaged in nuc-prot set with nucleotide");
783  }
784  }
786  bool conflict = cdregion.IsSetConflict() && cdregion.GetConflict();
787  if ( !pseudo && !conflict ) {
788  x_ValidateTrans();
789  ValidateSplice(false, false);
790  }
792  if (conflict) {
794  }
801 }
805 {
807  const CGb_qual& qual = **it;
808  if (qual.CanGetQual()) {
809  const string& key = qual.GetQual();
810  if (NStr::EqualNocase(key, "exception")) {
811  if (!m_Feat.IsSetExcept()) {
813  "Exception flag should be set in coding region");
814  }
815  } else if (NStr::EqualNocase(key, "codon")) {
817  "Use the proper genetic code, if available, "
818  "or set transl_excepts on specific codons");
819  } else if (NStr::EqualNocase(key, "protein_id")) {
821  "protein_id should not be a gbqual on a CDS feature");
822  } else if (NStr::EqualNocase(key, "gene_synonym")) {
824  "gene_synonym should not be a gbqual on a CDS feature");
825  } else if (NStr::EqualNocase(key, "transcript_id")) {
827  "transcript_id should not be a gbqual on a CDS feature");
828  } else if (NStr::EqualNocase(key, "codon_start")) {
829  const CCdregion& cdregion = m_Feat.GetData().GetCdregion();
830  if (cdregion.IsSetFrame() && cdregion.GetFrame() != CCdregion::eFrame_not_set) {
832  "conflicting codon_start values");
833  } else {
835  "codon_start value should be 1, 2, or 3");
836  }
837  }
838  }
839  }
840 }
844 {
845  if (!m_GeneIsPseudo && !s_IsPseudo(m_Feat)) {
846  return true;
847  } else {
848  return false;
849  }
850 }
853 const string s_PlastidTxt[20] = {
854  "",
855  "",
856  "chloroplast",
857  "chromoplast",
858  "",
859  "",
860  "plastid",
861  "",
862  "",
863  "",
864  "",
865  "",
866  "cyanelle",
867  "",
868  "",
869  "",
870  "apicoplast",
871  "leucoplast",
872  "proplastid",
873  "",
874 };
878 {
879  if ( genome == CBioSource::eGenome_chloroplast ||
881  genome == CBioSource::eGenome_plastid ||
882  genome == CBioSource::eGenome_cyanelle ||
883  genome == CBioSource::eGenome_apicoplast ||
884  genome == CBioSource::eGenome_leucoplast ||
885  genome == CBioSource::eGenome_proplastid ||
887  return true;
888  }
890  return false;
891 }
894 static bool IsGeneticCodeValid(int gcode)
895 {
896  bool ret = false;
897  if (gcode > 0) {
899  try {
900  const CTrans_table& tbl = CGen_code_table::GetTransTable(gcode);
901  (void)tbl; // suppress unused-variable warning
902  ret = true;
903  }
904  catch (CException&) {
905  }
906  }
908  return ret;
909 }
912 static int s_GetStrictGenCode(const CBioSource& src)
913 {
914  int gencode = 0;
916  try {
919  if ( src.IsSetOrg() && src.GetOrg().IsSetOrgname() ) {
920  const COrgName& orn = src.GetOrg().GetOrgname();
922  switch ( genome ) {
926  // bacteria and plant organelle code
927  if (orn.IsSetMgcode()) {
928  gencode = orn.GetMgcode();
929  }
930  break;
938  if (orn.IsSetPgcode() && orn.GetPgcode() != 0) {
939  gencode = orn.GetPgcode();
940  } else {
941  // bacteria and plant plastids are code 11.
942  gencode = 11;
943  }
944  break;
945  default:
946  if (orn.IsSetGcode()) {
947  gencode = orn.GetGcode();
948  }
949  break;
950  }
951  }
952  } catch (const CException& ) {
953  } catch (const std::exception& ) {
954  }
955  return gencode;
956 }
960 {
961  if (!m_LocationBioseq) {
962  return;
963  }
964  int cdsgencode = 0;
966  const CCdregion& cdregion = m_Feat.GetData().GetCdregion();
968  if (cdregion.CanGetCode()) {
969  cdsgencode = cdregion.GetCode().GetId();
971  if (!IsGeneticCodeValid(cdsgencode)) {
973  "A coding region contains invalid genetic code [" + NStr::IntToString(cdsgencode) + "]");
974  }
975  }
978  if (diter) {
979  const CBioSource& src = diter->GetSource();
980  int biopgencode = s_GetStrictGenCode(src);
982  if (biopgencode != cdsgencode
983  && (!m_Feat.IsSetExcept()
985  || NStr::Find(m_Feat.GetExcept_text(), "genetic code exception") == string::npos)) {
986  int genome = 0;
988  if (src.CanGetGenome()) {
989  genome = src.GetGenome();
990  }
992  if (IsPlastid(genome)) {
994  "Genetic code conflict between CDS (code " +
995  NStr::IntToString(cdsgencode) +
996  ") and BioSource.genome biological context (" +
997  s_PlastidTxt[genome] + ") (uses code 11)");
998  } else {
1000  "Genetic code conflict between CDS (code " +
1001  NStr::IntToString(cdsgencode) +
1002  ") and BioSource (code " +
1003  NStr::IntToString(biopgencode) + ")");
1004  }
1005  }
1006  }
1007 }
1011 {
1014  // for coding regions, internal exons should not be 15 or less bp long
1015  int num_short_exons = 0;
1016  string message;
1017  CSeq_loc_CI it(m_Feat.GetLocation());
1018  if (it) {
1019  // note - do not want to warn for first or last exon
1020  ++it;
1021  size_t prev_len = 16;
1022  size_t prev_start = 0;
1023  size_t prev_stop = 0;
1024  while (it) {
1025  if (prev_len <= 15) {
1026  num_short_exons++;
1027  if (!message.empty()) {
1028  message += ", ";
1029  }
1030  message += NStr::NumericToString(prev_start + 1)
1031  + "-" + NStr::NumericToString(prev_stop + 1);
1032  }
1033  prev_len = it.GetRange().GetLength();
1034  prev_start = it.GetRange().GetFrom();
1035  prev_stop = it.GetRange().GetTo();
1036  ++it;
1037  }
1038  }
1039  if (num_short_exons > 1) {
1041  "Coding region has multiple internal exons that are too short at positions " + message);
1042  } else if (num_short_exons == 1) {
1044  "Internal coding region exon is too short at position " + message);
1045  }
1046 }
1050 {
1051  if (x_HasGoodParent()) {
1052  return;
1053  }
1055  const CSeq_loc& loc = m_Feat.GetLocation();
1058  loc,
1061  m_Scope);
1062  if (!mrna) {
1063  return;
1064  }
1066  mrna = GetBestOverlappingFeat(
1067  loc,
1070  m_Scope);
1071  if (mrna) {
1072  return;
1073  }
1075  mrna = GetBestOverlappingFeat(
1076  loc,
1079  m_Scope);
1080  if (!mrna) {
1081  return;
1082  }
1084  bool pseudo = s_IsPseudo(m_Feat) || m_GeneIsPseudo;
1087  if (pseudo) {
1089  }
1091  mrna = GetBestOverlappingFeat(
1092  loc,
1095  m_Scope);
1097  EDiagSev sev = eDiag_Warning;
1098  if (pseudo) {
1099  sev = eDiag_Info;
1100  }
1101  if (mrna) {
1102  // ribosomal slippage exception suppresses CDSmRNArange warning
1103  bool supress = false;
1105  if (m_Feat.CanGetExcept_text()) {
1107  if (NStr::FindNoCase(text, "ribosomal slippage") != NPOS
1108  || NStr::FindNoCase(text, "trans-splicing") != NPOS) {
1109  supress = true;
1110  }
1111  }
1112  if (!supress) {
1113  PostErr(sev, err_type,
1114  "mRNA contains CDS but internal intron-exon boundaries "
1115  "do not match");
1116  }
1117  } else {
1118  PostErr(sev, err_type,
1119  "mRNA overlaps or contains CDS but does not completely "
1120  "contain intervals");
1121  }
1122 }
1126 {
1128  CSeq_feat_Handle fh;
1129  try {
1130  // will fail if location is bad
1132  } catch (CException&) {
1133  return false;
1134  }
1136  static const list<CSeqFeatData::ESubtype> parent_types = {
1141  };
1143  CRef<feature::CFeatTree> feat_tree;
1144  if (m_Imp.IsHugeFileMode()) {
1145  feat_tree = Ref(new feature::CFeatTree());
1146  CMappedFeat mappedFeat(fh);
1147  for (auto parent_type : parent_types) {
1148  feat_tree->AddFeaturesFor(mappedFeat, parent_type);
1149  }
1150  }
1151  else feat_tree = m_Imp.GetGeneCache().GetFeatTreeFromCache(m_Feat, m_Scope);
1152  if (!feat_tree) {
1153  return false;
1154  }
1156  for (auto parent_type : parent_types) {
1157  CMappedFeat parent = feat_tree->GetParent(fh, parent_type);
1158  if (parent) {
1160  parent.GetLocation(),
1161  &m_Scope,
1164  return true;
1165  }
1166  }
1167  }
1168  return false;
1169 }
1172 // VR-619
1173 // for an mRNA / CDS pair where both have far products
1174 // (which is only true for genomic RefSeqs with instantiated mRNA products),
1175 // please check that the pair found by CFeatTree corresponds to the nuc-prot pair in ID
1176 // (i.e.the CDS product is annotated on the mRNA product).
1178 {
1179  // if coding region doesn't have a far product, nothing to check
1180  if (!m_ProductIsFar) {
1181  return;
1182  }
1183  // no point if not far-fetching
1184  if (!m_Imp.IsRemoteFetch()) {
1185  return;
1186  }
1187  if (!m_Feat.GetData().IsCdregion() || !m_Feat.IsSetProduct()) {
1188  return;
1189  }
1190  if (!m_Imp.IsRefSeq()) {
1191  return;
1192  }
1193  const CSeq_id * cds_sid = m_Feat.GetProduct().GetId();
1194  if (!cds_sid) {
1195  return;
1196  }
1198  if (!feat_tree) {
1199  return;
1200  }
1202  if (!fh) {
1203  return;
1204  }
1205  CMappedFeat mrna = feat_tree->GetParent(fh, CSeqFeatData::eSubtype_mRNA);
1206  if (!mrna || !mrna.IsSetProduct()) {
1207  // no mRNA or no mRNA product
1208  return;
1209  }
1210  const CSeq_id * mrna_sid = mrna.GetProduct().GetId();
1211  if (!mrna_sid) {
1212  return;
1213  }
1215  if (!m_Imp.IsFarSequence(*mrna_sid)) {
1216  // mRNA product is not far
1217  return;
1218  }
1219  auto mrna_prod = m_Scope.GetBioseqHandle(*mrna_sid);
1220  if (!mrna_prod) {
1221  // can't be fetched, will be reported elsewhere
1222  return;
1223  }
1224  CSeq_entry_Handle far_mrna_nps =
1225  mrna_prod.GetExactComplexityLevel(CBioseq_set::eClass_nuc_prot);
1226  if (!far_mrna_nps) {
1227  PostErr(eDiag_Error, eErr_SEQ_FEAT_CDSmRNAmismatch, "no Far mRNA nuc-prot-set");
1228  } else {
1229  CBioseq_Handle cds_prod = m_Scope.GetBioseqHandleFromTSE(*cds_sid, far_mrna_nps);
1230  if (!cds_prod) {
1231  PostErr(eDiag_Error, eErr_SEQ_FEAT_CDSmRNAmismatch, "Far CDS product and far mRNA product are not packaged together");
1232  m_Imp.PostErr(eDiag_Error, eErr_SEQ_FEAT_CDSmRNAmismatch, "Far CDS product and far mRNA product are not packaged together", *(mrna.GetSeq_feat()));
1233  }
1234  }
1235 }
1239 {
1240  try {
1241  if (!m_Feat.GetData().IsCdregion() || !m_Feat.CanGetProduct()) {
1242  return;
1243  }
1246  if (!prot) {
1247  return;
1248  }
1250  if (!nuc) {
1251  return;
1252  }
1253  // check for self-referential CDS feature
1254  if (nuc == prot) {
1255  return;
1256  }
1258  const CGene_ref* cds_ref = nullptr;
1260  // map from cds product to nucleotide
1261  const string prev = GetDiagFilter(eDiagFilter_Post);
1262  SetDiagFilter(eDiagFilter_All, "!(1305.28,31)");
1266  for (CFeat_CI it(prot, CSeqFeatData::e_Prot); it; ++it) {
1267  CSeq_feat_Handle curr = it->GetSeq_feat_Handle();
1268  CSeqFeatData::ESubtype subtype = curr.GetFeatSubtype();
1270  if (subtype != CSeqFeatData::eSubtype_preprotein &&
1275  continue;
1276  }
1278  // see if already has gene xref
1279  if (curr.GetGeneXref()) {
1280  continue;
1281  }
1283  if (! cds_ref) {
1284  // wait until first mat_peptide found to avoid expensive computation on CDS /gene qualifier
1286  if (cgene && cgene->CanGetData() && cgene->GetData().IsGene()) {
1287  const CGene_ref& cgref = cgene->GetData().GetGene();
1288  cds_ref = &cgref;
1289  } else {
1290  // if CDS does not have overlapping gene, bail out of function
1291  return;
1292  }
1293  }
1295  const CSeq_loc& loc = curr.GetLocation();
1296  // map prot location to nuc location
1297  CRef<CSeq_loc> nloc(prot_to_cds.Map(loc));
1298  if (! nloc) {
1299  continue;
1300  }
1302  const CGene_ref* pep_ref = nullptr;
1304  if (pgene && pgene->CanGetData() && pgene->GetData().IsGene()) {
1305  const CGene_ref& pgref = pgene->GetData().GetGene();
1306  pep_ref = &pgref;
1307  }
1309  if (! cds_ref || ! pep_ref) {
1310  continue;
1311  }
1312  if (cds_ref->IsSetLocus_tag() && pep_ref->IsSetLocus_tag()) {
1313  if (cds_ref->GetLocus_tag() == pep_ref->GetLocus_tag()) {
1314  continue;
1315  }
1316  } else if (cds_ref->IsSetLocus() && pep_ref->IsSetLocus()) {
1317  if (cds_ref->GetLocus() == pep_ref->GetLocus()) {
1318  continue;
1319  }
1320  }
1322  if (pgene) {
1324  const CSeq_loc& gloc = pgene->GetLocation();
1326  if (sequence::Compare(*nloc, gloc, nullptr /* scope */, sequence::fCompareOverlapping) == sequence::eSame) {
1329  "Peptide under CDS matches small Gene");
1330  }
1331  }
1332  }
1333  } catch (const CException& ) {
1334  }
1335 }
1339 {
1341  return;
1342  }
1345  if (!sd) {
1346  return;
1347  }
1348  const CMolInfo& molinfo = sd->GetMolinfo();
1350  const CSeq_loc& loc = m_Feat.GetLocation();
1351  bool partial5 = loc.IsPartialStart(eExtreme_Biological);
1352  bool partial3 = loc.IsPartialStop(eExtreme_Biological);
1354  if (molinfo.CanGetCompleteness()) {
1355  switch (molinfo.GetCompleteness()) {
1357  break;
1360  if (partial5 || partial3) {
1362  "CDS is partial but protein is complete");
1363  }
1364  break;
1367  break;
1370  if (!partial5) {
1372  "CDS is 5' complete but protein is NH2 partial");
1373  }
1374  if (partial3) {
1375  EDiagSev sev = eDiag_Error;
1376  if (x_CDS3primePartialTest())
1377  {
1378  sev = eDiag_Warning;
1379  }
1381  "CDS is 3' partial but protein is NH2 partial");
1382  }
1383  break;
1386  if (!partial3) {
1388  "CDS is 3' complete but protein is CO2 partial");
1389  }
1390  if (partial5) {
1391  EDiagSev sev = eDiag_Error;
1392  if (x_CDS5primePartialTest())
1393  {
1394  sev = eDiag_Warning;
1395  }
1397  "CDS is 5' partial but protein is CO2 partial");
1398  }
1399  break;
1402  if (partial5 && partial3) {
1403  } else if (partial5) {
1404  EDiagSev sev = eDiag_Error;
1405  if (x_CDS5primePartialTest())
1406  {
1407  sev = eDiag_Warning;
1408  }
1410  "CDS is 5' partial but protein has neither end");
1411  } else if (partial3) {
1412  EDiagSev sev = eDiag_Error;
1413  if (x_CDS3primePartialTest()) {
1414  sev = eDiag_Warning;
1415  }
1418  "CDS is 3' partial but protein has neither end");
1419  } else {
1421  "CDS is complete but protein has neither end");
1422  }
1423  break;
1426  break;
1429  break;
1432  break;
1434  default:
1435  break;
1436  }
1437  }
1438 }
1441 static const char* const sc_BypassCdsPartialCheckText[] = {
1442  "RNA editing",
1443  "annotated by transcript or proteomic data",
1444  "artificial frameshift",
1445  "mismatches in translation",
1446  "rearrangement required for product",
1447  "reasons given in citation",
1448  "translated product replaced",
1449  "unclassified translation discrepancy"
1450 };
1455 {
1457  const string& except_text = m_Feat.GetExcept_text();
1458  ITERATE(TBypassCdsPartialCheckSet, it, sc_BypassCdsPartialCheck) {
1459  if (NStr::FindNoCase(except_text, *it) != NPOS) {
1460  return true; // biological exception
1461  }
1462  }
1463  }
1464  return false;
1465 }
1469 {
1470  CSeq_loc_CI last;
1471  for (CSeq_loc_CI sl_iter(m_Feat.GetLocation()); sl_iter; ++sl_iter) {
1472  last = sl_iter;
1473  }
1475  if (last) {
1476  if (last.GetStrand() == eNa_strand_minus) {
1477  if (last.GetRange().GetFrom() == 0) {
1478  return true;
1479  }
1480  } else {
1481  if (!m_LocationBioseq) {
1482  return false;
1483  }
1484  if (last.GetRange().GetTo() == m_LocationBioseq.GetInst_Length() - 1) {
1485  return true;
1486  }
1487  }
1488  }
1489  return false;
1490 }
1494 {
1495  CSeq_loc_CI first(m_Feat.GetLocation());
1497  if (first) {
1498  if (first.GetStrand() == eNa_strand_minus) {
1499  if (!m_LocationBioseq) {
1500  return false;
1501  }
1502  if (first.GetRange().GetTo() == m_LocationBioseq.GetInst_Length() - 1) {
1503  return true;
1504  }
1505  } else {
1506  if (first.GetRange().GetFrom() == 0) {
1507  return true;
1508  }
1509  }
1510  }
1511  return false;
1512 }
1516 {
1517  // don't calculate if no product or if ORF flag is set
1518  if (!m_Feat.IsSetProduct() ||
1520  return false;
1521  }
1522  // don't calculate if feature is pseudo
1523  if (s_IsPseudo(m_Feat) || m_GeneIsPseudo) {
1524  return false;
1525  }
1526  if (!m_ProductBioseq) {
1527  return false;
1528  } else if (m_ProductIsFar) {
1530  return true;
1531  } else {
1532  return false;
1533  }
1534  }
1536  bool found_match = false;
1538  CSeq_entry_Handle prod_nps =
1540  if (!prod_nps) {
1541  return true;
1542  }
1544  for (CSeq_loc_CI loc_i(m_Feat.GetLocation()); loc_i; ++loc_i) {
1545  const CSeq_id& sid = loc_i.GetSeq_id();
1546  if (sid.IsOther() && sid.GetOther().IsSetAccession() && NStr::StartsWith(sid.GetOther().GetAccession(), "NT_")) {
1547  return false;
1548  }
1549  CBioseq_Handle nuc = m_Scope.GetBioseqHandle(loc_i.GetSeq_id());
1550  if (nuc) {
1552  // we don't report this for NT records
1553  return false;
1554  }
1555  CSeq_entry_Handle wgs = nuc.GetExactComplexityLevel(CBioseq_set::eClass_gen_prod_set);
1556  if (wgs) {
1557  // we don't report this for gen-prod-sets
1558  return false;
1559  }
1561  CSeq_entry_Handle nuc_nps =
1562  nuc.GetExactComplexityLevel(CBioseq_set::eClass_nuc_prot);
1564  if (prod_nps == nuc_nps) {
1565  found_match = true;
1566  break;
1567  }
1568  }
1569  }
1570  return !found_match;
1571 }
1574 void CCdregionValidator::x_AddToIntronList(vector<CCdregionValidator::TShortIntron>& shortlist, TSeqPos last_start, TSeqPos last_stop, TSeqPos this_start, TSeqPos this_stop)
1575 {
1576  if (abs ((int)this_start - (int)last_stop) < 11) {
1577  shortlist.push_back(TShortIntron(last_stop, this_start));
1578  } else if (abs ((int)this_stop - (int)last_start) < 11) {
1579  shortlist.push_back(TShortIntron(last_start, this_stop));
1580  }
1581 }
1584 vector<CCdregionValidator::TShortIntron> CCdregionValidator::x_GetShortIntrons(const CSeq_loc& loc, CScope* scope)
1585 {
1586  vector<CCdregionValidator::TShortIntron> shortlist;
1588  CSeq_loc_CI li(loc);
1590  TSeqPos last_start = li.GetRange().GetFrom();
1591  TSeqPos last_stop = li.GetRange().GetTo();
1592  CRef<CSeq_id> last_id(new CSeq_id());
1593  last_id->Assign(li.GetSeq_id());
1595  ++li;
1596  while (li) {
1597  TSeqPos this_start = li.GetRange().GetFrom();
1598  TSeqPos this_stop = li.GetRange().GetTo();
1599  if (abs ((int)this_start - (int)last_stop) < 11 || abs ((int)this_stop - (int)last_start) < 11) {
1600  if (li.GetSeq_id().Equals(*last_id)) {
1601  // definitely same bioseq, definitely report
1602  x_AddToIntronList(shortlist, last_start, last_stop, this_start, this_stop);
1603  } else if (scope) {
1604  // only report if definitely on same bioseq
1605  CBioseq_Handle last_bsh = scope->GetBioseqHandle(*last_id);
1606  if (last_bsh) {
1607  for (auto id_it : last_bsh.GetId()) {
1608  if (id_it.GetSeqId()->Equals(li.GetSeq_id())) {
1609  x_AddToIntronList(shortlist, last_start, last_stop, this_start, this_stop);
1610  break;
1611  }
1612  }
1613  }
1614  }
1615  }
1616  last_start = this_start;
1617  last_stop = this_stop;
1618  last_id->Assign(li.GetSeq_id());
1619  ++li;
1620  }
1621  return shortlist;
1622 }
1626 {
1627  return NStr::NumericToString(interval.first + 1) + "-"
1628  + NStr::NumericToString(interval.second + 1);
1629 }
1633 {
1634  if (m_Feat.IsSetExcept()) {
1635  return;
1636  }
1638  string message;
1640  vector<TShortIntron> shortlist = x_GetShortIntrons(m_Feat.GetLocation(), &m_Scope);
1641  if (shortlist.size() == 0) {
1642  return;
1643  }
1645  // only report if no nonsense introns
1646  vector<CRef<CSeq_loc> > nonsense_introns = CCDSTranslationProblems::GetNonsenseIntrons(m_Feat, m_Scope);
1647  if (nonsense_introns.size() > 0) {
1648  return;
1649  }
1651  if (shortlist.size() == 1) {
1652  message = x_FormatIntronInterval(shortlist.front());
1653  } else if (shortlist.size() == 2) {
1654  message = x_FormatIntronInterval(shortlist.front())
1655  + " and " +
1656  x_FormatIntronInterval(shortlist.back());
1657  } else {
1658  for (size_t i = 0; i < shortlist.size() - 2; i++) {
1659  message += x_FormatIntronInterval(shortlist[i]) + ", ";
1660  }
1661  message += " and " + x_FormatIntronInterval(shortlist.back());
1662  }
1664  "Introns at positions " + message + " should be at least 10 nt long");
1665 }
1668 // non-pseudo CDS must have product
1670 {
1671  // bail if product exists
1672  if ( m_Feat.IsSetProduct() ) {
1673  return;
1674  }
1675  // bail if location has just stop
1676  if ( m_Feat.IsSetLocation() ) {
1677  const CSeq_loc& loc = m_Feat.GetLocation();
1678  if ( loc.IsPartialStart(eExtreme_Biological) && !loc.IsPartialStop(eExtreme_Biological) ) {
1679  if ( GetLength(loc, &m_Scope) <= 5 ) {
1680  return;
1681  }
1682  }
1683  }
1684  // supress in case of the appropriate exception
1685  if ( m_Feat.IsSetExcept() && m_Feat.IsSetExcept_text() &&
1688  "rearrangement required for product") != NPOS ) {
1689  return;
1690  }
1691  }
1693  // non-pseudo CDS must have /product
1695  "Expected CDS product absent");
1696 }
1700 {
1701  if (!m_ProductBioseq) {
1702  return;
1703  }
1704  // translate the coding region
1705  string transl_prot;
1706  try {
1708  false, // do not include stop codons
1709  false); // do not remove trailing X/B/Z
1711  } catch ( const runtime_error& ) {
1712  }
1715  prot_vec.SetCoding(CSeq_data::e_Ncbieaa);
1717  string prot_seq;
1718  prot_vec.GetSeqData(0, prot_vec.size(), prot_seq);
1720  if ( transl_prot.empty() || prot_seq.empty() || NStr::Equal(transl_prot, prot_seq) ) {
1722  "Coding region conflict flag should not be set");
1723  } else {
1725  "Coding region conflict flag is set");
1726  }
1727 }
1731 {
1732  if ( !m_Feat.IsSetProduct() ) {
1733  return;
1734  }
1736  const CCdregion& cdr = m_Feat.GetData().GetCdregion();
1737  if ( cdr.CanGetOrf() ) {
1738  return;
1739  }
1741  if ( !m_ProductBioseq || m_ProductIsFar ) {
1742  const CSeq_id* sid = nullptr;
1743  try {
1744  sid = &(GetId(m_Feat.GetProduct(), &m_Scope));
1745  } catch (const CObjmgrUtilException&) {}
1746  if (m_Imp.RequireLocalProduct(sid)) {
1748  "Unable to find product Bioseq from CDS feature");
1749  }
1750  return;
1751  }
1754  if ( !sfp ) {
1755  return;
1756  }
1758  if ( &m_Feat != sfp ) {
1759  // if genomic product set, with one cds on contig and one on cdna,
1760  // do not report.
1761  if ( m_Imp.IsGPS() ) {
1762  // feature packaging test will do final contig vs. cdna check
1764  if ( m_LocationBioseq != sfh ) {
1765  return;
1766  }
1767  }
1769  "Same product Bioseq from multiple CDS features");
1770  }
1771 }
1775 {
1776  if (!m_ProductBioseq || !m_LocationBioseq) {
1777  return;
1778  }
1781  return;
1782  }
1784  if (!prot) {
1785  return;
1786  }
1787  if (!PartialsSame(m_Feat.GetLocation(), prot->GetLocation())) {
1789  "Coding region and protein feature partials conflict");
1790  }
1791 }
1795 {
1796  if (vec.IsInGap(pos) || vec[pos] == 'N') {
1797  return true;
1798  } else {
1799  return false;
1800  }
1801 }
1804 void CCdregionValidator::x_ValidateParentPartialness(const CSeq_loc& parent_loc, const string& parent_name)
1805 {
1806  if (!m_LocationBioseq) {
1807  return;
1808  }
1810  bool check_gaps = false;
1813  check_gaps = true;
1814  }
1816  bool has_abutting_gap = false;
1817  bool is_minus_strand = m_Feat.GetLocation().IsSetStrand() && m_Feat.GetLocation().GetStrand() == eNa_strand_minus;
1819  if (m_Feat.GetLocation().IsPartialStart(eExtreme_Biological) && !parent_loc.IsPartialStart(eExtreme_Biological)) {
1821  if (check_gaps) {
1824  pos = is_minus_strand ? start + 1 : start - 1;
1826  if (pos < m_LocationBioseq.GetBioseqLength()) {
1827  has_abutting_gap = x_CheckPosNOrGap(pos, seq_vec);
1828  }
1829  }
1831  if (!has_abutting_gap) {
1832  EDiagSev sev = eDiag_Warning;
1834  if (gene && gene->GetData().GetGene().IsSetLocus()) {
1835  string locus = gene->GetData().GetGene().GetLocus();
1836  if ( NStr::EqualNocase (locus, "orf1ab") ) {
1837  sev = eDiag_Info;
1838  }
1839  }
1840  PostErr(sev, eErr_SEQ_FEAT_PartialProblemMismatch5Prime, parent_name + " should not be 5' complete if coding region is 5' partial");
1841  }
1842  }
1843  if (m_Feat.GetLocation().IsPartialStop(eExtreme_Biological) && !parent_loc.IsPartialStop(eExtreme_Biological)) {
1845  if (check_gaps) {
1849  pos = is_minus_strand ? stop - 1 : stop + 1;
1851  if (pos < m_LocationBioseq.GetBioseqLength()) {
1852  has_abutting_gap = x_CheckPosNOrGap(pos, seq_vec);
1853  }
1854  }
1856  if (!has_abutting_gap) {
1857  EDiagSev sev = eDiag_Warning;
1859  if (gene && gene->GetData().GetGene().IsSetLocus()) {
1860  string locus = gene->GetData().GetGene().GetLocus();
1861  if ( NStr::EqualNocase (locus, "orf1ab") ) {
1862  sev = eDiag_Info;
1863  }
1864  }
1865  PostErr(sev, eErr_SEQ_FEAT_PartialProblemMismatch3Prime, parent_name + " should not be 3' complete if coding region is 3' partial");
1866  }
1867  }
1868 }
1872 {
1873  if (!m_Gene) {
1874  return;
1875  }
1879  if (mrna) {
1880  TFeatScores contained_mrna;
1883  if (contained_mrna.size() == 1) {
1884  // messy for alternate splicing, so only check if there is only one
1885  x_ValidateParentPartialness(mrna->GetLocation(), "mRNA");
1886  }
1887  }
1888 }
1891 END_SCOPE(validator)
