NCBI C++ ToolKit
validerror_feat.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: validerror_feat.cpp 101299 2023-11-28 18:18:38Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko......
27  *
28  * File Description:
29  * validation of Seq_feat
30  * .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
41 
42 #include <serial/serialbase.hpp>
43 
44 #include <objmgr/bioseq_handle.hpp>
46 #include <objmgr/feat_ci.hpp>
47 #include <objmgr/seqdesc_ci.hpp>
48 #include <objmgr/seq_vector.hpp>
49 #include <objmgr/scope.hpp>
50 #include <objmgr/util/sequence.hpp>
51 #include <objmgr/util/feature.hpp>
52 
66 
71 
74 
75 #include <objects/seq/MolInfo.hpp>
76 #include <objects/seq/Bioseq.hpp>
78 
79 #include <objects/pub/Pub.hpp>
80 #include <objects/pub/Pub_set.hpp>
81 
83 
85 
87 
88 #include <util/static_set.hpp>
90 #include <util/sgml_entity.hpp>
91 
92 #include <algorithm>
93 #include <string>
94 
95 
98 BEGIN_SCOPE(validator)
99 using namespace sequence;
100 
101 
102 // =============================================================================
103 // Public
104 // =============================================================================
105 
106 
108  CValidError_base(imp)
109 {
110 }
111 
112 
114 {
115 }
116 
117 
119 {
120  if (!m_TSE || m_Imp.ShouldSubdivide()) {
121  m_GeneCache.Clear();
122  m_SeqCache.Clear();
123  m_TSE = seh;
124  }
125 }
126 
127 
129 {
131  if (!m_TSE) {
132  return empty;
133  }
135  m_Scope, loc, m_TSE.GetTSE_Handle());
136 }
137 
139 {
140  try {
141 
142 // ValidateSeqFeatData(feat.GetData(), feat);
143 
144  unique_ptr<CSingleFeatValidator> fval(FeatValidatorFactory(feat, *m_Scope, m_Imp));
145  if (fval) {
146  fval->Validate();
147  }
148 
149  } catch (const exception& e) {
151  string("Exception while validating feature. EXCEPTION: ") +
152  e.what(), feat);
153  }
154 }
155 
156 
158  const CSeq_feat& feat)
159 {
161 
162  ValidateSeqFeatXref(feat);
163 
164 }
165 
166 
168 {
169  bool rval = false;
170  // check for CDSonMinusStrandTranscribedRNA
171  if (feat.IsSetData()
172  && feat.GetData().IsCdregion()
173  && feat.IsSetLocation()
174  && feat.GetLocation().GetStrand() == eNa_strand_minus) {
176  if ( bsh ) {
178  if (di
179  && di->GetMolinfo().IsSetTech()
181  && di->GetMolinfo().IsSetBiomol()
184  "Coding region on TSA transcribed RNA should not be on the minus strand", feat);
185  rval = true;
186  }
187  }
188  }
189  return rval;
190 }
191 
192 
194 {
195  CSeqFeatData::E_Choice ftype = feat.GetData().Which();
196 
197  if (seq.IsAa()) {
198  // protein
199  switch (ftype) {
201  case CSeqFeatData::e_Rna:
205  "Invalid feature for a protein Bioseq.", feat);
206  break;
207  default:
208  break;
209  }
210  } else {
211  // nucleotide
212  if (ftype == CSeqFeatData::e_Prot || ftype == CSeqFeatData::e_Psec_str) {
214  "Invalid feature for a nucleotide Bioseq.", feat);
215  }
216  if (feat.IsSetData() && feat.GetData().IsProt() && feat.GetData().GetProt().IsSetProcessed()) {
217  CProt_ref::TProcessed processed = feat.GetData().GetProt().GetProcessed();
218  if (processed == CProt_ref::eProcessed_mature
221  || processed == CProt_ref::eProcessed_preprotein) {
222  PostErr (m_Imp.IsRefSeq() ? eDiag_Error : eDiag_Warning,
224  "Peptide processing feature should be remapped to the appropriate protein bioseq",
225  feat);
226  }
227  }
228  }
229 
230  // check for CDSonMinusStrandTranscribedRNA
231  GetTSACDSOnMinusStrandErrors(feat, seq);
232 }
233 
234 
235 // =============================================================================
236 // Private
237 // =============================================================================
238 
239 
240 // private member functions:
241 
242 //#define TEST_LONGTIME
243 
244 // in Ncbistdaa order
245 /*
246 Return values are:
247  0: no problem - Accession is in proper format
248 -1: Accession did not start with a letter (or two letters)
249 -2: Accession did not contain five numbers (or six numbers after 2 letters)
250 -3: the original Accession number to be validated was NULL
251 -4: the original Accession number is too long (>16)
252 */
253 
254 static int ValidateAccessionFormat (string accession)
255 {
257  return -1;
258  } else {
259  return 0;
260  }
261 }
262 
263 
264 bool CValidError_feat::GetPrefixAndAccessionFromInferenceAccession (string inf_accession, string &prefix, string &accession)
265 {
266  size_t pos1 = NStr::Find (inf_accession, ":");
267  size_t pos2 = NStr::Find (inf_accession, "|");
268  size_t pos = string::npos;
269 
270  if (pos1 < pos2) {
271  pos = pos1;
272  } else {
273  pos = pos2;
274  }
275  if (pos == string::npos) {
276  return false;
277  } else {
278  prefix = inf_accession.substr(0, pos);
280  accession = inf_accession.substr(pos + 1);
281  NStr::TruncateSpacesInPlace (accession);
282  return true;
283  }
284 }
285 
286 
287 bool s_IsSraPrefix (string str)
288 
289 {
290  if (str.length() < 3) {
291  return false;
292  }
293  char ch = str.c_str()[0];
294  if (ch != 'S' && ch != 'E' && ch != 'D') return false;
295  ch = str.c_str()[1];
296  if (ch != 'R') return false;
297  ch = str.c_str()[2];
298  if (ch != 'A' && ch != 'P' && ch != 'X' && ch != 'R' && ch != 'S' && ch != 'Z') return false;
299  return true;
300 }
301 
302 
304 
305 {
306  if (NStr::IsBlank(str) || NStr::StartsWith(str, ".") || NStr::EndsWith(str, ".")) {
307  return false;
308  }
309  bool rval = true;
310  ITERATE(string, it, str) {
311  if (!isdigit(*it) && *it != '.') {
312  rval = false;
313  break;
314  }
315  }
316  return rval;
317 }
318 
319 
320 CValidError_feat::EInferenceValidCode CValidError_feat::ValidateInferenceAccession (string accession, bool fetch_accession, bool is_similar_to, CScope* scope)
321 {
322  if (NStr::IsBlank (accession)) {
324  }
325 
327 
328  string prefix, remainder;
329  if (GetPrefixAndAccessionFromInferenceAccession (accession, prefix, remainder)) {
330  bool is_insd = false, is_refseq = false, is_blast = false;
331 
332  if (NStr::EqualNocase (prefix, "INSD")) {
333  is_insd = true;
334  } else if (NStr::EqualNocase (prefix, "RefSeq")) {
335  is_refseq = true;
336  } else if (NStr::StartsWith (prefix, "BLAST", NStr::eNocase)) {
337  is_blast = true;
338  }
339  if (is_insd || is_refseq) {
340  if (remainder.length() > 3) {
341  if (remainder.c_str()[2] == '_') {
342  if (is_insd) {
344  }
345  } else {
346  if (is_refseq) {
348  }
349  }
350  }
351  if (s_IsSraPrefix (remainder) && s_IsAllDigitsOrPeriods(remainder.substr(3))) {
352  // SRA
353  } else if (NStr::StartsWith(remainder, "MAP_") && s_IsAllDigitsOrPeriods(remainder.substr(4))) {
354  } else {
355  string ver;
356  int acc_code = ValidateAccessionFormat (remainder);
357  if (acc_code == 0) {
358  //-5: missing version number
359  //-6: bad version number
360  size_t dot_pos = NStr::Find (remainder, ".");
361  if (dot_pos == string::npos || NStr::IsBlank(remainder.substr(dot_pos + 1))) {
362  acc_code = -5;
363  } else {
364  const string& cps = remainder.substr(dot_pos + 1);
365  const char *cp = cps.c_str();
366  while (*cp != 0 && isdigit (*cp)) {
367  ++cp;
368  }
369  if (*cp != 0) {
370  acc_code = -6;
371  }
372  }
373  }
374 
375  if (acc_code == -5 || acc_code == -6) {
377  } else if (acc_code != 0) {
379  } else if (fetch_accession) {
380  // Test to see if accession is public
381  if (!IsSequenceFetchable(remainder, scope)) {
383  }
384  }
385  }
386  } else if (is_blast && is_similar_to) {
388  } else if (is_similar_to) {
390  // recognized database
391  } else {
393  }
394  }
395  if (NStr::Find (remainder, " ") != string::npos) {
397  }
398  } else {
400  }
401 
402  return rsult;
403 }
404 
405 
406 vector<string> CValidError_feat::GetAccessionsFromInferenceString (string inference, string &prefix, string &remainder, bool &same_species)
407 {
408  vector<string> accessions;
409 
410  accessions.clear();
411  CInferencePrefixList::GetPrefixAndRemainder (inference, prefix, remainder);
412  if (NStr::IsBlank (prefix)) {
413  return accessions;
414  }
415 
416  same_species = false;
417 
418  if (NStr::StartsWith (remainder, "(same species)", NStr::eNocase)) {
419  same_species = true;
420  remainder = remainder.substr(14);
421  NStr::TruncateSpacesInPlace (remainder);
422  }
423 
424  if (NStr::StartsWith (remainder, ":")) {
425  remainder = remainder.substr (1);
426  NStr::TruncateSpacesInPlace (remainder);
427  } else if (NStr::IsBlank (remainder)) {
428  return accessions;
429  } else {
430  prefix = "";
431  }
432 
433  if (NStr::IsBlank (remainder)) {
434  return accessions;
435  }
436 
437  if (NStr::Equal(prefix, "similar to sequence")
438  || NStr::Equal(prefix, "similar to AA sequence")
439  || NStr::Equal(prefix, "similar to DNA sequence")
440  || NStr::Equal(prefix, "similar to RNA sequence")
441  || NStr::Equal(prefix, "similar to RNA sequence, mRNA")
442  || NStr::Equal(prefix, "similar to RNA sequence, EST")
443  || NStr::Equal(prefix, "similar to RNA sequence, other RNA")) {
444  NStr::Split(remainder, ",", accessions, 0);
445  } else if (NStr::Equal(prefix, "alignment")) {
446  NStr::Split(remainder, ",", accessions, 0);
447  }
448  return accessions;
449 }
450 
451 
453 {
454  if (NStr::IsBlank (inference)) {
456  }
457 
458  string prefix, remainder;
459  bool same_species = false;
460 
461  vector<string> accessions = GetAccessionsFromInferenceString (inference, prefix, remainder, same_species);
462 
463  if (NStr::IsBlank (prefix)) {
465  }
466 
467  if (NStr::IsBlank (remainder)) {
469  }
470 
472  bool is_similar_to = NStr::StartsWith (prefix, "similar to");
473  if (same_species && !is_similar_to) {
475  }
476 
477  if (rsult == eInferenceValidCode_valid) {
478  for (size_t i = 0; i < accessions.size(); i++) {
479  NStr::TruncateSpacesInPlace (accessions[i]);
480  rsult = ValidateInferenceAccession (accessions[i], fetch_accession, is_similar_to, scope);
481  if (rsult != eInferenceValidCode_valid) {
482  break;
483  }
484  }
485  }
486  if (rsult == eInferenceValidCode_valid) {
487  int num_spaces = 0;
488  FOR_EACH_CHAR_IN_STRING(str_itr, remainder) {
489  const char& ch = *str_itr;
490  if (ch == ' ') {
491  num_spaces++;
492  }
493  }
494  if (num_spaces > 3) {
496  } else if (num_spaces > 0){
498  }
499  }
500  return rsult;
501 }
502 
503 
504 //LCOV_EXCL_START
505 //not used by asn_validate but may be needed by other applications
507 {
508  if (!feat.IsSetData() || !feat.GetData().IsCdregion()
509  || !feat.IsSetLocation()) {
510  return false;
511  }
512 
513  const CSeq_loc& loc = feat.GetLocation();
514  bool found_short = false;
515 
516  CSeq_loc_CI li(loc);
517 
518  TSeqPos last_start = li.GetRange().GetFrom();
519  TSeqPos last_stop = li.GetRange().GetTo();
520  CRef<CSeq_id> last_id(new CSeq_id());
521  last_id->Assign(li.GetSeq_id());
522 
523  ++li;
524  while (li && !found_short) {
525  TSeqPos this_start = li.GetRange().GetFrom();
526  TSeqPos this_stop = li.GetRange().GetTo();
527  if (abs ((int)this_start - (int)last_stop) < 11 || abs ((int)this_stop - (int)last_start) < 11) {
528  if (li.GetSeq_id().Equals(*last_id)) {
529  // definitely same bioseq, definitely report
530  found_short = true;
531  } else if (m_Scope) {
532  // only report if definitely on same bioseq
533  CBioseq_Handle last_bsh = m_Scope->GetBioseqHandle(*last_id);
534  if (last_bsh) {
535  for (auto id_it : last_bsh.GetId()) {
536  if (id_it.GetSeqId()->Equals(li.GetSeq_id())) {
537  found_short = true;
538  break;
539  }
540  }
541  }
542  }
543  }
544  last_start = this_start;
545  last_stop = this_stop;
546  last_id->Assign(li.GetSeq_id());
547  ++li;
548  }
549 
550  return found_short;
551 }
552 
553 
555 {
556  const CGene_ref* grp = feat.GetGeneXref();
557  if ( grp && CSingleFeatValidator::s_IsPseudo(*grp)) {
558  return true;
559  }
560 
561  // check overlapping gene
563  if ( overlap ) {
564  return CSingleFeatValidator::s_IsPseudo(*overlap);
565  }
566 
567  return false;
568 }
569 
570 
572 {
573  if (!feat.IsSetData()
575  || !feat.IsSetLocation()
576  || feat.IsSetPseudo()
577  || IsOverlappingGenePseudo(feat)) {
578  return false;
579  }
580 
581  const CSeq_loc& loc = feat.GetLocation();
582  bool is_short = false;
583 
584  if (! m_Imp.IsIndexerVersion()) {
585  CBioseq_Handle bsh = x_GetCachedBsh(loc);
586  if (!bsh || IsOrganelle(bsh)) return is_short;
587  }
588 
589  if (GetLength(loc, m_Scope) < 11) {
590  bool partial_left = loc.IsPartialStart(eExtreme_Positional);
591  bool partial_right = loc.IsPartialStop(eExtreme_Positional);
592 
593  CBioseq_Handle bsh;
594  if (partial_left && loc.GetStart(eExtreme_Positional) == 0) {
595  // partial at beginning of sequence, ok
596  } else if (partial_right &&
597  (bsh = x_GetCachedBsh(loc)) &&
598  loc.GetStop(eExtreme_Positional) == (bsh.GetBioseqLength() - 1))
599  {
600  // partial at end of sequence
601  } else {
602  is_short = true;
603  }
604  }
605  return is_short;
606 }
607 //LCOV_EXCL_STOP
608 
609 
610 bool
612  const CSeq_feat& feat1,
613  const CSeq_feat& feat2,
614  CSeqFeatData::ESubtype subtype1,
615  CSeqFeatData::ESubtype subtype2)
616 {
617  if (!feat1.IsSetData() || !feat2.IsSetData()) {
618  return false;
619  } else if (feat1.GetData().GetSubtype() == subtype1 && feat2.GetData().GetSubtype() == subtype2) {
620  return true;
621  } else if (feat1.GetData().GetSubtype() == subtype2 && feat2.GetData().GetSubtype() == subtype1) {
622  return true;
623  } else {
624  return false;
625  }
626 }
627 
628 
629 bool GeneXrefConflicts(const CSeq_feat& feat, const CSeq_feat& gene)
630 {
632  string label;
633  if ((*it)->IsSetData() && (*it)->GetData().IsGene()
634  && !CSingleFeatValidator::s_GeneRefsAreEquivalent((*it)->GetData().GetGene(), gene.GetData().GetGene(), label)) {
635  return true;
636  }
637  }
638  return false;
639 }
640 
641 
642 // does feat have an xref to a feature other than the one specified by id with the same subtype
644  const CSeq_feat& feat,
645  const CFeat_id& id,
646  CSeqFeatData::ESubtype subtype)
647 {
648  if (!feat.IsSetXref()) {
649  return false;
650  }
651  ITERATE(CSeq_feat::TXref, it, feat.GetXref()) {
652  if ((*it)->IsSetId()) {
653  if ((*it)->GetId().Equals(id)) {
654  // match
655  } else if ((*it)->GetId().IsLocal()) {
656  const CTSE_Handle::TFeatureId& x_id = (*it)->GetId().GetLocal();
658  if (!far_feats.empty()) {
659  return true;
660  }
661  }
662  }
663  }
664  return false;
665 }
666 
667 
669 {
670  if (!feat.IsSetId()) {
672  "Cross-referenced feature does not link reciprocally",
673  feat);
674  } else if (far_feat.HasSeqFeatXref(feat.GetId())) {
675  const bool is_cds_mrna = FeaturePairIsTwoTypes(feat, far_feat,
677  const bool is_gene_mrna = FeaturePairIsTwoTypes(feat, far_feat,
679  const bool is_gene_cdregion = FeaturePairIsTwoTypes(feat, far_feat,
681  if (is_cds_mrna ||
682  is_gene_mrna ||
683  is_gene_cdregion) {
684  if (feat.GetData().IsCdregion() && far_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_mRNA) {
685  ECompare comp = Compare(feat.GetLocation(), far_feat.GetLocation(),
687  if ((comp != eContained) && (comp != eSame)) {
689  "CDS not contained within cross-referenced mRNA", feat);
690  }
691  }
692  if (far_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_gene) {
693  // make sure feature does not have conflicting gene xref
694  if (GeneXrefConflicts(feat, far_feat)) {
696  "Feature gene xref does not match Feature ID cross-referenced gene feature",
697  feat);
698  }
699  }
700  } else if (CSeqFeatData::ProhibitXref(feat.GetData().GetSubtype(), far_feat.GetData().GetSubtype())) {
701  string label1 = feat.GetData().GetKey(CSeqFeatData::eVocabulary_genbank);
702  string label2 = far_feat.GetData().GetKey(CSeqFeatData::eVocabulary_genbank);
704  "Cross-references are not between CDS and mRNA pair or between a gene and a CDS or mRNA ("
705  + label1 + "," + label2 + ")",
706  feat);
707  } else if (!CSeqFeatData::AllowXref(feat.GetData().GetSubtype(), far_feat.GetData().GetSubtype())) {
708  string label1 = feat.GetData().GetKey(CSeqFeatData::eVocabulary_genbank);
709  string label2 = far_feat.GetData().GetKey(CSeqFeatData::eVocabulary_genbank);
711  "Cross-references are not between CDS and mRNA pair or between a gene and a CDS or mRNA ("
712  + label1 + "," + label2 + ")",
713  feat);
714  }
715  } else if (x_HasNonReciprocalXref(far_feat, feat.GetId(), feat.GetData().GetSubtype())) {
717  "Cross-referenced feature does not link reciprocally",
718  feat);
719  } else {
720  if (feat.GetData().IsCdregion() && far_feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_mRNA) {
721  ECompare comp = Compare(feat.GetLocation(), far_feat.GetLocation(),
723  if ((comp != eContained) && (comp != eSame)) {
725  "CDS not contained within cross-referenced mRNA", feat);
726  }
727  }
728  if (far_feat.IsSetXref()) {
730  "Cross-referenced feature does not link reciprocally",
731  feat);
732  } else if (!far_feat.GetData().IsGene()) {
734  "Cross-referenced feature does not have its own cross-reference", feat);
735  }
736  }
737 }
738 
739 
740 bool s_HasId(const CSeq_feat& feat, const CSeqFeatXref::TId::TLocal& id)
741 {
742  if (!feat.IsSetId() && feat.GetId().IsLocal()) {
743  return false;
744  }
745  return feat.GetId().GetLocal().Equals(id);
746 }
747 
748 
750 {
751  if (!feat.IsSetXref()) {
752  return;
753  }
754  for (auto it = feat.GetXref().begin(); it != feat.GetXref().end(); it++) {
755  ValidateSeqFeatXref(**it, feat);
756  }
757 }
758 
759 
761 {
762  if (!m_Imp.IsStandaloneAnnot() && !m_TSE) {
763  return;
764  }
765  if (!xref.IsSetId() && !xref.IsSetData()) {
767  "SeqFeatXref with no id or data field", feat);
768  } else if (xref.IsSetId()) {
769  if (xref.GetId().IsLocal()) {
770  vector<CConstRef<CSeq_feat> > far_feats;
771  if (m_Imp.IsStandaloneAnnot()) {
772  for (auto it = m_Imp.GetSeqAnnot()->GetData().GetFtable().begin(); it != m_Imp.GetSeqAnnot()->GetData().GetFtable().end(); it++) {
773  if (s_HasId(**it, xref.GetId().GetLocal())) {
774  far_feats.push_back(*it);
775  }
776  }
777  } else {
779  for (auto it = far_handles.begin(); it != far_handles.end(); it++) {
780  far_feats.push_back(it->GetSeq_feat());
781  }
782  }
783  if (far_feats.empty()) {
785  "Cross-referenced feature cannot be found",
786  feat);
787  } else {
788  for (auto ff = far_feats.begin(); ff != far_feats.end(); ff++) {
789  ValidateOneFeatXrefPair(feat, **ff);
790  if (xref.IsSetData()) {
791  // Check that feature with ID matches data
792  if (xref.GetData().Which() != (*ff)->GetData().Which()) {
794  "SeqFeatXref contains both id and data, data type conflicts with data on feature with id",
795  feat);
796  }
797  }
798  }
799  }
800  } else {
802  "Cross-referenced feature cannot be found",
803  feat);
804  }
805  }
806  if (xref.IsSetData() && xref.GetData().IsGene() && feat.GetData().IsGene()) {
808  "Gene feature has gene cross-reference",
809  feat);
810  }
811 }
812 
813 
814 END_SCOPE(validator)
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
@ eErr_SEQ_FEAT_InvalidFeatureForNucleotide
@ eErr_SEQ_FEAT_InvalidFeatureForProtein
@ eErr_SEQ_FEAT_SeqFeatXrefFeatureMissing
@ eErr_SEQ_FEAT_InvalidForType
@ eErr_SEQ_FEAT_UnnecessaryGeneXref
@ eErr_INTERNAL_Exception
@ eErr_SEQ_FEAT_CDSonMinusStrandTranscribedRNA
@ eErr_SEQ_FEAT_SeqFeatXrefProblem
@ eErr_SEQ_FEAT_SeqFeatXrefNotReciprocal
@ eErr_SEQ_FEAT_CDSmRNAXrefLocationProblem
CBioseq_Handle –.
bool IsAa(void) const
Definition: Bioseq.cpp:350
CFeat_id –.
Definition: Feat_id.hpp:66
static bool IsLegalInferenceDatabase(const string &db)
Definition: Gb_qual.cpp:784
void Clear()
Definition: gene_cache.hpp:89
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
Definition: gene_cache.cpp:106
static void GetPrefixAndRemainder(const string &inference, string &prefix, string &remainder)
Definition: Gb_qual.cpp:381
CScope –.
Definition: scope.hpp:92
static bool ProhibitXref(CSeqFeatData::ESubtype subtype1, CSeqFeatData::ESubtype subtype2)
ESubtype GetSubtype(void) const
string GetKey(EVocabulary vocab=eVocabulary_full) const
static bool AllowXref(CSeqFeatData::ESubtype subtype1, CSeqFeatData::ESubtype subtype2)
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool HasSeqFeatXref(const CSeqFeatXref::TId &id) const
Definition: Seq_feat.cpp:265
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
Definition: Seq_feat.cpp:181
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static bool s_IsPseudo(const CSeq_feat &feat)
static bool s_GeneRefsAreEquivalent(const CGene_ref &g1, const CGene_ref &g2, string &label)
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:604
vector< CSeq_feat_Handle > TSeq_feat_Handles
Definition: tse_handle.hpp:167
CSeq_entry_Handle m_TSE
void ValidateSeqFeatXref(const CSeq_feat &feat)
~CValidError_feat() override
bool x_HasNonReciprocalXref(const CSeq_feat &feat, const CFeat_id &id, CSeqFeatData::ESubtype subtype)
CBioseq_Handle x_GetCachedBsh(const CSeq_loc &loc)
void ValidateSeqFeatContext(const CSeq_feat &feat, const CBioseq &seq)
static EInferenceValidCode ValidateInferenceAccession(string accession, bool fetch_accession, bool is_similar_to, CScope *scope=nullptr)
CValidError_feat(CValidError_imp &imp)
void ValidateOneFeatXrefPair(const CSeq_feat &feat, const CSeq_feat &far_feat)
bool IsIntronShort(const CSeq_feat &feat)
void SetTSE(CSeq_entry_Handle seh)
bool GetTSACDSOnMinusStrandErrors(const CSeq_feat &feat, const CBioseq &seq)
bool IsOverlappingGenePseudo(const CSeq_feat &feat)
static bool GetPrefixAndAccessionFromInferenceAccession(string inf_accession, string &prefix, string &accession)
bool DoesCDSHaveShortIntrons(const CSeq_feat &feat)
@ eInferenceValidCode_bad_accession_version
@ eInferenceValidCode_same_species_misused
@ eInferenceValidCode_accession_version_not_public
@ eInferenceValidCode_unrecognized_database
void ValidateSeqFeat(const CSeq_feat &feat)
void x_ValidateSeqFeatExceptXref(const CSeq_feat &feat)
static EInferenceValidCode ValidateInference(string inference, bool fetch_accession, CScope *scope=nullptr)
static vector< string > GetAccessionsFromInferenceString(string inference, string &prefix, string &remainder, bool &same_species)
CBioseq_Handle GetBioseqHandleFromLocation(CScope *scope, const CSeq_loc &loc, const CTSE_Handle &tse)
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char * str(char *buf, int n)
Definition: stats.c:84
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
@ eAcc_unknown
Definition: Seq_id.hpp:322
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
ECompare
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TSeqPos GetBioseqLength(void) const
const CTSE_Handle & GetTSE_Handle(void) const
const TId & GetId(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
static const char label[]
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
EProcessed
processing status
Definition: Prot_ref_.hpp:95
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:538
bool IsSetProcessed(void) const
Check if a value has been assigned to Processed data member.
Definition: Prot_ref_.hpp:513
@ eProcessed_signal_peptide
Definition: Prot_ref_.hpp:99
@ eProcessed_transit_peptide
Definition: Prot_ref_.hpp:100
const TData & GetData(void) const
Get the Data member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
E_Choice Which(void) const
Which variant is currently selected.
bool IsProt(void) const
Check if variant Prot is selected.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
const TId & GetId(void) const
Get the Id member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
const TGene & GetGene(void) const
Get the variant data.
bool IsSetId(void) const
the feature copied Check if a value has been assigned to Id data member.
const TProt & GetProt(void) const
Get the variant data.
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
@ e_Txinit
transcription initiation
@ e_Rsite
restriction site (for maps really)
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:422
TTech GetTech(void) const
Get the Tech member data.
Definition: MolInfo_.hpp:497
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:447
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
Definition: MolInfo_.hpp:472
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
Definition: MolInfo_.hpp:113
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
int i
constexpr bool empty(list< Ts... >) noexcept
#define abs(a)
Definition: ncbi_heapmgr.c:130
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
bool IsOrganelle(int genome)
Definition: utilities.cpp:2831
bool IsSequenceFetchable(const CSeq_id &id, CScope *scope=nullptr)
Definition: utilities.cpp:2714
static const char * prefix[]
Definition: pcregrep.c:405
#define FOR_EACH_SEQFEATXREF_ON_SEQFEAT(Itr, Var)
FOR_EACH_SEQFEATXREF_ON_SEQFEAT EDIT_EACH_SEQFEATXREF_ON_SEQFEAT.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
CSingleFeatValidator * FeatValidatorFactory(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
bool s_IsSraPrefix(string str)
static int ValidateAccessionFormat(string accession)
bool GeneXrefConflicts(const CSeq_feat &feat, const CSeq_feat &gene)
bool s_IsAllDigitsOrPeriods(string str)
bool s_HasId(const CSeq_feat &feat, const CSeqFeatXref::TId::TLocal &id)
bool FeaturePairIsTwoTypes(const CSeq_feat &feat1, const CSeq_feat &feat2, CSeqFeatData::ESubtype subtype1, CSeqFeatData::ESubtype subtype2)
Modified on Fri May 24 14:51:34 2024 by modify_doxy.py rev. 669887