NCBI C++ ToolKit
validatorp.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: validatorp.cpp 102121 2024-04-03 21:59:37Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko, Mati Shomrat, ....
27  *
28  * File Description:
29  * Implementation of private parts of the validator
30  * .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
36 #include <corelib/ncbiapp.hpp>
38 
48 
49 #include <serial/iterator.hpp>
50 #include <serial/enumvalues.hpp>
51 
55 
57 
60 
61 #include <objects/seq/Bioseq.hpp>
63 #include <objects/seq/Seqdesc.hpp>
65 #include <objects/seq/Pubdesc.hpp>
66 #include <objects/seq/MolInfo.hpp>
73 
78 
80 
83 
84 #include <objmgr/bioseq_ci.hpp>
85 #include <objmgr/seqdesc_ci.hpp>
86 #include <objmgr/graph_ci.hpp>
87 #include <objmgr/seq_annot_ci.hpp>
88 #include <objmgr/util/feature.hpp>
89 #include <objmgr/util/sequence.hpp>
90 
91 #include <objmgr/feat_ci.hpp>
92 #include <objmgr/align_ci.hpp>
93 #include <objmgr/seq_vector.hpp>
94 #include <objmgr/scope.hpp>
95 
96 #include <objects/pub/Pub.hpp>
98 
110 #include <objects/biblio/Title.hpp>
112 #include <objects/biblio/Affil.hpp>
115 #include <objects/taxon3/taxon3.hpp>
117 
124 
125 #include <objtools/error_codes.hpp>
131 #include <util/sgml_entity.hpp>
132 #include <util/line_reader.hpp>
133 #include <util/util_misc.hpp>
134 #include <util/static_set.hpp>
135 
136 #include <algorithm>
137 
138 
139 #include <serial/iterator.hpp>
140 
141 #define NCBI_USE_ERRCODE_X Objtools_Validator
142 
145 BEGIN_SCOPE(validator)
146 using namespace sequence;
147 
148 namespace {
149  // avoid creating a PQuickStringLess for every comparison
150  PQuickStringLess s_QuickStringLess;
151 };
152 
153 
154 // =============================================================================
155 // CValidError_imp Public
156 // =============================================================================
157 
163 
167 
169 (CObjectManager& objmgr,
170  shared_ptr<SValidatorContext> pContext,
171  CValidError* errs,
172  Uint4 options) :
173  m_ObjMgr{&objmgr},
174  m_ErrRepository{errs},
175  m_pContext{pContext}
176 {
177  x_Init(options);
178 }
179 
181 {
182  SetOptions(options);
183  Reset();
184 
186 }
187 
188 // Destructor
190 {
191 }
192 
193 
195 {
196  // if (!m_pContext) {
197  // m_pContext = make_shared<SValidatorContext>();
198  // }
200  return *m_pContext;
201 }
202 
203 
205 {
207  return *m_pContext;
208 }
209 
210 
212 {
213  const auto& context = GetContext();
214  return context.PreprocessHugeFile ||
215  context.PostprocessHugeFile;
216 }
217 
218 
219 bool CValidError_imp::IsHugeSet(const CBioseq_set& bioseqSet) const
220 {
221  if (bioseqSet.IsSetClass()) {
222  return IsHugeSet(bioseqSet.GetClass());
223  }
224  return false;
225 }
226 
227 
229 {
230  return edit::CHugeAsnReader::IsHugeSet(setClass);
231 }
232 
233 
234 bool CValidError_imp::IsFarSequence(const CSeq_id& id) // const
235 {
236  if (IsHugeFileMode() && GetContext().IsIdInBlob) {
237  return !GetContext().IsIdInBlob(id);
238  }
239 
240  _ASSERT(m_Scope);
241  if (GetBioseqHandleFromTSE(id)) {
242  return false;
243  }
244  return true;
245 }
246 
247 
249 {
250  if (m_Scope) {
252  }
253  return CBioseq_Handle();
254 }
255 
256 
258 {
259  if (!IsHugeFileMode()) {
260  return GetBioseqHandleFromTSE(id);
261  }
262  // Huge-file mode
263  if (!IsFarSequence(id)) {
264  return m_Scope->GetBioseqHandle(id);
265  }
266  return CBioseq_Handle();
267 }
268 
269 
271 {
272  m_NonASCII = (options & CValidator::eVal_non_ascii) != 0;
275  m_ValidateExons = (options & CValidator::eVal_val_exons) != 0;
276  m_OvlPepErr = (options & CValidator::eVal_ovl_pep_err) != 0;
279  m_RemoteFetch = (options & CValidator::eVal_remote_fetch) != 0;
285  m_UseEntrez = (options & CValidator::eVal_use_entrez) != 0;
300 }
301 
302 
303 //LCOV_EXCL_START
304 //not used by asnvalidate
306 {
307  m_ErrRepository = errors;
308 }
309 //LCOV_EXCL_STOP
310 
311 
313 {
314  m_Scope = nullptr;
315  m_TSE = nullptr;
316  m_IsStandaloneAnnot = false;
317  m_SeqAnnot.Reset();
318 
319  m_pEntryInfo.reset(new CValidatorEntryInfo());
320 
321  m_IsNC = false;
322  m_IsNG = false;
323  m_IsNM = false;
324  m_IsNP = false;
325  m_IsNR = false;
326  m_IsNZ = false;
327  m_IsNS = false;
328  m_IsNT = false;
329  m_IsNW = false;
330  m_IsWP = false;
331  m_IsXR = false;
332 
333  m_PrgCallback = nullptr;
334  m_NumAlign = 0;
335  m_NumAnnot = 0;
336  m_NumBioseq = 0;
337  m_NumBioseq_set = 0;
339  m_NumDesc = 0;
340  m_NumDescr = 0;
341  m_NumFeat = 0;
342  m_NumGraph = 0;
346  m_NumGenes = 0;
347  m_NumGeneXrefs = 0;
350  m_NumPseudo = 0;
351  m_NumPseudogene = 0;
352  m_FarFetchFailure = false;
353  m_IsTbl2Asn = false;
354 }
355 
356 
357 // Error post methods
359 (EDiagSev sv,
360  EErrType et,
361  const string& msg,
362  const CSerialObject& obj)
363 {
364  const CTypeInfo* type_info = obj.GetThisTypeInfo();
365  if (type_info == CSeqdesc::GetTypeInfo()) {
366  const CSeqdesc* desc = dynamic_cast < const CSeqdesc* > (&obj);
367  ERR_POST_X(1, Warning << "Seqdesc validation error using default context.");
368  PostErr (sv, et, msg, GetTSE(), *desc);
369  } else if (type_info == CSeq_feat::GetTypeInfo()) {
370  const CSeq_feat* feat = dynamic_cast < const CSeq_feat* > (&obj);
371  PostErr (sv, et, msg, *feat);
372  } else if (type_info == CBioseq::GetTypeInfo()) {
373  const CBioseq* seq = dynamic_cast < const CBioseq* > (&obj);
374  PostErr (sv, et, msg, *seq);
375  } else if (type_info == CBioseq_set::GetTypeInfo()) {
376  const CBioseq_set* set = dynamic_cast < const CBioseq_set* > (&obj);
377  PostErr (sv, et, msg, *set);
378  } else if (type_info == CSeq_annot::GetTypeInfo()) {
379  const CSeq_annot* annot = dynamic_cast < const CSeq_annot* > (&obj);
380  PostErr (sv, et, msg, *annot);
381  } else if (type_info == CSeq_graph::GetTypeInfo()) {
382  const CSeq_graph* graph = dynamic_cast < const CSeq_graph* > (&obj);
383  PostErr (sv, et, msg, *graph);
384  } else if (type_info == CSeq_align::GetTypeInfo()) {
385  const CSeq_align* align = dynamic_cast < const CSeq_align* > (&obj);
386  PostErr (sv, et, msg, *align);
387  } else if (type_info == CSeq_entry::GetTypeInfo()) {
388  const CSeq_entry* entry = dynamic_cast < const CSeq_entry* > (&obj);
389  PostErr (sv, et, msg, *entry);
390  } else if (type_info == CBioSource::GetTypeInfo()) {
391  const CBioSource* src = dynamic_cast < const CBioSource* > (&obj);
392  PostErr (sv, et, msg, *src);
393  } else if (type_info == COrg_ref::GetTypeInfo()) {
394  const COrg_ref* org = dynamic_cast < const COrg_ref* > (&obj);
395  PostErr (sv, et, msg, *org);
396  } else if (type_info == CPubdesc::GetTypeInfo()) {
397  const CPubdesc* pd = dynamic_cast < const CPubdesc* > (&obj);
398  PostErr (sv, et, msg, *pd);
399  } else if (type_info == CSeq_submit::GetTypeInfo()) {
400  const CSeq_submit* ss = dynamic_cast < const CSeq_submit* > (&obj);
401  PostErr (sv, et, msg, *ss);
402  } else {
403  ERR_POST_X(1, Warning << "Unknown data type in PostErr.");
404  }
405 }
406 
407 
408 /*
409 void CValidError_imp::PostErr
410 (EDiagSev sv,
411  EErrType et,
412  const string& msg,
413  TDesc ds)
414 {
415  // Append Descriptor label
416  string desc = "DESCRIPTOR: ";
417  ds.GetLabel (&desc, CSeqdesc::eBoth);
418  desc += ", NO Descriptor Context";
419  m_ErrRepository->AddValidErrItem(sv, et, msg, desc, ds, *m_Scope);
420 }
421 */
422 
423 static const EErrType sc_ValidGenomeRaise[] = {
581 };
582 
584 
590 };
591 
593 
594 
597 };
598 
600 
601 
603  EErrType et
604 )
605 
606 {
607  if (sc_GenomeRaiseExceptEmblDdbjRefSeqArray.find(et) != sc_GenomeRaiseExceptEmblDdbjRefSeqArray.end()) {
608  if (IsEmbl() || IsDdbj() || IsRefSeq()) {
609  return false;
610  } else {
611  return true;
612  }
613  }
614  if (sc_GenomeRaiseExceptEmblDdbjArray.find(et) != sc_GenomeRaiseExceptEmblDdbjArray.end()) {
615  if (IsEmbl() || IsDdbj()) {
616  return false;
617  } else {
618  return true;
619  }
620  }
621  if (sc_GenomeRaiseArray.find (et) != sc_GenomeRaiseArray.end()) {
622  return true;
623  }
624  return false;
625 }
626 
628 (EDiagSev sv,
629  EErrType et,
630  const string& msg,
631  TFeat ft)
632 {
634 
635  // Adjust severity
637  sv = eDiag_Error;
638  }
639 
640  item->SetSev(sv);
641  item->SetErrIndex(et);
642  item->SetMsg(msg);
643  item->SetObject(ft);
644 
645  if (GenerateGoldenFile()) {
647  return;
648  }
649 
650  string content_label = CValidErrorFormat::GetFeatureContentLabel(ft, m_Scope);
651  item->SetObj_content(content_label);
652 
653  string feature_id = CValidErrorFormat::GetFeatureIdLabel(ft);
654  if (!NStr::IsBlank(feature_id)) {
655  item->SetFeatureId(feature_id);
656  }
657 
659  if (!NStr::IsBlank(bioseq_label)) {
660  item->SetBioseq(bioseq_label);
661  }
662 
663  // Calculate sequence offset
664  TSeqPos offset = 0;
665  string location;
666  if (ft.IsSetLocation()) {
669  if (!NStr::IsBlank(loc_label)) {
670  item->SetLocation(loc_label);
671  }
672  item->SetSeqOffset(offset);
673  }
674 
675 
677  if (!NStr::IsBlank(product_label)) {
678  item->SetProduct_loc(product_label);
679  }
680 
681  int version = 0;
682  string accession;
683  if (m_Scope) {
684  accession = GetAccessionFromObjects(&ft, nullptr, *m_Scope, &version);
685  }
686  item->SetAccession(accession);
687  if (version > 0) {
688  item->SetAccnver(accession + "." + NStr::IntToString(version));
689  item->SetVersion(version);
690  } else {
691  item->SetAccnver(accession);
692  }
693 
694  if (ft.IsSetData()) {
695  if (ft.GetData().IsGene()) {
696  if (ft.GetData().GetGene().IsSetLocus_tag() &&
698  item->SetLocus_tag(ft.GetData().GetGene().GetLocus_tag());
699  }
700  } else {
701  if (m_CollectLocusTags) {
702  // TODO: this should be part of post-processing
704  if (gene && gene->GetData().GetGene().IsSetLocus_tag() &&
705  !NStr::IsBlank(gene->GetData().GetGene().GetLocus_tag())) {
706  item->SetLocus_tag(gene->GetData().GetGene().GetLocus_tag());
707  }
708  }
709  }
710  }
711 
712  item->SetFeatureObjDescFromFields();
714 }
715 
716 
718 (EDiagSev sv,
719  EErrType et,
720  const string& msg,
721  TBioseq sq)
722 {
723  // Adjust severity
725  sv = eDiag_Error;
726  }
727 
728  if (GenerateGoldenFile()) {
729  m_ErrRepository->AddValidErrItem(sv, et, msg);
730  return;
731  }
732 
733  // Append bioseq label
734  string desc;
736  int version = 0;
737  const string& accession = GetAccessionFromBioseq(sq, &version);
738  // GetAccessionFromObjects(&sq, nullptr, *m_Scope, &version);
739  x_AddValidErrItem(sv, et, msg, desc, sq, accession, version);
740 }
741 
742 
744 (EDiagSev sv,
745  EErrType et,
746  const string& msg,
747  TSet st)
748 {
749  // Adjust severity
751  sv = eDiag_Error;
752  }
753 
754  if (GenerateGoldenFile()) {
755  m_ErrRepository->AddValidErrItem(sv, et, msg);
756  return;
757  }
758 
759  // Append Bioseq_set label
760 
761  const auto isSetClass = st.IsSetClass();
762 
763  if (isSetClass && GetContext().PreprocessHugeFile) {
764  if (auto setClass = st.GetClass(); IsHugeSet(setClass)) {
765  string desc =
767  x_AddValidErrItem(sv, et, msg, desc, st, GetContext().HugeSetId, 0);
768  return;
769  }
770  }
771 
772  int version = 0;
773  const string& accession = GetAccessionFromBioseqSet(st, &version);
774  //string desc = CValidErrorFormat::GetBioseqSetLabel(st, m_SuppressContext);
775  string desc = CValidErrorFormat::GetBioseqSetLabel(accession,
776  isSetClass ? st.GetClass() : CBioseq_set::eClass_not_set,
777  isSetClass ? m_SuppressContext : true);
778  x_AddValidErrItem(sv, et, msg, desc, st, accession, version);
779 }
780 
781 
783 (EDiagSev sv,
784  EErrType et,
785  const string& msg,
786  TEntry ctx,
787  TDesc ds)
788 {
789  // Adjust severity
791  sv = eDiag_Error;
792  }
793 
794  if (GenerateGoldenFile()) {
795  m_ErrRepository->AddValidErrItem(sv, et, msg);
796  return;
797  }
798 
799 
800  if (GetContext().PreprocessHugeFile &&
801  ctx.IsSet() && ctx.GetSet().IsSetClass()) {
802  if (auto setClass = ctx.GetSet().GetClass(); IsHugeSet(setClass)) {
803  string desc{"DESCRIPTOR: "};
804  desc += CValidErrorFormat::GetDescriptorContent(ds) + " ";
805  desc += "BIOSEQ-SET: ";
806  if (!m_SuppressContext) {
807  if (setClass == CBioseq_set::eClass_genbank) {
808  desc += "genbank: ";
809  }
810  else {
811  desc += "wgs-set: ";
812  }
813  }
814  desc += GetContext().HugeSetId;
815  m_ErrRepository->AddValidErrItem(sv, et, msg, desc, ds, GetContext().HugeSetId, 0);
816  return;
817  }
818  }
819 
820  // Append Descriptor label
822  int version = 0;
823  const string& accession = GetAccessionFromObjects(&ds, &ctx, *m_Scope, &version);
824  m_ErrRepository->AddValidErrItem(sv, et, msg, desc, ds, ctx, accession, version);
825 }
826 
827 
828 //void CValidError_imp::PostErr
829 //(EDiagSev sv,
830 // EErrType et,
831 // const string& msg,
832 // TBioseq sq,
833 // TDesc ds)
834 //{
835 // // Append Descriptor label
836 // string desc("DESCRIPTOR: ");
837 // ds.GetLabel(&desc, CSeqdesc::eBoth);
838 //
839 // s_AppendBioseqLabel(desc, sq, m_SuppressContext);
840 // m_ErrRepository->AddValidErrItem(sv, et, msg, desc, ds, *m_Scope);
841 // //PostErr(sv, et, msg, sq);
842 //}
843 
844 
845 //void CValidError_imp::PostErr
846 //(EDiagSev sv,
847 // EErrType et,
848 // const string& msg,
849 // TSet st,
850 // TDesc ds)
851 //{
852 // // Append Descriptor label
853 // string desc = " DESCRIPTOR: ";
854 // ds.GetLabel(&desc, CSeqdesc::eBoth);
855 // s_AppendSetLabel(desc, st, m_SuppressContext);
856 // m_ErrRepository->AddValidErrItem(sv, et, msg, desc, st, *m_Scope);
857 //
858 //}
859 
860 
862 (EDiagSev sv,
863  EErrType et,
864  const string& msg,
865  TAnnot an)
866 {
867  // Adjust severity
869  sv = eDiag_Error;
870  }
871 
872  if (GenerateGoldenFile()) {
873  m_ErrRepository->AddValidErrItem(sv, et, msg);
874  return;
875  }
876 
877  // Append Annotation label
878  string desc = "ANNOTATION: ";
879 
880  // !!! need to decide on the message
881 
882  int version = 0;
883  const string& accession = GetAccessionFromObjects(&an, nullptr, *m_Scope, &version);
884  x_AddValidErrItem(sv, et, msg, desc, an, accession, version);
885 }
886 
887 
889 (EDiagSev sv,
890  EErrType et,
891  const string& msg,
892  TGraph graph)
893 {
894  // Adjust severity
896  sv = eDiag_Error;
897  }
898 
899  if (GenerateGoldenFile()) {
900  m_ErrRepository->AddValidErrItem(sv, et, msg);
901  return;
902  }
903 
904  // Append Graph label
905  string desc = "GRAPH: ";
906  if (graph.IsSetTitle()) {
907  desc += graph.GetTitle();
908  } else {
909  desc += "<Unnamed>";
910  }
911  desc += " ";
912  graph.GetLoc().GetLabel(&desc);
913 
914  int version = 0;
915  const string& accession = GetAccessionFromObjects(&graph, nullptr, *m_Scope, &version);
916  x_AddValidErrItem(sv, et, msg, desc, graph, accession, version);
917 }
918 
919 
921 (EDiagSev sv,
922  EErrType et,
923  const string& msg,
924  TBioseq sq,
925  TGraph graph)
926 {
927  // Adjust severity
929  sv = eDiag_Error;
930  }
931 
932  if (GenerateGoldenFile()) {
933  m_ErrRepository->AddValidErrItem(sv, et, msg);
934  return;
935  }
936 
937  // Append Graph label
938  string desc("GRAPH: ");
939  if ( graph.IsSetTitle() ) {
940  desc += graph.GetTitle();
941  } else {
942  desc += "<Unnamed>";
943  }
944  desc += " ";
945  graph.GetLoc().GetLabel(&desc);
947  int version = 0;
948  const string& accession = GetAccessionFromObjects(&graph, nullptr, *m_Scope, &version);
949  x_AddValidErrItem(sv, et, msg, desc, graph, accession, version);
950 }
951 
952 
954 (EDiagSev sv,
955  EErrType et,
956  const string& msg,
957  TAlign align)
958 {
959  // Adjust severity
961  sv = eDiag_Error;
962  }
963 
964  if (GenerateGoldenFile()) {
965  m_ErrRepository->AddValidErrItem(sv, et, msg);
966  return;
967  }
968 
970  if (id) {
972  if (bsh) {
973  PostErr(sv, et, msg, *(bsh.GetCompleteBioseq()));
974  return;
975  }
976  }
977 
978  // Can't get bioseq for reporting, use other Alignment label
979  string desc = "ALIGNMENT: ";
980  if (align.IsSetType()) {
981  desc += align.ENUM_METHOD_NAME(EType)()->FindName(align.GetType(), true);
982  }
983  try {
984  CSeq_align::TDim dim = align.GetDim();
985  desc += ", dim=" + NStr::NumericToString(dim);
986  } catch ( const CUnassignedMember& ) {
987  desc += ", dim=UNASSIGNED";
988  }
989 
990  if (align.IsSetSegs()) {
991  desc += " SEGS: ";
992  desc += align.GetSegs().SelectionName(align.GetSegs().Which());
993  }
994 
995  int version = 0;
996  const string& accession = GetAccessionFromObjects(&align, nullptr, *m_Scope, &version);
997  x_AddValidErrItem(sv, et, msg, desc, align, accession, version);
998 }
999 
1000 
1002 (EDiagSev sv,
1003  EErrType et,
1004  const string& msg,
1005  TEntry entry)
1006 {
1007  // Adjust severity
1008  if (m_genomeSubmission && RaiseGenomeSeverity(et) && sv < eDiag_Error) {
1009  sv = eDiag_Error;
1010  }
1011 
1012  if (GenerateGoldenFile()) {
1013  m_ErrRepository->AddValidErrItem(sv, et, msg);
1014  return;
1015  }
1016 
1017  if (entry.IsSeq()) {
1018  PostErr(sv, et, msg, entry.GetSeq());
1019  } else if (entry.IsSet()) {
1020  PostErr(sv, et, msg, entry.GetSet());
1021  } else {
1022  string desc = "SEQ-ENTRY: ";
1023  entry.GetLabel(&desc, CSeq_entry::eContent);
1024 
1025  int version = 0;
1026  const string& accession = GetAccessionFromObjects(&entry, nullptr, *m_Scope, &version);
1027  x_AddValidErrItem(sv, et, msg, desc, entry, accession, version);
1028  }
1029 }
1030 
1031 
1033 (EDiagSev sv,
1034  EErrType et,
1035  const string& msg,
1036  const CBioSource& src)
1037 {
1038  // Adjust severity
1039  if (m_genomeSubmission && RaiseGenomeSeverity(et) && sv < eDiag_Error) {
1040  sv = eDiag_Error;
1041  }
1042 
1043  if (GenerateGoldenFile()) {
1044  m_ErrRepository->AddValidErrItem(sv, et, msg);
1045  return;
1046  }
1047 
1048  string desc = "BioSource: ";
1049  x_AddValidErrItem(sv, et, msg, desc, src, "", 0);
1050 }
1051 
1052 
1054 (EDiagSev sv,
1055  EErrType et,
1056  const string& msg,
1057  const COrg_ref& org)
1058 {
1059  // Adjust severity
1060  if (m_genomeSubmission && RaiseGenomeSeverity(et) && sv < eDiag_Error) {
1061  sv = eDiag_Error;
1062  }
1063 
1064  if (GenerateGoldenFile()) {
1065  m_ErrRepository->AddValidErrItem(sv, et, msg);
1066  return;
1067  }
1068 
1069  string desc = "Org-ref: ";
1070  x_AddValidErrItem(sv, et, msg, desc, org, "", 0);
1071 }
1072 
1073 
1075 (EDiagSev sv,
1076  EErrType et,
1077  const string& msg,
1078  const CPubdesc& pd)
1079 {
1080  // Adjust severity
1081  if (m_genomeSubmission && RaiseGenomeSeverity(et) && sv < eDiag_Error) {
1082  sv = eDiag_Error;
1083  }
1084 
1085  if (GenerateGoldenFile()) {
1086  m_ErrRepository->AddValidErrItem(sv, et, msg);
1087  return;
1088  }
1089 
1090  string desc = "Pubdesc: ";
1091  x_AddValidErrItem(sv, et, msg, desc, pd, "", 0);
1092 }
1093 
1094 
1096 (EDiagSev sv,
1097  EErrType et,
1098  const string& msg,
1099  const CSeq_submit& ss)
1100 {
1101  // Adjust severity
1102  if (m_genomeSubmission && RaiseGenomeSeverity(et) && sv < eDiag_Error) {
1103  sv = eDiag_Error;
1104  }
1105 
1106  if (GenerateGoldenFile()) {
1107  m_ErrRepository->AddValidErrItem(sv, et, msg);
1108  return;
1109  }
1110 
1111  string desc = "Seq-submit: ";
1112  x_AddValidErrItem(sv, et, msg, desc, ss, "", 0);
1113 }
1114 
1115 
1117  EDiagSev sev,
1118  EErrType type,
1119  const string& msg,
1120  const string& desc,
1121  const CSerialObject& obj,
1122  const string& accession,
1123  const int version)
1124 {
1125  if (IsHugeFileMode()) {
1126  m_ErrRepository->AddValidErrItem(sev, type, msg, desc, accession, version);
1127  return;
1128  }
1129  m_ErrRepository->AddValidErrItem(sev, type, msg, desc, obj, accession, version);
1130 }
1131 
1132 
1134 (EDiagSev sv,
1135  EErrType et,
1136  const string& msg,
1137  const CSerialObject& obj,
1138  const CSeq_entry *ctx)
1139 {
1140  if (!ctx) {
1141  PostErr (sv, et, msg, obj);
1142  } else if (obj.GetThisTypeInfo() == CSeqdesc::GetTypeInfo()) {
1143  PostErr(sv, et, msg, *ctx, *(dynamic_cast <const CSeqdesc*> (&obj)));
1144  } else {
1145  PostErr(sv, et, msg, obj);
1146  }
1147 
1148 }
1149 
1150 
1152 (EDiagSev sv,
1153  const string& msg,
1154  int flags,
1155  const CSerialObject& obj,
1156  const CSeq_entry *ctx)
1157 {
1158  string reasons = GetDateErrorDescription(flags);
1159 
1160  NStr::TruncateSpacesInPlace (reasons);
1161  reasons = msg + " - " + reasons;
1162 
1163  PostObjErr (sv, eErr_GENERIC_BadDate, reasons, obj, ctx);
1164 }
1165 
1166 
1168 (const CSeq_entry& se,
1169  const CCit_sub* cs,
1170  CScope* scope)
1171 {
1172  CSeq_entry_Handle seh;
1173  try {
1174  seh = scope->GetSeq_entryHandle(se);
1175  } catch (const CException& ) { ; }
1176  if (! seh) {
1177  seh = scope->AddTopLevelSeqEntry(se);
1178  if (!seh) {
1179  return false;
1180  }
1181  }
1182 
1183  return Validate(seh, cs);
1184 }
1185 
1186 static bool s_IsPhage(const COrg_ref& org)
1187 {
1188  if (org.IsSetDivision() && NStr::Equal(org.GetDivision(), "PHG")) {
1189  return true;
1190  } else {
1191  return false;
1192  }
1193 }
1194 
1195 
1197 {
1198  bool has_mult = false;
1199  int first_id = 0;
1200  int phage_id = 0;
1201 
1202  for (CBioseq_CI bi(seh); bi; ++bi) {
1203  for (CSeqdesc_CI desc_ci(*bi, CSeqdesc::e_Source);
1204  desc_ci && !has_mult;
1205  ++desc_ci) {
1206  if (desc_ci->GetSource().IsSetOrg()) {
1207  const COrg_ref& org = desc_ci->GetSource().GetOrg();
1208  if (org.IsSetDb()) {
1209  ITERATE(COrg_ref::TDb, it, org.GetDb()) {
1210  if ((*it)->IsSetDb() && NStr::EqualNocase((*it)->GetDb(), "taxon") &&
1211  (*it)->IsSetTag() && (*it)->GetTag().IsId()) {
1212  int this_id = (*it)->GetTag().GetId();
1213  if (this_id > 0) {
1214  if (s_IsPhage(org)) {
1215  phage_id = this_id;
1216  } else if (first_id == 0) {
1217  first_id = this_id;
1218  } else if (first_id != this_id) {
1219  has_mult = true;
1220  }
1221  }
1222  }
1223  }
1224  }
1225  }
1226  }
1227  }
1228  if (has_mult || (phage_id > 0 && first_id > 0)) {
1230  "There are multiple taxonIDs in this RefSeq record.",
1231  *m_TSE);
1232  }
1233 }
1234 
1235 
1237 {
1238  return *m_pEntryInfo;
1239 }
1240 
1241 
1243 {
1244  if (!m_pEntryInfo) {
1245  m_pEntryInfo.reset(new CValidatorEntryInfo());
1246  }
1247 
1248  return *m_pEntryInfo;
1249 }
1250 
1251 
1253 (const CSeq_entry_Handle& seh,
1254  const CCit_sub* cs)
1255 {
1256  _ASSERT(seh);
1257 
1258  if ( m_PrgCallback ) {
1260  if ( m_PrgCallback(&m_PrgInfo) ) {
1261  return false;
1262  }
1263  }
1264 
1265  // Check that CSeq_entry has data
1266  if (seh.Which() == CSeq_entry::e_not_set) {
1267  ERR_POST_X(2, Warning << "Seq_entry not set");
1268  return false;
1269  }
1270 
1271  Setup(seh);
1272 
1273  // Seq-submit has submission citationTest_Descr_LatLonValue
1274  if (cs) {
1275  x_SetEntryInfo().SetNoPubs(false);
1277  }
1278 
1279  // Get first CBioseq object pointer for PostErr below.
1281  if (!seq) {
1283  "No Bioseqs in this entire record.", seh.GetCompleteSeq_entry()->GetSet());
1284  return true;
1285  }
1286 
1287  // If m_NonASCII is true, then this flag was set by the caller
1288  // of validate to indicate that a non ascii character had been
1289  // read from a file being used to create a CSeq_entry, that the
1290  // error had been corrected, but that the error needs to be reported
1291  // by Validate. Note, Validate is not doing anything other than
1292  // reporting an error if m_NonASCII is true;
1293  if (m_NonASCII) {
1295  "Non-ascii chars in input ASN.1 strings", *seq);
1296  // Only report the error once
1297  m_NonASCII = false;
1298  }
1299 
1300  // Iterate thru components of record and validate each
1301 
1302  // also want to know if we have gi
1303  bool has_gi = false;
1304  // also want to know if there are any nucleotide sequences
1305  bool has_nucleotide_sequence = false;
1306 
1308  bi && (!IsINSDInSep() || !has_gi || !has_nucleotide_sequence);
1309  ++bi) {
1310  FOR_EACH_SEQID_ON_BIOSEQ (it, *(bi->GetCompleteBioseq())) {
1311  if ((*it)->IsGi()) {
1312  has_gi = true;
1313  }
1314  }
1315  if (bi->IsSetInst_Mol() && bi->IsNa()) {
1316  has_nucleotide_sequence = true;
1317  }
1318  }
1319 
1320  if (IsINSDInSep() && m_pEntryInfo->IsRefSeq()) {
1321  // NOTE: We use m_IsRefSeq to indicate the actual presence of RefSeq IDs in
1322  // the record, rather than IsRefSeq(), which indicates *either* RefSeq IDs are
1323  // present *OR* the refseq flag has been used
1325  "INSD and RefSeq records should not be present in the same set", *m_TSE);
1326  }
1327 
1328 #if 0
1329  // disabled for now
1330  // look for long IDs that would collide if truncated at 30 characters
1331  vector<string> id_strings;
1333  bi;
1334  ++bi) {
1335  FOR_EACH_SEQID_ON_BIOSEQ (it, *(bi->GetCompleteBioseq())) {
1336  if (!IsNCBIFILESeqId(**it)) {
1337  string label;
1338  (*it)->GetLabel(&label);
1339  id_strings.push_back(label);
1340  }
1341  }
1342  }
1343  stable_sort (id_strings.begin(), id_strings.end());
1344  for (vector<string>::iterator id_str_it = id_strings.begin();
1345  id_str_it != id_strings.end();
1346  ++id_str_it) {
1347  string pattern = (*id_str_it).substr(0, 30);
1348  string first_id = *id_str_it;
1349  vector<string>::iterator cmp_it = id_str_it;
1350  ++cmp_it;
1351  while (cmp_it != id_strings.end() && NStr::StartsWith(*cmp_it, pattern)) {
1352  CRef<CSeq_id> id(new CSeq_id(*cmp_it));
1355  "First 30 characters of " + first_id + " and " +
1356  *cmp_it + " are identical", *(bsh.GetCompleteBioseq()));
1357  ++id_str_it;
1358  ++cmp_it;
1359  }
1360  }
1361 #endif
1362 
1363  // look for colliding feature IDs
1364  vector < int > feature_ids;
1365  for (CFeat_CI fi(GetTSEH()); fi; ++fi) {
1366  const CSeq_feat& sf = fi->GetOriginalFeature();
1367  if (sf.IsSetId() && sf.GetId().IsLocal() && sf.GetId().GetLocal().IsId()) {
1368  feature_ids.push_back(sf.GetId().GetLocal().GetId());
1369  }
1370  }
1371 
1372  if (feature_ids.size() > 0) {
1373  const CTSE_Handle& tse = seh.GetTSE_Handle ();
1374  stable_sort (feature_ids.begin(), feature_ids.end());
1375  vector <int>::iterator it = feature_ids.begin();
1376  int id = *it;
1377  ++it;
1378  while (it != feature_ids.end()) {
1379  if (*it == id) {
1380  vector<CSeq_feat_Handle> handles = tse.GetFeaturesWithId(CSeqFeatData::e_not_set, id);
1381  ITERATE( vector<CSeq_feat_Handle>, feat_it, handles ) {
1383  "Colliding feature ID " + NStr::NumericToString (id), *(feat_it->GetSeq_feat()));
1384  }
1385  while (it != feature_ids.end() && *it == id) {
1386  ++it;
1387  }
1388  if (it != feature_ids.end()) {
1389  id = *it;
1390  ++it;
1391  }
1392  } else {
1393  id = *it;
1394  ++it;
1395  }
1396  }
1397  }
1398 
1399  // look for mixed gps and non-gps sets
1400  bool has_nongps = false;
1401  bool has_gps = false;
1402 
1403  for (CTypeConstIterator<CBioseq_set> si(*m_TSE); si && (!has_nongps || !has_gps); ++si) {
1404  if (si->IsSetClass()) {
1405  if (si->GetClass() == CBioseq_set::eClass_mut_set
1406  || si->GetClass() == CBioseq_set::eClass_pop_set
1407  || si->GetClass() == CBioseq_set::eClass_phy_set
1408  || si->GetClass() == CBioseq_set::eClass_eco_set
1409  || si->GetClass() == CBioseq_set::eClass_wgs_set
1410  || si->GetClass() == CBioseq_set::eClass_small_genome_set) {
1411  has_nongps = true;
1412  } else if (si->GetClass() == CBioseq_set::eClass_gen_prod_set) {
1413  has_gps = true;
1414  }
1415  }
1416  }
1417 
1418  if (has_nongps && has_gps) {
1420  "Genomic product set and mut/pop/phy/eco set records should not be present in the same set",
1421  *m_TSE);
1422  }
1423 
1424  // count inference accessions - if there are too many, temporarily disable inference checking
1425  bool old_inference_acc_check = m_ValidateInferenceAccessions;
1427  size_t num_inferences = 0, num_accessions = 0;
1428  CFeat_CI feat_inf(seh);
1429  while (feat_inf) {
1430  FOR_EACH_GBQUAL_ON_FEATURE (qual, *feat_inf) {
1431  if ((*qual)->IsSetQual() && (*qual)->IsSetVal() && NStr::Equal((*qual)->GetQual(), "inference")) {
1432  num_inferences++;
1433  string prefix, remainder;
1434  bool same_species;
1435  vector<string> accessions = CValidError_feat::GetAccessionsFromInferenceString ((*qual)->GetVal(), prefix, remainder, same_species);
1436  for (size_t i = 0; i < accessions.size(); i++) {
1437  NStr::TruncateSpacesInPlace (accessions[i]);
1438  string acc_prefix, accession;
1439  if (CValidError_feat::GetPrefixAndAccessionFromInferenceAccession (accessions[i], acc_prefix, accession)) {
1440  if (NStr::EqualNocase (acc_prefix, "INSD") || NStr::EqualNocase (acc_prefix, "RefSeq")) {
1441  num_accessions++;
1442  }
1443  }
1444  }
1445  }
1446  }
1447  ++feat_inf;
1448  }
1449  if (/* num_inferences > 1000 || */ num_accessions > 1000) {
1450  // warn about too many inferences
1452  "Skipping validation of " + NStr::SizetToString (num_inferences) + " /inference qualifiers with "
1453  + NStr::SizetToString (num_accessions) + " accessions",
1454  *m_TSE);
1455 
1456  // disable inference checking
1458  }
1459  }
1460 
1461  // validate the main data
1462  if (seh.IsSeq()) {
1463  const CBioseq& seq2 = seh.GetCompleteSeq_entry()->GetSeq();
1464  CValidError_bioseq bioseq_validator(*this);
1465  try {
1466  bioseq_validator.ValidateBioseq(seq2);
1467  } catch ( const exception& e ) {
1469  string("Exception while validating bioseq. EXCEPTION: ") +
1470  e.what(), seq2);
1471  return true;
1472  }
1473  } else if (seh.IsSet()) {
1474  const CBioseq_set& set = seh.GetCompleteSeq_entry()->GetSet();
1475  CValidError_bioseqset bioseqset_validator(*this);
1476 
1477  try {
1478  bioseqset_validator.ValidateBioseqSet(set);
1479 
1480  } catch ( const exception& e ) {
1482  string("Exception while validating bioseq set. EXCEPTION: ") +
1483  e.what(), set);
1484  return true;
1485  }
1486  }
1487 
1488  // put flag for validating inference accessions back to original value
1489  m_ValidateInferenceAccessions = old_inference_acc_check;
1490 
1491  // validation from data collected during previous step
1492 
1493  if (!GetContext().PreprocessHugeFile) {
1494  if ( m_NumTpaWithHistory > 0 &&
1495  m_NumTpaWithoutHistory > 0 ) {
1497  "There are " +
1499  " TPAs with history and " +
1501  " without history in this record.", *seq);
1502  }
1503  if ( m_NumTpaWithoutHistory > 0 && has_gi) {
1505  "There are " +
1507  " TPAs without history in this record, but the record has a gi number assignment.", *m_TSE);
1508  }
1509  }
1510 
1511  if (IsIndexerVersion() && DoesAnyProteinHaveGeneralID() && !IsRefSeq() && has_nucleotide_sequence) {
1512  call_once(SetContext().ProteinHaveGeneralIDOnceFlag,
1513  [](CValidError_imp* imp, CSeq_entry_Handle seh2) {
1515  "INDEXER_ONLY - Protein bioseqs have general seq-id.",
1516  *(seh2.GetCompleteSeq_entry()));
1517  }, this, seh);
1518  }
1519 
1520  ReportMissingPubs(*m_TSE, cs);
1522 
1523  if (m_NumMisplacedFeatures > 1) {
1525  "There are " + NStr::SizetToString (m_NumMisplacedFeatures) + " mispackaged features in this record.",
1526  *(seh.GetCompleteSeq_entry()));
1527  } else if (m_NumMisplacedFeatures == 1) {
1529  "There is 1 mispackaged feature in this record.",
1530  *(seh.GetCompleteSeq_entry()));
1531  }
1532  if (m_NumSmallGenomeSetMisplaced > 1) {
1534  "There are " + NStr::SizetToString (m_NumSmallGenomeSetMisplaced) + " mispackaged features in this small genome set record.",
1535  *(seh.GetCompleteSeq_entry()));
1536  } else if (m_NumSmallGenomeSetMisplaced == 1) {
1538  "There is 1 mispackaged feature in this small genome set record.",
1539  *(seh.GetCompleteSeq_entry()));
1540  }
1541  if ( !GetContext().PreprocessHugeFile ) {
1542  if ( m_NumGenes == 0 && m_NumGeneXrefs > 0 ) {
1544  "There are " + NStr::SizetToString(m_NumGeneXrefs) +
1545  " gene xrefs and no gene features in this record.", *m_TSE);
1546  }
1547  }
1548  ValidateCitations (seh);
1549 
1550 
1551  if ( m_NumMisplacedGraphs > 0 ) {
1554  string("There ") + ((m_NumMisplacedGraphs > 1) ? "are " : "is ") + num +
1555  " mispackaged graph" + ((m_NumMisplacedGraphs > 1) ? "s" : "") + " in this record.",
1556  *m_TSE);
1557  }
1558 
1559  if ( IsRefSeq() && ! IsWP() ) {
1561  }
1562 
1563 
1566  if (!GetContext().PreprocessHugeFile) {
1568  }
1569 
1570  if (m_FarFetchFailure) {
1572  "Far fetch failures caused some validator tests to be bypassed",
1573  *m_TSE);
1574  }
1575 
1576  if (m_DoTaxLookup) {
1578  }
1579 
1580  // validate cit-sub
1581  if (cs) {
1583  }
1584 
1585  // optional barcode tests
1586  if (m_DoBarcodeTests) {
1587  x_DoBarcodeTests(seh);
1588  }
1589  return true;
1590 }
1591 
1592 
1594 {
1595  if (block.IsSetHup() && block.GetHup() && block.IsSetReldate() &&
1596  IsDateInPast(block.GetReldate())) {
1598  "Record release date has already passed", ss);
1599  }
1600 
1601  if (block.IsSetContact() && block.GetContact().IsSetContact()) {
1602  const CAuthor& author = block.GetContact().GetContact();
1603  if (author.IsSetAffil() && author.GetAffil().IsStd()) {
1604  ValidateAffil(author.GetAffil().GetStd(), ss, nullptr);
1605  }
1606  const CPerson_id& pid = author.GetName();
1607  if (pid.IsName()) {
1608  const CName_std& nstd = pid.GetName();
1609  string first = "";
1610  string last = "";
1611  if (nstd.IsSetLast()) {
1612  last = nstd.GetLast();
1615  "Bad last name '" + last + "'", ss);
1616  }
1617  }
1618  if (nstd.IsSetFirst()) {
1619  first = nstd.GetFirst();
1622  "Bad first name '" + first + "'", ss);
1623  }
1624  }
1625  if (first != "" && last != "" && NStr::EqualNocase(last, "last") && NStr::EqualNocase(first, "first")) {
1627  "Bad first and last name", ss);
1628  }
1629  }
1630  }
1631  if (block.IsSetCit()) {
1632  const CCit_sub& sub = block.GetCit();
1633  if (sub.IsSetAuthors()) {
1634  const CAuth_list& auth_list = sub.GetAuthors();
1635  const CAuth_list::TNames& names = auth_list.GetNames();
1636  if (names.IsStd()) {
1637  ITERATE ( CAuth_list::C_Names::TStd, name, names.GetStd() ) {
1638  if ( (*name)->GetName().IsName() ) {
1639  const CName_std& nstd = (*name)->GetName().GetName();
1640  string first = "";
1641  string last = "";
1642  if (nstd.IsSetLast()) {
1643  last = nstd.GetLast();
1646  "Bad last name '" + last + "'", ss);
1647  }
1648  }
1649  if (nstd.IsSetFirst()) {
1650  first = nstd.GetFirst();
1653  "Bad first name '" + first + "'", ss);
1654  }
1655  }
1656  if (first != "" && last != "" && NStr::EqualNocase(last, "last") && NStr::EqualNocase(first, "first")) {
1658  "Bad first and last name", ss);
1659  }
1660  }
1661  }
1662  }
1663  }
1664  }
1665 }
1666 
1667 
1669  const CSeq_submit& ss, CScope* scope)
1670 {
1671  // Check that ss is type e_Entrys
1672  if ( ss.GetData().Which() != CSeq_submit::C_Data::e_Entrys ) {
1673  return;
1674  }
1675 
1677  if (ss.IsSetSub()) {
1678  if (IsHugeFileMode()) {
1679  call_once(SetContext().SubmitBlockOnceFlag,
1680  [this, &ss](){ ValidateSubmitBlock(ss.GetSub(), ss); });
1681  }
1682  else {
1683  ValidateSubmitBlock(ss.GetSub(), ss);
1684  }
1685  }
1686 
1687  // Get CCit_sub pointer
1688  const CCit_sub* cs = &ss.GetSub().GetCit();
1689 
1690  if (ss.IsSetSub() && ss.GetSub().IsSetTool() && NStr::StartsWith(ss.GetSub().GetTool(), "Geneious")) {
1692  }
1693 
1694  // Just loop thru CSeq_entrys
1695  FOR_EACH_SEQENTRY_ON_SEQSUBMIT (se_itr, ss) {
1696  const CSeq_entry& se = **se_itr;
1697  if(se.IsSet())
1698  {
1699  const CBioseq_set &set = se.GetSet();
1700  if(set.IsSetClass() &&
1701  set.GetClass() == CBioseq_set::eClass_wgs_set)
1702  {
1704  CSeq_entry_Handle seh;
1705  seh = scope->GetSeq_entryHandle(se);
1706  Setup(seh);
1707  call_once(SetContext().WgsSetInSeqSubmitOnceFlag,
1708  [this, seh]() {
1710  "File was created as a wgs-set, but should be a batch submission instead.",
1711  seh.GetCompleteSeq_entry()->GetSet());
1712  });
1713  } else {
1714  CSeq_entry_Handle seh;
1715  seh = scope->GetSeq_entryHandle(se);
1716  Setup(seh);
1718  "File was created as a wgs-set, but should be a batch submission instead.",
1719  seh.GetCompleteSeq_entry()->GetSet());
1720  }
1721  }
1722  }
1723  Validate (se, cs, scope);
1724  }
1725 }
1726 
1727 
1729  const CSeq_annot_Handle& sah)
1730 {
1731  Setup(sah);
1732 
1733  // Iterate thru components of record and validate each
1734 
1735  CValidError_annot annot_validator(*this);
1736  annot_validator.ValidateSeqAnnot(sah);
1737 
1738  switch (sah.Which()) {
1740  {
1741  CValidError_feat feat_validator(*this);
1742  for (CFeat_CI fi (sah); fi; ++fi) {
1743  const CSeq_feat& sf = fi->GetOriginalFeature();
1744  feat_validator.ValidateSeqFeat(sf);
1745  }
1746  }
1747  break;
1748 
1750  {
1751  if (IsValidateAlignments()) {
1752  CValidError_align align_validator(*this);
1753  int order = 1;
1754  for (CAlign_CI ai(sah); ai; ++ai) {
1755  const CSeq_align& sa = ai.GetOriginalSeq_align();
1756  align_validator.ValidateSeqAlign(sa, order++);
1757  }
1758  }
1759  }
1760  break;
1761 
1763  {
1764  CValidError_graph graph_validator(*this);
1765  // for (CTypeConstIterator <CSeq_graph> gi (sa); gi; ++gi) {
1766  for (CGraph_CI gi(sah); gi; ++gi) {
1767  const CSeq_graph& sg = gi->GetOriginalGraph();
1768  graph_validator.ValidateSeqGraph(sg);
1769  }
1770  }
1771  break;
1772  default:
1773  break;
1774  }
1778 }
1779 
1780 
1781 void CValidError_imp::Validate(const CSeq_feat& feat, CScope* scope)
1782 {
1783  // automatically restores m_Scope to its old value when we leave
1784  // the function
1785  CScopeRestorer scopeRestorer( m_Scope );
1786 
1787  if( scope ) {
1788  m_Scope.Reset(scope);
1789  }
1790  if (!m_Scope) {
1791  // set up a temporary local scope if there is no scope set already
1792  m_Scope.Reset(new CScope(*m_ObjMgr));
1793  }
1794 
1795  CValidError_feat feat_validator(*this);
1796  feat_validator.SetScope(*m_Scope);
1798  feat_validator.SetTSE(empty);
1799  feat_validator.ValidateSeqFeat(feat);
1800  if (feat.IsSetData() && feat.GetData().IsBiosrc()) {
1801  const CBioSource& src = feat.GetData().GetBiosrc();
1802  if (src.IsSetOrg()) {
1804  }
1805  }
1806  FindEmbeddedScript(feat);
1807  FindNonAsciiText(feat);
1809 }
1810 
1811 
1813 {
1814  // automatically restores m_Scope to its old value when we leave
1815  // the function
1816  CScopeRestorer scopeRestorer( m_Scope );
1817 
1818  if( scope ) {
1819  m_Scope.Reset(scope);
1820  }
1821  if (!m_Scope) {
1822  // set up a temporary local scope if there is no scope set already
1823  m_Scope.Reset(new CScope(*m_ObjMgr));
1824  }
1825 
1826  ValidateBioSource(src, src);
1827  if (src.IsSetOrg()) {
1829  }
1830  FindEmbeddedScript(src);
1831  FindNonAsciiText(src);
1833 }
1834 
1835 
1836 void CValidError_imp::Validate(const CPubdesc& pubdesc, CScope* scope)
1837 {
1838  // automatically restores m_Scope to its old value when we leave
1839  // the function
1840  CScopeRestorer scopeRestorer( m_Scope );
1841 
1842  if( scope ) {
1843  m_Scope.Reset(scope);
1844  }
1845  if (!m_Scope) {
1846  // set up a temporary local scope if there is no scope set already
1847  m_Scope.Reset(new CScope(*m_ObjMgr));
1848  }
1849 
1850  ValidatePubdesc(pubdesc, pubdesc);
1851  FindEmbeddedScript(pubdesc);
1852  FindNonAsciiText(pubdesc);
1853  FindCollidingSerialNumbers(pubdesc);
1854 }
1855 
1857 {
1858  CValidError_desc seqdesc_validator(*this);
1859  m_Scope.Reset(new CScope(*m_ObjMgr));
1861  seqdesc_validator.ValidateSeqDesc(desc,ctx);
1862 }
1863 
1864 
1867  void* user_data)
1868 {
1869  m_PrgCallback = callback;
1870  m_PrgInfo.m_UserData = user_data;
1871 }
1872 
1873 
1875 (const CDbtag& xref,
1876  const CSerialObject& obj,
1877  bool biosource,
1878  const CSeq_entry *ctx)
1879 {
1880  bool refseq_or_gps = IsRefSeq() || IsGPS();
1882  refseq_or_gps);
1883 
1884  const string& db = xref.IsSetDb() ? xref.GetDb() : kEmptyStr;
1885 
1888  "dbxref value " + xref.GetTag().GetStr() + " has SGML",
1889  obj, ctx);
1890  }
1893  "dbxref value " + xref.GetTag().GetStr() + " contains space character",
1894  obj, ctx);
1895  }
1896  if (flags & CValidator::eDbHasSgml) {
1898  "dbxref database " + db + " has SGML",
1899  obj, ctx);
1900  }
1901 
1902  bool isStr = false;
1903  string dbv;
1904  if (xref.IsSetTag() && xref.GetTag().IsStr()) {
1905  dbv = xref.GetTag().GetStr();
1906  isStr = true;
1907  } else if (xref.IsSetTag() && xref.GetTag().IsId()) {
1908  dbv = NStr::NumericToString(xref.GetTag().GetId());
1909  }
1910 
1913  "Illegal db_xref type " + db + " (" + dbv + ")", obj, ctx);
1914  }
1916  // capitalization is bad
1917  bool refseq_db = false, src_db = false;
1918  string correct_caps;
1919  xref.GetDBFlags(refseq_db, src_db, correct_caps);
1920  string message = "Illegal db_xref type " + db + " (" + dbv + "), legal capitalization is " + correct_caps;
1922  message += ", but should not be used on an OrgRef";
1923  } else if (flags & CValidator::eOnlyForSource) {
1924  message += ", but should only be used on an OrgRef";
1925  }
1926 
1928  } else {
1932  "RefSeq-specific db_xref type " + db + " (" + dbv + ") should not be used on a non-RefSeq OrgRef",
1933  obj, ctx);
1934  } else {
1936  "db_xref type " + db + " (" + dbv + ") is only legal for RefSeq",
1937  obj, ctx);
1938  }
1939  } else if (flags & CValidator::eNotForSource) {
1942  "RefSeq-specific db_xref type " + db + " (" + dbv + ") should not be used on an OrgRef",
1943  obj, ctx);
1944  } else {
1946  "db_xref type " + db + " (" + dbv + ") should not be used on an OrgRef",
1947  obj, ctx);
1948  }
1949  } else if (flags & CValidator::eOnlyForSource) {
1951  "db_xref type " + db + " (" + dbv + ") should only be used on an OrgRef",
1952  obj, ctx);
1953  }
1954  }
1955 
1956  if (isStr && db == "GeneID") {
1958  "db_xref type " + db + " (" + dbv + ") is required to be an integer",
1959  obj, ctx);
1960  }
1961 }
1962 
1963 
1965 (TDbtags& xref_list,
1966  const CSerialObject& obj,
1967  bool biosource,
1968  const CSeq_entry *ctx)
1969 {
1970  string last_db;
1971 
1972  ITERATE( TDbtags, xref, xref_list) {
1973  if (biosource
1974  && (*xref)->IsSetDb()) {
1975  if (!NStr::IsBlank(last_db)
1976  && NStr::EqualNocase((*xref)->GetDb(), last_db)) {
1978  "BioSource uses db " + last_db + " multiple times",
1979  obj, ctx);
1980  }
1981  last_db = (*xref)->GetDb();
1982  }
1983  ValidateDbxref(**xref, obj, biosource, ctx);
1984  }
1985 }
1986 
1987 
1989 (const CPacked_seqint& packed_int,
1990  SLocCheck& lc,
1991  const CSerialObject& obj)
1992 {
1993  ITERATE(CPacked_seqint::Tdata, it, packed_int.Get()) {
1994  lc.int_cur = (*it);
1995  lc.chk &= x_CheckSeqInt(lc.id_cur, lc.int_cur, lc.strand_cur);
1996 
1998 
1999  lc.id_prv = lc.id_cur;
2000  lc.strand_prv = lc.strand_cur;
2001  lc.int_prv = lc.int_cur;
2002  }
2003 }
2004 
2005 
2007  CConstRef<CSeq_id>& id_cur,
2008  const CSeq_interval* int_cur,
2009  ENa_strand& strand_cur)
2010 {
2011  strand_cur = int_cur->IsSetStrand() ?
2012  int_cur->GetStrand() : eNa_strand_unknown;
2013  id_cur = &int_cur->GetId();
2014  bool chk = IsValid(*int_cur, m_Scope);
2015  return chk;
2016 }
2017 
2018 
2020 {
2021  ITERATE(CPacked_seqint::Tdata, it, packed_int.Get()) {
2022  x_ReportInvalidFuzz(**it, obj);
2023  }
2024 }
2025 
2026 
2027 static const string kSpaceLeftFirst = "Should not specify 'space to left' at first position of non-circular sequence";
2028 static const string kSpaceRightLast = "Should not specify 'space to right' at last position of non-circular sequence";
2029 
2030 static const string kSpaceLeftCircle = "Should not specify 'circle to left' except at first position of circular sequence";
2031 static const string kSpaceRightCircle = "Should not specify 'circle to right' except at last position of circular sequence";
2032 
2034 {
2037  bool has_fuzz_from = false;
2038  bool has_fuzz_to = false;
2039 
2040  if (interval.IsSetFuzz_from() && interval.GetFuzz_from().IsLim()) {
2041  fuzz_from = interval.GetFuzz_from().GetLim();
2042  has_fuzz_from = true;
2043  }
2044  if (interval.IsSetFuzz_to() && interval.GetFuzz_to().IsLim()) {
2045  fuzz_to = interval.GetFuzz_to().GetLim();
2046  has_fuzz_to = true;
2047  }
2048  if (! has_fuzz_from && ! has_fuzz_to) {
2049  return;
2050  }
2051 
2052  // check for invalid fuzz on both ends of Interval
2053  if (has_fuzz_from && has_fuzz_to && fuzz_from == fuzz_to) {
2054  if (fuzz_from == CInt_fuzz::eLim_tl) {
2057  "Should not specify 'space to left' for both ends of interval", obj);
2058  }
2059  else if (fuzz_from == CInt_fuzz::eLim_tr) {
2062  "Should not specify 'space to right' for both ends of interval", obj);
2063  }
2064  else if (fuzz_from == CInt_fuzz::eLim_circle) {
2067  "Should not specify 'origin of circle' for both ends of interval", obj);
2068  }
2069  }
2070 
2071  CBioseq_Handle bsh = m_Scope->GetBioseqHandle(interval.GetId());
2072  if (! bsh) {
2073  return;
2074  }
2075 
2077  if (bsh.IsSetInst_Topology()) {
2078  top = bsh.GetInst_Topology();
2079  }
2080 
2081  if (top != CSeq_inst::eTopology_circular) {
2082 
2083  // VR-15
2084  // look for space to left at beginning of sequence or space to right at end
2085  if (fuzz_from == CInt_fuzz::eLim_tl && interval.IsSetFrom() && interval.GetFrom() == 0) {
2087  }
2088  if (fuzz_to == CInt_fuzz::eLim_tr && interval.IsSetTo() && interval.GetTo() == bsh.GetBioseqLength() - 1) {
2090  }
2091 
2092  } else if (fuzz_from == CInt_fuzz::eLim_circle || fuzz_to == CInt_fuzz::eLim_circle) {
2093 
2094  if (obj.GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
2095  const CSeq_feat* sfp = dynamic_cast<const CSeq_feat*>(&obj);
2096  if (sfp && sfp->IsSetExcept() && sfp->CanGetExcept_text() && NStr::FindNoCase(sfp->GetExcept_text(), "ribosomal slippage") != NPOS) {
2097  return;
2098  }
2099  }
2100 
2101  // VR-832
2102  if (fuzz_from == CInt_fuzz::eLim_circle && interval.IsSetFrom() && interval.GetFrom() != 0) {
2104  }
2105  if (fuzz_to == CInt_fuzz::eLim_circle && interval.IsSetTo() && interval.GetTo() != bsh.GetBioseqLength() - 1) {
2107  }
2108  }
2109 }
2110 
2111 
2113 {
2114  // VR-15
2115  if (!point.IsSetFuzz() || !point.GetFuzz().IsLim() ||
2116  (point.GetFuzz().GetLim() != CInt_fuzz::eLim_tl && point.GetFuzz().GetLim() != CInt_fuzz::eLim_tr) ||
2117  !point.IsSetId() || !point.IsSetPoint()) {
2118  return;
2119  }
2120  CBioseq_Handle bsh = m_Scope->GetBioseqHandle(point.GetId());
2121  if (!bsh) {
2122  return;
2123  }
2125  return;
2126  }
2127  if (point.GetPoint() == 0 && point.GetFuzz().GetLim() == CInt_fuzz::eLim_tl) {
2129  }
2130  if (point.GetPoint() == bsh.GetBioseqLength() - 1) {
2132  }
2133 }
2134 
2135 
2136 void CValidError_imp::x_ReportInvalidFuzz(const CSeq_loc& loc, const CSerialObject& obj)
2137 {
2139  for (; lit; ++lit) {
2140  CSeq_loc::E_Choice loc_choice = lit->Which();
2141  switch (loc_choice) {
2142  case CSeq_loc::e_Int:
2143  x_ReportInvalidFuzz(lit->GetInt(), obj);
2144  break;
2146  x_ReportInvalidFuzz(lit->GetPacked_int(), obj);
2147  break;
2148  case CSeq_loc::e_Pnt:
2149  x_ReportInvalidFuzz(lit->GetPnt(), obj);
2150  break;
2151  default:
2152  break;
2153  }
2154  }
2155 }
2156 
2157 
2158 unsigned int s_CountMix(const CSeq_loc& loc)
2159 {
2160  unsigned int num_mix = 0;
2162  for (; lit; ++lit) {
2163  if (lit->IsMix()) {
2164  num_mix++;
2165  }
2166  }
2167  return num_mix;
2168 }
2169 
2170 
2172 {
2173  lc.chk = true;
2174  lc.unmarked_strand = false;
2175  lc.mixed_strand = false;
2176  lc.has_other = false;
2177  lc.has_not_other = false;
2178  lc.id_cur = nullptr;
2179  lc.id_prv = nullptr;
2180  lc.int_cur = nullptr;
2181  lc.int_prv = nullptr;
2182  lc.strand_cur = eNa_strand_unknown;
2183  lc.strand_prv = eNa_strand_unknown;
2184  lc.prefix = prefix;
2185 }
2186 
2188 {
2189  if (lc.strand_prv != eNa_strand_other &&
2190  lc.strand_cur != eNa_strand_other) {
2191  if (lc.id_cur && lc.id_prv &&
2192  IsSameBioseq(*lc.id_cur, *lc.id_prv, m_Scope)) {
2193  if (lc.strand_prv != lc.strand_cur) {
2194  if ((lc.strand_prv == eNa_strand_plus &&
2195  lc.strand_cur == eNa_strand_unknown) ||
2196  (lc.strand_prv == eNa_strand_unknown &&
2197  lc.strand_cur == eNa_strand_plus)) {
2198  lc.unmarked_strand = true;
2199  } else {
2200  lc.mixed_strand = true;
2201  }
2202  }
2203  }
2204  }
2205  if (lc.strand_cur == eNa_strand_other) {
2206  lc.has_other = true;
2207  } else if (lc.strand_cur == eNa_strand_minus || lc.strand_cur == eNa_strand_plus) {
2208  lc.has_not_other = true;
2209  }
2210 
2211 }
2212 
2213 void CValidError_imp::x_CheckLoc(const CSeq_loc& loc, const CSerialObject& obj, SLocCheck& lc, bool lowerSev)
2214 {
2215  try {
2216  switch (loc.Which()) {
2217  case CSeq_loc::e_Int:
2218  lc.int_cur = &loc.GetInt();
2219  lc.chk = x_CheckSeqInt(lc.id_cur, lc.int_cur, lc.strand_cur);
2220  if (lc.strand_cur == eNa_strand_other) {
2221  lc.has_other = true;
2222  }
2223  if ((!lc.chk) && lowerSev) {
2224  TSeqPos length = GetLength(loc.GetInt().GetId(), m_Scope);
2225  TSeqPos fr = loc.GetInt().GetFrom();
2226  TSeqPos to = loc.GetInt().GetTo();
2227  if (fr < length && to >= length) {
2228  // RefSeq variation feature with dbSNP xref and interval flanking the length is ERROR
2229  } else {
2230  // otherwise keep severity at REJECT
2231  lowerSev = false;
2232  }
2233  }
2234  break;
2235  case CSeq_loc::e_Pnt:
2236  lc.strand_cur = loc.GetPnt().IsSetStrand() ?
2237  loc.GetPnt().GetStrand() : eNa_strand_unknown;
2238  if (lc.strand_cur == eNa_strand_other) {
2239  lc.has_other = true;
2240  }
2241  lc.id_cur = &loc.GetPnt().GetId();
2242  lc.chk = IsValid(loc.GetPnt(), m_Scope);
2243  lc.int_prv = nullptr;
2244  break;
2246  lc.strand_cur = loc.GetPacked_pnt().IsSetStrand() ?
2247  loc.GetPacked_pnt().GetStrand() : eNa_strand_unknown;
2248  if (lc.strand_cur == eNa_strand_other) {
2249  lc.has_other = true;
2250  }
2251  lc.id_cur = &loc.GetPacked_pnt().GetId();
2252  lc.chk = IsValid(loc.GetPacked_pnt(), m_Scope);
2253  lc.int_prv = nullptr;
2254  break;
2256  x_CheckPackedInt(loc.GetPacked_int(), lc, obj);
2257  break;
2258  case CSeq_loc::e_Null:
2259  break;
2260  case CSeq_loc::e_Mix:
2261  for (auto l : loc.GetMix().Get()) {
2262  x_CheckLoc(*l, obj, lc, lowerSev);
2264  }
2265  break;
2266  default:
2267  lc.strand_cur = eNa_strand_other;
2268  lc.id_cur = nullptr;
2269  lc.int_prv = nullptr;
2270  break;
2271  }
2272  if (!lc.chk) {
2273  string lbl = GetValidatorLocationLabel (loc, *m_Scope);
2274  EDiagSev sev = eDiag_Critical;
2275  if (lowerSev) {
2276  sev = eDiag_Error;
2277  }
2279  lc.prefix + ": SeqLoc [" + lbl + "] out of range", obj);
2280  }
2281 
2282  if (loc.Which() != CSeq_loc::e_Null) {
2284 
2285  lc.strand_prv = lc.strand_cur;
2286  lc.id_prv = lc.id_cur;
2287  }
2288  } catch( const exception& e ) {
2289  string label = GetValidatorLocationLabel(loc, *m_Scope);
2291  "Exception caught while validating location " +
2292  label + ". Exception: " + e.what(), obj);
2293 
2294  lc.strand_cur = eNa_strand_other;
2295  lc.id_cur = nullptr;
2296  lc.int_prv = nullptr;
2297  }
2298 }
2299 
2301 (const CSeq_loc& loc,
2302  const CBioseq_Handle& seq,
2303  bool report_abutting,
2304  const string& prefix,
2305  const CSerialObject& obj,
2306  bool lowerSev)
2307 {
2308  SLocCheck lc;
2309 
2311 
2312  x_CheckLoc(loc, obj, lc, lowerSev);
2313 
2314  if (lc.has_other && lc.has_not_other) {
2315  string label = GetValidatorLocationLabel(loc, *m_Scope);
2317  prefix + ": Inconsistent use of other strand SeqLoc [" + label + "]", obj);
2318  } else if (lc.has_other && NStr::Equal(prefix, "Location")) {
2321  "Strand 'other' in location", obj);
2322  }
2323 
2324  x_ReportInvalidFuzz(loc, obj);
2325 
2329  "Duplicate exons in location", obj);
2330  }
2331 
2332  if (s_CountMix(loc) > 1) {
2333  string label;
2334  loc.GetLabel(&label);
2336  prefix + ": SeqLoc [" + label + "] has nested SEQLOC_MIX elements",
2337  obj);
2338  }
2339 
2340  // Warn if different parts of a seq-loc refer to the same bioseq using
2341  // differnt id types (i.e. gi and accession)
2342  ValidateSeqLocIds(loc, obj);
2343 
2344  bool trans_splice = false;
2345  bool circular_rna = false;
2346  bool exception = false;
2347  const CSeq_feat* sfp = nullptr;
2348  if (obj.GetThisTypeInfo() == CSeq_feat::GetTypeInfo()) {
2349  sfp = dynamic_cast<const CSeq_feat*>(&obj);
2350  }
2351  if (sfp) {
2352  // primer_bind intervals MAY be in on opposite strands
2354  lc.mixed_strand = false;
2355  lc.unmarked_strand = false;
2356  }
2357 
2358  exception = sfp->IsSetExcept() ? sfp->GetExcept() : false;
2359  if (exception && sfp->CanGetExcept_text()) {
2360  if (NStr::FindNoCase(sfp->GetExcept_text(), "trans-splicing") != NPOS) {
2361  // trans splicing exception turns off both mixed_strand and
2362  // out_of_order messages
2363  trans_splice = true;
2364  } else if (NStr::FindNoCase(sfp->GetExcept_text(), "circular RNA") != NPOS) {
2365  // circular RNA exception turns off out_of_order message
2366  circular_rna = true;
2367  }
2368  }
2369  }
2370 
2371  string loc_lbl;
2372  if (report_abutting && (!sfp || !CSeqFeatData::AllowAdjacentIntervals(sfp->GetData().GetSubtype())) &&
2374  loc_lbl = GetValidatorLocationLabel(loc, *m_Scope);
2375 
2376  EDiagSev sev = exception ? eDiag_Warning : eDiag_Error;
2378  prefix + ": Adjacent intervals in SeqLoc [" +
2379  loc_lbl + "]", obj);
2380  }
2381 
2382  if (trans_splice && !NStr::Equal(prefix, "Product")) {
2383  CSeq_loc_CI li(loc);
2384  ++li;
2385  if (!li) {
2386  PostErr(eDiag_Warning, eErr_SEQ_FEAT_BadTranssplicedInterval, "Trans-spliced feature should have multiple intervals", obj);
2387  }
2388  return;
2389  }
2390 
2391  bool ordered = true;
2392  bool circular = false;
2393  if ( seq &&
2394  seq.IsSetInst() && seq.GetInst().IsSetTopology() &&
2396  circular = true;
2397  }
2398  try {
2399  if (m_Scope && (!sfp || CSeqFeatData::RequireLocationIntervalsInBiologicalOrder(sfp->GetData().GetSubtype())) && !circular) {
2401  }
2402  } catch ( const CException& ex) {
2403  string label;
2404  loc.GetLabel(&label);
2406  "Exception caught while validating location " +
2407  label + ". Exception: " + ex.what(), obj);
2408  }
2409 
2410  if (lc.mixed_strand || lc.unmarked_strand || !ordered) {
2411  if (loc_lbl.empty()) {
2412  loc_lbl = GetValidatorLocationLabel(loc, *m_Scope);
2413  }
2414  if (lc.mixed_strand) {
2415  if (IsSmallGenomeSet()) {
2417  prefix + ": Mixed strands in SeqLoc ["
2418  + loc_lbl + "] in small genome set - set trans-splicing exception if appropriate", obj);
2419  } else {
2420  EDiagSev sev = eDiag_Error;
2421  if (IsGeneious() || (sfp && sequence::IsPseudo(*sfp, *m_Scope))) {
2422  sev = eDiag_Warning;
2423  }
2425  prefix + ": Mixed strands in SeqLoc ["
2426  + loc_lbl + "]", obj);
2427  }
2428  } else if (lc.unmarked_strand) {
2430  prefix + ": Mixed plus and unknown strands in SeqLoc ["
2431  + loc_lbl + "]", obj);
2432  }
2433  if (!ordered && !circular_rna) {
2434  if (IsSmallGenomeSet()) {
2436  prefix + ": Intervals out of order in SeqLoc [" +
2437  loc_lbl + "]", obj);
2438  } else {
2440  prefix + ": Intervals out of order in SeqLoc [" +
2441  loc_lbl + "]", obj);
2442  }
2443  }
2444  return;
2445  }
2446 
2447  if ( seq &&
2448  seq.IsSetInst_Repr() &&
2449  seq.GetInst_Repr() != CSeq_inst::eRepr_seg ) {
2450  return;
2451  }
2452 
2453  // Check for intervals out of order on segmented Bioseq
2454  if ( seq && BadSeqLocSortOrder(seq, loc) && !circular_rna ) {
2455  if (loc_lbl.empty()) {
2456  loc.GetLabel(&loc_lbl);
2457  }
2459  prefix + "Intervals out of order in SeqLoc [" +
2460  loc_lbl + "]", obj);
2461  }
2462 
2463  // Check for mixed strand on segmented Bioseq
2464  if ( IsMixedStrands(loc) ) {
2465  if (loc_lbl.empty()) {
2466  loc.GetLabel(&loc_lbl);
2467  }
2469  prefix + ": Mixed strands in SeqLoc [" +
2470  loc_lbl + "]", obj);
2471  }
2472 }
2473 
2474 
2476 {
2477  if (!SeqIsPatent(seq)) {
2478  m_BioseqWithNoSource.push_back(CConstRef<CBioseq>(&seq));
2479  }
2480 }
2481 
2482 
2484 {
2485  if (!SeqIsPatent (seq)) {
2487  "The product name is missing from this protein.", *(seq.GetCompleteBioseq()));
2488  }
2489 }
2490 
2491 
2493 {
2494  bool wgs = false;
2495 
2496  FOR_EACH_DESCRIPTOR_ON_BIOSEQ (it, seq) {
2497  if ((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech()
2498  && (*it)->GetMolinfo().GetTech() == CMolInfo::eTech_wgs) {
2499  wgs = true;
2500  break;
2501  }
2502  }
2503  if (!wgs) {
2504  return false;
2505  }
2506 
2507  bool is_other = false;
2508  bool has_gi = false;
2509 
2510  FOR_EACH_SEQID_ON_BIOSEQ (it, seq) {
2511  if ((*it)->IsOther()) {
2512  is_other = true;
2513  break;
2514  } else if ((*it)->IsGi()) {
2515  has_gi = true;
2516  break;
2517  }
2518  }
2519  if (!is_other || has_gi) {
2520  return false;
2521  }
2522 
2523  return true;
2524 }
2525 
2526 
2528 {
2529  bool tsa = false;
2530 
2531  FOR_EACH_DESCRIPTOR_ON_BIOSEQ (it, seq) {
2532  if ((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech()
2533  && (*it)->GetMolinfo().GetTech() == CMolInfo::eTech_tsa) {
2534  tsa = true;
2535  break;
2536  }
2537  }
2538  if (!tsa) {
2539  return false;
2540  }
2541 
2542  bool is_other = false;
2543  bool has_gi = false;
2544 
2545  FOR_EACH_SEQID_ON_BIOSEQ (it, seq) {
2546  if ((*it)->IsOther()) {
2547  is_other = true;
2548  break;
2549  } else if ((*it)->IsGi()) {
2550  has_gi = true;
2551  break;
2552  }
2553  }
2554  if (!is_other || has_gi) {
2555  return false;
2556  }
2557 
2558  return true;
2559 }
2560 
2561 
2563 {
2564  if (GetContext().PreprocessHugeFile) {
2565  if (m_pEntryInfo->IsNoBioSource() && !GetContext().IsPatent && !GetContext().IsPDB) {
2566  return;
2567  }
2568  }
2569  else if (m_pEntryInfo->IsNoBioSource() && !m_pEntryInfo->IsPatent() && !m_pEntryInfo->IsPDB()) {
2571  "No source information included on this record.", se);
2572 
2573  if (!GetContext().PostprocessHugeFile) {
2574  return;
2575  }
2576  }
2577 
2578  size_t num_no_source = m_BioseqWithNoSource.size();
2579 
2580  for ( size_t i = 0; i < num_no_source; ++i ) {
2582  "No organism name included in the source. Other qualifiers may exist.",
2583  *(m_BioseqWithNoSource[i]));
2584  }
2585 }
2586 
2587 
2589 {
2590  CConstRef<CSeq_feat> feat;
2591 
2593 
2594  if ( bsh ) {
2595  if ( IsNT() && m_TSE ) {
2596  // In case of a NT bioseq limit the search to features packaged on the
2597  // NT (we assume features have been pulled from the segments to the NT).
2599  sel.SetByProduct()
2601  CFeat_CI fi(bsh, sel);
2602  if ( fi ) {
2603  // return the first one (should be the one packaged on the
2604  // nuc-prot set).
2605  feat.Reset(&(fi->GetOriginalFeature()));
2606  }
2607  } else {
2609  sel.SetByProduct();
2610  CFeat_CI fi(bsh, sel);
2611  if ( fi ) {
2612  // return the first one (should be the one packaged on the
2613  // nuc-prot set).
2614  feat.Reset(&(fi->GetOriginalFeature()));
2615  }
2616  }
2617  }
2618 
2619  return feat;
2620 }
2621 
2622 
2624 {
2626  return GetmRNAGivenProduct(bsh);
2627 }
2628 
2629 
2631 {
2632  CConstRef<CSeq_feat> feat;
2633  if ( bsh ) {
2634  // In case of a NT bioseq limit the search to features packaged on the
2635  // NT (we assume features have been pulled from the segments to the NT).
2636  CSeq_entry_Handle limit;
2637  if ( IsNT() && m_TSE ) {
2638  limit = m_Scope->GetSeq_entryHandle(*m_TSE);
2639  }
2640 
2641  if (limit) {
2643  sel.SetByProduct() .SetLimitTSE(limit);
2644  CFeat_CI fi(bsh, sel);
2645  if ( fi ) {
2646  // return the first one (should be the one packaged on the
2647  // nuc-prot set).
2648  feat.Reset(&(fi->GetOriginalFeature()));
2649  }
2650  } else {
2652  sel.SetByProduct();
2653  CFeat_CI fi(bsh, sel);
2654  if ( fi ) {
2655  // return the first one (should be the one packaged on the
2656  // nuc-prot set).
2657  feat.Reset(&(fi->GetOriginalFeature()));
2658  }
2659  }
2660  }
2661 
2662  return feat;
2663 }
2664 
2665 
2667 (const CBioseq& seq,
2668  CBioseq_set::EClass clss)
2669 {
2670  const CSeq_entry* parent = nullptr;
2671  for ( parent = seq.GetParentEntry();
2672  parent;
2673  parent = parent->GetParentEntry() ) {
2674  if ( parent->IsSet() ) {
2675  const CBioseq_set& set = parent->GetSet();
2676  if ( set.IsSetClass() && set.GetClass() == clss ) {
2677  break;
2678  }
2679  }
2680  }
2681  return parent;
2682 }
2683 
2684 
2685 bool CValidError_imp::IsSerialNumberInComment(const string& comment)
2686 {
2687  size_t pos = comment.find('[', 0);
2688  while ( pos != string::npos ) {
2689  ++pos;
2690  bool okay = true;
2691  if ( isdigit((unsigned char) comment[pos]) ) {
2692  // skip if first character after bracket is 0
2693  if (comment[pos] == '0') {
2694  okay = false;
2695  }
2696  while ( isdigit((unsigned char) comment[pos]) ) {
2697  ++pos;
2698  }
2699  if ( comment[pos] == ']' && okay ) {
2700  return true;
2701  }
2702  }
2703 
2704  pos = comment.find('[', pos);
2705  }
2706  return false;
2707 }
2708 
2709 
2711 {
2712  // okay to have far RefSeq product, but only if genomic product set
2713  if ( sid && sid->IsOther() ) {
2714  if ( IsGPS() ) {
2715  return false;
2716  }
2717  }
2718  // or just a bioseq
2719  if ( GetTSE().IsSeq() ) {
2720  return false;
2721  }
2722 
2723  // or in a standalone Seq-annot
2724  if (IsStandaloneAnnot() ) {
2725  return false;
2726  }
2727  return true;
2728 }
2729 
2730 
2732  vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
2733  vector<string>& published_labels, vector<string>& unpublished_labels)
2734 {
2735  FOR_EACH_SEQDESC_ON_SEQENTRY (it, se) {
2736  if ((*it)->IsPub()) {
2737  CCleanup::GetPubdescLabels ((*it)->GetPub(), pmids, muids, serials, published_labels, unpublished_labels);
2738  }
2739  }
2740 
2741  if (se.IsSet()) {
2742  FOR_EACH_SEQENTRY_ON_SEQSET (it, se.GetSet()) {
2743  s_CollectPubDescriptorLabels (**it, pmids, muids, serials, published_labels, unpublished_labels);
2744  }
2745  }
2746 }
2747 
2748 
2750 {
2751  vector<TEntrezId> pmids;
2752  vector<TEntrezId> muids;
2753  vector<int> serials;
2754  vector<string> published_labels;
2755  vector<string> unpublished_labels;
2756 
2757  // collect labels for pubs on record
2758  s_CollectPubDescriptorLabels (*(seh.GetCompleteSeq_entry()), pmids, muids, serials, published_labels, unpublished_labels);
2759 
2761  while (feat) {
2762  CCleanup::GetPubdescLabels (feat->GetData().GetPub(), pmids, muids, serials, published_labels, unpublished_labels);
2763  ++feat;
2764  }
2765 
2766  // now examine citations to determine whether they match a pub on the record
2767  CFeat_CI f (seh);
2768  while (f) {
2769  if (f->IsSetCit() && f->GetCit().IsPub()) {
2770  ITERATE (CPub_set::TPub, cit_it, f->GetCit().GetPub()) {
2771  bool found = false;
2772 
2773  if ((*cit_it)->IsPmid()) {
2774  vector<TEntrezId>::iterator it = pmids.begin();
2775  while (it != pmids.end() && !found) {
2776  if (*it == (*cit_it)->GetPmid()) {
2777  found = true;
2778  }
2779  ++it;
2780  }
2781  if (!found) {
2783  "Citation on feature refers to uid ["
2784  + NStr::NumericToString((*cit_it)->GetPmid().Get())
2785  + "] not on a publication in the record",
2786  f->GetOriginalFeature());
2787  }
2788  } else if ((*cit_it)->IsMuid()) {
2789  vector<TEntrezId>::iterator it = muids.begin();
2790  while (it != muids.end() && !found) {
2791  if (*it == (*cit_it)->GetMuid()) {
2792  found = true;
2793  }
2794  ++it;
2795  }
2796  if (!found) {
2798  "Citation on feature refers to uid ["
2799  + NStr::NumericToString((*cit_it)->GetMuid())
2800  + "] not on a publication in the record",
2801  f->GetOriginalFeature());
2802  }
2803  } else if ((*cit_it)->IsEquiv()) {
2804  continue;
2805  } else {
2806  string label;
2807  (*cit_it)->GetLabel(&label, CPub::eContent, CPub::fLabel_Unique);
2808 
2809  if (NStr::EndsWith (label, ">")) {
2810  label = label.substr(0, label.length() - 2);
2811  }
2812  if(NStr::EndsWith (label, "|")) {
2813  label = label.substr(0, label.length() - 1);
2814  }
2815  if (NStr::EndsWith (label, " ")) {
2816  label = label.substr(0, label.length() - 1);
2817  }
2818  size_t len = label.length();
2819  vector<string>::iterator unpub_it = unpublished_labels.begin();
2820  while (unpub_it != unpublished_labels.end() && !found) {
2821  size_t it_len =(*unpub_it).length();
2822  if (NStr::EqualNocase (*unpub_it, 0, it_len > len ? len : it_len, label)) {
2823  found = true;
2824  }
2825  ++unpub_it;
2826  }
2827  vector<string>::iterator pub_it = published_labels.begin();
2828 
2829  while (pub_it != published_labels.end() && !found) {
2830  size_t it_len =(*pub_it).length();
2831  if (NStr::EqualNocase (*pub_it, 0, it_len > len ? len : it_len, label)) {
2833  "Citation on feature needs to be updated to published uid",
2834  f->GetOriginalFeature());
2835  found = true;
2836  }
2837  ++pub_it;
2838  }
2839  if (!found) {
2841  "Citation on feature refers to a publication not in the record",
2842  f->GetOriginalFeature());
2843  }
2844  }
2845  }
2846  }
2847  ++f;
2848  }
2849 }
2850 
2851 
2852 // =============================================================================
2853 // Private
2854 // =============================================================================
2855 
2856 
2857 
2859 {
2861  for( ; it; ++it) {
2862  const string& str = *it;
2863  FOR_EACH_CHAR_IN_STRING(c_it, str) {
2864  const char& ch = *c_it;
2865  unsigned char chu = ch;
2866  if (ch > 127 || (ch < 32 && ch != '\t' && ch != '\r' && ch != '\n')) {
2868  "Non-ASCII character '" + NStr::NumericToString(chu) + "' found in item", obj);
2869  break;
2870  }
2871  }
2872  }
2873 }
2874 
2875 
2877 {
2878  class CScriptTagTextFsm : public CTextFsm<int>
2879  {
2880  public:
2881  CScriptTagTextFsm() {
2882  const char * script_tags[] = {
2883  "<script", "<object", "<applet", "<embed", "<form",
2884  "javascript:", "vbscript:"};
2885  ITERATE_0_IDX(idx, ArraySize(script_tags)) {
2886  AddWord(script_tags[idx], true);
2887  }
2888  Prime();
2889  }
2890 
2891  // Returns true if the given string matches any of the strings
2892  // in the fsm anywhere.
2893  bool DoesStrHaveFsmHits(const string &str) {
2894  int state = GetInitialState();
2895  ITERATE(string, str_it, str) {
2896  state = GetNextState(state, *str_it);
2897  if( IsMatchFound(state) ) {
2898  return true;
2899  }
2900  }
2901 
2902  return false;
2903  }
2904  };
2905  static CScriptTagTextFsm s_ScriptTagFsm;
2906 
2907 
2909  for( ; it; ++it) {
2910  if (s_ScriptTagFsm.DoesStrHaveFsmHits(*it)) {
2912  "Script tag found in item", obj);
2913  return;
2914  }
2915 }
2916 }
2917 
2918 
2919 bool CValidError_imp::IsMixedStrands(const CSeq_loc& loc)
2920 {
2921  if ( SeqLocCheck(loc, m_Scope) == eSeqLocCheck_warning ) {
2922  return false;
2923  }
2924 
2925  CSeq_loc_CI curr(loc);
2926  if ( !curr ) {
2927  return false;
2928  }
2929  CSeq_loc_CI prev = curr;
2930  ++curr;
2931 
2932  while ( curr ) {
2933  ENa_strand curr_strand = curr.GetStrand();
2934  ENa_strand prev_strand = prev.GetStrand();
2935 
2936  if ( (prev_strand == eNa_strand_minus &&
2937  curr_strand != eNa_strand_minus) ||
2938  (prev_strand != eNa_strand_minus &&
2939  curr_strand == eNa_strand_minus) ) {
2940  return true;
2941  }
2942 
2943  prev = curr;
2944  ++curr;
2945  }
2946 
2947  return false;
2948 }
2949 
2950 
2951 static bool s_SeqLocHasGI (const CSeq_loc& loc)
2952 {
2953  bool rval = false;
2954 
2955  for ( CSeq_loc_CI it(loc); it && !rval; ++it ) {
2956  if (it.GetSeq_id().IsGi()) {
2957  rval = true;
2958  }
2959  }
2960  return rval;
2961 }
2962 
2963 
2965 {
2966  m_TSEH = seh;
2968  m_GeneCache.Clear();
2969 }
2970 
2971 
2973 {
2975  return true;
2976  } else {
2977  return false;
2978  }
2979 }
2980 
2981 
2983 {
2984  if (se.IsSeq()) {
2985  return 1;
2986  } else if (!se.IsSet()) {
2987  return 0;
2988  }
2989  if (se.GetSet().IsSetClass()) {
2992  return 1;
2993  }
2994  }
2995  size_t count = 0;
2996  if (se.GetSet().IsSetSeq_set()) {
2997  for (auto it = se.GetSet().GetSeq_set().begin(); it != se.GetSet().GetSeq_set().end(); it++) {
2998  count += s_CountTopSetSiblings(**it);
2999  }
3000  }
3001  return count;
3002 }
3003 
3004 
3006 {
3007  // "Save" the Seq-entry
3008  SetTSE(seh);
3009 
3012 
3013  // If no Pubs/BioSource in CSeq_entry, post only one error
3014  if (GetContext().PreprocessHugeFile) {
3015  x_SetEntryInfo().SetNoPubs(GetContext().NoPubsFound);
3016  x_SetEntryInfo().SetNoCitSubPubs(GetContext().NoCitSubsFound);
3017  x_SetEntryInfo().SetNoBioSource(GetContext().NoBioSource);
3018  } else {
3020  x_SetEntryInfo().SetNoPubs(!pub);
3021  while (pub && !pub->IsSub()) {
3022  ++pub;
3023  }
3027  }
3028 
3029 
3030  // Look for genomic product set
3032  if (si->IsSetClass ()) {
3033  if (si->GetClass () == CBioseq_set::eClass_gen_prod_set) {
3034  x_SetEntryInfo().SetGPS();
3035  }
3036  if (si->GetClass () == CBioseq_set::eClass_small_genome_set) {
3038  }
3039  }
3040  }
3041 
3042  // Examine all Seq-ids on Bioseqs
3043  for (CTypeConstIterator <CBioseq> bi (*m_TSE); bi; ++bi) {
3044  FOR_EACH_SEQID_ON_BIOSEQ (sid_itr, *bi) {
3045  const CSeq_id& sid = **sid_itr;
3046  const CTextseq_id* tsid = sid.GetTextseq_Id();
3047  CSeq_id::E_Choice typ = sid.Which();
3048  switch (typ) {
3049  case CSeq_id::e_not_set:
3050  break;
3051  case CSeq_id::e_Local:
3052  break;
3053  case CSeq_id::e_Gibbsq:
3054  break;
3055  case CSeq_id::e_Gibbmt:
3056  break;
3057  case CSeq_id::e_Giim:
3058  break;
3059  case CSeq_id::e_Genbank:
3062  x_SetEntryInfo().SetGED();
3063  break;
3064  case CSeq_id::e_Embl:
3066  x_SetEntryInfo().SetGED();
3067  x_SetEntryInfo().SetEmbl();
3068  break;
3069  case CSeq_id::e_Pir:
3070  break;
3071  case CSeq_id::e_Swissprot:
3072  break;
3073  case CSeq_id::e_Patent:
3075  break;
3076  case CSeq_id::e_Other:
3078  // and do RefSeq subclasses up front as well
3079  if (sid.GetOther().IsSetAccession()) {
3080  string acc = sid.GetOther().GetAccession().substr(0, 3);
3081  if (acc == "NC_") {
3082  m_IsNC = true;
3083  } else if (acc == "NG_") {
3084  m_IsNG = true;
3085  } else if (acc == "NM_") {
3086  m_IsNM = true;
3087  } else if (acc == "NP_") {
3088  m_IsNP = true;
3089  } else if (acc == "NR_") {
3090  m_IsNR = true;
3091  } else if (acc == "NZ_") {
3092  m_IsNZ = true;
3093  } else if (acc == "NS_") {
3094  m_IsNS = true;
3095  } else if (acc == "NT_") {
3096  m_IsNT = true;
3097  } else if (acc == "NW_") {
3098  m_IsNW = true;
3099  } else if (acc == "WP_") {
3100  m_IsWP = true;
3101  } else if (acc == "XR_") {
3102  m_IsXR = true;
3103  }
3104  }
3105  break;
3106  case CSeq_id::e_General:
3107  if ((*bi).IsAa() && !sid.GetGeneral().IsSkippable()) {
3109  }
3110  break;
3111  case CSeq_id::e_Gi:
3112  x_SetEntryInfo().SetGI();
3114  break;
3115  case CSeq_id::e_Ddbj:
3117  x_SetEntryInfo().SetGED();
3118  x_SetEntryInfo().SetDdbj();
3119  break;
3120  case CSeq_id::e_Prf:
3121  break;
3122  case CSeq_id::e_Pdb:
3123  x_SetEntryInfo().SetPDB();
3124  break;
3125  case CSeq_id::e_Tpg:
3127  break;
3128  case CSeq_id::e_Tpe:
3129  x_SetEntryInfo().SetTPE();
3131  break;
3132  case CSeq_id::e_Tpd:
3134  break;
3135  case CSeq_id::e_Gpipe:
3137  break;
3138  default:
3139  break;
3140  }
3141  if ( tsid && tsid->IsSetAccession() && tsid->IsSetVersion() && tsid->GetVersion() >= 1 ) {
3143  }
3144  if (typ != CSeq_id::e_Local && typ != CSeq_id::e_General) {
3146  }
3147  }
3148  }
3149 
3150  // search all source descriptors for genomic source
3151  for (CSeqdesc_CI desc_ci (seh, CSeqdesc::e_Source);
3152  desc_ci && !m_pEntryInfo->IsGenomic();
3153  ++desc_ci) {
3154  if (desc_ci->GetSource().IsSetGenome()
3155  && desc_ci->GetSource().GetGenome() == CBioSource::eGenome_genomic) {
3157  }
3158  }
3159 
3160  // search genome build and annotation pipeline user object descriptors
3161  for (CSeqdesc_CI desc_ci (seh, CSeqdesc::e_User);
3162  desc_ci && !m_pEntryInfo->IsGpipe();
3163  ++desc_ci) {
3164  if ( desc_ci->GetUser().IsSetType() ) {
3165  const CUser_object& obj = desc_ci->GetUser();
3166  const CObject_id& oi = obj.GetType();
3167  if ( ! oi.IsStr() ) continue;
3168  if ( NStr::CompareNocase(oi.GetStr(), "GenomeBuild") == 0 ) {
3170  } else if ( NStr::CompareNocase(oi.GetStr(), "StructuredComment") == 0 ) {
3171  ITERATE (CUser_object::TData, field, obj.GetData()) {
3172  if ((*field)->IsSetLabel() && (*field)->GetLabel().IsStr()) {
3173  if (NStr::EqualNocase((*field)->GetLabel().GetStr(), "Annotation Pipeline")) {
3174  if (NStr::EqualNocase((*field)->GetData().GetStr(), "NCBI eukaryotic genome annotation pipeline")) {
3176  }
3177  }
3178  }
3179  }
3180  }
3181  }
3182  }
3183 
3184  // examine features for location gi, product gi, and locus tag
3185  for (CFeat_CI feat_ci (seh);
3187  ++feat_ci) {
3188  if (s_SeqLocHasGI(feat_ci->GetLocation())) {
3190  }
3191  if (feat_ci->IsSetProduct() && s_SeqLocHasGI(feat_ci->GetProduct())) {
3193  }
3194  if (feat_ci->IsSetData() && feat_ci->GetData().IsGene()
3195  && feat_ci->GetData().GetGene().IsSetLocus_tag()
3196  && !NStr::IsBlank (feat_ci->GetData().GetGene().GetLocus_tag())) {
3198  }
3199  }
3200 
3201  if ( m_PrgCallback ) {
3202  m_NumAlign = 0;
3203  for (CTypeConstIterator<CSeq_align> i(*m_TSE); i; ++i) {
3204  m_NumAlign++;
3205  }
3206  m_NumAnnot = 0;
3207  for (CTypeConstIterator<CSeq_annot> i(*m_TSE); i; ++i) {
3208  m_NumAnnot++;
3209  }
3210  m_NumBioseq = 0;
3211  for (CTypeConstIterator<CBioseq> i(*m_TSE); i; ++i) {
3212  m_NumBioseq++;
3213  }
3214  m_NumBioseq_set = 0;
3215  for (CTypeConstIterator<CBioseq_set> i(*m_TSE); i; ++i) {
3216  m_NumBioseq_set++;
3217  }
3218  m_NumDesc = 0;
3219  for (CTypeConstIterator<CSeqdesc> i(*m_TSE); i; ++i) {
3220  m_NumDesc++;
3221  }
3222  m_NumDescr = 0;
3223  for (CTypeConstIterator<CSeq_descr> i(*m_TSE); i; ++i) {
3224  m_NumDescr++;
3225  }
3226  m_NumFeat = 0;
3227  for (CTypeConstIterator<CSeq_feat> i(*m_TSE); i; ++i) {
3228  m_NumFeat++;
3229  }
3230  m_NumGraph = 0;
3231  for (CTypeConstIterator<CSeq_graph> i(*m_TSE); i; ++i) {
3232  m_NumGraph++;
3233  }
3236  m_NumGraph;
3237  }
3238 
3239  if (CNcbiApplication::Instance()->GetProgramDisplayName() == "table2asn") {
3240  m_IsTbl2Asn = true;
3241  }
3242 }
3243 
3244 
3246 {
3247  m_Scope.Reset(new CScope(*m_ObjMgr));
3248  m_Scope->AddTopLevelSeqEntry(*const_cast<CSeq_entry*>(&se));
3249  m_Scope->AddDefaults();
3250 }
3251 
3252 
3254 {
3255  m_IsStandaloneAnnot = true;
3256  if (! m_Scope) {
3257  m_Scope.Reset(& sah.GetScope());
3258  }
3260  m_TSE.Reset(new CSeq_entry); // set a dummy Seq-entry
3262 }
3263 
3264 
3266 {
3267  m_Scope.Reset(new CScope(*m_ObjMgr));
3268  CRef<CSeq_entry> tmp_entry(new CSeq_entry());
3269  tmp_entry->SetSeq().Assign(seq);
3270  m_TSE.Reset(tmp_entry);
3272  Setup(m_TSEH);
3273  return m_TSEH;
3274 }
3275 
3276 
3278 (const CSeq_loc& loc,
3279  const CSerialObject& obj)
3280 {
3281  for ( CSeq_loc_CI lit(loc); lit; ++lit ) {
3282  const CSeq_id& id1 = lit.GetSeq_id();
3283  CSeq_loc_CI lit2 = lit;
3284  for ( ++lit2; lit2; ++lit2 ) {
3285  const CSeq_id& id2 = lit2.GetSeq_id();
3286  if ( IsSameBioseq(id1, id2, m_Scope) && !id1.Match(id2) ) {
3289  "Two ids refer to the same bioseq but are of "
3290  "different type", obj);
3291  }
3292  }
3293  if (IsTemporary(id1)) {
3295  "Feature locations should not use Seq-ids that will be stripped during ID load", obj);
3296  }
3297  }
3300  "Feature location intervals should all be on the same sequence", obj);
3301  }
3302 }
3303 
3304 
3306 {
3307  return validator::IsInOrganelleSmallGenomeSet(id, scope);
3308 }
3309 
3310 
3311 // all ids in a location should point to the same sequence, unless the sequences are
3312 // in an organelle small genome set
3313 bool CValidError_imp::BadMultipleSequenceLocation(const CSeq_loc& loc, CScope& scope)
3314 {
3315  return validator::BadMultipleSequenceLocation(loc, scope);
3316 }
3317 
3318 
3319 bool CValidError_imp::x_IsFarFetchFailure (const CSeq_loc& loc)
3320 {
3322  && IsFarLocation(loc, GetTSEH())) {
3323  return true;
3324  } else {
3325  return false;
3326  }
3327 }
3328 
3329 
3330 //LCOV_EXCL_START
3331 // not used by asnvalidate, used by external programs
3333 {
3334  bool rval = false;
3335  Setup(se);
3336  CValidError_bioseq bioseq_validator(*this);
3338  while (bi) {
3339  rval |= bioseq_validator.GetTSANStretchErrors(*(bi->GetCompleteBioseq()));
3340  ++bi;
3341  }
3342  return rval;
3343 }
3344 
3345 
3347 {
3348  CSeq_entry_Handle seh = Setup(seq);
3349  CValidError_bioseq bioseq_validator(*this);
3350  return bioseq_validator.GetTSANStretchErrors(*(seh.GetSeq().GetCompleteBioseq()));
3351 }
3352 
3353 
3355 {
3356  bool rval = false;
3357  Setup(se);
3358  CValidError_feat feat_validator(*this);
3359  CFeat_CI fi(se);
3360  while (fi) {
3361  CBioseq_Handle bsh = se.GetScope().GetBioseqHandle(fi->GetLocation());
3362  if (bsh) {
3363  rval |= feat_validator.GetTSACDSOnMinusStrandErrors(*(fi->GetSeq_feat()), *(bsh.GetCompleteBioseq()));
3364  }
3365  ++fi;
3366  }
3367 
3368  return rval;
3369 }
3370 
3371 
3373 {
3374  CSeq_entry_Handle seh = Setup(seq);
3375  CValidError_feat feat_validator(*this);
3376  return feat_validator.GetTSACDSOnMinusStrandErrors(f, *(seh.GetSeq().GetCompleteBioseq()));
3377 }
3378 
3379 
3381 {
3382  bool rval = false;
3383  Setup(se);
3384  CValidError_bioseq bioseq_validator(*this);
3386  while (bi) {
3387  rval |= bioseq_validator.GetTSAConflictingBiomolTechErrors(*(bi->GetCompleteBioseq()));
3388  ++bi;
3389  }
3390  return rval;
3391 }
3392 
3393 
3395 {
3396  CSeq_entry_Handle seh = Setup(seq);
3397  CValidError_bioseq bioseq_validator(*this);
3398  return bioseq_validator.GetTSAConflictingBiomolTechErrors(*(seh.GetSeq().GetCompleteBioseq()));
3399 }
3400 //LCOV_EXCL_STOP
3401 
3402 const string kTooShort = "Too Short";
3403 const string kMissingPrimers = "Missing Primers";
3404 const string kMissingCountry = "Missing Country";
3405 const string kMissingVoucher = "Missing Voucher";
3406 const string kBadCollectionDate = "Bad Collection Date";
3407 const string kTooManyNs = "Too Many Ns";
3408 const string kMissingOrderAssignment = "Missing Order Assignment";
3409 const string kLowTrace = "Low Trace";
3410 const string kFrameShift = "Frame Shift";
3411 const string kStructuredVoucher = "Structured Voucher";
3412 
3413 #define ADD_BARCODE_ERR(TestName) \
3414  PostErr(eDiag_Warning, eErr_GENERIC_Barcode##TestName, k##TestName, sq); \
3415  if (!msg.empty()) { \
3416  msg += ","; \
3417  } \
3418  msg += k##TestName;
3419 
3421 {
3422  TBarcodeResults results = GetBarcodeValues(seh);
3423  for (auto r : results) {
3424  const CBioseq& sq = *(r.bsh.GetCompleteBioseq());
3425  if (BarcodeTestFails(r)){
3426  string msg;
3427  if (r.length) {
3428  ADD_BARCODE_ERR(TooShort)
3429  }
3430  if (r.primers) {
3431  ADD_BARCODE_ERR(MissingPrimers)
3432  }
3433  if (r.country) {
3434  ADD_BARCODE_ERR(MissingCountry)
3435  }
3436  if (r.voucher) {
3437  ADD_BARCODE_ERR(MissingVoucher)
3438  }
3439  if (!r.percent_n.empty()) {
3441  if (!msg.empty()) {
3442  msg += ",";
3443  }
3444  msg += kTooManyNs + ":" + r.percent_n;
3445  }
3446  if (r.collection_date) {
3447  ADD_BARCODE_ERR(BadCollectionDate)
3448  }
3449  if (r.order_assignment) {
3450  ADD_BARCODE_ERR(MissingOrderAssignment)
3451  }
3452  if (r.low_trace) {
3453  ADD_BARCODE_ERR(LowTrace)
3454  }
3455  if (r.frame_shift) {
3456  ADD_BARCODE_ERR(FrameShift)
3457  }
3458  if (!r.structured_voucher) {
3459  ADD_BARCODE_ERR(StructuredVoucher)
3460  }
3461  PostErr(eDiag_Info, eErr_GENERIC_BarcodeTestFails, "FAIL (" + msg + ")", sq);
3462  } else {
3464  }
3465  }
3466 }
3467 
3468 
3472 bool CValidError_imp::IsGPS() const { return GetEntryInfo().IsGPS(); }
3473 bool CValidError_imp::IsGED() const { return GetEntryInfo().IsGED(); }
3474 bool CValidError_imp::IsPDB() const { return GetEntryInfo().IsPDB(); }
3477 bool CValidError_imp::IsEmbl() const { return GetEntryInfo().IsEmbl(); }
3478 bool CValidError_imp::IsDdbj() const { return GetEntryInfo().IsDdbj(); }
3479 bool CValidError_imp::IsTPE() const { return GetEntryInfo().IsTPE(); }
3480 bool CValidError_imp::IsNC() const { return m_IsNC; }
3481 bool CValidError_imp::IsNG() const { return m_IsNG; }
3482 bool CValidError_imp::IsNM() const { return m_IsNM; }
3483 bool CValidError_imp::IsNP() const { return m_IsNP; }
3484 bool CValidError_imp::IsNR() const { return m_IsNR; }
3485 bool CValidError_imp::IsNS() const { return m_IsNS; }
3486 bool CValidError_imp::IsNT() const { return m_IsNT; }
3487 bool CValidError_imp::IsNW() const { return m_IsNW; }
3488 bool CValidError_imp::IsNZ() const { return m_IsNZ; }
3489 bool CValidError_imp::IsWP() const { return m_IsWP; }
3490 bool CValidError_imp::IsXR() const { return m_IsXR; }
3491 bool CValidError_imp::IsGI() const { return GetEntryInfo().IsGI(); }
3493 bool CValidError_imp::IsGpipe() const { return GetEntryInfo().IsGpipe(); }
3506 
3507 
3508 
3509 // =============================================================================
3510 // CValidError_base Implementation
3511 // =============================================================================
3512 
3513 
3515  m_Imp(imp), m_Scope(imp.GetScope())
3516 {
3517 }
3518 
3519 
3521 {
3522 }
3523 
3524 
3526 (EDiagSev sv,
3527  EErrType et,
3528  const string& msg,
3529  const CSerialObject& obj)
3530 {
3531  m_Imp.PostErr(sv, et, msg, obj);
3532 }
3533 
3534 
3535 //void CValidError_base::PostErr
3536 //(EDiagSev sv,
3537 // EErrType et,
3538 // const string& msg,
3539 // TDesc ds)
3540 //{
3541 // m_Imp.PostErr(sv, et, msg, ds);
3542 //}
3543 
3544 
3546 (EDiagSev sv,
3547  EErrType et,
3548  const string& msg,
3549  const CSeq_feat& ft)
3550 {
3551  m_Imp.PostErr(sv, et, msg, ft);
3552 }
3553 
3554 
3556 (EDiagSev sv,
3557  EErrType et,
3558  const string& msg,
3559  const CBioseq& sq)
3560 {
3561  m_Imp.PostErr(sv, et, msg, sq);
3562 }
3563 
3564 
3566 (EDiagSev sv,
3567  EErrType et,
3568  const string& msg,
3569  const CSeq_entry& ctx,
3570  const CSeqdesc& ds)
3571 {
3572  m_Imp.PostErr(sv, et, msg, ctx, ds);
3573 }
3574 
3575 
3577 (EDiagSev sv,
3578  EErrType et,
3579  const string& msg,
3580  const CBioseq_set& set)
3581 {
3582  m_Imp.PostErr(sv, et, msg, set);
3583 }
3584 
3585 
3587 (EDiagSev sv,
3588  EErrType et,
3589  const string& msg,
3590  const CSeq_annot& annot)
3591 {
3592  m_Imp.PostErr(sv, et, msg, annot);
3593 }
3594 
3596 (EDiagSev sv,
3597  EErrType et,
3598  const string& msg,
3599  const CSeq_graph& graph)
3600 {
3601  m_Imp.PostErr(sv, et, msg, graph);
3602 }
3603 
3604 
3606 (EDiagSev sv,
3607  EErrType et,
3608  const string& msg,
3609  const CBioseq& sq,
3610  const CSeq_graph& graph)
3611 {
3612  m_Imp.PostErr(sv, et, msg, sq, graph);
3613 }
3614 
3615 
3617 (EDiagSev sv,
3618  EErrType et,
3619  const string& msg,
3620  const CSeq_align& align)
3621 {
3622  m_Imp.PostErr(sv, et, msg, align);
3623 }
3624 
3625 
3627 (EDiagSev sv,
3628  EErrType et,
3629  const string& msg,
3630  const CSeq_entry& entry)
3631 {
3632  m_Imp.PostErr(sv, et, msg, entry);
3633 }
3634 
3635 CCacheImpl&
3637 {
3638  return m_Imp.GetCache();
3639 }
3640 
3641 
3643 {
3644  CSeq_entry_Handle parent = seh.GetParentEntry();
3645  if (!parent || !parent.IsSet()) {
3646  return false;
3647  }
3649  if (!pset) {
3650  return false;
3651  }
3652  if (pset->IsSetSeq_set() && pset->GetSeq_set().size() > 10) {
3653  return true;
3654  } else {
3655  return s_HasTopSetSiblings(parent);
3656  }
3657 }
3658 
3659 
3661 {
3662  CSeq_entry_Handle appropriate_parent;
3663 
3664  CSeq_entry_Handle np;
3665  CSeq_entry_Handle gps;
3666  if (seh.IsSet() && seh.GetSet().IsSetClass()) {
3667  if (seh.GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) {
3668  np = seh;
3669  } else if (s_IsGoodTopSetClass(seh.GetSet().GetClass())) {
3670  gps = seh;
3671  }
3672  } else if (seh.IsSeq()) {
3674  if (p && p.IsSet() && p.GetSet().IsSetClass()) {
3676  np = p;
3677  } else if (s_IsGoodTopSetClass(p.GetSet().GetClass())) {
3678  gps = p;
3679  }
3680  }
3681  }
3682  if (gps) {
3683  appropriate_parent = gps;
3684  } else if (np) {
3686  if (gp && gp.IsSet() && gp.GetSet().IsSetClass() &&
3688  appropriate_parent = gp;
3689  } else {
3690  appropriate_parent = np;
3691  }
3692  } else {
3693  appropriate_parent = seh;
3694  }
3695  return appropriate_parent;
3696 }
3697 
3698 
3701  CConstRef<CPubdesc> pub)
3702 {
3703  // first, try to receive from cache
3705  m_pubdescCache.find(pub);
3706  if( find_iter != m_pubdescCache.end() ) {
3707  return *find_iter->second;
3708  }
3709 
3710  CRef<CPubdescInfo> pInfo(new CPubdescInfo);
3712  *pub, pInfo->m_pmids, pInfo->m_muids,
3713  pInfo->m_serials, pInfo->m_published_labels,
3714  pInfo->m_unpublished_labels);
3715  m_pubdescCache[pub] = pInfo;
3716  return *pInfo;
3717 }
3718 
3719 bool
3721  const SFeatKey & rhs) const
3722 {
3723  if( feat_type != rhs.feat_type ) {
3724  return feat_type < rhs.feat_type;
3725  } else if( feat_subtype != rhs.feat_subtype ) {
3726  return feat_subtype < rhs.feat_subtype;
3727  } else {
3728  return bioseq_h < rhs.bioseq_h;
3729  }
3730 }
3731 
3732 bool
3734  const SFeatKey & rhs) const
3735 {
3736  return (feat_type == rhs.feat_type) &&
3737  (feat_subtype == rhs.feat_subtype) && (bioseq_h == rhs.bioseq_h);
3738 }
3739 
3740 const CCacheImpl::TFeatValue &
3742  const CCacheImpl::SFeatKey & featKey)
3743 {
3744  // check common case where already in the cache
3745  TFeatCache::iterator find_iter = m_featCache.find(featKey);
3746  if( find_iter != m_featCache.end() ) {
3747  return find_iter->second;
3748  }
3749 
3750  // check if bioseq already processed, but had no entry requested above
3751  SFeatKey bioseq_check_key(
3753  TFeatCache::const_iterator bioseq_find_iter =
3754  m_featCache.find(bioseq_check_key);
3755  if( bioseq_find_iter != m_featCache.end() ) {
3756  // bioseq was already processed,
3757  // it just happened to not have an entry here
3758  return kEmptyFeatValue;
3759  }
3760 
3761  // bioseq never added to cache, so calculate that now
3762 
3763  // to avoid expensive constructions of CFeat_CI's,
3764  // we iterate through all the seqs on
3765  // the bioseq and load them into the cache.
3766  CFeat_CI feat_ci(featKey.bioseq_h);
3767  for( ; feat_ci; ++feat_ci ) {
3768  SFeatKey inner_feat_key(
3769  feat_ci->GetFeatType(), feat_ci->GetFeatSubtype(), featKey.bioseq_h);
3770 
3771  m_featCache[inner_feat_key].push_back(*feat_ci);
3772 
3773  // also add "don't care" entries for partial searches
3774  // (e.g. if caller just wants to search on type but not on
3775  // subtype they can set subtype to kAnyFeatSubtype)
3776  SFeatKey any_type_key = inner_feat_key;
3777  any_type_key.feat_type = kAnyFeatType;
3778  m_featCache[any_type_key].push_back(*feat_ci);
3779 
3780  SFeatKey any_subtype_key = inner_feat_key;
3781  any_subtype_key.feat_subtype = kAnyFeatSubtype;
3782  m_featCache[any_subtype_key].push_back(*feat_ci);
3783 
3784  // for when the caller wants all feats on a bioseq
3785  SFeatKey any_type_or_subtype_key = inner_feat_key;
3786  any_type_or_subtype_key.feat_type = kAnyFeatType;
3787  any_type_or_subtype_key.feat_subtype = kAnyFeatSubtype;
3788  m_featCache[any_type_or_subtype_key].push_back(*feat_ci);
3789  }
3790 
3791  // in case a bioseq has no features, we add a dummy key just to
3792  // remember that so we don't use CFeat_CI again on the same bioseq
3793  m_featCache[bioseq_check_key]; // gets default val
3794 
3795  return m_featCache[featKey];
3796 }
3797 
3800  const vector<SFeatKey> &featKeys)
3801 {
3802  if( featKeys.empty() ) {
3803  return new TFeatValue;
3804  }
3805 
3806  // all featKeys must have the same bioseq
3807  const CBioseq_Handle & bioseq_h = featKeys[0].bioseq_h;
3808  ITERATE(vector<SFeatKey>, feat_it, featKeys) {
3809  if( feat_it->bioseq_h != bioseq_h ) {
3810  throw runtime_error("GetFeatFromCacheMulti must be called with only 1 bioseq in its args");
3811  }
3812  }
3813 
3814  // set prevents dups
3815  set<TFeatValue::value_type> set_of_feats;
3816 
3817  // combine the answers from every key into the set
3818  ITERATE(vector<SFeatKey>, key_it, featKeys ) {
3819  const TFeatValue & feat_value = GetFeatFromCache(*key_it);
3820  copy(BEGIN_COMMA_END(feat_value), inserter(
3821  set_of_feats, set_of_feats.begin()));
3822  }
3823 
3824  // go through every feature on the bioseq and remember any that match what's in the set
3825  // (The purpose of this step is to return the feats in the same
3826  // order they were on the original bioseq. In the future, we may
3827  // consider adding a flag to avoid sorting for time purposes).
3828  AutoPtr<TFeatValue> answer(new TFeatValue);
3829  SFeatKey all_feats_key(
3830  kAnyFeatType, kAnyFeatSubtype, bioseq_h);
3831  const TFeatValue & all_feats_vec = GetFeatFromCache(all_feats_key);
3832  ITERATE(TFeatValue, feat_it, all_feats_vec) {
3833  if( set_of_feats.find(*feat_it) != set_of_feats.end() ) {
3834  answer->push_back(*feat_it);
3835  }
3836  }
3837 
3838  return answer;
3839 }
3840 
3841 
3842 //LCOV_EXCL_START
3843 //not used
3844 bool
3846 {
3847  if( m_eFeatKeyStr != rhs.m_eFeatKeyStr ) {
3848  return m_eFeatKeyStr < rhs.m_eFeatKeyStr;
3849  }
3850  if( m_bioseq != rhs.m_bioseq ) {
3851  return m_bioseq < rhs.m_bioseq;
3852  }
3853  return s_QuickStringLess(m_feat_str, rhs.m_feat_str);
3854 }
3855 
3856 
3857 bool
3859 {
3860  if( m_eFeatKeyStr != rhs.m_eFeatKeyStr ) {
3861  return false;
3862  }
3863  if( m_bioseq != rhs.m_bioseq ) {
3864  return false;
3865  }
3866  return (m_feat_str == rhs.m_feat_str);
3867 }
3868 
3869 
3870 const CCacheImpl::TFeatValue &
3872  const SFeatStrKey & feat_str_key, const CTSE_Handle & tse_arg)
3873 {
3874  const CBioseq_Handle & search_bsh = feat_str_key.m_bioseq;
3875 
3876  // caller must give us something to work with
3877  _ASSERT(search_bsh || tse_arg);
3878 
3879  const CTSE_Handle & tse = (tse_arg ? tse_arg : search_bsh.GetTSE_Handle());
3880 
3881  // load cache if empty
3883  // (for now just indexes genes, but more may be added in the future)
3885  AutoPtr<CFeat_CI> p_gene_ci;
3886  // if we have TSE, get all features on it; otherwise, just get
3887  // the features from the bioseq
3888  if( tse ) {
3889  p_gene_ci.reset(new CFeat_CI(tse, sel));
3890  } else {
3891  p_gene_ci.reset(new CFeat_CI(search_bsh, sel));
3892  }
3893  CFeat_CI & gene_ci = *p_gene_ci; // for convenience
3894 
3895  for( ; gene_ci; ++gene_ci ) {
3896  CBioseq_Handle bsh = tse.GetScope().GetBioseqHandle(gene_ci->GetLocation());
3897  string label;
3898  const CGene_ref & gene_ref = gene_ci->GetData().GetGene();
3899 
3900  // for each one, add an entry for using given Bioseq and the
3901  // kAnyBioseq (so users can search on any bioseq)
3902  gene_ref.GetLabel(&label);
3903  SFeatStrKey label_key(eFeatKeyStr_Label, bsh, label);
3904  m_featStrKeyToFeatsCache[label_key].push_back(*gene_ci);
3905  if( bsh ) {
3906  label_key.m_bioseq = kAnyBioseq;
3907  m_featStrKeyToFeatsCache[label_key].push_back(*gene_ci);
3908  }
3909 
3910  const string & locus_tag = (
3911  gene_ref.IsSetLocus_tag() ? gene_ref.GetLocus_tag() :
3912  kEmptyStr);
3913  SFeatStrKey locus_tag_key(eFeatKeyStr_LocusTag, bsh, locus_tag);
3914  m_featStrKeyToFeatsCache[locus_tag_key].push_back(*gene_ci);
3915  if( bsh ) {
3916  locus_tag_key.m_bioseq = kAnyBioseq;
3917  m_featStrKeyToFeatsCache[locus_tag_key].push_back(*gene_ci);
3918  }
3919  }
3920  }
3921 
3922  // get from cache, if possible
3924  m_featStrKeyToFeatsCache.find(feat_str_key);
3925  if( find_iter != m_featStrKeyToFeatsCache.end() ) {
3926  return find_iter->second;
3927  } else {
3928  // nothing found
3929  return kEmptyFeatValue;
3930  }
3931 }
3932 
3933 
3936  const CCacheImpl::TFeatToBioseqKey & feat_to_bioseq_key,
3937  const CTSE_Handle & tse)
3938 {
3939  // load cache if empty
3940  if( m_featToBioseqCache.empty() ) {
3941  CBioseq_CI bioseq_ci(tse);
3942  for( ; bioseq_ci; ++bioseq_ci ) {
3943  CFeat_CI feat_ci(*bioseq_ci);
3944  for( ; feat_ci; ++feat_ci ) {
3945  m_featToBioseqCache[*feat_ci].insert(*bioseq_ci);
3946  }
3947  }
3948  }
3949 
3950  // we're being given the map to a feature, so we should've loaded
3951  // at least one feature when we loaded the cache
3953 
3954  // load from the cache
3956  m_featToBioseqCache.find(feat_to_bioseq_key);
3957  if( find_iter != m_featToBioseqCache.end() ) {
3958  return find_iter->second;
3959  } else {
3960  const static TFeatToBioseqValue kEmptyFeatToBioseqCache;
3961  return kEmptyFeatToBioseqCache;
3962  }
3963 }
3964 //LCOV_EXCL_STOP
3965 
3969  const CTSE_Handle & tse)
3970 {
3971  _ASSERT(tse);
3972 
3973  // load cache if empty
3974  if( m_IdToBioseqCache.empty() ) {
3975  CBioseq_CI bioseq_ci(tse);
3976  for( ; bioseq_ci; ++bioseq_ci ) {
3977  const CBioseq_Handle::TId & ids = bioseq_ci->GetId();
3978  ITERATE(CBioseq_Handle::TId, id_it, ids) {
3979  m_IdToBioseqCache[id_it->GetSeqId()] = *bioseq_ci;
3980  }
3981  }
3982  }
3983 
3984  // there should be at least one Bioseq otherwise there wouldn't
3985  // be anything to validate.
3987 
3989  if( find_iter != m_IdToBioseqCache.end() ) {
3990  return find_iter->second;
3991  } else {
3992  static const TIdToBioseqValue s_EmptyResult;
3993  return s_EmptyResult;
3994  }
3995 }
3996 
3999  CScope *scope, const CSeq_loc& loc, const CTSE_Handle & tse)
4000 {
4001  _ASSERT(scope || tse);
4002  if( ! tse || (!tse.GetTopLevelEntry().IsSet() && !tse.GetTopLevelEntry().IsSeq())) {
4003  // fall back on old style
4004  return BioseqHandleFromLocation(scope, loc);
4005  }
4006 
4007 
4008  for ( CSeq_loc_CI citer (loc); citer; ++citer) {
4009  CConstRef<CSeq_id> id(&citer.GetSeq_id());
4010  const TIdToBioseqValue & bioseq = GetIdToBioseq(id, tse);
4011  if( bioseq ) {
4012  return bioseq;
4013  }
4014  }
4015 
4016  // nothing found, so fall back on old style if possible
4017  if( scope ) {
4018  return BioseqHandleFromLocation(scope, loc);
4019  } else {
4020  return kEmptyBioseqHandle;
4021  }
4022 }
4023 
4024 
4026 {
4028  m_featCache.clear();
4032 }
4033 
4034 
4035 
4036 
4037 
4038 END_SCOPE(validator)
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
EErrType
@ eErr_SEQ_FEAT_WrongQualOnImpFeat
@ eErr_SEQ_DESCR_ObsoleteSourceQual
@ eErr_SEQ_DESCR_ObsoleteSourceLocation
@ eErr_SEQ_INST_FarFetchFailure
@ eErr_SEQ_FEAT_WholeLocation
@ eErr_SEQ_INST_ShortSeq
@ eErr_GENERIC_MissingPubRequirement
@ eErr_SEQ_FEAT_EcNumberProblem
@ eErr_SEQ_FEAT_DuplicateAnticodonInterval
@ eErr_SEQ_INST_CompleteGenomeHasGaps
@ eErr_SEQ_FEAT_CDShasTooManyXs
@ eErr_SEQ_FEAT_TranslExceptPhase
@ eErr_SEQ_FEAT_MinusStrandProtein
@ eErr_SEQ_INST_CompleteTitleProblem
@ eErr_SEQ_PKG_EmptySet
@ eErr_SEQ_DESCR_UnwantedCompleteFlag
@ eErr_SEQ_FEAT_GeneXrefWithoutLocus
@ eErr_SEQ_FEAT_BadLocation
@ eErr_SEQ_FEAT_GenesInconsistent
@ eErr_SEQ_INST_HighNContentStretch
@ eErr_SEQ_PKG_NoBioseqFound
@ eErr_SEQ_FEAT_PseudoRnaHasProduct
@ eErr_SEQ_DESCR_InconsistentBioSources
@ eErr_GENERIC_PastReleaseDate
@ eErr_SEQ_DESCR_BioSourceDbTagConflict
@ eErr_SEQ_FEAT_UnknownImpFeatQual
@ eErr_SEQ_FEAT_DuplicateExonInterval
@ eErr_GENERIC_UnnecessaryPubEquiv
@ eErr_SEQ_DESCR_BioSourceOnProtein
@ eErr_SEQ_DESCR_LatLonRange
@ eErr_SEQ_FEAT_UnnecessaryTranslExcept
@ eErr_SEQ_GRAPH_GraphBioseqId
@ eErr_SEQ_FEAT_MixedStrand
@ eErr_SEQ_FEAT_BadRRNAcomponentOrder
@ eErr_SEQ_DESCR_DuplicatePCRPrimerSequence
@ eErr_SEQ_FEAT_BadGeneOntologyFormat
@ eErr_SEQ_DESCR_LatLonCountry
@ eErr_SEQ_PKG_NucProtSetHasTitle
@ eErr_SEQ_FEAT_IllegalDbXref
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_FEAT_BadAnticodonAA
@ eErr_SEQ_FEAT_MissingCDSproduct
@ eErr_SEQ_FEAT_FeatureBeginsOrEndsInGap
@ eErr_SEQ_FEAT_TranslExceptAndRnaEditing
@ eErr_GENERIC_BarcodeTooManyNs
@ eErr_SEQ_PKG_BioseqSetClassNotSet
@ eErr_SEQ_DESCR_NoOrgFound
@ eErr_SEQ_FEAT_MissingProteinName
@ eErr_SEQ_DESCR_BadPCRPrimerSequence
@ eErr_SEQ_FEAT_GeneXrefWithoutGene
@ eErr_SEQ_DESCR_TransgenicProblem
@ eErr_SEQ_PKG_MissingSetTitle
@ eErr_SEQ_FEAT_InvalidQualifierValue
@ eErr_SEQ_FEAT_GeneOntologyTermMissingGOID
@ eErr_SEQ_FEAT_ProtRefHasNoData
@ eErr_SEQ_GRAPH_GraphSeqLocLen
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_DESCR_LatLonValue
@ eErr_SEQ_FEAT_TransLen
@ eErr_SEQ_FEAT_FeatureCitationProblem
@ eErr_SEQ_DESCR_IdenticalInstitutionCode
@ eErr_SEQ_PKG_ImproperlyNestedSets
@ eErr_SEQ_INST_UnknownLengthGapNot100
@ eErr_SEQ_FEAT_WrongQualOnFeature
@ eErr_SEQ_FEAT_MultipleProtRefs
@ eErr_SEQ_FEAT_MultipleEquivPublications
@ eErr_SEQ_PKG_SeqSubmitWithWgsSet
@ eErr_SEQ_PKG_InconsistentMoltypeSet
@ eErr_SEQ_INST_ConflictingBiomolTech
@ eErr_SEQ_FEAT_MissingQualOnImpFeat
@ eErr_SEQ_PKG_INSDRefSeqPackaging
@ eErr_SEQ_FEAT_LocusCollidesWithLocusTag
@ eErr_SEQ_PKG_GPSnonGPSPackaging
@ eErr_SEQ_DESCR_BadCollectionDate
@ eErr_SEQ_FEAT_MultipleEquivBioSources
@ eErr_SEQ_FEAT_CDSwithNoMRNAOverlap
@ eErr_SEQ_DESCR_BadInstitutionCode
@ eErr_SEQ_FEAT_PeptideFeatOutOfFrame
@ eErr_SEQ_FEAT_ProteinNameHasPMID
@ eErr_SEQ_FEAT_RepeatRegionNeedsNote
@ eErr_SEQ_DESCR_BadAltitude
@ eErr_SEQ_FEAT_GeneXrefStrandProblem
@ eErr_SEQ_FEAT_MissingTrnaAA
@ eErr_GENERIC_NonAsciiAsn
@ eErr_SEQ_FEAT_CDSwithMultipleMRNAs
@ eErr_SEQ_FEAT_CollidingFeatureIDs
@ eErr_SEQ_DESCR_IncorrectlyFormattedVoucherID
@ eErr_SEQ_FEAT_OrfCdsHasProduct
@ eErr_SEQ_FEAT_ImproperBondLocation
@ eErr_SEQ_PKG_GraphPackagingProblem
@ eErr_SEQ_INST_OverlappingDeltaRange
@ eErr_SEQ_FEAT_BadTranssplicedInterval
@ eErr_SEQ_INST_SeqLocLength
@ eErr_SEQ_DESCR_MultipleTaxonIDs
@ eErr_SEQ_DESCR_BadKeyword
@ eErr_SEQ_FEAT_UnknownImpFeatKey
@ eErr_SEQ_DESCR_Inconsistent
@ eErr_SEQ_PKG_ArchaicFeatureLocation
@ eErr_GENERIC_BadDate
@ eErr_GENERIC_BarcodeTestFails
@ eErr_SEQ_FEAT_NestedSeqLocMix
@ eErr_SEQ_FEAT_ShortIntron
@ eErr_SEQ_FEAT_UnknownFeatureQual
@ eErr_SEQ_DESCR_MultipleChromosomes
@ eErr_SEQ_FEAT_Range
@ eErr_SEQ_FEAT_InconsistentGeneOntologyTermAndId
@ eErr_SEQ_PKG_MisplacedMolInfo
@ eErr_GENERIC_EmbeddedScript
@ eErr_GENERIC_BarcodeTestPasses
@ eErr_SEQ_GRAPH_GraphAbove
@ eErr_SEQ_FEAT_FeatureInsideGap
@ eErr_SEQ_FEAT_DifferntIdTypesInSeqLoc
@ eErr_SEQ_FEAT_BadFullLengthFeature
@ eErr_SEQ_FEAT_RNAtype0
@ eErr_SEQ_FEAT_BadCharInAuthorName
@ eErr_SEQ_FEAT_FarLocation
@ eErr_SEQ_INST_BadHTGSeq
@ eErr_SEQ_FEAT_InvalidFuzz
@ eErr_SEQ_FEAT_InvalidInferenceValue
@ eErr_SEQ_FEAT_GeneXrefNeeded
@ eErr_SEQ_INST_UnexpectedIdentifierChange
@ eErr_SEQ_FEAT_InconsistentRRNAstrands
@ eErr_SEQ_PKG_ArchaicFeatureProduct
@ eErr_SEQ_DESCR_MultipleSourceQualifiers
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlap
@ eErr_SEQ_FEAT_BadTrailingCharacter
@ eErr_SEQ_DESCR_WrongVoucherType
@ eErr_SEQ_INST_ProteinsHaveGeneralID
@ eErr_SEQ_GRAPH_GraphOutOfOrder
@ eErr_SEQ_FEAT_BadInternalCharacter
@ eErr_SEQ_DESCR_NoSourceDescriptor
@ eErr_SEQ_DESCR_BadCollectionCode
@ eErr_SEQ_FEAT_BadProteinName
@ eErr_SEQ_FEAT_FeatureProductInconsistency
@ eErr_GENERIC_PublicationInconsistency
@ eErr_GENERIC_BadSubmissionAuthorName
@ eErr_GENERIC_CollidingSerialNumbers
@ eErr_SEQ_PKG_ComponentMissingTitle
@ eErr_SEQ_DESCR_DBLinkMissingUserObject
@ eErr_SEQ_PKG_InternalGenBankSet
@ eErr_SEQ_DESCR_BioSourceMissing
@ eErr_SEQ_FEAT_BadAnticodonCodon
@ eErr_SEQ_FEAT_BadTrailingHyphen
@ eErr_SEQ_FEAT_OldLocusTagMismtach
@ eErr_SEQ_DESCR_MolInfoConflictsWithBioSource
@ eErr_SEQ_FEAT_UTRdoesNotAbutCDS
@ eErr_SEQ_FEAT_PseudoRnaViaGeneHasProduct
@ eErr_SEQ_FEAT_ConflictFlagSet
@ eErr_SEQ_FEAT_StrandOther
@ eErr_SEQ_PKG_FeaturePackagingProblem
@ eErr_SEQ_DESCR_MultipleNames
@ eErr_SEQ_INST_BadSeqIdFormat
@ eErr_SEQ_PKG_GenomicProductPackagingProblem
@ eErr_INTERNAL_Exception
@ eErr_SEQ_FEAT_BadEcNumberFormat
@ eErr_SEQ_FEAT_CDSproductPackagingProblem
@ eErr_SEQ_FEAT_RedundantFields
@ eErr_SEQ_INST_InternalNsInSeqRaw
@ eErr_SEQ_DESCR_BadOrgMod
@ eErr_SEQ_INST_TerminalNs
@ eErr_SEQ_DESCR_BadOrganelleLocation
@ eErr_SEQ_FEAT_NoNameForProtein
@ eErr_SEQ_FEAT_RptUnitRangeProblem
@ eErr_SEQ_FEAT_SeqLocOrder
@ eErr_SEQ_DESCR_TaxonomyIsSpeciesProblem
@ eErr_SEQ_FEAT_CDSmRNAXrefLocationProblem
@ eErr_SEQ_PKG_SingleItemSet
@ eErr_SEQ_DESCR_BioSourceNeedsChromosome
@ eErr_SEQ_FEAT_VectorContamination
@ eErr_SEQ_FEAT_AbuttingIntervals
@ eErr_SEQ_FEAT_CDSrange
@ eErr_SEQ_FEAT_LocusTagProblem
@ eErr_SEQ_DESCR_BioSourceInconsistency
@ eErr_SEQ_FEAT_OnlyGeneXrefs
@ eErr_SEQ_FEAT_TranslExcept
@ eErr_SEQ_INST_InternalGapsInSeqRaw
@ eErr_SEQ_FEAT_GeneRefHasNoData
@ eErr_SEQ_INST_DuplicateSegmentReferences
@ eErr_SEQ_FEAT_TooManyInferenceAccessions
@ eErr_SEQ_FEAT_TerminalXDiscrepancy
@ eErr_SEQ_FEAT_MiscFeatureNeedsNote
@ eErr_SEQ_DESCR_CollidingPublications
@ eErr_SEQ_FEAT_GenomeSetMixedStrand
@ eErr_SEQ_FEAT_BadCharInAuthorLastName
@ eErr_SEQ_FEAT_HypotheticalProteinMismatch
@ eErr_SEQ_INST_TpaAssemblyProblem
@ eErr_SEQ_FEAT_MissingGeneXref
AutoPtr –.
Definition: ncbimisc.hpp:401
CAlign_CI –.
Definition: align_ci.hpp:63
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CAuthor –.
Definition: Author.hpp:59
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CSeq_entry * GetParentEntry(void) const
Definition: Bioseq.hpp:174
static void GetPubdescLabels(const CPubdesc &pd, vector< TEntrezId > &pmids, vector< TEntrezId > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels)
For Publication Citations Get labels for a pubdesc.
Definition: cleanup.cpp:3140
Definition: Dbtag.hpp:53
bool GetDBFlags(bool &is_refseq, bool &is_src, string &correct_caps) const
Definition: Dbtag.cpp:327
bool IsSkippable(void) const
Definition: Dbtag.cpp:281
CFeat_CI –.
Definition: feat_ci.hpp:64
void Clear()
Definition: gene_cache.hpp:89
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
Definition: gene_cache.cpp:106
void GetLabel(string *label) const
Definition: Gene_ref.cpp:57
CGraph_CI –.
Definition: graph_ci.hpp:234
CMappedFeat –.
Definition: mapped_feat.hpp:59
@Name_std.hpp User-defined methods of the data storage class.
Definition: Name_std.hpp:56
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CObjectManager –.
const string & GetDivision(void) const
Definition: Org_ref.cpp:164
bool IsSetDivision(void) const
Definition: Org_ref.cpp:159
@ eContent
Definition: Pub.hpp:66
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
static bool RequireLocationIntervalsInBiologicalOrder(ESubtype subtype)
static bool AllowAdjacentIntervals(ESubtype subtype)
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
CSeq_annot_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
@ eContent
Definition: Seq_entry.hpp:93
void GetLabel(string *label, ELabelType type) const
Definition: Seq_entry.cpp:274
CSeq_entry * GetParentEntry(void) const
Definition: Seq_entry.hpp:131
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
CSubmit_block –.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
Definition: tse_handle.cpp:205
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
Definition: tse_handle.cpp:604
CScope & GetScope(void) const
Returns scope.
Definition: tse_handle.hpp:325
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Definition: typeinfo.hpp:76
Thrown on an attempt to write unassigned data member.
Definition: exception.hpp:84
static string GetFeatureBioseqLabel(const CSeq_feat &ft, CRef< CScope > scope, bool suppress_context)
static string GetDescriptorContent(const CSeqdesc &ds)
static string GetFeatureLocationLabel(const CSeq_feat &ft, CRef< CScope > scope, bool suppress_context)
static string GetFeatureProductLocLabel(const CSeq_feat &ft, CRef< CScope > scope, bool suppress_context)
static string GetDescriptorLabel(const CSeqdesc &ds, const CSeq_entry &ctx, CRef< CScope > scope, bool suppress_context)
static string GetFeatureContentLabel(const CSeq_feat &feat, CRef< CScope > scope)
static string GetFeatureIdLabel(const CSeq_feat &ft)
static string GetBioseqSetLabel(const CBioseq_set &st, CRef< CScope > scope, bool suppress_context)
void ValidateSeqAlign(const CSeq_align &align, int order=-1)
void ValidateSeqAnnot(const CSeq_annot_Handle &annot)
CCacheImpl & GetCache()
virtual ~CValidError_base()
static CSeq_entry_Handle GetAppropriateXrefParent(CSeq_entry_Handle seh)
CValidError_imp & m_Imp
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
CValidError_base(CValidError_imp &imp)
void ValidateBioseq(const CBioseq &seq)
bool GetTSAConflictingBiomolTechErrors(const CBioseq &seq)
bool GetTSANStretchErrors(const CBioseq &seq)
void ValidateBioseqSet(const CBioseq_set &seqset)
void ValidateSeqDesc(const CSeqdesc &desc, const CSeq_entry &ctx)
Validate descriptors as stand alone objects (no context)
void SetScope(CScope &scope)
void SetTSE(CSeq_entry_Handle seh)
bool GetTSACDSOnMinusStrandErrors(const CSeq_feat &feat, const CBioseq &seq)
static bool GetPrefixAndAccessionFromInferenceAccession(string inf_accession, string &prefix, string &accession)
void ValidateSeqFeat(const CSeq_feat &feat)
static vector< string > GetAccessionsFromInferenceString(string inference, string &prefix, string &remainder, bool &same_species)
void ValidateSeqGraph(const CSeq_graph &graph)
void x_ReportInvalidFuzz(const CPacked_seqint &packed_int, const CSerialObject &obj)
CRef< CObjectManager > m_ObjMgr
bool IsGED() const
void SetScope(const CSeq_entry &se)
void FindCollidingSerialNumbers(const CSerialObject &obj)
Definition: valid_pub.cpp:1323
const CSeq_entry_Handle & GetTSEH()
static bool BadMultipleSequenceLocation(const CSeq_loc &loc, CScope &scope)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
Definition: validatorp.cpp:359
static bool IsTSAIntermediate(const CBioseq &seq)
void x_CheckPackedInt(const CPacked_seqint &packed_int, SLocCheck &lc, const CSerialObject &obj)
static bool IsInOrganelleSmallGenomeSet(const CSeq_id &id, CScope &scope)
bool IsNC() const
const CBioSourceKind & BioSourceKind() const
SIZE_TYPE m_NumPseudogene
bool IsNS() const
CRef< CScope > m_Scope
bool HasGiOrAccnVer() const
SIZE_TYPE m_NumTpaWithHistory
void SetTSE(const CSeq_entry_Handle &seh)
const SValidatorContext & GetContext() const
Definition: validatorp.cpp:204
CValidator::TProgressCallback m_PrgCallback
bool IsPDB() const
CValidError * m_ErrRepository
CConstRef< CSeq_feat > GetmRNAGivenProduct(const CBioseq &seq)
bool IsValidateAlignments() const
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id)
Definition: validatorp.cpp:248
void ValidateCitations(const CSeq_entry_Handle &seh)
bool DoesAnyFeatLocHaveGI() const
void FindNonAsciiText(const CSerialObject &obj)
void AddBioseqWithNoBiosource(const CBioseq &seq)
void ValidateSeqLocIds(const CSeq_loc &loc, const CSerialObject &obj)
bool GenerateGoldenFile() const
bool IsStandaloneAnnot() const
void x_DoBarcodeTests(CSeq_entry_Handle seh)
CConstRef< CSeq_annot > m_SeqAnnot
bool IsNM() const
bool DoesAnyProductLocHaveGI() const
bool GetTSAConflictingBiomolTechErrors(const CSeq_entry_Handle &se)
void x_AddValidErrItem(EDiagSev sev, EErrType type, const string &msg, const string &desc, const CSerialObject &obj, const string &accession, const int version)
unique_ptr< CValidatorEntryInfo > m_pEntryInfo
SIZE_TYPE m_NumMisplacedGraphs
bool IsNT() const
bool IsGenbank() const
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
bool IsNZ() const
void Setup(const CSeq_entry_Handle &seh)
bool Validate(const CSeq_entry &se, const CCit_sub *cs=nullptr, CScope *scope=nullptr)
SIZE_TYPE m_NumTpaWithoutHistory
static bool IsWGSIntermediate(const CBioseq &seq)
CValidator::CProgressInfo m_PrgInfo
void ValidateDbxref(const CDbtag &xref, const CSerialObject &obj, bool biosource=false, const CSeq_entry *ctx=nullptr)
bool IsSerialNumberInComment(const string &comment)
bool IsGenomic() const
void ValidateTaxonomy(const CSeq_entry &se)
bool IsFarSequence(const CSeq_id &id)
Definition: validatorp.cpp:234
const CTSE_Handle & GetTSE_Handle()
void FindEmbeddedScript(const CSerialObject &obj)
bool IsHugeFileMode() const
Definition: validatorp.cpp:211
SIZE_TYPE m_NumSmallGenomeSetMisplaced
void ValidateCitSub(const CCit_sub &cs, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
Definition: valid_pub.cpp:1078
void SetOptions(Uint4 options)
Definition: validatorp.cpp:270
bool m_ValidateInferenceAccessions
void ValidateSubmitBlock(const CSubmit_block &block, const CSeq_submit &ss)
bool IsNoCitSubPubs() const
void SetErrorRepository(CValidError *errors)
Definition: validatorp.cpp:305
bool IsNP() const
vector< CConstRef< CBioseq > > m_BioseqWithNoSource
void ValidateAffil(const CAffil::TStd &std, const CSerialObject &obj, const CSeq_entry *ctx)
Definition: valid_pub.cpp:988
CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)
CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)
Definition: validatorp.cpp:257
bool IsRefSeq() const
bool IsGPS() const
bool IsINSDInSep() const
bool IsNG() const
const CSeq_entry * GetAncestor(const CBioseq &seq, CBioseq_set::EClass clss)
bool IsGeneious() const
SIZE_TYPE m_NumGeneXrefs
bool x_IsFarFetchFailure(const CSeq_loc &loc)
bool IsNoPubs() const
CValidError_imp(CObjectManager &objmgr, shared_ptr< SValidatorContext > pContext, CValidError *errors, Uint4 options=0)
Definition: validatorp.cpp:169
void PostBadDateError(EDiagSev sv, const string &msg, int flags, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void AddProtWithoutFullRef(const CBioseq_Handle &seq)
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
const CValidatorEntryInfo & GetEntryInfo() const
CSeq_entry_Handle m_TSEH
bool RaiseGenomeSeverity(EErrType et)
Definition: validatorp.cpp:602
SIZE_TYPE m_NumBioseq_set
bool RequireLocalProduct(const CSeq_id *sid) const
bool IsGI() const
bool IsGpipe() const
bool IsFarFetchCDSproducts() const
bool IsPatent() const
bool DoesAnyProteinHaveGeneralID() const
virtual ~CValidError_imp()
Definition: validatorp.cpp:189
void x_Init(Uint4 options)
Definition: validatorp.cpp:180
bool IsWP() const
void ReportMissingPubs(const CSeq_entry &se, const CCit_sub *cs)
Definition: valid_pub.cpp:1277
bool IsNoBioSource() const
bool IsMixedStrands(const CSeq_loc &loc)
bool IsLocalGeneralOnly() const
CBioSourceKind m_biosource_kind
CConstRef< CSeq_entry > m_TSE
CGeneCache m_GeneCache
void x_InitLocCheck(SLocCheck &lc, const string &prefix)
SValidatorContext & SetContext()
Definition: validatorp.cpp:194
bool IsIndexerVersion() const
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
bool IsEmbl() const
void SetProgressCallback(CValidator::TProgressCallback callback, void *user_data)
bool GetTSACDSOnMinusStrandErrors(const CSeq_entry_Handle &se)
void ValidateMultipleTaxIds(const CSeq_entry_Handle &seh)
bool IsHugeSet(const CBioseq_set &bioseqSet) const
Definition: validatorp.cpp:219
bool IsSeqSubmit() const
void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)
bool GetTSANStretchErrors(const CSeq_entry_Handle &se)
bool IsXR() const
shared_ptr< SValidatorContext > m_pContext
bool x_CheckSeqInt(CConstRef< CSeq_id > &id_cur, const CSeq_interval *int_cur, ENa_strand &strand_cur)
bool DoesAnyGeneHaveLocusTag() const
bool IsTPE() const
void x_CheckLoc(const CSeq_loc &loc, const CSerialObject &obj, SLocCheck &lc, bool lowerSev=false)
bool IsNR() const
const CSeq_entry & GetTSE() const
bool IsFarFetchMRNAproducts() const
CValidatorEntryInfo & x_SetEntryInfo()
SIZE_TYPE m_NumMisplacedFeatures
CCacheImpl & GetCache()
bool IsDdbj() const
void x_CheckForStrandChange(SLocCheck &lc)
bool IsNW() const
void ReportMissingBiosource(const CSeq_entry &se)
void ValidatePubdesc(const CPubdesc &pub, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
Definition: valid_pub.cpp:77
void AddValidErrItem(EDiagSev sev, unsigned int ec, const string &msg, const string &desc, const CSerialObject &obj, const string &acc, const int ver, const string &location=kEmptyStr, const int seq_offset=0)
Definition: ValidError.cpp:58
void SetNoBioSource(bool val=true)
Definition: entry_info.cpp:42
void SetPatent(bool val=true)
Definition: entry_info.cpp:46
void SetGpipe(bool val=true)
Definition: entry_info.cpp:52
bool IsDdbj() const
Definition: entry_info.cpp:75
void SetGenomic(bool val=true)
Definition: entry_info.cpp:55
bool IsGPS() const
Definition: entry_info.cpp:69
void SetProductLocHasGI(bool val=true)
Definition: entry_info.cpp:60
bool IsNoPubs() const
Definition: entry_info.cpp:66
bool DoesAnyGeneHaveLocusTag() const
Definition: entry_info.cpp:87
void SetPDB(bool val=true)
Definition: entry_info.cpp:45
bool DoesAnyProteinHaveGeneralID() const
Definition: entry_info.cpp:88
void SetProteinHasGeneralID(bool val=true)
Definition: entry_info.cpp:62
bool IsINSDInSep() const
Definition: entry_info.cpp:89
void SetGeneious(bool val=true)
Definition: entry_info.cpp:64
bool IsGeneious() const
Definition: entry_info.cpp:90
void SetNoCitSubPubs(bool val=true)
Definition: entry_info.cpp:41
bool IsEmbl() const
Definition: entry_info.cpp:74
void SetGeneHasLocusTag(bool val=true)
Definition: entry_info.cpp:61
bool IsNoCitSubPubs() const
Definition: entry_info.cpp:67
bool HasGiOrAccnVer() const
Definition: entry_info.cpp:80
void SetGI(bool val=true)
Definition: entry_info.cpp:51
void SetEmbl(bool val=true)
Definition: entry_info.cpp:48
bool IsLocalGeneralOnly() const
Definition: entry_info.cpp:79
bool IsPatent() const
Definition: entry_info.cpp:72
bool IsGpipe() const
Definition: entry_info.cpp:78
bool IsGenomic() const
Definition: entry_info.cpp:81
bool DoesAnyFeatLocHaveGI() const
Definition: entry_info.cpp:85
bool DoesAnyProductLocHaveGI() const
Definition: entry_info.cpp:86
void SetSeqSubmit(bool val=true)
Definition: entry_info.cpp:56
void SetRefSeq(bool val=true)
Definition: entry_info.cpp:47
void SetTPE(bool val=true)
Definition: entry_info.cpp:50
void SetFeatLocHasGI(bool val=true)
Definition: entry_info.cpp:59
bool IsSeqSubmit() const
Definition: entry_info.cpp:82
void SetGPS(bool val=true)
Definition: entry_info.cpp:43
void SetGenbank(bool val=true)
Definition: entry_info.cpp:58
void SetINSDInSep(bool val=true)
Definition: entry_info.cpp:63
void SetLocalGeneralOnly(bool val=true)
Definition: entry_info.cpp:53
void SetNoPubs(bool val=true)
Definition: entry_info.cpp:40
bool IsPDB() const
Definition: entry_info.cpp:71
void SetDdbj(bool val=true)
Definition: entry_info.cpp:49
bool IsGenbank() const
Definition: entry_info.cpp:84
void SetSmallGenomeSet(bool val=true)
Definition: entry_info.cpp:57
bool IsRefSeq() const
Definition: entry_info.cpp:73
bool IsTPE() const
Definition: entry_info.cpp:76
bool IsSmallGenomeSet() const
Definition: entry_info.cpp:83
void SetGED(bool val=true)
Definition: entry_info.cpp:44
void SetGiOrAccnVer(bool val=true)
Definition: entry_info.cpp:54
bool IsGI() const
Definition: entry_info.cpp:77
bool IsGED() const
Definition: entry_info.cpp:70
bool IsNoBioSource() const
Definition: entry_info.cpp:68
Cache various information for one validation run.
Definition: cache_impl.hpp:126
const TFeatValue & GetFeatFromCache(const SFeatKey &featKey)
TFeatStrKeyToFeatsCache m_featStrKeyToFeatsCache
Definition: cache_impl.hpp:255
AutoPtr< TFeatValue > GetFeatFromCacheMulti(const vector< SFeatKey > &featKeys)
const CPubdescInfo & GetPubdescToInfo(CConstRef< CPubdesc > pub)
TIdToBioseqCache m_IdToBioseqCache
Definition: cache_impl.hpp:261
static const CSeqFeatData::ESubtype kAnyFeatSubtype
Definition: cache_impl.hpp:168
TPubdescCache m_pubdescCache
Definition: cache_impl.hpp:249
CBioseq_Handle GetBioseqHandleFromLocation(CScope *scope, const CSeq_loc &loc, const CTSE_Handle &tse)
static const TFeatValue kEmptyFeatValue
Definition: cache_impl.hpp:245
static const CTSE_Handle kEmptyTSEHandle
Definition: cache_impl.hpp:237
static const CSeqFeatData::E_Choice kAnyFeatType
Definition: cache_impl.hpp:167
std::vector< CMappedFeat > TFeatValue
Definition: cache_impl.hpp:164
const TFeatToBioseqValue & GetBioseqsOfFeatCache(const TFeatToBioseqKey &feat_to_bioseq_key, const CTSE_Handle &tse)
const TFeatValue & GetFeatStrKeyToFeats(const SFeatStrKey &feat_str_key, const CTSE_Handle &tse)
TFeatToBioseqCache m_featToBioseqCache
Definition: cache_impl.hpp:258
static const CBioseq_Handle kAnyBioseq
Definition: cache_impl.hpp:203
const TIdToBioseqValue & GetIdToBioseq(const TIdToBioseqKey &key, const CTSE_Handle &tse)
static const CBioseq_Handle kEmptyBioseqHandle
Definition: cache_impl.hpp:244
@ eVal_remote_fetch
Definition: validator.hpp:86
@ eVal_do_barcode_tests
Definition: validator.hpp:100
@ eVal_indexer_version
Definition: validator.hpp:91
@ eVal_do_rubisco_test
Definition: validator.hpp:90
@ eVal_refseq_conventions
Definition: validator.hpp:101
@ eVal_no_context
Definition: validator.hpp:79
@ eVal_generate_golden_file
Definition: validator.hpp:103
@ eVal_collect_locus_tags
Definition: validator.hpp:102
@ eVal_validate_id_set
Definition: validator.hpp:85
@ eVal_ovl_pep_err
Definition: validator.hpp:82
@ eVal_inference_accns
Definition: validator.hpp:93
@ eVal_val_exons
Definition: validator.hpp:81
@ eVal_need_isojta
Definition: validator.hpp:84
@ eVal_use_entrez
Definition: validator.hpp:92
@ eVal_ignore_exceptions
Definition: validator.hpp:94
@ eVal_non_ascii
Definition: validator.hpp:78
@ eVal_report_splice_as_error
Definition: validator.hpp:95
@ eVal_genome_submission
Definition: validator.hpp:98
@ eVal_locus_tag_general_match
Definition: validator.hpp:89
@ eVal_val_align
Definition: validator.hpp:80
@ eVal_far_fetch_cds_products
Definition: validator.hpp:88
@ eVal_ignore_inferences
Definition: validator.hpp:105
@ eVal_do_tax_lookup
Definition: validator.hpp:99
@ eVal_latlon_check_state
Definition: validator.hpp:96
@ eVal_far_fetch_mrna_products
Definition: validator.hpp:87
@ eVal_latlon_ignore_water
Definition: validator.hpp:97
@ eVal_compare_vdjc_to_cds
Definition: validator.hpp:104
@ eVal_seqsubmit_parent
Definition: validator.hpp:83
static TDbxrefValidFlags IsValidDbxref(const CDbtag &xref, bool is_biosource, bool is_refseq_or_gps)
Definition: validator.cpp:617
int TDbxrefValidFlags
Definition: validator.hpp:266
bool(* TProgressCallback)(CProgressInfo *)
Definition: validator.hpp:249
@ eRefSeqNotForSource
Definition: validator.hpp:262
@ eBadCapitalization
Definition: validator.hpp:263
static bool DoesSeqLocContainDuplicateIntervals(const CSeq_loc &loc, CScope &scope)
Definition: validator.cpp:588
static bool DoesSeqLocContainAdjacentIntervals(const CSeq_loc &loc, CScope &scope)
Definition: validator.cpp:561
static bool IsSeqLocCorrectlyOrdered(const CSeq_loc &loc, CScope &scope)
Definition: validator.cpp:514
@ fLabel_Unique
Append a unique tag [V1].
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: set.hpp:45
const_iterator begin() const
Definition: set.hpp:135
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static uch flags
static const char si[8][64]
Definition: des.c:146
static void chk(int check, const char *fmt,...)
Definition: ct_dynamic.c:49
CS_CONTEXT * ctx
Definition: t0006.c:12
static const struct name_t names[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static int lc
Definition: getdata.c:30
static const char * str(char *buf, int n)
Definition: stats.c:84
int offset
Definition: replacements.h:160
static const char location[]
Definition: config.c:97
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:480
#define ITERATE_0_IDX(idx, up_to)
idx loops from 0 (inclusive) to up_to (exclusive)
Definition: ncbimisc.hpp:865
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
@ eDiag_Critical
Critical error message.
Definition: ncbidiag.hpp:654
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
bool BadSeqLocSortOrder(const CBioseq_Handle &bsh, const CSeq_loc &loc)
Returns true if the order of Seq_locs is bad, otherwise, false.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
bool IsValid(const CSeq_point &pt, CScope *scope)
Checks that point >= 0 and point < length of Bioseq.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
ESeqLocCheck SeqLocCheck(const CSeq_loc &loc, CScope *scope)
Checks that a CSeq_loc is all on one strand on one CBioseq.
@ eSeqLocCheck_warning
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1428
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CSeq_entry_Handle GetSeq_entryHandle(CDataLoader *loader, const TBlobId &blob_id, EMissing action=eMissing_Default)
Get Seq-entry handle by its blob-id, with possible loading.
Definition: scope.cpp:113
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
vector< CSeq_id_Handle > TId
CSeq_annot::C_Data::E_Choice Which(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
E_Choice Which(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
TSet GetSet(void) const
const CTSE_Handle & GetTSE_Handle(void) const
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
TSeq GetSeq(void) const
TInst_Topology GetInst_Topology(void) const
bool IsSetInst(void) const
bool IsSetInst_Repr(void) const
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
bool IsSet(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeqFeatData::E_Choice GetFeatType(void) const
bool IsSetInst_Topology(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const TId & GetId(void) const
const TInst & GetInst(void) const
bool IsSeq(void) const
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetByProduct(bool byProduct=true)
Set flag indicating if the features should be searched by their product rather than location.
SAnnotSelector & SetLimitTSE(const CTSE_Handle &limit)
Limit annotations to those from the TSE only.
@ eLevel_All
Any bioseq.
Definition: bioseq_ci.hpp:73
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2751
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static const char label[]
const TAffil & GetAffil(void) const
Get the Affil member data.
Definition: Author_.hpp:476
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_sub_.hpp:357
bool IsSetAffil(void) const
Check if a value has been assigned to Affil data member.
Definition: Author_.hpp:464
bool IsSetAuthors(void) const
not necessarily authors of the paper Check if a value has been assigned to Authors data member.
Definition: Cit_sub_.hpp:345
const TName & GetName(void) const
Get the Name member data.
Definition: Author_.hpp:352
list< CRef< CAuthor > > TStd
Definition: Auth_list_.hpp:170
const TStd & GetStd(void) const
Get the variant data.
Definition: Affil_.cpp:214
const TNames & GetNames(void) const
Get the Names member data.
Definition: Auth_list_.hpp:478
bool IsStd(void) const
Check if variant Std is selected.
Definition: Affil_.hpp:1207
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:422
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:497
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
Definition: BioSource_.hpp:397
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:781
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:793
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
Definition: Dbtag_.hpp:255
bool IsName(void) const
Check if variant Name is selected.
Definition: Person_id_.hpp:359
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
ELim
some limit value
Definition: Int_fuzz_.hpp:209
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
Definition: Name_std_.hpp:410
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
const TType & GetType(void) const
Get the Type member data.
const TFirst & GetFirst(void) const
Get the First member data.
Definition: Name_std_.hpp:469
const TLast & GetLast(void) const
Get the Last member data.
Definition: Name_std_.hpp:422
const TName & GetName(void) const
Get the variant data.
Definition: Person_id_.cpp:137
bool IsSetFirst(void) const
Check if a value has been assigned to First data member.
Definition: Name_std_.hpp:457
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
@ eLim_circle
artificial break at origin of circle
Definition: Int_fuzz_.hpp:215
@ eLim_tl
space to left of position
Definition: Int_fuzz_.hpp:214
@ eLim_tr
space to right of position
Definition: Int_fuzz_.hpp:213
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
Definition: Org_ref_.hpp:479
vector< CRef< CDbtag > > TDb
Definition: Org_ref_.hpp:101
const TDb & GetDb(void) const
Get the Db member data.
Definition: Org_ref_.hpp:491
list< CRef< CPub > > TPub
Definition: Pub_set_.hpp:159
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
TDim GetDim(void) const
Get the Dim member data.
Definition: Seq_align_.hpp:856
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
Definition: Seq_align_.cpp:143
bool IsSetSegs(void) const
Check if a value has been assigned to Segs data member.
Definition: Seq_align_.hpp:909
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: Seq_align_.hpp:790
TType GetType(void) const
Get the Type member data.
Definition: Seq_align_.hpp:809
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
const TPub & GetPub(void) const
Get the variant data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
Definition: Seq_feat_.hpp:990
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool CanGetExcept_text(void) const
Check if it is safe to call GetExcept_text method.
Definition: Seq_feat_.hpp:1399
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
const TGene & GetGene(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
Definition: Seq_feat_.hpp:1009
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
@ e_not_set
No variant selected.
@ e_Pub
publication applies to this seq
const TFuzz_from & GetFuzz_from(void) const
Get the Fuzz_from member data.
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
Definition: Seq_point_.hpp:378
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
const TId & GetId(void) const
Get the Id member data.
bool IsSetPoint(void) const
Check if a value has been assigned to Point data member.
Definition: Seq_point_.hpp:284
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
bool IsOther(void) const
Check if variant Other is selected.
Definition: Seq_id_.hpp:871
const TFuzz_to & GetFuzz_to(void) const
Get the Fuzz_to member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
TVersion GetVersion(void) const
Get the Version member data.
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
const TOther & GetOther(void) const
Get the variant data.
Definition: Seq_id_.cpp:347
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
TStrand GetStrand(void) const
Get the Strand member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
TTo GetTo(void) const
Get the To member data.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
E_Choice
Choice variants.
Definition: Seq_loc_.hpp:96
const TAccession & GetAccession(void) const
Get the Accession member data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Gpipe
Internal NCBI genome pipeline processing ID.
Definition: Seq_id_.hpp:113
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Prf
PRF SEQDB.
Definition: Seq_id_.hpp:108
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
@ e_Int
from to
Definition: Seq_loc_.hpp:101
const TTitle & GetTitle(void) const
Get the Title member data.
Definition: Seq_graph_.hpp:775
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Seq_graph_.hpp:869
bool IsSetTitle(void) const
Check if a value has been assigned to Title data member.
Definition: Seq_graph_.hpp:763
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_wgs_set
whole genome shotgun project
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_genbank
converted genbank
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
@ e_not_set
No variant selected.
Definition: Seq_entry_.hpp:88
TTopology GetTopology(void) const
Get the Topology member data.
Definition: Seq_inst_.hpp:733
ETopology
topology of molecule
Definition: Seq_inst_.hpp:121
bool IsSetTopology(void) const
Check if a value has been assigned to Topology data member.
Definition: Seq_inst_.hpp:708
@ eRepr_seg
segmented sequence
Definition: Seq_inst_.hpp:95
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ eTech_wgs
whole genome shotgun sequencing
Definition: MolInfo_.hpp:143
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
bool IsSetTool(void) const
tool used to make submission Check if a value has been assigned to Tool data member.
const TTool & GetTool(void) const
Get the Tool member data.
const TContact & GetContact(void) const
Get the Contact member data.
const TReldate & GetReldate(void) const
Get the Reldate member data.
const TCit & GetCit(void) const
Get the Cit member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetReldate(void) const
release by date Check if a value has been assigned to Reldate data member.
const TData & GetData(void) const
Get the Data member data.
bool IsSetHup(void) const
hold until publish Check if a value has been assigned to Hup data member.
THup GetHup(void) const
Get the Hup member data.
const TSub & GetSub(void) const
Get the Sub member data.
bool IsSetSub(void) const
Check if a value has been assigned to Sub data member.
const TContact & GetContact(void) const
Get the Contact member data.
bool IsSetCit(void) const
citation for this submission Check if a value has been assigned to Cit data member.
bool IsSetContact(void) const
who to contact Check if a value has been assigned to Contact data member.
bool IsSetContact(void) const
WARNING: this will replace the above Check if a value has been assigned to Contact data member.
Definition of all error codes used in objtools libraries.
int i
int len
Lightweight interface for getting lines of data with minimal memory copying.
static int version
Definition: mdb_load.c:29
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::KEY key
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
The Object manager core.
CBioseq_Handle BioseqHandleFromLocation(CScope *m_Scope, const CSeq_loc &loc)
Definition: utilities.cpp:1277
bool IsDateInPast(const CDate &date)
Definition: utilities.cpp:826
string GetDateErrorDescription(int flags)
Definition: utilities.cpp:855
CConstRef< CSeq_id > GetReportableSeqIdForAlignment(const CSeq_align &align, CScope &scope)
Definition: utilities.cpp:399
void AppendBioseqLabel(string &str, const CBioseq &sq, bool supress_context)
Definition: utilities.cpp:1064
bool IsInOrganelleSmallGenomeSet(const CSeq_id &id, CScope &scope)
Definition: utilities.cpp:3050
string GetAccessionFromBioseqSet(const CBioseq_set &bsst, int *version)
Definition: utilities.cpp:433
bool BadMultipleSequenceLocation(const CSeq_loc &loc, CScope &scope)
Definition: utilities.cpp:3080
string GetValidatorLocationLabel(const CSeq_loc &loc, CScope &scope)
Definition: utilities.cpp:958
bool IsBadSubmissionLastName(const string &last)
Definition: utilities.cpp:3116
string GetAccessionFromBioseq(const CBioseq &bioseq, int *version)
Definition: utilities.cpp:427
bool SeqIsPatent(const CBioseq &seq)
Definition: utilities.cpp:1155
bool IsFarLocation(const CSeq_loc &loc, const CSeq_entry_Handle &seh)
Definition: utilities.cpp:159
const CSeq_feat::TDbxref TDbtags
Definition: utilities.hpp:199
bool IsTemporary(const CSeq_id &id)
Definition: utilities.cpp:2817
bool IsBadSubmissionFirstName(const string &first)
Definition: utilities.cpp:3102
string GetAccessionFromObjects(const CSerialObject *obj, const CSeq_entry *ctx, CScope &scope, int *version)
Definition: utilities.cpp:443
#define fi
static const char * prefix[]
Definition: pcregrep.c:405
#define FOR_EACH_DESCRIPTOR_ON_BIOSEQ
Definition: seq_macros.hpp:241
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
Definition: seq_macros.hpp:308
#define FOR_EACH_GBQUAL_ON_FEATURE
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FOR_EACH_SEQDESC_ON_SEQENTRY(Itr, Var)
FOR_EACH_SEQDESC_ON_SEQENTRY EDIT_EACH_SEQDESC_ON_SEQENTRY.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
#define BEGIN_COMMA_END(container)
bool operator==(const SFeatKey &rhs) const
CSeqFeatData::E_Choice feat_type
Definition: cache_impl.hpp:157
bool operator<(const SFeatKey &rhs) const
CSeqFeatData::ESubtype feat_subtype
Definition: cache_impl.hpp:158
bool operator<(const SFeatStrKey &rhs) const
bool operator==(const SFeatStrKey &rhs) const
PQuickStringLess implements an ordering of strings, that is more efficient than usual lexicographical...
Definition: ncbistr.hpp:5020
SAnnotSelector –.
Definition: type.c:6
#define FOR_EACH_SEQENTRY_ON_SEQSUBMIT(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSUBMIT EDIT_EACH_SEQENTRY_ON_SEQSUBMIT.
#define _ASSERT
CScope & GetScope()
bool BarcodeTestFails(const SBarcode &b)
vector< SBarcode > TBarcodeResults
TBarcodeResults GetBarcodeValues(CSeq_entry_Handle seh)
const string kTooShort
const string kMissingPrimers
static bool s_IsPhage(const COrg_ref &org)
#define ADD_BARCODE_ERR(TestName)
const string kBadCollectionDate
const string kFrameShift
static const string kSpaceLeftFirst
const string kMissingCountry
static const string kSpaceLeftCircle
bool s_HasTopSetSiblings(CSeq_entry_Handle seh)
size_t s_CountTopSetSiblings(const CSeq_entry &se)
bool s_IsGoodTopSetClass(CBioseq_set::EClass set_class)
static bool s_SeqLocHasGI(const CSeq_loc &loc)
static const EErrType sc_ValidGenomeRaiseExceptEmblDdbj[]
Definition: validatorp.cpp:585
DEFINE_STATIC_ARRAY_MAP(CStaticArraySet< EErrType >, sc_GenomeRaiseArray, sc_ValidGenomeRaise)
static const string kSpaceRightLast
unsigned int s_CountMix(const CSeq_loc &loc)
static void s_CollectPubDescriptorLabels(const CSeq_entry &se, vector< TEntrezId > &pmids, vector< TEntrezId > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels)
const string kMissingVoucher
const string kMissingOrderAssignment
static const EErrType sc_ValidGenomeRaise[]
Definition: validatorp.cpp:423
const string kLowTrace
static const string kSpaceRightCircle
const string kStructuredVoucher
static const EErrType sc_ValidGenomeRaiseExceptEmblDdbjRefSeq[]
Definition: validatorp.cpp:595
const string kTooManyNs
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Sun Apr 14 05:25:04 2024 by modify_doxy.py rev. 669887