NCBI C++ ToolKit
huge_file_validator.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: huge_file_validator.hpp 103011 2024-08-21 17:32:06Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Justin Foley
27  *
28  * File Description:
29  *
30  */
31 
32 #ifndef _HUGE_FILE_VALIDATOR_HPP_
33 #define _HUGE_FILE_VALIDATOR_HPP_
34 
37 #include <objects/seq/MolInfo.hpp>
38 
41 
42 class CValidError;
43 class IValidError;
44 
45 BEGIN_SCOPE(validator)
46 
47 struct SValidatorContext;
48 
49 string NCBI_VALIDATOR_EXPORT g_GetIdString(const edit::CHugeAsnReader& reader);
50 
51 
53 public:
54  struct SGlobalInfo {
55  bool IsPatent = false;
56  bool IsPDB = false;
57  bool IsRefSeq = false;
58 
59  bool NoBioSource = true;
60  bool NoPubsFound = true;
61  bool NoCitSubsFound = true;
62  bool CurrIsGI = false;
63  bool CurrTpaAssembly = false;
64  int JustTpaAssembly = 0;
65  int TpaAssemblyHist = 0;
66  int TpaNoHistYesGI = 0;
67  int CumulativeInferenceCount = 0;
68  bool NotJustLocalOrGeneral = false;
69  bool HasRefSeq = false;
73  unsigned int numMisplacedFeats {0};
74 
75  void Clear() {
76  IsPatent = false;
77  IsPDB = false;
78  IsRefSeq = false;
79 
80  NoBioSource = true;
81  NoPubsFound = true;
82  NoCitSubsFound = true;
83  CurrIsGI = false;
84  CurrTpaAssembly = false;
85  JustTpaAssembly = 0;
86  TpaAssemblyHist = 0;
87  TpaNoHistYesGI = 0;
88  CumulativeInferenceCount = 0;
89  NotJustLocalOrGeneral = false;
90  HasRefSeq = false;
91  pubSerialNumbers.clear();
92  conflictingSerialNumbers.clear();
93  biomols.clear();
94  numMisplacedFeats = 0;
95  }
96  };
97 
99  using TReader = edit::CHugeAsnReader;
101  using TOptions = unsigned int;
102 
103  CHugeFileValidator(const TReader& reader,
104  TOptions options);
106 
107 
108  bool IsInBlob(const CSeq_id& id) const;
109 
110  void UpdateValidatorContext(const TGlobalInfo& globalInfo,
111  SValidatorContext& context) const;
112 
113  void ReportGlobalErrors(const TGlobalInfo& globalInfo,
114  IValidError& errors) const;
115 
116  void ReportPostErrors(const SValidatorContext& context, IValidError& errors) const;
117 
118  void PostprocessErrors(const TGlobalInfo& globalInfo,
119  const string& genbankSetId,
120  CRef<CValidError>& pErrors) const;
121 
122  static void RegisterReaderHooks(CObjectIStream& objStream, SGlobalInfo& m_GlobalInfo);
123 
124 private:
125 
126  void x_ReportMissingPubs(IValidError& errors) const;
127 
128  void x_ReportMissingCitSubs(bool hasRefSeqAccession, IValidError& errors) const;
129 
130  void x_ReportCollidingSerialNumbers(const set<int>& collidingNumbers,
131  IValidError& errors) const;
132 
133  void x_ReportMissingBioSources(IValidError& errors) const;
134 
135  void x_ReportConflictingBiomols(IValidError& errors) const;
136 
137 
138 
139  string x_GetIdString() const;
140 
141  string x_GetHugeSetLabel() const;
142 
143  mutable unique_ptr<string> m_pIdString;
144 
147 };
148 
149 string NCBI_VALIDATOR_EXPORT g_GetHugeSetIdString(const edit::CHugeAsnReader& reader);
150 
152  const string& genbankSetId,
153  CRef<CValidError>& pErrors);
154 
156  const string& genbankSetId,
157  list<CRef<CValidErrItem>>& errors);
158 
159 END_SCOPE(validator)
162 
163 #endif
User-defined methods of the data storage class.
unique_ptr< string > m_pIdString
void PostprocessErrors(const TGlobalInfo &globalInfo, const string &genbankSetId, CRef< CValidError > &pErrors) const
edit::CHugeAsnReader TReader
TReader::TBioseqInfo TBioseqInfo
CObjectIStream –.
Definition: objistr.hpp:93
CRef –.
Definition: ncbiobj.hpp:618
void clear()
Definition: set.hpp:153
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_VALIDATOR_EXPORT
Definition: ncbi_export.h:913
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
CHugeAsnReader::TBioseqInfo TBioseqInfo
string g_GetIdString(const edit::CHugeAsnReader &reader)
void g_PostprocessErrors(const CHugeFileValidator::TGlobalInfo &globalInfo, const string &genbankSetId, CRef< CValidError > &pErrors)
string g_GetHugeSetIdString(const edit::CHugeAsnReader &reader)
set< CMolInfo::TBiomol > biomols
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Fri Sep 20 14:57:55 2024 by modify_doxy.py rev. 669887