NCBI C++ ToolKit
huge_file_validator.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: huge_file_validator.hpp 102515 2024-05-20 15:38:38Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Justin Foley
27  *
28  * File Description:
29  *
30  */
31 
32 #ifndef _HUGE_FILE_VALIDATOR_HPP_
33 #define _HUGE_FILE_VALIDATOR_HPP_
34 
37 #include <objects/seq/MolInfo.hpp>
38 
41 
42 class CValidError;
43 class IValidError;
44 
45 BEGIN_SCOPE(validator)
46 
47 struct SValidatorContext;
48 
49 string NCBI_VALIDATOR_EXPORT g_GetIdString(const edit::CHugeAsnReader& reader);
50 
51 
53 public:
54  struct SGlobalInfo {
55  bool IsPatent = false;
56  bool IsPDB = false;
57  bool IsRefSeq = false;
58 
59  bool NoBioSource = true;
60  bool NoPubsFound = true;
61  bool NoCitSubsFound = true;
62  bool CurrIsGI = false;
63  bool CurrTpaAssembly = false;
64  int JustTpaAssembly = 0;
65  int TpaAssemblyHist = 0;
66  int TpaNoHistYesGI = 0;
70  unsigned int numMisplacedFeats {0};
71 
72  void Clear() {
73  IsPatent = false;
74  IsPDB = false;
75  IsRefSeq = false;
76 
77  NoBioSource = true;
78  NoPubsFound = true;
79  NoCitSubsFound = true;
80  CurrIsGI = false;
81  CurrTpaAssembly = false;
82  JustTpaAssembly = 0;
83  TpaAssemblyHist = 0;
84  TpaNoHistYesGI = 0;
85  pubSerialNumbers.clear();
86  conflictingSerialNumbers.clear();
87  biomols.clear();
88  numMisplacedFeats = 0;
89  }
90  };
91 
93  using TReader = edit::CHugeAsnReader;
95  using TOptions = unsigned int;
96 
97  CHugeFileValidator(const TReader& reader,
98  TOptions options);
100 
101 
102  bool IsInBlob(const CSeq_id& id) const;
103 
104  void UpdateValidatorContext(const TGlobalInfo& globalInfo,
105  SValidatorContext& context) const;
106 
107  void ReportGlobalErrors(const TGlobalInfo& globalInfo,
108  IValidError& errors) const;
109 
110  void ReportPostErrors(const SValidatorContext& context, IValidError& errors) const;
111 
112  void PostprocessErrors(const TGlobalInfo& globalInfo,
113  const string& genbankSetId,
114  CRef<CValidError>& pErrors) const;
115 
116  static void RegisterReaderHooks(CObjectIStream& objStream, SGlobalInfo& m_GlobalInfo);
117 
118 private:
119 
120  void x_ReportMissingPubs(IValidError& errors) const;
121 
122  void x_ReportMissingCitSubs(bool hasRefSeqAccession, IValidError& errors) const;
123 
124  void x_ReportCollidingSerialNumbers(const set<int>& collidingNumbers,
125  IValidError& errors) const;
126 
127  void x_ReportMissingBioSources(IValidError& errors) const;
128 
129  void x_ReportConflictingBiomols(IValidError& errors) const;
130 
131 
132 
133  string x_GetIdString() const;
134 
135  string x_GetHugeSetLabel() const;
136 
137  mutable unique_ptr<string> m_pIdString;
138 
141 };
142 
143 string NCBI_VALIDATOR_EXPORT g_GetHugeSetIdString(const edit::CHugeAsnReader& reader);
144 
146  const string& genbankSetId,
147  CRef<CValidError>& pErrors);
148 
150  const string& genbankSetId,
151  list<CRef<CValidErrItem>>& errors);
152 
153 END_SCOPE(validator)
156 
157 #endif
User-defined methods of the data storage class.
unique_ptr< string > m_pIdString
void PostprocessErrors(const TGlobalInfo &globalInfo, const string &genbankSetId, CRef< CValidError > &pErrors) const
edit::CHugeAsnReader TReader
TReader::TBioseqInfo TBioseqInfo
CObjectIStream –.
Definition: objistr.hpp:93
CRef –.
Definition: ncbiobj.hpp:618
void clear()
Definition: set.hpp:153
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_VALIDATOR_EXPORT
Definition: ncbi_export.h:913
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
CHugeAsnReader::TBioseqInfo TBioseqInfo
string g_GetIdString(const edit::CHugeAsnReader &reader)
void g_PostprocessErrors(const CHugeFileValidator::TGlobalInfo &globalInfo, const string &genbankSetId, CRef< CValidError > &pErrors)
string g_GetHugeSetIdString(const edit::CHugeAsnReader &reader)
set< CMolInfo::TBiomol > biomols
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Thu Jun 13 17:32:50 2024 by modify_doxy.py rev. 669887