NCBI C++ ToolKit
srcchk.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: srcchk.cpp 98270 2022-10-20 13:56:31Z gotvyans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig, NCBI
27 *
28 * File Description:
29 * source qualifier generator application
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <common/ncbi_source_ver.h>
35 #include <corelib/ncbiapp.hpp>
37 #include <serial/serial.hpp>
38 #include <serial/objistr.hpp>
39 
42 #include <objmgr/scope.hpp>
43 
45 
49 
52 
53 // ============================================================================
55 // ============================================================================
56 {
57 public:
58  void Init();
59  int Run();
60 
61 private:
63  const CArgs&);
64  bool xTryProcessIdFile(
65  const CArgs&);
67  const CArgs&);
68  bool xGetDesiredFields(
69  const CArgs&,
70  vector<string>&);
72  const CArgs&);
73  void xDumpError(
74  const ILineError&,
75  std::ostream&);
76 
77 private:
82 };
83 
84 // ----------------------------------------------------------------------------
86 // ----------------------------------------------------------------------------
87 {
88  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
89  arg_desc->SetUsageContext(
90  GetArguments().GetProgramBasename(),
91  "Extract Genbank source qualifiers",
92  false);
93 
94  // input
95  {{
96  arg_desc->AddOptionalKey("i", "IDsFile",
97  "IDs file name. Defaults to stdin",
99 
100  arg_desc->AddOptionalKey("seq-entry", "SeqEntryFile",
101  "File containing Seq-entry in ASN.1 format",
103 
104  arg_desc->SetDependency("seq-entry",
106  "i");
107  }}
108 
109  // parameters
110  {{
111  arg_desc->AddOptionalKey("f", "FieldsList",
112  "List of fields", CArgDescriptions::eString );
113 
114  arg_desc->SetDependency("f",
116  "F");
117 
118  arg_desc->SetDependency("f",
120  "all-fields");
121 
122  arg_desc->AddOptionalKey("F", "FieldsFile",
123  "File of fields", CArgDescriptions::eInputFile );
124 
125  arg_desc->SetDependency("F",
127  "all-fields");
128 
129  arg_desc->AddFlag("all-fields", "List all fields");
130  }}
131 
132  {{
133  arg_desc->AddFlag("list-supported-fields",
134  "List in alphabetical order the fields that srcchk can display; ignore other arguments");
135  }}
136 
137  // output
138  {{
139  arg_desc->AddOptionalKey("o", "OutputFile",
140  "Output file name. Defaults to stdout",
142  }}
143  {{
144  // misc
145  arg_desc->AddDefaultKey("delim", "Delimiter",
146  "Column value delimiter", CArgDescriptions::eString, "\t");
147  }}
148 
149 
150  SetupArgDescriptions(arg_desc.release());
151 }
152 
153 // ----------------------------------------------------------------------------
155 // ----------------------------------------------------------------------------
156 {
157  const CArgs& args = GetArgs();
158 
159  if (args["list-supported-fields"]) {
160  vector<string> sortedFields = CSrcWriter::sAllSrcCheckFields;
161  sort(begin(sortedFields), end(sortedFields));
162  for (const auto& field : sortedFields) {
163  cout << field << "\n";
164  }
165  return 0;
166  }
167 
173 
175  m_pWriter.Reset(xInitWriter(args));
176  bool processed = xTryProcessSeqEntry(args);
177  if (!processed) {
178  processed = xTryProcessIdFile(args);
179  }
180  size_t errorCount = m_pErrors->Count();
181  for (size_t pos=0; pos < errorCount; ++pos) {
182  xDumpError(m_pErrors->GetError(pos), cerr);
183  }
184  return (errorCount ? 1 : 0);
185 }
186 
187 // -----------------------------------------------------------------------------
189  const CArgs& args)
190 // -----------------------------------------------------------------------------
191 {
192  CNcbiOstream* pOs = xInitOutputStream(args);
193  if (0 == pOs) {
194  string error_msg = args["o"] ?
195  "Unable to open output file \"" + args["o"].AsString() + "\"." :
196  "Unable to write to stdout.";
198  m_pErrors->PutError(*pE);
199  delete pE;
200  return false;
201  }
202 
203  const streamsize maxLineSize(100);
204  char line[maxLineSize];
205 
206  CSrcWriter::FIELDS desiredFields;
207  if (!xGetDesiredFields(args, desiredFields)) {
208  return false;
209  }
210 
211  CNcbiIstream* pIfstr = 0;
212  try {
213  pIfstr = args["i"] ? &args["i"].AsInputFile() : &cin;
214  }
215  catch(const std::exception&) {
216  string error_msg = args["i"] ?
217  "Unable to open ID file \"" + args["i"].AsString() + "\"." :
218  "Unable to read IDs from stdin.";
220  m_pErrors->PutError(*pE);
221  delete pE;
222  return false;
223  }
224  vector<pair<string, CBioseq_Handle> > vecIdBsh;
225  while (!pIfstr->eof()) {
226  pIfstr->getline(line, maxLineSize);
227  if (line[0] == 0 || line[0] == '#') {
228  continue;
229  }
230  string id(line);
232  try {
235  vecIdBsh.push_back(make_pair(id,bsh));
236  } catch (const CSeqIdException& e) {
238  throw;
239  }
241  string err_msg = "Malformatted ID \"" + id + "\"";
242  ERR_POST(err_msg);
243  vecIdBsh.push_back(make_pair(id,CBioseq_Handle()));
244  }
245  }
246 
247 
248  if (vecIdBsh.empty() || !m_pWriter->WriteBioseqHandles(vecIdBsh, desiredFields, *pOs, m_pErrors)) {
249  return false;
250  }
251  return true;
252 }
253 
254 // -----------------------------------------------------------------------------
256  const CArgs& args)
257 // -----------------------------------------------------------------------------
258 {
259  if (!args["seq-entry"]) {
260  return false;
261  }
262  CNcbiOstream* pOs = xInitOutputStream(args);
263  if (0 == pOs) {
264  string error_msg = args["o"] ?
265  "Unable to open output file \"" + args["o"].AsString() + "\"." :
266  "Unable to write to stdout.";
268  m_pErrors->PutError(*pE);
269  delete pE;
270  return false;
271  }
272 
273  CSrcWriter::FIELDS desiredFields;
274  if (!xGetDesiredFields(args, desiredFields)) {
275  return false;
276  }
277 
278  const char* infile = args["seq-entry"].AsString().c_str();
279  CNcbiIstream* pInputStream = new CNcbiIfstream(infile, ios::binary);
280  unique_ptr<CObjectIStream> pI(CObjectIStream::Open(eSerial_AsnText, *pInputStream, eTakeOwnership));
281  if (!pI) {
282  string msg("Unable to open Seq-entry file \"" + args["seq-entry"].AsString() + "\".");
284  m_pErrors->PutError(*pE);
285  delete pE;
286  return false;
287  }
288 
289  CRef<CSeq_entry> pSe(new CSeq_entry);
290  try {
291  pI->Read(ObjectInfo(*pSe));
292  }
293  catch (const CException&) {
294  string msg("Unable to process Seq-entry file \"" + args["seq-entry"].AsString() + "\".");
296  m_pErrors->PutError(*pE);
297  delete pE;
298  return true; //!!!
299  }
300 
301  m_pWriter->WriteSeqEntry(*pSe, *m_pScope, *pOs);
302  return true; //!!!
303 }
304 
305 // -----------------------------------------------------------------------------
307  const CArgs& args,
308  CSrcWriter::FIELDS& fields)
309 // -----------------------------------------------------------------------------
310 {
311  if (args["all-fields"]) {
313  return true;
314  }
315 
316  if (args["f"]) {
317  string fieldString = args["f"].AsString();
318  NStr::Split(fieldString, ",", fields);
319  return CSrcWriter::ValidateFields(fields, m_pErrors);
320  }
321  if (args["F"]) {
322  const streamsize maxLineSize(100);
323  char line[maxLineSize];
324  CNcbiIstream* pIfstr = 0;
325  try {
326  pIfstr = &args["F"].AsInputFile();
327  }
328  catch (const std::exception& e) {
331  "Unable to open fields file \"" + args["F"].AsString() + "\".");
332  m_pErrors->PutError(*pE);
333  delete pE;
334  return false;
335  }
336  while (!pIfstr->eof()) {
337  pIfstr->getline(line, maxLineSize);
338  if (line[0] == 0 || line[0] == '#') {
339  continue;
340  }
341  string field(line);
343  if (field.empty()) {
344  continue;
345  }
346  if (field == "id" || field == "accession") {
347  //handled implicitly
348  continue;
349  }
350  fields.push_back(field);
351  }
352  return CSrcWriter::ValidateFields(fields, m_pErrors);
353  }
354 
355 
356  fields.assign(
358  return true;
359 }
360 
361 // -----------------------------------------------------------------------------
363  const CArgs& args)
364 // -----------------------------------------------------------------------------
365 {
366  if (!args["o"]) {
367  return &cout;
368  }
369  try {
370  return &args["o"].AsOutputFile();
371  }
372  catch(const std::exception&) {
373  return 0;
374  }
375 }
376 
377 // ----------------------------------------------------------------------------
379  const CArgs& args)
380 // ----------------------------------------------------------------------------
381 {
382  CSrcWriter* pWriter = new CSrcWriter(0);
383  pWriter->SetDelimiter(args["delim"].AsString());
384  return pWriter;
385 }
386 
387 // ---------------------------------------------------------------------------
389  const ILineError& error,
390  std::ostream& out)
391 // ---------------------------------------------------------------------------
392 {
393  out << "srcchk "
394  << error.SeverityStr().c_str()
395  << ": "
396  << error.ErrorMessage().c_str()
397  << endl;
398 }
399 
402 
403 // ===========================================================================
404 int main(int argc, const char** argv)
405 // ===========================================================================
406 {
407  return CSrcChkApp().AppMain(argc, argv);
408 }
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
static void SetupObjectManager(const CArgs &args, objects::CObjectManager &obj_mgr, TLoaders loaders=fDefault)
Set up the standard object manager data loaders according to the arguments provided above.
size_t Count() const override
const ILineError & GetError(size_t uPos) const override
0-based error retrieval.
CScope –.
Definition: scope.hpp:92
CSeqIdException –.
Definition: Seq_id.hpp:1001
Definition: Seq_entry.hpp:56
bool xTryProcessSeqEntry(const CArgs &)
Definition: srcchk.cpp:255
bool xTryProcessIdFile(const CArgs &)
Definition: srcchk.cpp:188
CSrcWriter * xInitWriter(const CArgs &)
Definition: srcchk.cpp:378
CRef< CSrcWriter > m_pWriter
Definition: srcchk.cpp:80
CRef< CObjectManager > m_pObjMngr
Definition: srcchk.cpp:78
CNcbiOstream * xInitOutputStream(const CArgs &)
Definition: srcchk.cpp:362
void Init()
Initialize the application.
Definition: srcchk.cpp:85
int Run()
Run the application.
Definition: srcchk.cpp:154
CRef< CMessageListenerBase > m_pErrors
Definition: srcchk.cpp:81
bool xGetDesiredFields(const CArgs &, vector< string > &)
Definition: srcchk.cpp:306
CRef< CScope > m_pScope
Definition: srcchk.cpp:79
void xDumpError(const ILineError &, std::ostream &)
Definition: srcchk.cpp:388
static CSrcError * Create(ncbi::EDiagSev severity, const std::string &)
Used to generate tables showing qualifier-field entries occuring in the BioSources of instances of Bi...
Definition: src_writer.hpp:73
static bool ValidateFields(const FIELDS &fields, ILineErrorListener *=nullptr)
Verify that each string in fields is a valid qualifier name.
static const FIELDS sAllSrcCheckFields
All possible fields processed by srchck application, in their canonical order.
Definition: src_writer.hpp:179
static const FIELDS sDefaultSrcCheckFields
Default fields processed by srcchk application, in their canonical order.
Definition: src_writer.hpp:178
void SetDelimiter(const string &delimiter)
Set the column delimiter for the output table.
Definition: src_writer.hpp:111
vector< string > FIELDS
Definition: src_writer.hpp:78
virtual bool WriteBioseqHandles(const vector< pair< string, CBioseq_Handle > > &, const FIELDS &, CNcbiOstream &, ILineErrorListener *=nullptr)
Write a table of the specified qualifier-field entries found in the BioSources of a vector of Bioseqs...
Definition: src_writer.cpp:165
virtual bool WriteSeqEntry(const CSeq_entry &, CScope &, CNcbiOstream &, bool=false)
Write a table of all qualifier-field entries occurring in the BioSources for a given Seq-entry,...
Definition: src_writer.cpp:189
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
std::ofstream out("events_result.xml")
main entry point for tests
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:832
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1208
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eTakeOwnership
An object can take ownership of another.
Definition: ncbi_types.h:136
@ eExcludes
One argument excludes another.
Definition: ncbiargs.hpp:957
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eFormat
Contents not parsable as expected.
Definition: Seq_id.hpp:1006
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
Definition: objectinfo.hpp:762
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
Definition: ncbistr.cpp:3944
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
@ fNonAscii_Quote
Octal for all non-ASCII characters.
Definition: ncbistr.hpp:2735
void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)
Init [X]CONNECT library with the specified "reg" and "lock" (ownership for either or both can be deta...
constexpr auto sort(_Init &&init)
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
The Object manager core.
USING_SCOPE(objects)
int main(int argc, const char **argv)
Definition: srcchk.cpp:404
USING_NCBI_SCOPE
Definition: srcchk.cpp:401
Modified on Tue Jul 16 13:24:13 2024 by modify_doxy.py rev. 669887