NCBI C++ ToolKit
tableval.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tableval.cpp 94996 2021-09-27 14:11:38Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Sergiy Gotvyanskyy
27 *
28 * File Description:
29 * Validates tab-delimited files agains ASN.1 datatypes, main application function
30 *
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistre.hpp>
36 #include <corelib/ncbiapp.hpp>
37 #include <corelib/ncbienv.hpp>
38 #include <corelib/ncbiargs.hpp>
39 #include <corelib/ncbi_mask.hpp>
40 
42 #include <connect/ncbi_util.h>
43 
44 #include <util/line_reader.hpp>
45 
47 #include "tab_table_reader.hpp"
48 #include "col_validator.hpp"
49 
50 #include <common/test_assert.h> /* This header must go last */
51 
52 using namespace ncbi;
53 using namespace objects;
54 
55 const char * TABLEVAL_APP_VER = "10.0";
56 
57 /////////////////////////////////////////////////////////////////////////////
58 //
59 // Demo application
60 //
61 
62 
64 {
65 public:
66  CTAbleValApp(void);
67 
68  virtual void Init(void);
69  virtual int Run (void);
70 
71 private:
72 
73  void Setup(const CArgs& args);
74 
75  void ProcessOneFile(CNcbiIstream& input, CNcbiOstream* output);
76  void ProcessOneFile(const string& current_file);
77  bool ProcessOneDirectory(const CDir& directory, const CMask& mask, bool recurse);
78 
80 
81  //EDiagSev m_LowCutoff;
82  //EDiagSev m_HighCutoff;
83 
84  //CNcbiOstream* m_OutputStream;
86 
88 
89  string m_columns_def;
92  string m_format;
93  string m_unique_cols;
94 
102 };
103 
105  m_LogStream(0), m_output(0),
106  m_comma_separated(false),
107  m_no_header(false),
108  m_skip_empty(false),
109  m_ignore_unknown_types(false)
110 {
111  int build_num =
112 #ifdef NCBI_PRODUCTION_VER
113  NCBI_PRODUCTION_VER
114 #else
115  0
116 #endif
117  ;
118 
119  SetVersion(CVersionInfo(1, 0, build_num));
120 }
121 
122 
124 {
125 
126  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
127 
128  // Prepare command line descriptions, inherit them from tbl2asn legacy application
129 
130  arg_desc->AddOptionalKey
131  ("p", "Directory", "Path to input files",
133 
134  arg_desc->AddOptionalKey
135  ("r", "Directory", "Path to results",
137 
138  arg_desc->AddOptionalKey
139  ("i", "InFile", "Single Input File",
141 
142  arg_desc->AddOptionalKey(
143  "o", "OutFile", "Single Output File",
145 
146  arg_desc->AddDefaultKey
147  ("x", "Suffix", "File suffix to match", CArgDescriptions::eString, ".tbl");
148 
149  arg_desc->AddFlag("E", "Recurse");
150 
151  arg_desc->AddFlag("no-header", "Start from the first row");
152  arg_desc->AddFlag("skip-empty", "Ignore all empty rows");
153  arg_desc->AddFlag("ignore-unknown", "Ignore all unknown types");
154  arg_desc->AddDefaultKey("format", "String", "Output type: tab, xml, text, html", CArgDescriptions::eString, "tab");
155 
156  arg_desc->AddFlag("comma", "Use comma separator instead of tabs");
157 
158  arg_desc->AddOptionalKey("columns", "String", "Comma separated columns definitions", CArgDescriptions::eString);
159 
160  arg_desc->AddOptionalKey("required", "String", "Comma separated required columns, use indices or names", CArgDescriptions::eString);
161 
162  arg_desc->AddOptionalKey("ignore", "String", "Comma separated columns to be ignored, use indices or names", CArgDescriptions::eString);
163 
164  arg_desc->AddOptionalKey("unique", "String",
165  "Comma separated columns needs to be unique in file, use indices or names",
167 
168  arg_desc->AddOptionalKey("aliases", "InFile", "Filename of data type aliases", CArgDescriptions::eInputFile);
169 
170  arg_desc->AddOptionalKey("discouraged", "String", "Comma separated list of discouraged types", CArgDescriptions::eString);
171 
172  arg_desc->AddOptionalKey("require-one", "String", "Comma separated list of choice columns", CArgDescriptions::eString, CArgDescriptions::fAllowMultiple);
173 
174  arg_desc->AddOptionalKey("logfile", "LogFile", "Error Log File", CArgDescriptions::eOutputFile);
175 
176  arg_desc->AddFlag("print-supported", "Show supported data types");
177 
178  // Program description
179  string prog_description = "Validates tab delimited files against ASN.1 data types\n";
180  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
181  prog_description, false);
182 
183  // Pass argument descriptions to the application
184  SetupArgDescriptions(arg_desc.release());
185 }
186 
187 
189 {
190  const CArgs& args = GetArgs();
191 
192  Setup(args);
193 
194  m_LogStream = args["logfile"] ? &(args["logfile"].AsOutputFile()) : &NcbiCout;
195  m_logger.Reset(new CMessageListenerLenient());
197 
198  // note - the C Toolkit uses 0 for SEV_NONE, but the C++ Toolkit uses 0 for SEV_INFO
199  // adjust here to make the inputs to table2asn match tbl2asn expectations
200  //m_ReportLevel = args["R"].AsInteger() - 1;
201  //m_LowCutoff = static_cast<EDiagSev>(args["Q"].AsInteger() - 1);
202  //m_HighCutoff = static_cast<EDiagSev>(args["P"].AsInteger() - 1);
203 
204  CTabDelimitedValidator::RegisterAliases(args["aliases"]?&args["aliases"].AsInputFile():0);
205 
206  if (args["print-supported"])
207  {
209  return 0;
210  }
211 
212  m_comma_separated = args["comma"];
213  if (args["columns"])
214  {
215  m_columns_def = args["columns"].AsString();
217  }
218  if (args["required"])
219  {
220  m_required_cols = args["required"].AsString();
222  }
223 
224  if (args["ignore"])
225  {
226  m_ignored_cols = args["ignore"].AsString();
228  }
229  if (args["unique"])
230  {
231  m_unique_cols = args["unique"].AsString();
233  }
234 
235 
236  m_no_header = args["no-header"];
237  m_skip_empty = args["skip-empty"];
238  m_format = args["format"].AsString();
239  m_ignore_unknown_types = args["ignore-unknown"];
240 
241  if (args["discouraged"])
242  {
243  m_discouraged = args["discouraged"].AsString();
245  }
246 
247  if (args["require-one"])
248  {
249  m_require_one = args["require-one"].GetStringList();
250  }
251 
252  // Designate where do we output files: local folder, specified folder or a specific single output file
253  if (args["o"])
254  {
255  m_output = &args["o"].AsOutputFile();
256  }
257  else
258  {
259  if (args["r"])
260  {
261  m_ResultsDirectory = args["r"].AsString();
262  }
263  else
264  {
265  m_ResultsDirectory = ".";
266  }
268 
269  CDir outputdir(m_ResultsDirectory);
270  if (!IsDryRun())
271  if (!outputdir.Exists())
272  outputdir.Create();
273  }
274 
275  try
276  {
277  // Designate where do we get input: single file or a folder or folder structure
278  if ( args["p"] )
279  {
280  CDir directory(args["p"].AsString());
281  if (directory.Exists())
282  {
283  CMaskFileName masks;
284  masks.Add("*" +args["x"].AsString());
285 
286  ProcessOneDirectory (directory, masks, args["E"].AsBoolean());
287  }
288  } else {
289  if (args["i"])
290  {
291  ProcessOneFile (args["i"].AsString());
292  }
293  }
294  }
295  catch (CException& e)
296  {
298  "", 0, "", "", "",
299  e.GetMsg()));
300  }
301 
302  if (m_logger->Count() == 0)
303  return 0;
304  else
305  {
307 
308  int errors = m_logger->LevelCount(eDiag_Critical) +
311  // all errors reported as failure
312  if (errors > 0)
313  return 1;
314 
315  // only warnings reported as 2
317  return 2;
318 
319  // otherwise it's ok
320  return 0;
321  }
322 }
323 
325 {
326  int flags =
331 
332  if (m_format == "xml")
334  else
335  if (m_format == "tab")
337  else
338  if (m_format == "html")
340  else
341  if (m_format == "text")
343 
345 
347 
349  m_require_one);
350 
351  validator.GenerateOutput(output, false);
352 }
353 
354 void CTAbleValApp::ProcessOneFile(const string& current_file)
355 {
356  CFile file(current_file);
357  if (!file.Exists())
358  {
361  "File " + current_file + " does not exists"));
362  return;
363  }
364 
365  CNcbiOstream* output = 0;
366  unique_ptr<CNcbiOfstream> local_output;
367  CFile local_file;
368  try
369  {
370  if (!DryRun())
371  {
372  if (m_output == 0)
373  {
374  string temp_file = current_file.substr(0, current_file.rfind('.')); // npos will signal to use the whole string
375  temp_file += ".val";
376  local_file.Reset(temp_file);
377  local_output.reset(new CNcbiOfstream(local_file.GetPath().c_str()));
378  output = local_output.get();
379  }
380  else
381  {
382  output = m_output;
383  }
384  }
385 
386  CNcbiIfstream input(current_file.c_str());
388  //if (!IsDryRun())
389  //m_reader->WriteObject(*obj, *output);
390  }
391  catch(...)
392  {
393  // if something goes wrong - remove the partial output to avoid confuse
394  if (local_output.get())
395  {
396  local_file.Remove();
397  }
398  throw;
399  }
400 }
401 
402 bool CTAbleValApp::ProcessOneDirectory(const CDir& directory, const CMask& mask, bool recurse)
403 {
405  unique_ptr<CDir::TEntries> entries(e);
406 
407  for (CDir::TEntries::const_iterator it = e->begin(); it != e->end(); it++)
408  {
409  // first process files and then recursivelly access other folders
410  if (!(*it)->IsDir())
411  {
412  if (mask.Match((*it)->GetPath()))
413  {
414  ProcessOneFile((*it)->GetPath());
415  }
416  }
417  else
418  if (recurse)
419  {
420  ProcessOneDirectory(**it, mask, recurse);
421  }
422  }
423 
424  return true;
425 }
426 
427 void CTAbleValApp::Setup(const CArgs& args)
428 {
429  // Setup application registry and logs for CONNECT library
431  CORE_SetREG(REG_cxx2c(&GetConfig(), false));
432  // Setup MT-safety for CONNECT library
433  // CORE_SetLOCK(MT_LOCK_cxx2c());
434 
435  // Create object manager
436  //m_ObjMgr = CObjectManager::GetInstance();
437  if ( args["r"] ) {
438  // Create GenBank data loader and register it with the OM.
439  // The last argument "eDefault" informs the OM that the loader must
440  // be included in scopes during the CScope::AddDefaults() call.
441  //CGBDataLoader::RegisterInObjectManager(*m_ObjMgr);
442  }
443 }
444 
445 /////////////////////////////////////////////////////////////////////////////
446 // MAIN
447 
448 int main(int argc, const char* argv[])
449 {
450  return CTAbleValApp().AppMain(argc, argv, 0, eDS_Default, 0);
451 }
452 
ncbi::TMaskedQueryRegions mask
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
void PrintSupported(CNcbiOstream &out_stream) const
static CColumnValidatorRegistry & GetInstance()
CDir –.
Definition: ncbifile.hpp:1696
CFile –.
Definition: ncbifile.hpp:1605
static CLineError * Create(EProblem eProblem, EDiagSev eSeverity, const std::string &strSeqId, unsigned int uLine, const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const std::string &strErrorMessage=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
Definition: line_error.cpp:42
CMaskFileName –.
Definition: ncbi_mask.hpp:107
CMask –.
Definition: ncbi_mask.hpp:59
virtual void SetProgressOstream(CNcbiOstream *pProgressOstrm, ENcbiOwnership eNcbiOwnership=eNoOwnership)
This sets the stream to which progress messages are written.
size_t LevelCount(EDiagSev eSev) override
Returns the number of errors seen so far at the given severity.
size_t Count() const override
string m_ignored_cols
Definition: tableval.cpp:91
bool m_no_header
Definition: tableval.cpp:97
CNcbiOstream * m_LogStream
Definition: tableval.cpp:85
virtual int Run(void)
Definition: tableval.cpp:188
string m_discouraged
Definition: tableval.cpp:100
string m_required_cols
Definition: tableval.cpp:90
void ProcessOneFile(CNcbiIstream &input, CNcbiOstream *output)
Definition: tableval.cpp:324
void Setup(const CArgs &args)
Definition: tableval.cpp:427
CNcbiOstream * m_output
Definition: tableval.cpp:95
CRef< CMessageListenerBase > m_logger
Definition: tableval.cpp:79
virtual void Init(void)
Definition: tableval.cpp:123
string m_columns_def
Definition: tableval.cpp:89
CTAbleValApp(void)
Definition: tableval.cpp:104
string m_format
Definition: tableval.cpp:92
CArgValue::TStringArray m_require_one
Definition: tableval.cpp:101
bool m_comma_separated
Definition: tableval.cpp:96
string m_ResultsDirectory
Definition: tableval.cpp:87
bool m_skip_empty
Definition: tableval.cpp:98
string m_unique_cols
Definition: tableval.cpp:93
bool m_ignore_unknown_types
Definition: tableval.cpp:99
bool ProcessOneDirectory(const CDir &directory, const CMask &mask, bool recurse)
Definition: tableval.cpp:402
static void RegisterAliases(CNcbiIstream *in_stream)
void ValidateInput(ILineReader &reader, const string &default_columns, const string &required, const string &ignored, const string &unique, const string &discouraged, const vector< string > &require_one)
void GenerateOutput(CNcbiOstream *out_stream, bool no_headers)
CVersionInfo –.
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
@ eProblem_GeneralParsingError
Definition: line_error.hpp:105
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
#define false
Definition: bool.h:36
static void Init(void)
Definition: cursor6.c:76
static SQLCHAR output[256]
Definition: print.c:5
#define CNcbiApplication
vector< string > TStringArray
Some values types can contain several value lists.
Definition: ncbiargs.hpp:293
@ fAllowMultiple
Repeated key arguments are legal (use with AddKey)
Definition: ncbiargs.hpp:635
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
@ eDiag_Critical
Critical error message.
Definition: ncbidiag.hpp:654
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
Definition: ncbifile.cpp:2595
static string AddTrailingPathSeparator(const string &path)
Add trailing path separator, if needed.
Definition: ncbifile.cpp:455
virtual bool Exists(void) const
Check if directory "dirname" exists.
Definition: ncbifile.hpp:4066
void Reset(const string &path)
Reset path string.
Definition: ncbifile.cpp:298
bool Create(TCreateFlags flags=fCreate_Default) const
Create the directory using "dirname" passed in the constructor.
Definition: ncbifile.cpp:4071
list< TEntry > TEntries
Definition: ncbifile.hpp:1751
TEntries * GetEntriesPtr(const string &mask=kEmptyStr, TGetEntriesFlags flags=0) const
Get directory entries based on the specified "mask".
Definition: ncbifile.cpp:3856
const string & GetPath(void) const
Get entry path.
Definition: ncbifile.hpp:3911
@ fIgnoreRecursive
Suppress "self recursive" elements (the directories "." and "..").
Definition: ncbifile.hpp:1756
@ fCreateObjects
Create appropriate subclasses of CDirEntry (CFile,CDir,...), not just CDirEntry objects.
Definition: ncbifile.hpp:1759
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCout
Definition: ncbistre.hpp:543
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
void Run(void)
Enter the main loop.
LOG LOG_cxx2c(void)
Create LOG on top of C++ Toolkit CNcbiDiag.
void CORE_SetREG(REG rg)
Set the registry (no registry if "rg" is passed zero) – to be used by the core internals.
Definition: ncbi_util.c:696
REG REG_cxx2c(IRWRegistry *reg, bool pass_ownership=false)
Convert a C++ Toolkit registry object to a REG registry.
void CORE_SetLOG(LOG lg)
Set the log handle (no logging if "lg" is passed zero) – to be used by the core internals (CORE LOG).
Definition: ncbi_util.c:123
void Add(const string &mask)
Add an inclusion mask.
Definition: ncbi_mask.hpp:67
FILE * file
static int input()
Lightweight interface for getting lines of data with minimal memory copying.
Magic spell ;-) needed for some weird compilers... very empiric.
Classes to match a string against a set of masks.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
#define GetArgs
Avoid preprocessor name clash with the NCBI C Toolkit.
Definition: ncbiapp_api.hpp:54
Defines command line argument related classes.
Defines unified interface to application:
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
int main(int argc, const char *argv[])
Definition: tableval.cpp:448
const char * TABLEVAL_APP_VER
Definition: tableval.cpp:55
static wxAcceleratorEntry entries[3]
Modified on Wed Sep 04 14:59:13 2024 by modify_doxy.py rev. 669887