NCBI C++ ToolKit
unit_test_srcwriter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_srcwriter.cpp 93574 2021-04-30 16:19:19Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Justin Foley, NCBI
27 *
28 * File Description:
29 * src writer unit test.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <corelib/ncbi_system.hpp>
37 #include <corelib/ncbiapp.hpp>
38 #include <corelib/ncbifile.hpp>
40 #include <serial/serial.hpp>
41 #include <serial/objistr.hpp>
42 
44 #include <objmgr/scope.hpp>
46 #include <dbapi/driver/drivers.hpp>
47 
51 #include "error_logger.hpp"
52 #include <objmgr/bioseq_ci.hpp>
53 
54 
55 #include <cstdio>
56 
57 // This header must be included before all Boost.Test headers if there are any
58 #include <corelib/test_boost.hpp>
59 
60 
63 
64 
65 // ============================================================================
66 // Customization data:
67 const string extInput("input");
68 const string extOutput("output");
69 const string extErrors("errors");
70 const string extKeep("new");
71 const string dirTestFiles("srcwriter_test_cases");
72 // ============================================================================
73 
74 struct STestInfo {
75  CFile mInFile;
78  string mObjType;
79 };
80 
81 typedef string TTestName;
83 
84 
86 public:
88  TTestNameToInfoMap * pTestNameToInfoMap,
89  const string& extInput,
90  const string& extOutput,
91  const string& extErrors)
92  : m_pTestNameToInfoMap(pTestNameToInfoMap),
96  { }
97 
98 
99  void operator()( const CDirEntry & dirEntry ) {
100 
101  if( ! dirEntry.IsFile() ) {
102  return;
103  }
104  CFile file(dirEntry);
105  string name = file.GetName();
106  if (NStr::EndsWith(name, ".txt") || NStr::StartsWith(name, ".")) {
107  return;
108  }
109  if (NStr::EndsWith(name, extKeep)) {
110  return;
111  }
112 
113  // extract info from the file name
114  const string sFileName = file.GetName();
115  list<string> fileNamePieces;
116  NStr::Split( sFileName, ".", fileNamePieces, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
117  BOOST_REQUIRE(fileNamePieces.size() == 3);
118 
119  string sTestName = fileNamePieces.front();
120  BOOST_REQUIRE(!sTestName.empty());
121  string sObjType = *(next(fileNamePieces.begin()));
122  BOOST_REQUIRE(!sObjType.empty());
123  string sFileType = fileNamePieces.back();
124  BOOST_REQUIRE(!sFileType.empty());
125 
126  STestInfo & test_info_to_load =
127  (*m_pTestNameToInfoMap)[sTestName];
128 
129  // Figure out which test to perform
130  if (sObjType == "seqentry" || sObjType == "ids" || sObjType == "srcchk") {
131  test_info_to_load.mObjType = sObjType;
132  } else {
133  BOOST_FAIL("Unknown object type " << sObjType << ".");
134  }
135  // figure out what type of file we have and set appropriately
136  if (sFileType == mExtInput) {
137  BOOST_REQUIRE( test_info_to_load.mInFile.GetPath().empty() );
138  test_info_to_load.mInFile = file;
139  }
140  else if (sFileType == mExtOutput) {
141  BOOST_REQUIRE( test_info_to_load.mOutFile.GetPath().empty() );
142  test_info_to_load.mOutFile = file;
143  }
144  else if (sFileType == mExtErrors) {
145  BOOST_REQUIRE( test_info_to_load.mErrorFile.GetPath().empty() );
146  test_info_to_load.mErrorFile = file;
147  }
148 
149  else {
150  BOOST_FAIL("Unknown file type " << sFileName << ".");
151  }
152  }
153 
154 private:
155  // raw pointer because we do NOT own this
157  string mExtInput;
158  string mExtOutput;
159  string mExtErrors;
160 };
161 
162 
163 // ----------------------------------------------------------------------------
164 void sUpdateCase(CDir& test_cases_dir, const string& test_name)
165 // ----------------------------------------------------------------------------
166 {
167  string input = CDir::ConcatPath( test_cases_dir.GetPath(), test_name + "." + extInput);
168  string output = CDir::ConcatPath( test_cases_dir.GetPath(), test_name + "." + extOutput);
169  string errors = CDir::ConcatPath( test_cases_dir.GetPath(), test_name + "." + extErrors);
170  if (!CFile(input).Exists()) {
171  BOOST_FAIL("input file " << input << " does not exist.");
172  }
173  string test_base, test_type;
174  NStr::SplitInTwo(test_name, ".", test_base, test_type);
175  cerr << "Creating new test case from " << input << " ..." << endl;
176 
177  CErrorLogger logger(errors);
178 
179  //get a scope
181  CGBDataLoader::RegisterInObjectManager(*pObjMngr).GetLoader()->SetAlwaysLoadExternal(false);
182  CRef<CScope> pScope(new CScope(*pObjMngr));
183  pScope->AddDefaults();
184 
185  //get a writer object
186  CNcbiIfstream ifstr(input.c_str());
187  unique_ptr<CObjectIStream> pI(CObjectIStream::Open(eSerial_AsnText, ifstr));
188 
189  CNcbiOfstream ofstr(output.c_str());
190 
191  CSrcWriter writer(0);
192  writer.SetDelimiter("\t");
193 
194  if (test_type == "seqentry") {
195  CRef<CSeq_entry> pEntry(new CSeq_entry);
196  *pI >> *pEntry;
197  writer.WriteSeqEntry(*pEntry, *pScope, ofstr);
198  } else if (test_type == "srcchk") {
199  vector<pair<string, CBioseq_Handle> > vecIdBsh;
200  while (!pI->EndOfData()) {
201  CRef<CSeq_entry> pEntry(new CSeq_entry);
202  *pI >> *pEntry;
203  CSeq_entry_Handle handle = pScope->AddTopLevelSeqEntry(*pEntry);
204  for (CBioseq_CI bci(handle); bci; ++bci) {
205  vecIdBsh.push_back(make_pair("",*bci));
206  }
207  }
208  writer.WriteBioseqHandles(vecIdBsh, CSrcWriter::sAllSrcCheckFields, ofstr);
209  }
210 
211  ofstr.flush();
212  ifstr.close();
213  ofstr.close();
214 
215  cerr << " Produced new srcwriter file " << output << "." << endl;
216  cerr << " ... Done." << endl;
217 }
218 
219 
220 // ----------------------------------------------------------------------------
221 void sUpdateAll(CDir& test_cases_dir)
222 // ----------------------------------------------------------------------------
223 {
224  const vector<string> kEmptyStringVec;
225  TTestNameToInfoMap testNameToInfoMap;
226  CTestNameToInfoMapLoader testInfoLoader(
227  &testNameToInfoMap, extInput, extOutput, extErrors);
229  test_cases_dir,
230  kEmptyStringVec,
231  kEmptyStringVec,
232  testInfoLoader,
234 
235  ITERATE(TTestNameToInfoMap, name_to_info_it, testNameToInfoMap) {
236  const string & sName = name_to_info_it->first +
237  "." + name_to_info_it->second.mObjType;
238  sUpdateCase(test_cases_dir, sName);
239  }
240 }
241 
242 
243 // ----------------------------------------------------------------------------
244 void sRunTest(const string &sTestName, const STestInfo & testInfo, bool keep)
245 // ----------------------------------------------------------------------------
246 {
247  cerr << "Testing " << testInfo.mInFile.GetName() << " against " <<
248  testInfo.mOutFile.GetName() << " and " <<
249  testInfo.mErrorFile.GetName() << endl;
250 
251  string logName = CDirEntry::GetTmpName();
252  CErrorLogger logger(logName);
253 
254  // get a scope
256  CGBDataLoader::RegisterInObjectManager(*pObjMngr).GetLoader()->SetAlwaysLoadExternal(false);
257  CRef<CScope> pScope(new CScope(*pObjMngr));
258  pScope->AddDefaults();
259 
260  // get a writer object
261  CSrcWriter writer(0);
262  writer.SetDelimiter("\t");
263 
264  CNcbiIfstream ifstr(testInfo.mInFile.GetPath().c_str());
265  unique_ptr<CObjectIStream> pI(CObjectIStream::Open(eSerial_AsnText, ifstr));
266  // create a temporary result file
267  string resultName = CDirEntry::GetTmpName();
268  CNcbiOfstream ofstr(resultName.c_str());
269 
270  if (testInfo.mObjType == "seqentry") {
271  CRef<CSeq_entry> pEntry(new CSeq_entry);
272  *pI >> *pEntry;
273  writer.WriteSeqEntry(*pEntry, *pScope, ofstr);
274  } else if (testInfo.mObjType == "srcchk") {
275  vector<pair<string, CBioseq_Handle> > vecIdBsh;
276  while (!pI->EndOfData()) {
277  CRef<CSeq_entry> pEntry(new CSeq_entry);
278  *pI >> *pEntry;
279  CSeq_entry_Handle handle = pScope->AddTopLevelSeqEntry(*pEntry);
280  for (CBioseq_CI bci(handle); bci; ++bci) {
281  vecIdBsh.push_back(make_pair("",*bci));
282  }
283  }
284  writer.WriteBioseqHandles(vecIdBsh, CSrcWriter::sAllSrcCheckFields, ofstr);
285  }
286 
287  ofstr.flush();
288  ifstr.close();
289  ofstr.close();
290 
291 
292  bool success = testInfo.mOutFile.CompareTextContents(resultName, CFile::eIgnoreWs);
293  if (!success) {
294  CDirEntry deResult = CDirEntry(resultName);
295  if (keep) {
296  deResult.Copy(testInfo.mOutFile.GetPath() + "." + extKeep,
298  }
299  deResult.Remove();
300  CDirEntry(logName).Remove();
301  BOOST_ERROR("Error: " << sTestName << " failed due to post processing diffs.");
302  }
303  // remove the temporay result file
304  CDirEntry(resultName).Remove();
305 
306  success = testInfo.mErrorFile.CompareTextContents(logName, CFile::eIgnoreWs);
307  CDirEntry deErrors = CDirEntry(logName);
308  if (!success && keep) {
309  deErrors.Copy(testInfo.mErrorFile.GetPath() + "." + extKeep);
310  }
311  deErrors.Remove();
312  if (!success) {
313  BOOST_ERROR("Error: " << sTestName << " failed due to error handling diffs.");
314  }
315 };
316 
318 {
319 }
320 
322 {
323  arg_descrs->AddDefaultKey("test-dir", "TEST_FILE_DIRECTORY",
324  "Set the root directory under which all test files can be found.",
326  dirTestFiles );
327  arg_descrs->AddDefaultKey("update-case", "UPDATE_CASE",
328  "Produce .asn and .error files from given name for new or updated test case.",
330  "" );
331  arg_descrs->AddFlag("update-all",
332  "Update all test cases to current reader code (dangerous).",
333  true );
334  arg_descrs->AddFlag("keep-diffs",
335  "Keep output files that are different from the expected.",
336  true );
337 }
338 
340 {
341 }
342 
343 
345 {
346  const CArgs& args = CNcbiApplication::Instance()->GetArgs();
347 
348  CDir test_cases_dir( args["test-dir"].AsDirectory() );
349  BOOST_REQUIRE_MESSAGE( test_cases_dir.IsDir(),
350  "Cannot find dir: " << test_cases_dir.GetPath() );
351 
352  bool update_all = args["update-all"].AsBoolean();
353  if (update_all) {
354  sUpdateAll(test_cases_dir);
355  return;
356  }
357 
358  string update_case = args["update-case"].AsString();
359  if (!update_case.empty()) {
360  sUpdateCase(test_cases_dir, update_case);
361  return;
362  }
363 
364  const vector<string> kEmptyStringVec;
365  TTestNameToInfoMap testNameToInfoMap;
366  CTestNameToInfoMapLoader testInfoLoader(
367  &testNameToInfoMap, extInput, extOutput, extErrors);
369  test_cases_dir,
370  kEmptyStringVec,
371  kEmptyStringVec,
372  testInfoLoader,
374 
375  ITERATE(TTestNameToInfoMap, name_to_info_it, testNameToInfoMap) {
376  const string &sName = name_to_info_it->first;
377  const STestInfo &testInfo = name_to_info_it->second;
378  cout << "Verifying: " << sName << endl;
379  BOOST_REQUIRE_MESSAGE( testInfo.mInFile.Exists(),
380  extInput + " file does not exist: " << testInfo.mInFile.GetPath() );
381  BOOST_REQUIRE_MESSAGE( testInfo.mOutFile.Exists(),
382  extOutput + " file does not exist: " << testInfo.mOutFile.GetPath() );
383  BOOST_REQUIRE_MESSAGE( testInfo.mErrorFile.Exists(),
384  extErrors + " file does not exist: " << testInfo.mErrorFile.GetPath() );
385  }
386  ITERATE(TTestNameToInfoMap, name_to_info_it, testNameToInfoMap) {
387  const string & sName = name_to_info_it->first;
388  const STestInfo & testInfo = name_to_info_it->second;
389 
390  cout << "Running test: " << sName << endl;
391 
392  BOOST_CHECK_NO_THROW(sRunTest(sName, testInfo, args["keep-diffs"]));
393  }
394 }
395 
396 
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CDirEntry –.
Definition: ncbifile.hpp:262
CDir –.
Definition: ncbifile.hpp:1696
CFile –.
Definition: ncbifile.hpp:1605
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CScope –.
Definition: scope.hpp:92
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
Used to generate tables showing qualifier-field entries occuring in the BioSources of instances of Bi...
Definition: src_writer.hpp:73
static const FIELDS sAllSrcCheckFields
All possible fields processed by srchck application, in their canonical order.
Definition: src_writer.hpp:179
void SetDelimiter(const string &delimiter)
Set the column delimiter for the output table.
Definition: src_writer.hpp:111
virtual bool WriteBioseqHandles(const vector< pair< string, CBioseq_Handle > > &, const FIELDS &, CNcbiOstream &, ILineErrorListener *=nullptr)
Write a table of the specified qualifier-field entries found in the BioSources of a vector of Bioseqs...
Definition: src_writer.cpp:165
virtual bool WriteSeqEntry(const CSeq_entry &, CScope &, CNcbiOstream &, bool=false)
Write a table of all qualifier-field entries occurring in the BioSources for a given Seq-entry,...
Definition: src_writer.cpp:189
TTestNameToInfoMap * m_pTestNameToInfoMap
void operator()(const CDirEntry &dirEntry)
CTestNameToInfoMapLoader(TTestNameToInfoMap *pTestNameToInfoMap, const string &extInput, const string &extOutput, const string &extErrors)
Definition: map.hpp:338
static void test_type(TDSSOCKET *tds, TDSCOLUMN *col)
Definition: all_types.c:18
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
static SQLCHAR output[256]
Definition: print.c:5
static int RunTests(void)
Code to iterate through all tests to run.
Definition: testodbc.c:397
static char test_name[128]
Definition: utf8_2.c:34
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eDirectory
Name of file directory.
Definition: ncbiargs.hpp:598
void FindFilesInDir(const CDir &dir, const vector< string > &masks, const vector< string > &masks_subdir, TFindFunc &find_func, TFindFiles flags=fFF_Default)
Find files in the specified directory.
Definition: ncbifile.hpp:3023
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
Definition: ncbifile.cpp:2595
bool IsDir(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a directory.
Definition: ncbifile.hpp:3947
bool CompareTextContents(const string &file, ECompareText mode, size_t buf_size=0) const
Compare files contents in text form.
Definition: ncbifile.cpp:3565
bool IsFile(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a file.
Definition: ncbifile.hpp:3941
static string GetTmpName(ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
Definition: ncbifile.cpp:2903
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
Definition: ncbifile.cpp:776
string GetName(void) const
Get the base entry name with extension (if any).
Definition: ncbifile.hpp:3917
const string & GetPath(void) const
Get entry path.
Definition: ncbifile.hpp:3911
virtual bool Copy(const string &new_path, TCopyFlags flags=fCF_Default, size_t buf_size=0) const
Copy the entry to a location specified by "new_path".
Definition: ncbifile.cpp:2428
virtual bool Exists(void) const
Check existence of file.
Definition: ncbifile.hpp:4039
@ fCF_Overwrite
The following flags define what to do when the destination entry already exists:
Definition: ncbifile.hpp:535
@ fFF_Recursive
descend into sub-dirs
Definition: ncbifile.hpp:3013
@ fFF_Default
default behavior
Definition: ncbifile.hpp:3015
@ eIgnoreWs
Definition: ncbifile.hpp:1669
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
TLoader * GetLoader(void) const
Get pointer to the loader.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5432
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5414
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3545
@ fSplit_Truncate
Definition: ncbistr.hpp:2503
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2500
FILE * file
static int input()
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The Object manager core.
Utility stuff for more convenient using of Boost.Test library.
BOOST_AUTO_TEST_CASE(RunTests)
USING_SCOPE(objects)
const string extErrors("errors")
void sUpdateAll(CDir &test_cases_dir)
map< TTestName, STestInfo > TTestNameToInfoMap
const string extOutput("output")
void sUpdateCase(CDir &test_cases_dir, const string &test_name)
NCBITEST_AUTO_INIT()
const string dirTestFiles("srcwriter_test_cases")
USING_NCBI_SCOPE
const string extInput("input")
const string extKeep("new")
void sRunTest(const string &sTestName, const STestInfo &testInfo, bool keep)
NCBITEST_AUTO_FINI()
NCBITEST_INIT_CMDLINE(arg_descrs)
string TTestName
Modified on Fri Jul 19 17:12:19 2024 by modify_doxy.py rev. 669887