NCBI C++ ToolKit
unit_test_gff3flybasewriter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_gff3flybasewriter.cpp 93574 2021-04-30 16:19:19Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mostly Mike Kornbluh, NCBI.
27 * Customizations by Frank Ludwig, NCBI.
28 *
29 * File Description:
30 * GFF3 writer unit test.
31 *
32 * ===========================================================================
33 */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include <corelib/ncbi_system.hpp>
38 #include <corelib/ncbiapp.hpp>
39 #include <corelib/ncbifile.hpp>
41 #include <serial/serial.hpp>
42 #include <serial/objistr.hpp>
43 #include <serial/streamiter.hpp>
44 
46 #include <objmgr/scope.hpp>
47 #include <objmgr/seq_entry_ci.hpp>
48 #include <objmgr/bioseq_ci.hpp>
49 #include <objmgr/annot_ci.hpp>
51 #include <dbapi/driver/drivers.hpp>
52 
55 #include "error_logger.hpp"
56 
57 #include <cstdio>
58 
59 // This header must be included before all Boost.Test headers if there are any
60 #include <corelib/test_boost.hpp>
61 
64 
65 // ============================================================================
66 // Customization data:
67 const string extInput("asn");
68 const string extDatabase("database");
69 const string extOutput("gff3");
70 const string extErrors("errors");
71 const string extKeep("new");
72 
73 const string dirTestFiles("gff3flybasewriter_test_cases");
74 // !!!
75 // !!! Must also customize reader type in sRunTest !!!
76 // !!!
77 // ============================================================================
78 
79 struct STestInfo {
80  CFile mInFile;
84  string mObjType;
85 };
86 typedef string TTestName;
88 
90 public:
92  TTestNameToInfoMap * pTestNameToInfoMap,
93  const string& extInput,
94  const string& extOutput,
95  const string& extErrors,
96  const string& extDatabase)
97  : m_pTestNameToInfoMap(pTestNameToInfoMap),
102  { }
103 
104  void operator()( const CDirEntry & dirEntry ) {
105 
106  if( ! dirEntry.IsFile() ) {
107  return;
108  }
109 
110  CFile file(dirEntry);
111  string name = file.GetName();
112  if (NStr::EndsWith(name, ".txt") || NStr::StartsWith(name, ".")) {
113  return;
114  }
115  if (NStr::EndsWith(name, extKeep)) {
116  return;
117  }
118 
119  // extract info from the file name
120  const string sFileName = file.GetName();
121  vector<string> vecFileNamePieces;
122  NStr::Split( sFileName, ".", vecFileNamePieces );
123  BOOST_REQUIRE(vecFileNamePieces.size() == 2);
124 
125  string sTestName = vecFileNamePieces[0];
126  BOOST_REQUIRE(!sTestName.empty());
127  string sFileType = vecFileNamePieces[1];
128  BOOST_REQUIRE(!sFileType.empty());
129 
130  STestInfo & test_info_to_load =
131  (*m_pTestNameToInfoMap)[vecFileNamePieces[0]];
132  test_info_to_load.mObjType = "align";
133 
134  // figure out what type of file we have and set appropriately
135  if (sFileType == mExtInput) {
136  BOOST_REQUIRE( test_info_to_load.mInFile.GetPath().empty() );
137  test_info_to_load.mInFile = file;
138  }
139  else if (sFileType == mExtDatabase) {
140  BOOST_REQUIRE( test_info_to_load.mDatabaseFile.GetPath().empty() );
141  test_info_to_load.mDatabaseFile = file;
142  }
143  else if (sFileType == mExtOutput) {
144  BOOST_REQUIRE( test_info_to_load.mOutFile.GetPath().empty() );
145  test_info_to_load.mOutFile = file;
146  }
147  else if (sFileType == mExtErrors) {
148  BOOST_REQUIRE( test_info_to_load.mErrorFile.GetPath().empty() );
149  test_info_to_load.mErrorFile = file;
150  }
151 
152  else {
153  BOOST_FAIL("Unknown file type " << sFileName << ".");
154  }
155  }
156 
157 private:
158  // raw pointer because we do NOT own this
160  string mExtInput;
161  string mExtOutput;
162  string mExtErrors;
163  string mExtDatabase;
164 };
165 
167 {
168  return new CGff3FlybaseWriter(scope, ostr);
169 }
170 
171 void sUpdateCase(CDir& test_cases_dir, const string& test_name)
172 {
173  string input = CDir::ConcatPath( test_cases_dir.GetPath(), test_name + "." + extInput);
174  string database = CDir::ConcatPath( test_cases_dir.GetPath(), test_name + "." + extDatabase);
175  string output = CDir::ConcatPath( test_cases_dir.GetPath(), test_name + "." + extOutput);
176  string errors = CDir::ConcatPath( test_cases_dir.GetPath(), test_name + "." + extErrors);
177  if (!CFile(input).Exists()) {
178  BOOST_FAIL("input file " << input << " does not exist.");
179  }
180  //string test_base, test_type;
181  //NStr::SplitInTwo(test_name, ".", test_base, test_type);
182  cerr << "Creating new test case from " << input << " ..." << endl;
183 
184  CErrorLogger logger(errors);
185 
186  //get a scope
188  // CGBDataLoader::RegisterInObjectManager(*pObjMngr).GetLoader()->SetAlwaysLoadExternal(false);
189  // pScope->AddDefaults();
190 
191  //get a writer object
192  CNcbiIfstream ifstr(input.c_str(), ios::binary);
193  unique_ptr<CObjectIStream> pI(CObjectIStream::Open(eSerial_AsnText, ifstr));
194 
195  CNcbiIfstream dbstr(database.c_str(), ios::binary);
196  unique_ptr<CObjectIStream> pDB(CObjectIStream::Open(eSerial_AsnText, dbstr));
197  CRef<CScope> pScope(new CScope(*pObjMngr));
198  {
199  CObjectIStreamIterator<CSeq_entry> entry_it(*pDB);
200  for (const auto& entry : entry_it) {
201  pScope->AddTopLevelSeqEntry(entry);
202  }
203  }
204 
205  CNcbiOfstream ofstr(output.c_str());
206  CGff3FlybaseWriter* pWriter = sGetWriter(*pScope, ofstr);
207 
208  CRef<CSeq_align> pAlign(new CSeq_align);
209  *pI >> *pAlign;
210  pWriter->WriteHeader();
211  pWriter->WriteAlign(*pAlign);
212  pWriter->WriteFooter();
213  while (!pI->EndOfData()) {
214  *pI >> *pAlign;
215  pWriter->WriteHeader();
216  pWriter->WriteAlign(*pAlign);
217  pWriter->WriteFooter();
218  }
219  delete pWriter;
220  ofstr.flush();
221 
222  ifstr.close();
223  ofstr.close();
224 
225  cerr << " Produced new flybase file " << output << "." << endl;
226  cerr << " ... Done." << endl;
227 }
228 
229 // ----------------------------------------------------------------------------
230 void sUpdateAll(CDir& test_cases_dir)
231 // ----------------------------------------------------------------------------
232 {
233  const vector<string> kEmptyStringVec;
234  TTestNameToInfoMap testNameToInfoMap;
235  CTestNameToInfoMapLoader testInfoLoader(
236  &testNameToInfoMap, extInput, extOutput, extErrors, extDatabase);
238  test_cases_dir,
239  kEmptyStringVec,
240  kEmptyStringVec,
241  testInfoLoader,
243 
244  ITERATE(TTestNameToInfoMap, name_to_info_it, testNameToInfoMap) {
245  const string & sName = name_to_info_it->first;
246  // "." + name_to_info_it->second.mObjType;
247  sUpdateCase(test_cases_dir, sName);
248  }
249 }
250 
251 // ----------------------------------------------------------------------------
252 void sRunTest(const string &sTestName, const STestInfo & testInfo, bool keep)
253 // ----------------------------------------------------------------------------
254 {
255  cerr << "Testing " << testInfo.mInFile.GetName() << " and " <<
256  testInfo.mDatabaseFile.GetName() << " "
257  << " against " <<
258  testInfo.mOutFile.GetName() << " and " <<
259  testInfo.mErrorFile.GetName() << endl;
260 
261  string logName = CDirEntry::GetTmpName();
262  CErrorLogger logger(logName);
263 
264  //get a scope
266  // CGBDataLoader::RegisterInObjectManager(*pObjMngr).GetLoader()->SetAlwaysLoadExternal(false);
267  // pScope->AddDefaults();
268 
269  //get a writer object
270  CNcbiIfstream ifstr(testInfo.mInFile.GetPath().c_str(), ios::binary);
271  unique_ptr<CObjectIStream> pI(CObjectIStream::Open(eSerial_AsnText, ifstr));
272 
273  CNcbiIfstream dbstr(testInfo.mDatabaseFile.GetPath().c_str(), ios::binary);
274  unique_ptr<CObjectIStream> pDB(CObjectIStream::Open(eSerial_AsnText, dbstr));
275  CRef<CScope> pScope(new CScope(*pObjMngr));
276  {
277  CObjectIStreamIterator<CSeq_entry> entry_it(*pDB);
278  for (const auto& entry :entry_it) {
279  pScope->AddTopLevelSeqEntry(entry);
280  }
281  }
282  string resultName = CDirEntry::GetTmpName();
283  CNcbiOfstream ofstr(resultName.c_str());
284  CGff3FlybaseWriter* pWriter = sGetWriter(*pScope, ofstr);
285 
286  CRef<CSeq_align> pAlign(new CSeq_align);
287  *pI >> *pAlign;
288  pWriter->WriteHeader();
289  pWriter->WriteAlign(*pAlign);
290  pWriter->WriteFooter();
291  while (!pI->EndOfData()) {
292  *pI >> *pAlign;
293  pWriter->WriteHeader();
294  pWriter->WriteAlign(*pAlign);
295  pWriter->WriteFooter();
296  }
297  delete pWriter;
298  ofstr.flush();
299  ifstr.close();
300  ofstr.close();
301 
302  bool success = testInfo.mOutFile.CompareTextContents(resultName, CFile::eIgnoreWs);
303  if (!success) {
304  CDirEntry deResult = CDirEntry(resultName);
305  if (keep) {
306  deResult.Copy(testInfo.mOutFile.GetPath() + "." + extKeep,
308  }
309  deResult.Remove();
310  CDirEntry(logName).Remove();
311  BOOST_ERROR("Error: " << sTestName << " failed due to post processing diffs.");
312  }
313  CDirEntry(resultName).Remove();
314 
315  success = testInfo.mErrorFile.CompareTextContents(logName, CFile::eIgnoreWs);
316  CDirEntry deErrors = CDirEntry(logName);
317  if (!success && keep) {
318  deErrors.Copy(testInfo.mErrorFile.GetPath() + "." + extKeep);
319  }
320  deErrors.Remove();
321  if (!success) {
322  BOOST_ERROR("Error: " << sTestName << " failed due to error handling diffs.");
323  }
324 };
326 {
327 }
328 
330 {
331  arg_descrs->AddDefaultKey("test-dir", "TEST_FILE_DIRECTORY",
332  "Set the root directory under which all test files can be found.",
334  dirTestFiles );
335  arg_descrs->AddDefaultKey("update-case", "UPDATE_CASE",
336  "Produce .asn and .error files from given name for new or updated test case.",
338  "" );
339  arg_descrs->AddFlag("update-all",
340  "Update all test cases to current reader code (dangerous).",
341  true );
342  arg_descrs->AddFlag("keep-diffs",
343  "Keep output files that are different from the expected.",
344  true );
345 }
346 
348 {
349 }
350 
352 {
353  const CArgs& args = CNcbiApplication::Instance()->GetArgs();
354 
355  CDir test_cases_dir( args["test-dir"].AsDirectory() );
356  BOOST_REQUIRE_MESSAGE( test_cases_dir.IsDir(),
357  "Cannot find dir: " << test_cases_dir.GetPath() );
358 
359  bool update_all = args["update-all"].AsBoolean();
360  if (update_all) {
361  sUpdateAll(test_cases_dir);
362  return;
363  }
364 
365  string update_case = args["update-case"].AsString();
366  if (!update_case.empty()) {
367  sUpdateCase(test_cases_dir, update_case);
368  return;
369  }
370 
371  const vector<string> kEmptyStringVec;
372  TTestNameToInfoMap testNameToInfoMap;
373  CTestNameToInfoMapLoader testInfoLoader(
374  &testNameToInfoMap, extInput, extOutput, extErrors, extDatabase);
376  test_cases_dir,
377  kEmptyStringVec,
378  kEmptyStringVec,
379  testInfoLoader,
381 
382  ITERATE(TTestNameToInfoMap, name_to_info_it, testNameToInfoMap) {
383  const string & sName = name_to_info_it->first;
384  const STestInfo & testInfo = name_to_info_it->second;
385  cout << "Verifying: " << sName << endl;
386  BOOST_REQUIRE_MESSAGE( testInfo.mInFile.Exists(),
387  extInput + " file does not exist: " << testInfo.mInFile.GetPath() );
388  BOOST_REQUIRE_MESSAGE( testInfo.mDatabaseFile.Exists(),
389  extDatabase + " file does not exist: " << testInfo.mInFile.GetPath() );
390  BOOST_REQUIRE_MESSAGE( testInfo.mOutFile.Exists(),
391  extOutput + " file does not exist: " << testInfo.mOutFile.GetPath() );
392  BOOST_REQUIRE_MESSAGE( testInfo.mErrorFile.Exists(),
393  extErrors + " file does not exist: " << testInfo.mErrorFile.GetPath() );
394  }
395  ITERATE(TTestNameToInfoMap, name_to_info_it, testNameToInfoMap) {
396  const string & sName = name_to_info_it->first;
397  const STestInfo & testInfo = name_to_info_it->second;
398 
399  cout << "Running test: " << sName << endl;
400 
401  BOOST_CHECK_NO_THROW(sRunTest(sName, testInfo, args["keep-diffs"]));
402  }
403 }
CArgs –.
Definition: ncbiargs.hpp:379
CDirEntry –.
Definition: ncbifile.hpp:262
CDir –.
Definition: ncbifile.hpp:1695
CFile –.
Definition: ncbifile.hpp:1604
virtual bool WriteHeader() override
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CObjectIStreamIterator.
Definition: streamiter.hpp:140
CScope –.
Definition: scope.hpp:92
TTestNameToInfoMap * m_pTestNameToInfoMap
CTestNameToInfoMapLoader(TTestNameToInfoMap *pTestNameToInfoMap, const string &extInput, const string &extOutput, const string &extErrors, const string &extDatabase)
void operator()(const CDirEntry &dirEntry)
Definition: map.hpp:338
static SQLCHAR output[256]
Definition: print.c:5
static int RunTests(void)
Code to iterate through all tests to run.
Definition: testodbc.c:397
static char test_name[128]
Definition: utf8_2.c:34
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eDirectory
Name of file directory.
Definition: ncbiargs.hpp:598
void FindFilesInDir(const CDir &dir, const vector< string > &masks, const vector< string > &masks_subdir, TFindFunc &find_func, TFindFiles flags=fFF_Default)
Find files in the specified directory.
Definition: ncbifile.hpp:3022
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
Definition: ncbifile.cpp:2595
bool IsDir(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a directory.
Definition: ncbifile.hpp:3946
bool CompareTextContents(const string &file, ECompareText mode, size_t buf_size=0) const
Compare files contents in text form.
Definition: ncbifile.cpp:3565
bool IsFile(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a file.
Definition: ncbifile.hpp:3940
static string GetTmpName(ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
Definition: ncbifile.cpp:2903
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
Definition: ncbifile.cpp:776
string GetName(void) const
Get the base entry name with extension (if any).
Definition: ncbifile.hpp:3916
const string & GetPath(void) const
Get entry path.
Definition: ncbifile.hpp:3910
virtual bool Copy(const string &new_path, TCopyFlags flags=fCF_Default, size_t buf_size=0) const
Copy the entry to a location specified by "new_path".
Definition: ncbifile.cpp:2428
virtual bool Exists(void) const
Check existence of file.
Definition: ncbifile.hpp:4038
@ fCF_Overwrite
The following flags define what to do when the destination entry already exists:
Definition: ncbifile.hpp:534
@ fFF_Recursive
descend into sub-dirs
Definition: ncbifile.hpp:3012
@ fFF_Default
default behavior
Definition: ncbifile.hpp:3014
@ eIgnoreWs
Definition: ncbifile.hpp:1668
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
FILE * file
static int input()
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The Object manager core.
Utility stuff for more convenient using of Boost.Test library.
BOOST_AUTO_TEST_CASE(RunTests)
USING_SCOPE(objects)
const string extErrors("errors")
void sUpdateAll(CDir &test_cases_dir)
map< TTestName, STestInfo > TTestNameToInfoMap
const string extOutput("gff3")
void sUpdateCase(CDir &test_cases_dir, const string &test_name)
CGff3FlybaseWriter * sGetWriter(CScope &scope, CNcbiOstream &ostr)
const string extInput("asn")
const string dirTestFiles("gff3flybasewriter_test_cases")
const string extKeep("new")
const string extDatabase("database")
void sRunTest(const string &sTestName, const STestInfo &testInfo, bool keep)
NCBITEST_INIT_CMDLINE(arg_descrs)
Modified on Mon May 27 04:39:36 2024 by modify_doxy.py rev. 669887