NCBI C++ ToolKit
test_table_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: test_table_reader.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Bob Falk
27  *
28  * File Description
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <stdio.h>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbireg.hpp>
37 #include <corelib/ncbifile.hpp>
38 #include <algorithm>
39 
43 
48 
49 #include <common/test_assert.h>
50 
51 
54 
55 /////////////////////////////////
56 // Test application
57 //
58 
60 {
61 public:
62  void Init(void);
63  int Run(void);
64 
65  void LoadTable(const string& fname,
66  const string& parms_file,
67  const string& table_xform,
68  const string& accession);
69 };
70 
71 
73 {
74  // Set err.-posting and tracing to maximum
78 
79  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions());
80 
81  arg_desc->AddOptionalKey("xform", "TransformationType",
82  "Desired transformation for table - none, seqlocs or features. Default: none",
84 
85  arg_desc->AddOptionalKey("acc", "Accession",
86  "Optional accession, e.g. GCF_000001405.17, used to look up ids for chromosome fields",
88 
89  arg_desc->AddOptionalKey("table-format", "TableFormatDescription",
90  "ASN table format description for table(s) being imported",
92 
93  arg_desc->SetArgsType(CArgDescriptions::eRegularArgs);
94  arg_desc->AddExtra(1, kMax_UInt, "Files or Directories", CArgDescriptions::eString);
95 
96  SetupArgDescriptions(arg_desc.release());
97 }
98 
99 void CTestApplication::LoadTable(const string& fname,
100  const string& parms_file,
101  const string& table_xform,
102  const string& accession)
103 {
104  LOG_POST(Info << "Loading Table: " << fname);
105 
106  //CTableImportDataSource ds;
108  string transformation_type = table_xform;
109  bool success;
110 
111  try {
112  if (parms_file == "") {
113  success = ds->LoadTable(fname);
114 
115  if (!success) {
116  LOG_POST(Error << "Error loading file: " << fname);
117  return;
118  }
119 
121  {
122  LOG_POST(Info << "Delimiters: ");
123  vector<char> delims = ds->GetDelimiterRules().GetDelimiters();
124  for (size_t i=0; i<delims.size(); ++i)
125  LOG_POST(Info << "\'" << delims[i] << "\', ");
126  LOG_POST(Info << "Merge Delimiters: " << ds->GetDelimiterRules().GetMergeDelimiters());
127 
128  // guess column types
129  CTableColumnTypeGuesser guesser(ds);
130  guesser.GuessColumns();
131 
132  // If an accession is provided, look for and add it to any fields
133  // that need id mapping (chromosome #'s)
134  if (accession != "") {
135  for (size_t i=0; i<ds->GetColumns().size(); ++i) {
136  if (ds->GetColumns()[i].GetDataType() ==
138  //cout << "Adding accession to col: " << i << " " << accession << endl;
139  CMapAssemblyParams ainfo;
140  ainfo.SetAssemblyAcc(accession);
141  ds->GetColumns()[i].SetAssembly(ainfo);
142  }
143  }
144 
145  }
146  }
147  else {
148  LOG_POST(Error << "Could not find a delimiter for table: " << fname);
149  }
150  }
151  else {
152  CNcbiIfstream in_file(parms_file.c_str());
153  if (!in_file) {
154  LOG_POST(Error << "Could not open parameters file: " << parms_file);
155  return;
156  }
157 
158  CUser_object user_object;
159  LOG_POST(Info << "Loading Parameters file: " << parms_file);
160  in_file >> MSerial_AsnText >> user_object;
161 
162  // If the command line paramaters specify an explicit transformation
163  // type use that instead of the type stored in the parameters file
164  if (table_xform == "" &&
165  user_object.HasField("conversion-type") &&
166  user_object.GetField("conversion-type").GetData().IsStr()) {
167  transformation_type = user_object.
168  GetField("conversion-type").GetData().GetStr();
169  }
170 
171  success = ds->LoadTable(fname, user_object);
172 
173  if (!success) {
174  LOG_POST(Error << "Error loading file: " << fname);
175  return;
176  }
177  else {
178  LOG_POST(Info << "Loaded table from: " << fname);
179  }
180  }
181  }
182  catch( CException& c) {
183  LOG_POST(Error <<"Error loading table: " << c.ReportAll());
184  return;
185  }
186 
187  string output_type = "";
188 
189  try {
190  // Convert table to asn and then (if table_xform is not 'none') transform
191  // it to the desired format. After (optional) transform write table out
192  // to a file with same name but of type ".asn".
194  ds->ConvertToSeqAnnot(annot_table_ds->GetContainer());
195 
196  string msg;
197  if (transformation_type == "seqlocs") {
198  vector<CTableAnnotDataSource::STableLocation> locations =
199  annot_table_ds->FindLocations(msg, false);
200 
201  if (locations.size() == 0) {
202  LOG_POST(Error << msg << " - Unable to create locations");
203  }
204  else {
205  for (size_t i=0; i<locations.size(); ++i) {
206  if (!locations[i].m_IsRsid)
207  success = annot_table_ds->AddSeqLoc(locations[i], static_cast<int>(i+1), NULL);
208  else
209  success = annot_table_ds->AddSnpSeqLoc(locations[i], static_cast<int>(i+1), NULL);
210  }
211  output_type = ".locs";
212  }
213  }
214  else if (transformation_type == "features") {
215  vector<CTableAnnotDataSource::STableLocation> locations =
216  annot_table_ds->FindLocations(msg, false);
217 
218  // Any location can be a feature, but only 1. If we find more than 1
219  // we choose to to create any features.
220  if (locations.size() > 1) {
221  LOG_POST(Error << "Unable to create features - table has more than 1 location per row");
222  }
223  // unable to identify columns needed to create location info needed by features.
224  else if (locations.size() == 0) {
225  LOG_POST(Error << msg << " - Unable to create features");
226  }
227  else {
228  if (!locations[0].m_IsRsid)
229  success = annot_table_ds->CreateFeature(locations[0], NULL);
230  else
231  success = annot_table_ds->CreateSnpFeature(locations[0], NULL);
232  output_type = ".feats";
233  }
234  }
235 
236  string out_fname = fname + output_type + ".asn";
237 
238  annot_table_ds->WriteAsn(out_fname);
239  }
240  catch( CException& c) {
241  LOG_POST(Error <<"Error transforming and saving table: " << c.ReportAll());
242  }
243 
244 }
245 
246 
248 {
249  try{
250 
251  const CNcbiArguments& args = GetArguments();
252 
253  if (args.Size() == 0)
254  return 1;
255 
256  string parms_file;
257  string table_xform = "";
258  string accession = "";
259 
260  LOG_POST(Info << "starting run");
261 
262  const CArgs& parsed_args = GetArgs();
263 
264  if (parsed_args["table-format"].HasValue()) {
265  parms_file = parsed_args["table-format"].AsString();
266  LOG_POST(Info << "Loading tables using format from asn file: " <<
267  parms_file);
268  }
269 
270  if (parsed_args["xform"].HasValue()) {
271  table_xform = parsed_args["xform"].AsString();
272  table_xform = NStr::ToLower(table_xform);
273  }
274 
275  if (parsed_args["acc"].HasValue()) {
276  accession = parsed_args["acc"].AsString();
277  }
278 
279  for (size_t idx = 1; idx<=parsed_args.GetNExtra(); ++idx) {
280  string arg = parsed_args[idx].AsString();
281 
282  // If it is a file name, read it, convert it, and write it out as asn.
283  // Otherwise print a message and skip it.
284 
285  CFile file(arg);
286 
287  // Ignore asn files since they are never tables and this program creates
288  // them as ouput (allow user to mix them into directory).
289  string ext = file.GetExt();
290  if (NStr::ToLower(ext) == ".asn") {
291  cout << "Skipping asn file: " << arg << endl;
292  LOG_POST(Info << "Skipping asn file: " << arg);
293  continue;
294  }
295 
296  if (file.IsFile()) {
297  LoadTable(arg, parms_file, table_xform, accession);
298  }
299  else {
300  cout << "file: " << arg << " is not a file name" << endl;
301  LOG_POST(Error << "file: " << arg <<
302  " is not a file name.\n");
303  }
304  }
305  }
306  catch(CException& e) {
307  LOG_POST(Error << "Error: " << e.GetMsg());
308  }
309  catch(...) {
310  LOG_POST(Error << "Undefined Error");
311  }
312 
313 
314  return 0;
315 }
316 
317 
318 
319 int NcbiSys_main(int argc, ncbi::TXChar* argv[])
320 {
321  // ----------------------------------------------------------
322  CTestApplication theTestApplication;
323 
324  return theTestApplication.AppMain(argc, argv, 0 /*envp*/, eDS_ToMemory);
325 
326  return 0;
327 }
328 
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CFile –.
Definition: ncbifile.hpp:1604
void SetAssemblyAcc(string value)
CNcbiArguments –.
Definition: ncbienv.hpp:236
bool AddSeqLoc(const STableLocation &fc, int loc_number, ICanceled *cancel=NULL)
Add a location to the table based on the specified columns.
CRef< CSeq_annot > GetContainer()
vector< STableLocation > FindLocations(string &msg, bool strand_required)
Find colums that can be combined to form locations based on column info If no locations are found,...
bool CreateSnpFeature(const STableLocation &fc, ICanceled *cancel=NULL)
Create a region feature for each snp using specified snp (rsid) column.
bool AddSnpSeqLoc(const STableLocation &fc, int loc_number, ICanceled *cancel=NULL)
Add a location to the table using the rsid (snp/variation) ids location from the snp db.
bool CreateFeature(const STableLocation &fc, ICanceled *cancel=NULL)
Create a feature using specified columns.
void WriteAsn(const string &fname)
Write seqtable to file 'fname'.
CTableColumnTypeGuesser -.
void GuessColumns()
Update columns in data source with type info based on best-guesses.
const vector< char > & GetDelimiters() const
CTableImportDataSource -.
const CTableDelimiterRules & GetDelimiterRules() const
Get set all delimiter rules (for delimited tables)
vector< CTableImportColumn > & GetColumns()
return the array of column data
EFieldSeparatorType GetTableType() const
bool LoadTable(const wxString &fname, CUser_object &user_object)
void ConvertToSeqAnnot(CRef< CSeq_annot > annot_container)
Save data in table into annot_container.
int Run(void)
Run the application.
void LoadTable(const string &fname, const string &parms_file, const string &table_xform, const string &accession)
void Init(void)
Initialize the application.
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
size_t GetNExtra(void) const
Get the number of unnamed positional (a.k.a. extra) args.
Definition: ncbiargs.hpp:422
@ eRegularArgs
Regular application.
Definition: ncbiargs.hpp:560
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
#define NULL
Definition: ncbistd.hpp:225
void SetDiagPostFlag(EDiagPostFlag flag)
Set the specified flag (globally).
Definition: ncbidiag.cpp:6070
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
void SetDiagTrace(EDiagTrace how, EDiagTrace dflt=eDT_Default)
Set the diagnostic trace settings.
Definition: ncbidiag.cpp:6226
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
@ eDPF_All
All flags (except for the "unusual" ones!)
Definition: ncbidiag.hpp:718
@ eDS_ToMemory
Keep in a temp.memory buffer, see FlushMessages()
Definition: ncbidiag.hpp:1785
@ eDT_Enable
Enable messages of severity "eDiag_Trace".
Definition: ncbidiag.hpp:1550
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
SIZE_TYPE Size(void) const
Get size (number) of arguments.
Definition: ncbienv.hpp:265
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
#define kMax_UInt
Definition: ncbi_limits.h:185
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
char TXChar
Definition: ncbistr.hpp:172
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
const TData & GetData(void) const
Get the Data member data.
FILE * file
int i
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Process information in the NCBI Registry, including working with configuration files.
USING_SCOPE(objects)
int NcbiSys_main(int argc, ncbi::TXChar *argv[])
USING_NCBI_SCOPE
Modified on Wed May 01 14:25:19 2024 by modify_doxy.py rev. 669887