32 #include <ncbi_pch.hpp>
33 #include <stdio.h>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbireg.hpp>
37 #include <corelib/ncbifile.hpp>
38 #include <algorithm>
49 #include <common/test_assert.h>
55 /////////////////////////////////
56 // Test application
57 //
60 {
61 public:
62  void Init(void);
63  int Run(void);
65  void LoadTable(const string& fname,
66  const string& parms_file,
67  const string& table_xform,
68  const string& accession);
69 };
73 {
74  // Set err.-posting and tracing to maximum
79  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions());
81  arg_desc->AddOptionalKey("xform", "TransformationType",
82  "Desired transformation for table - none, seqlocs or features. Default: none",
85  arg_desc->AddOptionalKey("acc", "Accession",
86  "Optional accession, e.g. GCF_000001405.17, used to look up ids for chromosome fields",
89  arg_desc->AddOptionalKey("table-format", "TableFormatDescription",
90  "ASN table format description for table(s) being imported",
93  arg_desc->SetArgsType(CArgDescriptions::eRegularArgs);
94  arg_desc->AddExtra(1, kMax_UInt, "Files or Directories", CArgDescriptions::eString);
96  SetupArgDescriptions(arg_desc.release());
97 }
99 void CTestApplication::LoadTable(const string& fname,
100  const string& parms_file,
101  const string& table_xform,
102  const string& accession)
103 {
104  LOG_POST(Info << "Loading Table: " << fname);
106  //CTableImportDataSource ds;
108  string transformation_type = table_xform;
109  bool success;
111  try {
112  if (parms_file == "") {
113  success = ds->LoadTable(fname);
115  if (!success) {
116  LOG_POST(Error << "Error loading file: " << fname);
117  return;
118  }
121  {
122  LOG_POST(Info << "Delimiters: ");
123  vector<char> delims = ds->GetDelimiterRules().GetDelimiters();
124  for (size_t i=0; i<delims.size(); ++i)
125  LOG_POST(Info << "\'" << delims[i] << "\', ");
126  LOG_POST(Info << "Merge Delimiters: " << ds->GetDelimiterRules().GetMergeDelimiters());
128  // guess column types
129  CTableColumnTypeGuesser guesser(ds);
130  guesser.GuessColumns();
132  // If an accession is provided, look for and add it to any fields
133  // that need id mapping (chromosome #'s)
134  if (accession != "") {
135  for (size_t i=0; i<ds->GetColumns().size(); ++i) {
136  if (ds->GetColumns()[i].GetDataType() ==
138  //cout << "Adding accession to col: " << i << " " << accession << endl;
139  CMapAssemblyParams ainfo;
140  ainfo.SetAssemblyAcc(accession);
141  ds->GetColumns()[i].SetAssembly(ainfo);
142  }
143  }
145  }
146  }
147  else {
148  LOG_POST(Error << "Could not find a delimiter for table: " << fname);
149  }
150  }
151  else {
152  CNcbiIfstream in_file(parms_file.c_str());
153  if (!in_file) {
154  LOG_POST(Error << "Could not open parameters file: " << parms_file);
155  return;
156  }
158  CUser_object user_object;
159  LOG_POST(Info << "Loading Parameters file: " << parms_file);
160  in_file >> MSerial_AsnText >> user_object;
162  // If the command line paramaters specify an explicit transformation
163  // type use that instead of the type stored in the parameters file
164  if (table_xform == "" &&
165  user_object.HasField("conversion-type") &&
166  user_object.GetField("conversion-type").GetData().IsStr()) {
167  transformation_type = user_object.
168  GetField("conversion-type").GetData().GetStr();
169  }
171  success = ds->LoadTable(fname, user_object);
173  if (!success) {
174  LOG_POST(Error << "Error loading file: " << fname);
175  return;
176  }
177  else {
178  LOG_POST(Info << "Loaded table from: " << fname);
179  }
180  }
181  }
182  catch( CException& c) {
183  LOG_POST(Error <<"Error loading table: " << c.ReportAll());
184  return;
185  }
187  string output_type = "";
189  try {
190  // Convert table to asn and then (if table_xform is not 'none') transform
191  // it to the desired format. After (optional) transform write table out
192  // to a file with same name but of type ".asn".
194  ds->ConvertToSeqAnnot(annot_table_ds->GetContainer());
196  string msg;
197  if (transformation_type == "seqlocs") {
198  vector<CTableAnnotDataSource::STableLocation> locations =
199  annot_table_ds->FindLocations(msg, false);
201  if (locations.size() == 0) {
202  LOG_POST(Error << msg << " - Unable to create locations");
203  }
204  else {
205  for (size_t i=0; i<locations.size(); ++i) {
206  if (!locations[i].m_IsRsid)
207  success = annot_table_ds->AddSeqLoc(locations[i], static_cast<int>(i+1), NULL);
208  else
209  success = annot_table_ds->AddSnpSeqLoc(locations[i], static_cast<int>(i+1), NULL);
210  }
211  output_type = ".locs";
212  }
213  }
214  else if (transformation_type == "features") {
215  vector<CTableAnnotDataSource::STableLocation> locations =
216  annot_table_ds->FindLocations(msg, false);
218  // Any location can be a feature, but only 1. If we find more than 1
219  // we choose to to create any features.
220  if (locations.size() > 1) {
221  LOG_POST(Error << "Unable to create features - table has more than 1 location per row");
222  }
223  // unable to identify columns needed to create location info needed by features.
224  else if (locations.size() == 0) {
225  LOG_POST(Error << msg << " - Unable to create features");
226  }
227  else {
228  if (!locations[0].m_IsRsid)
229  success = annot_table_ds->CreateFeature(locations[0], NULL);
230  else
231  success = annot_table_ds->CreateSnpFeature(locations[0], NULL);
232  output_type = ".feats";
233  }
234  }
236  string out_fname = fname + output_type + ".asn";
238  annot_table_ds->WriteAsn(out_fname);
239  }
240  catch( CException& c) {
241  LOG_POST(Error <<"Error transforming and saving table: " << c.ReportAll());
242  }
244 }
248 {
249  try{
251  const CNcbiArguments& args = GetArguments();
253  if (args.Size() == 0)
254  return 1;
256  string parms_file;
257  string table_xform = "";
258  string accession = "";
260  LOG_POST(Info << "starting run");
262  const CArgs& parsed_args = GetArgs();
264  if (parsed_args["table-format"].HasValue()) {
265  parms_file = parsed_args["table-format"].AsString();
266  LOG_POST(Info << "Loading tables using format from asn file: " <<
267  parms_file);
268  }
270  if (parsed_args["xform"].HasValue()) {
271  table_xform = parsed_args["xform"].AsString();
272  table_xform = NStr::ToLower(table_xform);
273  }
275  if (parsed_args["acc"].HasValue()) {
276  accession = parsed_args["acc"].AsString();
277  }
279  for (size_t idx = 1; idx<=parsed_args.GetNExtra(); ++idx) {
280  string arg = parsed_args[idx].AsString();
282  // If it is a file name, read it, convert it, and write it out as asn.
283  // Otherwise print a message and skip it.
285  CFile file(arg);
287  // Ignore asn files since they are never tables and this program creates
288  // them as ouput (allow user to mix them into directory).
289  string ext = file.GetExt();
290  if (NStr::ToLower(ext) == ".asn") {
291  cout << "Skipping asn file: " << arg << endl;
292  LOG_POST(Info << "Skipping asn file: " << arg);
293  continue;
294  }
296  if (file.IsFile()) {
297  LoadTable(arg, parms_file, table_xform, accession);
298  }
299  else {
300  cout << "file: " << arg << " is not a file name" << endl;
301  LOG_POST(Error << "file: " << arg <<
302  " is not a file name.\n");
303  }
304  }
305  }
306  catch(CException& e) {
307  LOG_POST(Error << "Error: " << e.GetMsg());
308  }
309  catch(...) {
310  LOG_POST(Error << "Undefined Error");
311  }
314  return 0;
315 }
319 int NcbiSys_main(int argc, ncbi::TXChar* argv[])
320 {
321  // ----------------------------------------------------------
322  CTestApplication theTestApplication;
324  return theTestApplication.AppMain(argc, argv, 0 /*envp*/, eDS_ToMemory);
326  return 0;
327 }
