NCBI C++ ToolKit
gi2taxid.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gi2taxid.cpp 92128 2020-12-22 16:49:02Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbiargs.hpp>
37 #include <corelib/ncbireg.hpp>
38 
41 
44 
45 
47 {
48 public:
49  virtual void Init(void);
50  virtual int Run (void);
51 
52 };
53 
54 
56 {
57  // Prepare command line descriptions
58  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
59 
60  arg_desc->AddDefaultKey("gi", "GI",
61  "gi to test",
63  "0");
64 
65  arg_desc->AddOptionalKey("file", "InputFile",
66  "Input file to test, one gi or accession per line",
68 
69  arg_desc->AddFlag("show_acc",
70  "Show the passed accession as well as the gi");
71 
72  // Pass argument descriptions to the application
73  //
74  SetupArgDescriptions(arg_desc.release());
75 }
76 
77 
79 {
80  const CArgs& args = GetArgs();
81 
82  bool show = args["show_acc"];
83 
84  vector<string> id_list;
85  if( args["gi"].AsInteger() ) {
86  id_list.push_back( args["gi"].AsString() );
87  }
88 
89  if (args["file"]) {
90  CNcbiIstream& istr = args["file"].AsInputFile();
91  string acc;
92  while (istr >> acc) {
93  id_list.push_back(acc);
94  }
95  }
96 
97  CID1Client id1_client;
98  CTaxon1 tax;
99  tax.Init();
100 
101  static const char* sc_ValidChars =
102  "abcdefghijklmnopqrstuvwxyz"
103  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
104  "0123456789"
105  "_.|";
106 
107  ITERATE (vector<string>, iter, id_list) {
108  string id_str = *iter;
109  id_str = NStr::TruncateSpaces(id_str);
110  string::size_type pos = id_str.find_first_not_of(sc_ValidChars);
111  if (pos != string::npos) {
112  id_str.erase(pos);
113  }
114  if ( id_str.empty() ) {
115  LOG_POST(Info << "ignoring accession: " << *iter);
116  continue;
117  }
118 
119  // resolve the id to a gi
120  TGi gi = ZERO_GI;
121  try {
122  gi = NStr::StringToNumeric<TGi>(id_str);
123  }
124  catch (...) {
125  try {
126  cout << "trying: " << *iter << " -> " << id_str << endl;
127  CSeq_id id(id_str);
128  gi = id1_client.AskGetgi(id);
129  } catch (CException&) {
130  // gi = 0;
131  }
132  }
133 
134  if (gi == ZERO_GI) {
135  ERR_POST(Error << "don't know anything about accession/id: "
136  << id_str);
137  continue;
138  }
139 
140  TTaxId tax_id = ZERO_TAX_ID;
141  tax.GetTaxId4GI(gi, tax_id);
142 
143  if (show) {
144  cout << id_str << " ";
145  }
146  cout << gi << " " << tax_id << endl;
147  }
148 
149  return 0;
150 }
151 
152 
153 int main(int argc, const char* argv[])
154 {
155  return CGi2TaxIdApp().AppMain(argc, argv, 0, eDS_Default, 0);
156 }
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
virtual int Run(void)
Run the application.
Definition: gi2taxid.cpp:78
virtual void Init(void)
Initialize the application.
Definition: gi2taxid.cpp:55
bool GetTaxId4GI(TGi gi, TTaxId &tax_id_out)
Definition: taxon1.cpp:1371
bool Init(void)
Definition: taxon1.cpp:101
USING_SCOPE(ncbi)
int main(int argc, const char *argv[])
Definition: gi2taxid.cpp:153
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ZERO_GI
Definition: ncbimisc.hpp:1088
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
virtual NCBI_NS_NCBI::TGi AskGetgi(const CSeq_id &req, TReply *reply=0)
Definition: id1_client_.cpp:98
Magic spell ;-) needed for some weird compilers... very empiric.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Process information in the NCBI Registry, including working with configuration files.
Modified on Mon Dec 11 02:35:19 2023 by modify_doxy.py rev. 669887