NCBI C++ ToolKit
objextract.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: objextract.cpp 92152 2020-12-22 17:11:12Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  * Simple command-line app to split an object given a set of read hooks
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbiargs.hpp>
37 
38 #include <serial/iterator.hpp>
39 #include <serial/objectiter.hpp>
40 #include <serial/objhook.hpp>
41 #include <serial/objistr.hpp>
42 #include <serial/objostr.hpp>
43 #include <serial/serial.hpp>
44 
48 #include <objects/seq/Bioseq.hpp>
51 
54 
55 
56 /////////////////////////////////////////////////////////////////////////////
57 // CObjExtractApp::
58 
59 
61 {
62 private:
63  virtual void Init(void);
64  virtual int Run(void);
65  virtual void Exit(void);
66 };
67 
68 
69 /////////////////////////////////////////////////////////////////////////////
70 // Init test for all different types of arguments
71 
72 
74 {
75  // Create command-line argument descriptions class
76  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
77 
78  // Specify USAGE context
79  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
80  "Object splitter/decontainerizer");
81 
82  arg_desc->AddDefaultKey("i", "InputFile",
83  "File to split",
85  "-");
86 
87  arg_desc->AddDefaultKey("ifmt", "InputFormat",
88  "Format of input file",
90  "asn");
91  arg_desc->SetConstraint("ifmt",
92  &(*new CArgAllow_Strings, "asn", "asnb", "xml"));
93 
94  arg_desc->AddDefaultKey("itype", "InputType",
95  "Type for input file",
97  "Seq-entry");
98  arg_desc->SetConstraint("itype",
99  &(*new CArgAllow_Strings,
100  "Seq-entry", "Bioseq", "Bioseq-set",
101  "Seq-annot", "Seq-align-set"));
102 
103  arg_desc->AddDefaultKey("o", "OutputFile",
104  "Output for decontainered/found objects",
106  "-");
107 
108  arg_desc->AddDefaultKey("ofmt", "OutputFormat",
109  "Format of onput file",
111  "asn");
112  arg_desc->SetConstraint("ofmt",
113  &(*new CArgAllow_Strings, "asn", "asnb", "xml"));
114 
115  arg_desc->AddDefaultKey("otype", "OutputType",
116  "Type for onput file",
118  "Seq-entry");
119  arg_desc->SetConstraint("otype",
120  &(*new CArgAllow_Strings,
121  "Seq-entry", "Bioseq", "Bioseq-set",
122  "Seq-annot", "Seq-align-set"));
123 
124  // Setup arg.descriptions for this application
125  SetupArgDescriptions(arg_desc.release());
126 }
127 
128 
129 ///
130 /// a simple read hook - when it finds its object, it reads it in and dumps it
131 /// to the stream
132 ///
134 {
135 public:
137  CObjectOStream& ostr)
138  : m_TypeInfo(info),
139  m_Ostr(ostr)
140  {
141  }
142 
144  const CObjectInfo& object)
145  {
146  DefaultRead(istr, object);
148  (static_cast<CSerialObject*>(object.GetObjectPtr()));
149  m_Ostr.Write(obj, obj->GetThisTypeInfo());
150  istr.SetDiscardCurrObject();
151  }
152 
153 private:
156 };
157 
158 
159 ///
160 /// handler for GenBank release files
161 ///
163 {
164 public:
166  : m_Ostr(ostr)
167  {
168  }
169 
171  {
172  m_Ostr.Write(entry, entry->GetThisTypeInfo());
173  return true;
174  }
175 
176 private:
178 };
179 
180 
181 
183 {
184  // Get arguments
185  const CArgs& args = GetArgs();
186 
187  ///
188  /// establish our types
189  ///
190  CSeq_entry::GetTypeInfo();
191  CBioseq::GetTypeInfo();
192  CBioseq_set::GetTypeInfo();
193  CSeq_annot::GetTypeInfo();
194  CSeq_align_set::GetTypeInfo();
195 
196  ///
197  /// get our args
198  ///
199  CNcbiIstream& istr = args["i"].AsInputFile();
200  CNcbiOstream& ostr = args["o"].AsOutputFile();
202  {{
203  string str = args["ifmt"].AsString();
204  if (str == "asn") {
205  ifmt = eSerial_AsnText;
206  } else if (str == "asnb") {
207  ifmt = eSerial_AsnBinary;
208  } else if (str == "xml") {
209  ifmt = eSerial_Xml;
210  }
211  }}
212  const CTypeInfo* itype = CSeq_entry::GetTypeInfo();
213  {{
214  string str = args["itype"].AsString();
216  if (info) {
217  itype = info;
218  }
219  }}
220 
222  {{
223  string str = args["ofmt"].AsString();
224  if (str == "asn") {
225  ofmt = eSerial_AsnText;
226  } else if (str == "asnb") {
227  ofmt = eSerial_AsnBinary;
228  } else if (str == "xml") {
229  ofmt = eSerial_Xml;
230  }
231  }}
232  const CTypeInfo* otype = CSeq_entry::GetTypeInfo();
233  {{
234  string str = args["otype"].AsString();
236  if (info) {
237  otype = info;
238  }
239  }}
240 
241  ///
242  /// now, process!
243  ///
244  unique_ptr<CObjectIStream> obj_istr(CObjectIStream::Open(ifmt, istr));
245  unique_ptr<CObjectOStream> obj_ostr(CObjectOStream::Open(ofmt, ostr));
246 
247 
249  << "extracting " << otype->GetName()
250  << " from " << itype->GetName());
251  if (itype == CBioseq_set::GetTypeInfo() &&
252  otype == CSeq_entry::GetTypeInfo()) {
253  /// process as a GenBank Release File
254  CGBReleaseFile rf(*obj_istr.release());
255  rf.RegisterHandler(new CGbEntryHandler(*obj_ostr));
256  rf.Read();
257  } else {
258  const_cast<CTypeInfo*>(otype)
259  ->SetGlobalReadHook(new CReadHookWriter(otype, *obj_ostr));
260 
261  CRef<CSerialObject> obj(static_cast<CSerialObject*>(itype->Create()));
262  obj_istr->Read(obj, obj->GetThisTypeInfo());
263  }
264 
265  return 0;
266 }
267 
268 
269 /////////////////////////////////////////////////////////////////////////////
270 // Cleanup
271 
272 
274 {
275  SetDiagStream(0);
276 }
277 
278 
279 /////////////////////////////////////////////////////////////////////////////
280 // MAIN
281 
282 
283 int main(int argc, const char* argv[])
284 {
285  // Execute main application function
286  return CObjExtractApp().AppMain(argc, argv, 0, eDS_Default, 0);
287 }
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Interface for handling Seq-entry objects.
CGBReleaseFile is a utility class to ease the processing of Genbank release files one Seq-entry at a ...
void RegisterHandler(ISeqEntryHandler *handler)
Register handler.
void Read(void)
Read the release file.
handler for GenBank release files
Definition: objextract.cpp:163
CGbEntryHandler(CObjectOStream &ostr)
Definition: objextract.cpp:165
bool HandleSeqEntry(CRef< CSeq_entry > &entry)
user code for handling a Seq-entry goes here.
Definition: objextract.cpp:170
CObjectOStream & m_Ostr
Definition: objextract.cpp:177
virtual void Exit(void)
Cleanup on application exit.
Definition: objextract.cpp:273
virtual void Init(void)
Initialize the application.
Definition: objextract.cpp:73
virtual int Run(void)
Run the application.
Definition: objextract.cpp:182
CObjectIStream –.
Definition: objistr.hpp:93
CObjectInfo –.
Definition: objectinfo.hpp:597
CObjectOStream –.
Definition: objostr.hpp:83
a simple read hook - when it finds its object, it reads it in and dumps it to the stream
Definition: objextract.cpp:134
void ReadObject(CObjectIStream &istr, const CObjectInfo &object)
This method will be called at approriate time when the object of requested type is to be read.
Definition: objextract.cpp:143
CReadHookWriter(const CTypeInfo *info, CObjectOStream &ostr)
Definition: objextract.cpp:136
CObjectOStream & m_Ostr
Definition: objextract.cpp:155
const CTypeInfo * m_TypeInfo
Definition: objextract.cpp:154
Read hook for a standalone object.
Definition: objhook.hpp:59
Base class for all serializable objects.
Definition: serialbase.hpp:150
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Definition: typeinfo.hpp:76
static const char * str(char *buf, int n)
Definition: stats.c:84
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:832
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1208
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
Definition: ncbidiag.cpp:8086
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
void DefaultRead(CObjectIStream &in, const CObjectInfo &object)
Default read.
Definition: objhook.cpp:171
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
void Write(const CConstObjectInfo &object)
Definition: objostr.cpp:593
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
void SetDiscardCurrObject(bool discard=true)
Discard the object, which has been just read.
Definition: objistr.hpp:637
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
const string & GetName(void) const
Get name of this type.
Definition: typeinfo.cpp:249
static TTypeInfo GetClassInfoByName(const string &name)
Definition: classinfob.cpp:244
TObjectPtr Create(CObjectMemoryPool *memoryPool=0) const
Create object of this type on heap (can be deleted by operator delete)
static MDB_envinfo info
Definition: mdb_load.c:37
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
USING_SCOPE(objects)
int main(int argc, const char *argv[])
Definition: objextract.cpp:283
USING_NCBI_SCOPE
Definition: objextract.cpp:52
Modified on Fri Sep 20 14:57:31 2024 by modify_doxy.py rev. 669887