NCBI C++ ToolKit
hooks_highest_se_objs.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: hooks_highest_se_objs.cpp 90014 2020-05-04 17:30:22Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology gfmtion
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: David McElhany
27 *
28 * File Description:
29 * Show how to process the highest level Seq-entry objects in a data stream,
30 * using serial hooks when necessary.
31 *
32 * This sample program simply outputs the label for each processed Seq-entry,
33 * but it is done in a way that could be easily extended to more sophisticated
34 * processing of the entire Seq-entry.
35 *
36 * Constraints for the sample program:
37 * - The input will be streamed from STDIN, as ASN.1 text.
38 * - The input data type must be specified in the command line. The
39 * supported input data types are Bioseq-set, Seq-entry, and Seq-submit.
40 * Note that hooks are not needed if the input type is Seq-entry.
41 * - The input data could be a single object, or multiple concatenated
42 * objects of a single type.
43 * - The output will be streamed to STDOUT.
44 *
45 * ===========================================================================
46 */
47 
48 #include <ncbi_pch.hpp>
49 
50 #include <corelib/ncbiapp.hpp>
51 #include <corelib/ncbiargs.hpp>
52 
56 
57 #include <serial/objectio.hpp>
58 #include <serial/objhook.hpp>
59 #include <serial/objistr.hpp>
60 #include <serial/serial.hpp>
61 
64 
65 
66 ///////////////////////////////////////////////////////////////////////////
67 // Static Functions
68 
69 static void s_Process(const CSeq_entry& entry)
70 {
71  // This sample program simply outputs the Seq-entry's label, but this is
72  // where you would put your custom processing code.
73  string label;
75  cout << label << endl;
76 }
77 
78 
79 ///////////////////////////////////////////////////////////////////////////
80 // Hook Classes
81 
82 // This class processes top-level Seq-entry's when skipping through a Bioseq-set.
83 
85 {
86 public:
87  virtual void SkipClassMember(CObjectIStream& stream,
88  const CObjectTypeInfoMI& passed_info)
89  {
90  // The relevant ASN.1 is:
91  // Bioseq-set ::= SEQUENCE {
92  // seq-set SEQUENCE OF Seq-entry
93  //
94  // This hook is on the 'seq-set' class member of Bioseq-set, which
95  // means it's the whole 'SEQUENCE OF Seq-entry' that's hooked, not
96  // individual Seq-entry's.
97  //
98  // Therefore, we will iterate through the sequence and: (1) read
99  // into a local Seq-entry object, and (2) process that Seq-entry.
100  CIStreamContainerIterator isc(stream, passed_info.GetMemberType());
101  for ( ; isc; ++isc ) {
102  CSeq_entry entry;
103  isc >> entry;
104  s_Process(entry);
105  }
106  }
107 };
108 
109 
110 // This class processes top-level Seq-entry's when skipping through a Seq-submit.
111 
113 {
114 public:
115  virtual void SkipChoiceVariant(CObjectIStream& stream,
116  const CObjectTypeInfoCV& passed_info)
117  {
118  // The relevant ASN.1 is:
119  // Seq-submit ::= SEQUENCE {
120  // data CHOICE {
121  // entrys SET OF Seq-entry
122  //
123  // This hook is on the 'entrys' choice variant of Seq-submit.data,
124  // which means it's the whole 'SET OF Seq-entry' that's hooked, not
125  // individual Seq-entry's.
126  //
127  // Therefore, we will iterate through the set and: (1) read into a
128  // local Seq-entry object, and (2) process that Seq-entry.
129  CIStreamContainerIterator isc(stream, passed_info.GetVariantType());
130  for ( ; isc; ++isc ) {
131  CSeq_entry entry;
132  isc >> entry;
133  s_Process(entry);
134  }
135  }
136 };
137 
138 
139 ///////////////////////////////////////////////////////////////////////////
140 // Main Application Functionality
141 
143 {
144  virtual void Init(void);
145  virtual int Run(void);
146 };
147 
148 
150 {
151  // Create command-line argument descriptions class
152  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
153 
154  // Specify USAGE context
155  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
156  "Bioseq-set info extractor");
157 
158  // Describe the expected command-line arguments
159  arg_desc->AddKey("type", "InputType", "type of input data object",
161  arg_desc->SetConstraint("type",
162  &(*new CArgAllow_Strings, "Bioseq-set", "Seq-entry", "Seq-submit"));
163 
164  // Setup arg.descriptions for this application
165  SetupArgDescriptions(arg_desc.release());
166 }
167 
168 
170 {
171  // Get object stream.
172  unique_ptr<CObjectIStream> in(CObjectIStream::Open(eSerial_AsnText, cin));
173 
174  // Set hooks. No hooks are necessary for Seq-entry because in that case
175  // all highest-level (Seq-entry) objects are read.
176  string type_str = GetArgs()["type"].AsString();
177  if (type_str == "Bioseq-set") {
178  in->SetPathSkipMemberHook("Bioseq-set.seq-set",
180  } else if (type_str == "Seq-submit") {
181  in->SetPathSkipVariantHook("Seq-submit.data.entrys",
183  }
184 
185  // Repeat processing for each concatenated object in input stream.
186  while ( ! in->EndOfData()) {
187  if (type_str == "Bioseq-set") {
188  in->Skip(CType<CBioseq_set>());
189  } else if (type_str == "Seq-submit") {
190  in->Skip(CType<CSeq_submit>());
191  } else if (type_str == "Seq-entry") {
192  // Just read and process each object - no skipping is needed.
193  CSeq_entry entry;
194  *in >> entry;
195  s_Process(entry);
196  }
197  }
198 
199  return 0;
200 }
201 
202 
203 int main(int argc, const char* argv[])
204 {
205  // Execute main application function
206  return CProcessHighestSeObjs().AppMain(argc, argv);
207 }
User-defined methods of the data storage class.
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
Reading (iterating through) elements of containers (SET OF, SEQUENCE OF).
Definition: objectio.hpp:164
CObjectIStream –.
Definition: objistr.hpp:93
CObjectTypeInfoCV –.
Definition: objectiter.hpp:477
CObjectTypeInfoMI –.
Definition: objectiter.hpp:246
virtual void Init(void)
Initialize the application.
virtual int Run(void)
Run the application.
Definition: Seq_entry.hpp:56
@ eBoth
Definition: Seq_entry.hpp:94
void GetLabel(string *label, ELabelType type) const
Definition: Seq_entry.cpp:274
Skip hook for a choice variant (CHOICE)
Definition: objhook.hpp:239
Skip hook for data member of a containing object (eg, SEQUENCE)
Definition: objhook.hpp:223
virtual void SkipClassMember(CObjectIStream &stream, const CObjectTypeInfoMI &passed_info)
virtual void SkipChoiceVariant(CObjectIStream &stream, const CObjectTypeInfoCV &passed_info)
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
CObjectTypeInfo GetVariantType(void) const
Get variant data type.
CObjectTypeInfo GetMemberType(void) const
Get data type information.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
static const char label[]
USING_SCOPE(objects)
static void s_Process(const CSeq_entry &entry)
int main(int argc, const char *argv[])
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
std::istream & in(std::istream &in_, double &x_)
Modified on Wed Apr 17 13:10:08 2024 by modify_doxy.py rev. 669887