NCBI C++ ToolKit
demo_align_cleanup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: demo_align_cleanup.cpp 92516 2021-01-26 20:05:01Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  *
30  * Assumptions:
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbienv.hpp>
37 #include <corelib/ncbiargs.hpp>
38 #include <corelib/ncbifile.hpp>
39 
41 #include <objmgr/scope.hpp>
42 #include <objmgr/util/sequence.hpp>
43 
45 
46 #include <serial/serial.hpp>
47 #include <serial/objistr.hpp>
48 #include <serial/objostr.hpp>
49 
50 #include <util/stream_source.hpp>
51 
53 
56 
57 
59 {
60 public:
61  virtual void Init(void);
62  virtual int Run(void);
63  virtual void Exit(void);
64 };
65 
66 
68 {
69  // Create command-line argument descriptions class
70  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
71 
72  // Specify USAGE context
73  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
74  "Assembly alignment cleanup program");
75 
77 
78  arg_desc->AddDefaultKey("ifmt", "InputFormat",
79  "Format for input file",
81  "seq-align");
82  arg_desc->SetConstraint("ifmt",
83  &(*new CArgAllow_Strings,
84  "seq-align", "seq-align-set", "seq-annot"));
85 
86  arg_desc->AddDefaultKey("query-type", "QueryType",
87  "Type of query sequences",
89  "auto");
90  arg_desc->SetConstraint("query-type",
91  &(*new CArgAllow_Strings,
92  "auto", "genomic", "rna", "protein"));
93 
95 
96  arg_desc->AddDefaultKey
97  ("splign-direction", "SplignDirection", "Query direction for splign",
98  CArgDescriptions::eString, "sense");
99  arg_desc->SetConstraint("splign-direction",
100  &(*new CArgAllow_Strings,
101  "sense", "antisense", "both"));
102 
103  arg_desc->AddFlag("with-best-placement",
104  "Invoke best placement algorithm on alignments.");
105 
106 
107  arg_desc->AddDefaultKey("o", "Output",
108  "Output cleaned alignments",
110  arg_desc->AddAlias("output", "o");
111 
112  arg_desc->AddFlag("t",
113  "Both input and output data streams are "
114  "ASN.1 text, not binary.");
115  arg_desc->AddFlag("it", "Input data streams are ASN.1 text, not binary.");
116  arg_desc->AddFlag("ot", "Output data streams are ASN.1 text, not binary.");
117 
118  // standard gpipe app arguments
120 
121  // Setup arg.descriptions for this application
122  SetupArgDescriptions(arg_desc.release());
123 }
124 
125 
127 {
128  const CArgs& args = GetArgs();
129 
130  // setup object manager
133 
134  // setup scope
135  CRef<CScope> scope(new CScope(*om));
136  scope->AddDefaults();
137 
138  // text or binary
139  ESerialDataFormat input_fmt, output_fmt;
140  if(args["t"]) {
141  input_fmt = output_fmt = eSerial_AsnText;
142  } else {
143  input_fmt = args["it"] ? eSerial_AsnText : eSerial_AsnBinary;
144  output_fmt = args["ot"] ? eSerial_AsnText : eSerial_AsnBinary;
145  }
146 
148  if (args["query-type"].AsString() == "genomic") {
149  query_type = CAdvancedAlignCleanup::eGenomic;
150  } else if (args["query-type"].AsString() == "rna") {
151  query_type = CAdvancedAlignCleanup::eRna;
152  } else if (args["query-type"].AsString() == "protein") {
153  query_type = CAdvancedAlignCleanup::eProtein;
154  }
155 
158  if (args["splign-direction"].AsString() == "sense") {
160  } else if (args["splign-direction"].AsString() == "antisense") {
162  }
163 
164  //
165  // read alignments in, and collate by sequence pair
166  //
167  CSeq_align_set::Tdata input_aligns;
168  CSeq_align_set::Tdata cleaned_aligns;
169  for (CInputStreamSource stream_source(args); stream_source; ++stream_source)
170  {
171  unique_ptr<CObjectIStream> is(
172  CObjectIStream::Open(input_fmt, *stream_source));
173  while (!is->EndOfData()) {
174  if (args["ifmt"].AsString() == "seq-align") {
175  CRef<CSeq_align> align(new CSeq_align);
176  *is >> *align;
177  input_aligns.push_back(align);
178  } else if (args["ifmt"].AsString() == "seq-align-set") {
180  *is >> *align;
181  input_aligns.insert(input_aligns.end(),
182  align->Get().begin(), align->Get().end());
183  } else if (args["ifmt"].AsString() == "seq-annot") {
184  CRef<CSeq_annot> input_annot(new CSeq_annot);
185  *is >> *input_annot;
186  input_aligns.insert(input_aligns.end(),
187  input_annot->GetData().GetAlign().begin(),
188  input_annot->GetData().GetAlign().end());
189  }
190  }
191  }
192 
194  cleanup.SetParams(args);
195  cleanup.SetScope(scope);
196  cleanup.Cleanup(input_aligns, cleaned_aligns, query_type,
197  args["with-best-placement"], false, splign_dir);
198 
199  unique_ptr<CObjectOStream> os(
200  CObjectOStream::Open(output_fmt, args["o"].AsOutputFile()));
201  ITERATE (CSeq_align_set::Tdata, it, cleaned_aligns) {
202  *os << **it;
203  }
204 
205  return 0;
206 }
207 
208 
209 void
211 {
212 }
213 
214 
215 int
216 main(int argc, const char* argv[])
217 {
218  // Execute main application function
219  return CAlignCleanupApplication().AppMain(argc, argv);
220 }
static void SetupArgDescriptions(CArgDescriptions &arg_desc)
virtual void Init(void)
Initialize the application.
virtual void Exit(void)
Cleanup on application exit.
virtual int Run(void)
Run the application.
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
static void SetupObjectManager(const CArgs &args, objects::CObjectManager &obj_mgr, TLoaders loaders=fDefault)
Set up the standard object manager data loaders according to the arguments provided above.
static void AddArgumentDescriptions(CArgDescriptions &arg_desc, TLoaders loaders=fDefault)
Add a standard set of arguments used to configure the object manager.
class CInputStreamSource encapsulates details of how we supply applications with input data through s...
static void SetStandardInputArgs(CArgDescriptions &arg_desc, const string &prefix="input", const string &description="data to process", bool is_mandatory=false)
Supply a standard set of arguments via argument descriptions to an application.
CScope –.
Definition: scope.hpp:92
USING_SCOPE(ncbi)
int main(int argc, const char *argv[])
static void cleanup(void)
Definition: ct_dynamic.c:30
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
const TAlign & GetAlign(void) const
Get the variant data.
Definition: Seq_annot_.hpp:641
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
Magic spell ;-) needed for some weird compilers... very empiric.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The Object manager core.
CRef< objects::CObjectManager > om
Modified on Tue Apr 30 06:43:08 2024 by modify_doxy.py rev. 669887