NCBI C++ ToolKit
vecscreen_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: vecscreen_app.cpp 98214 2022-10-12 15:36:06Z boukn $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Christiam Camacho
27  *
28  */
29 
30 /** @file vecscreen_app.cpp
31  * VecScreen command line application
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
41 #include "../blast/blast_app_util.hpp"
42 
43 #ifndef SKIP_DOXYGEN_PROCESSING
45 USING_SCOPE(blast);
47 #endif
48 
50 {
51 public:
52  /** @inheritDoc */
55  version->SetVersionInfo(new CVecScreenVersion());
57  }
58 private:
59  /** @inheritDoc */
60  virtual void Init();
61  /** @inheritDoc */
62  virtual int Run();
63 
64  /// This application's command line args
65 };
66 
68 {
70  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
71 
72  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
73  "Vector screening tool, version " +
75  arg_desc->SetCurrentGroup("Input query options");
76  arg_desc->AddDefaultKey(kArgQuery, "input_file",
77  "Input file name",
79 
80  arg_desc->SetCurrentGroup("BLAST database options");
81  arg_desc->AddDefaultKey(kArgDb, "dbname", "BLAST database name",
83 
84  arg_desc->SetCurrentGroup("Post-processing options");
85  arg_desc->AddDefaultKey("term-flex", "int", "terminal flexibility, in bases, default 25",
87 
88  arg_desc->SetCurrentGroup("Output configuration options");
89  arg_desc->AddDefaultKey(kArgOutput, "output_file", "Output file name",
91 
92  //arg_desc->AddDefaultKey("outfmt", "format",
93  arg_desc->AddDefaultKey(kArgOutputFormat, "format",
94  "VecScreen results options:\n"
95  " 0 = Show alignments pairwise,\n"
96  " 1 = Do not show alignments, just contaminated range offsets,\n"
97  " 2 = Contaminated ranges, as a blast-tabular-like report,\n"
98  " 3 = Contaminated ranges, as a series of json objects,\n"
99  " 4 = Seq-align-set ASN Text,\n"
100  " 5 = Seq-align-set ASN Text, no post-processing\n",
103  arg_desc->SetConstraint(kArgOutputFormat,
105  // Produce Text output?
106  arg_desc->AddFlag("text_output", "Produce text output?", true);
107 
108  arg_desc->SetCurrentGroup("");
109  SetupArgDescriptions(arg_desc.release());
110 }
111 
113 {
114  int status = BLAST_EXIT_SUCCESS;
115 
116  try {
117 
118  // Allow the fasta reader to complain on invalid sequence input
120 
121  const bool kIsProtein(false);
122  /*** Process the command line arguments ***/
123  const CArgs& args = GetArgs();
124  const string kDbName(args[kArgDb].AsString());
126 
127  /*** Initialize the scope ***/
130  CBlastInputSourceConfig iconfig(dlconfig);
131  iconfig.SetQueryLocalIdMode();
132  CRef<CScope> scope = CBlastScopeSource(dlconfig).NewScope();
133 
134  /*** Initialize the input stream ***/
135  CBlastFastaInputSource fasta(args[kArgQuery].AsInputFile(), iconfig);
136  CBlastInput input(&fasta, 1);
137 
138  /*** Get the formatting options ***/
140  args[kArgOutputFormat].AsInteger();
141  const bool kHtmlOutput = !args["text_output"].AsBoolean();
142  const TSeqPos kTermFlex = args["term-flex"].AsInteger();
143 
144  /*** Process the input ***/
145  while ( !input.End() ) {
146  CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
147  _ASSERT(query_batch->Size() == 1);
148  CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
149  CVecscreenRun vs(CRef<CSeq_loc>(const_cast<CSeq_loc*>(&*query_batch->GetQuerySeqLoc(0))),
150  query_batch->GetScope(0), kDbName, kTermFlex);
151  CVecscreenRun::CFormatter vs_format(vs, *scope, kFmt, kHtmlOutput);
152  vs_format.FormatResults(args[kArgOutput].AsOutputFile(), opts_hndl);
153  }
154 
155  } CATCH_ALL(status)
156  return status;
157 }
158 
159 #ifndef SKIP_DOXYGEN_PROCESSING
160 int main(int argc, const char* argv[] /*, const char* envp[]*/)
161 {
162  return CVecScreenApp().AppMain(argc, argv, 0, eDS_Default, "");
163 }
164 #endif /* SKIP_DOXYGEN_PROCESSING */
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
#define BLAST_EXIT_SUCCESS
Command line binary exit code: success.
Interface for reading SRA sequences into blast input.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
@ eVecScreen
Vector screening.
Definition: blast_types.hpp:72
Class to constrain the values of an argument to those in between the values specified in the construc...
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Class representing a text file containing sequences in fasta format.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
void SetQueryLocalIdMode()
Append query-specific prefix codes to all generated local ids.
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
CRef< objects::CScope > GetScope(size_type i) const
Get the scope containing a query by index.
Definition: sseqloc.hpp:322
size_type Size() const
Returns the number of queries found in this query vector.
Definition: sseqloc.hpp:305
CConstRef< objects::CSeq_loc > GetQuerySeqLoc(size_type i) const
Get the query Seq-loc for a query by index.
Definition: sseqloc.hpp:313
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
NCBI C++ Object Manager dependant implementation of IQueryFactory.
CRef –.
Definition: ncbiobj.hpp:618
virtual void Init()
@inheritDoc
virtual int Run()
@inheritDoc
CVecScreenApp()
@inheritDoc
The Vecscreen formatter.
void FormatResults(CNcbiOstream &out, CRef< blast::CBlastOptionsHandle > vs_opts)
Format the VecScreen results.
@ eEndValue
Sentinel value, not an actual output format.
This class runs vecscreen.
const string kArgOutput
Output file name.
const string kDfltArgQuery
Default value for query sequence input.
const string kArgQuery
Query sequence(s)
const string kArgDb
BLAST database name.
void Print(const CCompactSAMApplication::AlignInfo &ai)
const int kDfltArgOutputFormat
Default value for formatted output type.
const string kArgOutputFormat
Argument to select formatted output type.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1174
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1312
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
#define CVersion
static int input()
static int version
Definition: mdb_load.c:29
const string kIsProtein
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
Configuration structure for the CBlastScopeSource.
void OptimizeForWholeLargeSequenceRetrieval(bool value=true)
Configures the BLAST database data loader to optimize the retrieval of *entire* large sequences.
@ eUseBlastDbDataLoader
Use the local BLAST database loader first, if this fails, use the remote BLAST database data loader.
#define _ASSERT
USING_SCOPE(blast)
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
const string kDefaultVectorDb
const TSeqPos kDefaultTerminalFlexibility
Modified on Sat Jun 22 10:44:41 2024 by modify_doxy.py rev. 669887