49 #ifndef SKIP_DOXYGEN_PROCESSING
118 arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
119 "Stand-alone BLAST formatter client, version "
122 arg_desc->SetCurrentGroup(
"Input options");
123 arg_desc->AddOptionalKey(
kArgRid,
"BLAST_RID",
"BLAST Request ID (RID)",
127 arg_desc->AddOptionalKey(
kArgArchive,
"ArchiveFile",
"File containing BLAST Archive format in ASN.1 (i.e.: output format 11)",
134 arg_desc->SetCurrentGroup(
"Output configuration options");
135 arg_desc->AddDefaultKey(
kArgOutput,
"output_file",
"Output file name",
138 arg_desc->SetCurrentGroup(
"Miscellaneous options");
140 "Should the query and subject defline(s) be parsed?",
true);
141 arg_desc->SetCurrentGroup(
"");
153 static bool first_time =
true;
173 throw runtime_error(
"PSSM has no query");
177 if ( !seq_entry.
IsSeq() ) {
178 throw runtime_error(
"Cannot have multiple queries in a PSSM");
200 if (b4_queries->
IsPssm()) {
206 _ASSERT( !(*seqloc)->GetId()->IsLocal() );
237 const list<CRef<CBioseq> > subjects =
243 const TSeqPos length = (*bioseq)->GetInst().GetLength();
245 retval.push_back(
SSeqLoc(sl, subj_scope));
251 retval.push_back(
SSeqLoc(*sl, subj_scope));
263 bool isRemote =
false;
274 if ( (subjects = db_args->GetSubjects(scope)) ) {
276 char* bl2seq_legacy = getenv(
"BL2SEQ_LEGACY");
316 const int kGenbankLoaderPriority = 99;
321 scope->AddDataLoader(genbank_loader, kGenbankLoaderPriority);
333 const string& kRid = args[
kArgRid].HasValue()
354 "SAM format is only applicable to blastn results" );
363 int filtering_algorithm = -1;
415 (!kRid.empty() || isRemoteLoader),
418 kTask ==
"megablast",
431 bool isPsiBlast = (
"psiblast" ==
kTask);
450 formatter.
WriteArchive(*query_factory, *opts_handle, *results);
495 #define EXIT_CODE__UNKNOWN_RID 1
496 #define EXIT_CODE__SEARCH_PENDING 2
497 #define EXIT_CODE__SEARCH_FAILED 3
512 "Invalid input format for BLAST Archive.");
525 "Invalid input format for BLAST Archive.");
532 const string kRid = args[
kArgRid].AsString();
545 cerr <<
"Unknown/invalid RID '" << kRid <<
"'." << endl;
554 cerr <<
"RID '" << kRid <<
"' is still pending." << endl;
559 cerr <<
"RID '" << kRid <<
"' has failed" << endl;
590 #ifndef SKIP_DOXYGEN_PROCESSING
591 int main(
int argc,
const char* argv[] )
Declares singleton objects to store the version and reference for the BLAST engine.
Data loader implementation that uses the blast databases remotely.
string RegisterOMDataLoader(CRef< CSeqDB > db_handle)
Register the BLAST database data loader using the already initialized CSeqDB object.
string GetCmdlineArgs(const CNcbiArguments &a)
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
bool UseXInclude(const CFormattingArgs &f, const string &s)
Utility functions for BLAST command line applications.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
const CSeq_id * GetFirstId() const
bool IsProtein() const
Returns true if the database is protein.
size_t GetNumQueries() const
Retrieve the number of queries in this object.
Argument class to collect database/subject arguments.
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
void SetSubjects(CRef< IQueryFactory > subjects, CRef< CScope > scope, bool is_protein)
Sets the subject sequences.
void SetSearchDatabase(CRef< CSearchDatabase > search_db)
Set the search database information.
@ eNucleotide
nucleotide database
@ eProtein
protein database
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
CRef< objects::CScope > GetScope(size_type i) const
Get the scope containing a query by index.
size_type size() const
Identical to Size, provided to facilitate STL-style iteration.
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
void AddDataLoaders(CRef< objects::CScope > scope)
Add the data loader configured in the object to the provided scope.
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Argument class to collect debugging options.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced (only available when compiled with _DEBUG)
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
const CSeq_entry & GetQuery() const
Retrieve the query sequence.
bool HasQuery() const
Has this PSSM a query in it?
void SetPreopenConnection(bool preopen=true)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
API for Remote Blast Requests.
Search Results for All Queries.
void AddTaxIds(const set< TTaxId > &tax_ids)
Root class for all serialization exceptions.
Template class for iteration on objects of class C (non-medifiable version)
const string kArgOutput
Output file name.
const string kArgArchive
Argument to blast_formatter to request BLAST archive file name.
const string kTask
Task to perform.
const string kArgParseDeflines
Argument to specify if the query and subject sequences defline should be parsed.
const string kArgRid
Argument to blast_formatter to request RID.
void Print(const CCompactSAMApplication::AlignInfo &ai)
std::ofstream out("events_result.xml")
main entry point for tests
bool LoadFromArchive()
Loads next chunk of archive from file.
CRef< CBlastOptionsHandle > GetSearchOptions()
Get the search options used for this search.
string GetDatabaseName() const
Accessor for the database name.
CRef< objects::CPssmWithParameters > GetPSSM(void)
Get the PSSM produced by the search.
int GetDbGeneticCode() const
void SetNegativeGiList(CSeqDBGiList *gilist)
Mutator for the negative gi list.
string GetErrors(void)
This returns a string containing any errors that were produced by the search.
set< TTaxId > & GetNegativeTaxidList()
CRef< CSeqDB > GetSeqDb() const
Obtain a reference to the database.
ESearchStatus CheckStatus()
Returns the status of a previously submitted search/RID.
void SetVerbose(EDebugMode verb=eDebug)
Adjust the debugging level.
string GetTask() const
Returns the task used to create the remote search (if any)
bool IsProtein() const
Determine whether this database contains protein sequences or not.
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
CRef< objects::CBlast4_queries > GetQueries()
Get the queries used for this search.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
int GetQueryGeneticCode() const
CRef< objects::CBlast4_database > GetDatabases()
Get the database used by the search.
bool GetSumStatisticsMode() const
Sum statistics options.
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
bool GetMBIndexLoaded() const
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
CBlast4_subject::TSeq_loc_list GetSubjectSeqLocs()
set< TTaxId > & GetTaxidList()
bool IsErrMsgArchive(void)
Int4 GetDbFilteringAlgorithmId() const
Returns the filtering algorithm ID used in the database.
list< CRef< objects::CBioseq > > GetSubjectSequences()
Returns subject sequences if "bl2seq" mode used.
const char * GetMatrixName() const
bool IsDbSearch()
Return values states whether GetDatabases or GetSubjectSequences call should be used.
unsigned int GetPsiNumberOfIterations(void)
void SetGiList(CSeqDBGiList *gilist)
Mutator for the gi list.
@ eInvalidArgument
Invalid argument to some function/method (could be programmer error - prefer assertions in those case...
@ eStatus_Failed
Completed but failed, call GetErrors/GetErrorVector()
@ eStatus_Pending
Not completed yet.
@ eStatus_Unknown
Never submitted or purged from the system.
@ eStatus_Done
Completed successfully.
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
CVersionInfo GetVersion(void) const
Get the program version information.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
@ eDiag_Warning
Warning message.
@ eDiag_Critical
Critical error message.
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define NCBI_RETHROW(prev_exception, exception_class, err_code, message)
Generic macro to re-throw an exception.
void Info(CExceptionArgs_Base &args)
static int BestRank(const CRef< CSeq_id > &id)
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
TLoader * GetLoader(void) const
Get pointer to the loader.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddScope(CScope &scope, TPriority pri=kPriority_Default)
Add the scope's datasources as a single group with the given priority All data sources (data loaders ...
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Start(void)
Start the timer.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
bool IsSeq_loc_list(void) const
Check if variant Seq_loc_list is selected.
const TName & GetName(void) const
Get the Name member data.
bool IsPssm(void) const
Check if variant Pssm is selected.
list< CRef< CSeq_loc > > TSeq_loc_list
const TBioseq_set & GetBioseq_set(void) const
Get the variant data.
bool IsBioseq_set(void) const
Check if variant Bioseq_set is selected.
const TSeq_loc_list & GetSeq_loc_list(void) const
Get the variant data.
const TPssm & GetPssm(void) const
Get the variant data.
list< CRef< CSeq_loc > > TSeq_loc_list
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Configuration structure for the CBlastScopeSource.
void OptimizeForWholeLargeSequenceRetrieval(bool value=true)
Configures the BLAST database data loader to optimize the retrieval of *entire* large sequences.
@ eUseNoDataLoaders
Do not add any data loaders.
Structure to represent a single sequence to be fed to BLAST.
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager.