NCBI C++ ToolKit
Functions
blast_app_util.cpp File Reference

Utility functions for BLAST command line applications. More...

#include <ncbi_pch.hpp>
#include "blast_app_util.hpp"
#include <serial/serial.hpp>
#include <serial/objostr.hpp>
#include <objtools/data_loaders/blastdb/bdbloader.hpp>
#include <algo/blast/api/remote_blast.hpp>
#include <algo/blast/api/objmgr_query_data.hpp>
#include <algo/blast/api/blast_options_builder.hpp>
#include <algo/blast/api/search_strategy.hpp>
#include <algo/blast/blastinput/blast_input.hpp>
#include <algo/blast/blastinput/psiblast_args.hpp>
#include <algo/blast/blastinput/tblastn_args.hpp>
#include <algo/blast/blastinput/blast_scope_src.hpp>
#include <objmgr/util/sequence.hpp>
#include <objects/scoremat/Pssm.hpp>
#include <serial/typeinfo.hpp>
#include <objtools/data_loaders/blastdb/bdbloader_rmt.hpp>
#include <algo/blast/format/blast_format.hpp>
#include <objtools/align_format/format_flags.hpp>
+ Include dependency graph for blast_app_util.cpp:

Go to the source code of this file.

Go to the SVN repository for this file.

Functions

 USING_SCOPE (objects)
 
 USING_SCOPE (blast)
 
CRef< blast::CRemoteBlast > InitializeRemoteBlast (CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool verbose_output, const string &client_id, CRef< objects::CPssmWithParameters > pssm)
 Initializes a CRemoteBlast instance for usage by command line BLAST binaries. More...
 
blast::SDataLoaderConfig InitializeQueryDataLoaderConfiguration (bool query_is_protein, CRef< blast::CLocalDbAdapter > db_adapter)
 Initialize the data loader configuration for the query. More...
 
void InitializeSubject (CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool is_remote_search, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
 Initializes the subject/database as well as its scope. More...
 
string RegisterOMDataLoader (CRef< CSeqDB > db_handle)
 Register the BLAST database data loader using the already initialized CSeqDB object. More...
 
static CRef< blast::CExportStrategy > s_InitializeExportStrategy (CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, const string &client_id, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
 
static void s_ExportSearchStrategy (CNcbiOstream *out, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > options_handle, CRef< blast::CBlastDatabaseArgs > db_args, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
 Real implementation of search strategy extraction. More...
 
static TSeqLocVector s_ConvertBioseqs2TSeqLocVector (const CBlast4_subject::TSequences &subjects)
 Converts a list of Bioseqs into a TSeqLocVector. More...
 
static void s_ImportPssm (const CBlast4_queries &queries, CRef< blast::CBlastOptionsHandle > opts_hndl, blast::CBlastAppArgs *cmdline_args)
 Import PSSM into the command line arguments object. More...
 
static void s_ImportQueries (const CBlast4_queries &queries, CRef< blast::CBlastOptionsHandle > opts_hndl, blast::CBlastAppArgs *cmdline_args)
 Import queries into command line arguments object. More...
 
static CRef< blast::CBlastDatabaseArgs > s_ImportDatabase (const CBlast4_subject &subj, CBlastOptionsBuilder &opts_builder, bool subject_is_protein, bool is_remote_search)
 Import the database and return it in a CBlastDatabaseArgs object. More...
 
static CRef< blast::CBlastDatabaseArgs > s_ImportSubjects (const CBlast4_subject &subj, bool subject_is_protein)
 Import the subject sequences into a CBlastDatabaseArgs object. More...
 
static void s_ImportSearchStrategy (CNcbiIstream *in, blast::CBlastAppArgs *cmdline_args, bool is_remote_search, bool override_query, bool override_subject)
 Imports search strategy, using CImportStrategy. More...
 
bool RecoverSearchStrategy (const CArgs &args, blast::CBlastAppArgs *cmdline_args)
 Recover search strategy from input file. More...
 
void SaveSearchStrategy (const CArgs &args, blast::CBlastAppArgs *cmdline_args, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > opts_hndl, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
 Save the search strategy corresponding to the current command line search. More...
 
static void s_ExtractSeqidsAndRanges (const blast::CSearchResultSet &results, CScope::TIds &ids, vector< TSeqRange > &ranges)
 Extracts the subject sequence IDs and ranges from the BLAST results. More...
 
static bool s_IsUsingRemoteBlastDbDataLoader ()
 Returns true if the remote BLAST DB data loader is being used. More...
 
static bool s_IsPrefetchFormat (blast::CFormattingArgs::EOutputFormat format_type)
 
static bool s_PreFetchSeqs (const blast::CSearchResultSet &results, blast::CFormattingArgs::EOutputFormat format_type)
 
void BlastFormatter_PreFetchSequenceData (const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
 This method optimize the retrieval of sequence data to scope. More...
 
CRef< CBlastAncillaryDataExtractPssmAncillaryData (const CPssmWithParameters &pssm)
 Auxiliary function to extract the ancillary data from the PSSM. More...
 
void CheckForFreqRatioFile (const string &rps_dbname, CRef< CBlastOptionsHandle > &opt_handle, bool isRpsblast)
 
bool IsIStreamEmpty (CNcbiIstream &in)
 
string GetCmdlineArgs (const CNcbiArguments &a)
 
bool UseXInclude (const CFormattingArgs &f, const string &s)
 
string GetSubjectFile (const CArgs &args)
 Get name of subject file @parameter args arguments class [in]. More...
 
void PrintErrorArchive (const CArgs &a, const list< CRef< CBlast4_error > > &msg)
 Function to print blast archive with only error messages (search failed) to output stream. More...
 
void QueryBatchCleanup ()
 Clean up formatter scope and release. More...
 
void LogQueryInfo (CBlastUsageReport &report, const CBlastInput &q_info)
 
void LogBlastOptions (blast::CBlastUsageReport &report, const CBlastOptions &opt)
 
void LogCmdOptions (blast::CBlastUsageReport &report, const CBlastAppArgs &args)
 
int GetMTByQueriesBatchSize (EProgram p, int num_threads, const string &task)
 
void MTByQueries_DBSize_Warning (const Int8 length_limit, bool is_db_protein)
 
void CheckMTByQueries_QuerySize (EProgram prog, int batch_size)
 

Detailed Description

Utility functions for BLAST command line applications.

Definition in file blast_app_util.cpp.

Function Documentation

◆ BlastFormatter_PreFetchSequenceData()

void BlastFormatter_PreFetchSequenceData ( const blast::CSearchResultSet &  results,
CRef< CScope scope,
blast::CFormattingArgs::EOutputFormat  format_type 
)

◆ CheckForFreqRatioFile()

void CheckForFreqRatioFile ( const string rps_dbname,
CRef< CBlastOptionsHandle > &  opt_handle,
bool  isRpsblast 
)

◆ CheckMTByQueries_QuerySize()

void CheckMTByQueries_QuerySize ( EProgram  prog,
int  batch_size 
)

◆ ExtractPssmAncillaryData()

CRef<CBlastAncillaryData> ExtractPssmAncillaryData ( const CPssmWithParameters pssm)

◆ GetCmdlineArgs()

string GetCmdlineArgs ( const CNcbiArguments a)

◆ GetMTByQueriesBatchSize()

int GetMTByQueriesBatchSize ( EProgram  p,
int  num_threads,
const string task 
)

◆ GetSubjectFile()

string GetSubjectFile ( const CArgs args)

Get name of subject file @parameter args arguments class [in].

Returns
Name of subject file.

Definition at line 898 of file blast_app_util.cpp.

References CArgs::Exist(), and kArgSubject.

Referenced by CBlastnNode::Main(), CBlastpNode::Main(), CBlastxNode::Main(), CTblastnNode::Main(), CTblastxApp::Run(), CBlastnApp::x_RunMTBySplitDB(), CBlastpApp::x_RunMTBySplitDB(), CBlastxApp::x_RunMTBySplitDB(), and CTblastnApp::x_RunMTBySplitDB().

◆ InitializeQueryDataLoaderConfiguration()

blast::SDataLoaderConfig InitializeQueryDataLoaderConfiguration ( bool  query_is_protein,
CRef< blast::CLocalDbAdapter >  db_adapter 
)

◆ InitializeRemoteBlast()

CRef<blast::CRemoteBlast> InitializeRemoteBlast ( CRef< blast::IQueryFactory >  queries,
CRef< blast::CBlastDatabaseArgs >  db_args,
CRef< blast::CBlastOptionsHandle >  opts_hndl,
bool  verbose_output,
const string client_id = kEmptyStr,
CRef< objects::CPssmWithParameters >  pssm = CRef< objects::CPssmWithParameters >() 
)

Initializes a CRemoteBlast instance for usage by command line BLAST binaries.

Parameters
queriesquery sequence(s) or NULL in case of PSSM input [in]
db_argsdatabase/subject arguments [in]
opts_hndlBLAST options handle [in]
verbose_outputset to true if CRemoteBlast should produce verbose output [in]
pssmPSSM to use for single iteration remote PSI-BLAST
Exceptions
CInputExceptionin case of remote PSI-BL2SEQ, as it's not supported

Definition at line 84 of file blast_app_util.cpp.

References _ASSERT, CRef< C, Locker >::Empty(), kEmptyStr, NCBI_THROW, CRef< C, Locker >::NotEmpty(), CRef< C, Locker >::Reset(), CRemoteBlast::SetClientId(), and CRemoteBlast::SetVerbose().

Referenced by CPsiBlastApp::DoIterations(), CBlastnNode::Main(), CBlastpNode::Main(), CBlastxNode::Main(), CRPSBlastNode::Main(), CRPSTBlastnNode::Main(), CTblastnNode::Main(), CDeltaBlastApp::Run(), CTblastxApp::Run(), CRMBlastnApp::Run(), CBlastnApp::x_RunMTBySplitDB(), CBlastpApp::x_RunMTBySplitDB(), CBlastxApp::x_RunMTBySplitDB(), CRPSBlastApp::x_RunMTBySplitDB(), CRPSTBlastnApp::x_RunMTBySplitDB(), and CTblastnApp::x_RunMTBySplitDB().

◆ InitializeSubject()

void InitializeSubject ( CRef< blast::CBlastDatabaseArgs >  db_args,
CRef< blast::CBlastOptionsHandle >  opts_hndl,
bool  is_remote_search,
CRef< blast::CLocalDbAdapter > &  db_adapter,
CRef< objects::CScope > &  scope 
)

◆ IsIStreamEmpty()

bool IsIStreamEmpty ( CNcbiIstream in)

◆ LogBlastOptions()

void LogBlastOptions ( blast::CBlastUsageReport &  report,
const CBlastOptions opt 
)

◆ LogCmdOptions()

void LogCmdOptions ( blast::CBlastUsageReport &  report,
const CBlastAppArgs args 
)

◆ LogQueryInfo()

void LogQueryInfo ( CBlastUsageReport report,
const CBlastInput q_info 
)

◆ MTByQueries_DBSize_Warning()

void MTByQueries_DBSize_Warning ( const Int8  length_limit,
bool  is_db_protein 
)

◆ PrintErrorArchive()

void PrintErrorArchive ( const CArgs a,
const list< CRef< CBlast4_error > > &  msg 
)

◆ QueryBatchCleanup()

void QueryBatchCleanup ( )

◆ RecoverSearchStrategy()

bool RecoverSearchStrategy ( const CArgs args,
blast::CBlastAppArgs *  cmdline_args 
)

◆ RegisterOMDataLoader()

string RegisterOMDataLoader ( CRef< CSeqDB db_handle)

Register the BLAST database data loader using the already initialized CSeqDB object.

Parameters
db_handleproperly initialized CSeqDB instance [in]
Returns
name of the BLAST data data loader (to be added to the CScope object)

Definition at line 246 of file blast_app_util.cpp.

References _TRACE, CObjectManager::eDefault, CObjectManager::GetInstance(), CBlastDbDataLoader::GetLoaderNameFromArgs(), CBlastDatabaseArgs::kSubjectsDataLoaderPriority, om, and CBlastDbDataLoader::RegisterInObjectManager().

Referenced by InitializeSubject(), CDeltaBlastApp::Run(), CSeedTopApp::Run(), and s_InitializeSubject().

◆ s_ConvertBioseqs2TSeqLocVector()

static TSeqLocVector s_ConvertBioseqs2TSeqLocVector ( const CBlast4_subject::TSequences subjects)
static

Converts a list of Bioseqs into a TSeqLocVector.

All Bioseqs are added to the same CScope object

Parameters
subjectsBioseqs to convert

Definition at line 355 of file blast_app_util.cpp.

References CScope::AddBioseq(), CSeq_id::BestRank(), FindBestChoice(), CObjectManager::GetInstance(), and ITERATE.

Referenced by s_ImportSubjects().

◆ s_ExportSearchStrategy()

static void s_ExportSearchStrategy ( CNcbiOstream out,
CRef< blast::IQueryFactory >  queries,
CRef< blast::CBlastOptionsHandle >  options_handle,
CRef< blast::CBlastDatabaseArgs >  db_args,
CRef< objects::CPssmWithParameters >  pssm,
unsigned int  num_iters 
)
static

Real implementation of search strategy extraction.

Definition at line 319 of file blast_app_util.cpp.

References _ASSERT, CBlastException::eNotSupported, CException::GetErrCode(), kEmptyStr, NCBI_THROW, out(), and s_InitializeExportStrategy().

Referenced by SaveSearchStrategy().

◆ s_ExtractSeqidsAndRanges()

static void s_ExtractSeqidsAndRanges ( const blast::CSearchResultSet &  results,
CScope::TIds ids,
vector< TSeqRange > &  ranges 
)
static

Extracts the subject sequence IDs and ranges from the BLAST results.

Note
if this ever needs to be refactored for popular developer consumption, this function should operate on CSeq_align_set as opposed to blast::CSearchResultSet

Definition at line 659 of file blast_app_util.cpp.

References _ASSERT, eNa_strand_minus, eNa_strand_plus, CSeq_id_Handle::GetHandle(), ITERATE, r(), compile_time_bits::range(), result, CRange_Base::SetFrom(), and COpenRange< Position >::SetToOpen().

Referenced by BlastFormatter_PreFetchSequenceData().

◆ s_ImportDatabase()

static CRef<blast::CBlastDatabaseArgs> s_ImportDatabase ( const CBlast4_subject subj,
CBlastOptionsBuilder opts_builder,
bool  subject_is_protein,
bool  is_remote_search 
)
static

◆ s_ImportPssm()

static void s_ImportPssm ( const CBlast4_queries queries,
CRef< blast::CBlastOptionsHandle >  opts_hndl,
blast::CBlastAppArgs *  cmdline_args 
)
static

Import PSSM into the command line arguments object.

Definition at line 372 of file blast_app_util.cpp.

References Blast_ProgramNameFromType(), CBlast4_queries_Base::GetPssm(), NCBI_THROW, NULL, CPsiBlastAppArgs::SetInputPssm(), and CTblastnAppArgs::SetInputPssm().

Referenced by s_ImportSearchStrategy().

◆ s_ImportQueries()

static void s_ImportQueries ( const CBlast4_queries queries,
CRef< blast::CBlastOptionsHandle >  opts_hndl,
blast::CBlastAppArgs *  cmdline_args 
)
static

◆ s_ImportSearchStrategy()

static void s_ImportSearchStrategy ( CNcbiIstream in,
blast::CBlastAppArgs *  cmdline_args,
bool  is_remote_search,
bool  override_query,
bool  override_subject 
)
static

◆ s_ImportSubjects()

static CRef<blast::CBlastDatabaseArgs> s_ImportSubjects ( const CBlast4_subject subj,
bool  subject_is_protein 
)
static

◆ s_InitializeExportStrategy()

static CRef<blast::CExportStrategy> s_InitializeExportStrategy ( CRef< blast::IQueryFactory >  queries,
CRef< blast::CBlastDatabaseArgs >  db_args,
CRef< blast::CBlastOptionsHandle >  opts_hndl,
const string client_id,
CRef< objects::CPssmWithParameters >  pssm,
unsigned int  num_iters 
)
static

◆ s_IsPrefetchFormat()

static bool s_IsPrefetchFormat ( blast::CFormattingArgs::EOutputFormat  format_type)
static

◆ s_IsUsingRemoteBlastDbDataLoader()

static bool s_IsUsingRemoteBlastDbDataLoader ( )
static

Returns true if the remote BLAST DB data loader is being used.

Definition at line 703 of file blast_app_util.cpp.

References CObjectManager::GetInstance(), CObjectManager::GetRegisteredNames(), ITERATE, and NStr::StartsWith().

Referenced by BlastFormatter_PreFetchSequenceData().

◆ s_PreFetchSeqs()

static bool s_PreFetchSeqs ( const blast::CSearchResultSet &  results,
blast::CFormattingArgs::EOutputFormat  format_type 
)
static

Definition at line 728 of file blast_app_util.cpp.

References i, s_IsPrefetchFormat(), and NStr::StringToInt().

Referenced by BlastFormatter_PreFetchSequenceData().

◆ SaveSearchStrategy()

void SaveSearchStrategy ( const CArgs args,
blast::CBlastAppArgs *  cmdline_args,
CRef< blast::IQueryFactory >  queries,
CRef< blast::CBlastOptionsHandle >  opts_hndl,
CRef< objects::CPssmWithParameters >  pssm,
unsigned int  num_iters 
)

◆ UseXInclude()

bool UseXInclude ( const CFormattingArgs f,
const string s 
)

◆ USING_SCOPE() [1/2]

USING_SCOPE ( blast  )

◆ USING_SCOPE() [2/2]

USING_SCOPE ( objects  )
Modified on Tue May 21 11:01:56 2024 by modify_doxy.py rev. 669887