62 CSeq_align_set::Tdata::const_iterator input_it = input_list.begin();
63 CSeq_align_set::Tdata::iterator final_it = final_list.begin();
65 while(input_it != input_list.end())
70 if(hit_count >= list_size)
72 final_list.erase(final_it, final_list.end());
76 if(final_it == final_list.end())
87 if(input_evalue == final_evalue)
95 if(input_evalue < final_evalue)
97 CSeq_align_set::Tdata::const_iterator start_input_it = input_it;
100 const CSeq_id & id_prev = (*input_it)->GetSeq_id(1);
102 if(input_it == input_list.end())
107 if(! id_prev.
Match((*input_it)->GetSeq_id(1)))
113 final_list.insert(final_it, start_input_it, input_it);
119 const CSeq_id & id_prev = (*final_it)->GetSeq_id(1);
122 if(final_it == final_list.end())
127 if(! id_prev.
Match((*final_it)->GetSeq_id(1)))
142 aggregate_search_result_set->
clear();
144 for(
unsigned int i=0;
i <
t[0]->GetNumQueries();
i++)
146 vector< CRef<CSearchResults> > thread_results;
148 const CSeq_id &
id = *(thread_results[0]->GetSeqId());
150 for(
unsigned int d=1; d < num_of_threads; d++)
152 thread_results.push_back ((*(
t[d]))[id]);
157 for(
unsigned int d=0; d< num_of_threads; d++)
159 if(thread_results[d]->HasAlignments())
164 align_set->
Set().insert(align_set->
Set().begin(),
165 thread_align_set->
Get().begin(),
166 thread_align_set->
Get().end());
173 aggregate_messages.
Combine(thread_results[d]->GetErrors());
177 thread_results[0]->GetMaskedQueryRegions(query_mask);
181 thread_results[0]->GetAncillaryData(),
183 aggregate_search_result_set->
push_back(aggregate_search_results);
187 return aggregate_search_result_set;
194 bool include_filtered_reads)
203 CVDBBlastUtil vdbUtil(isCSRA?csras:dbs,
true, isCSRA, include_filtered_reads);
208 CLocalBlast lcl_blast(query_factory, opt_handle, seqSrc, seqInfoSrc);
227 bool include_filtered_reads)
244 CPsiBlast psi_blast(pssm, db_adapter, psi_opts);
269 vector<string> & chunks,
273 vector<string> & chunks,
293 vector<string> & chunks,
295 bool include_filtered_reads):
296 m_chunks(chunks), m_include_filtered_reads(include_filtered_reads),
297 m_num_extensions(0), m_pssm(pssm)
305 vector<string> & chunks,
307 bool include_filtered_reads):
308 m_query_factory(query_factory), m_chunks(chunks),
309 m_include_filtered_reads(include_filtered_reads), m_num_extensions(0)
342 unsigned int num_of_chunks =
m_chunks.size();
343 vector<CRef<CSearchResultSet> >
results;
345 for(
unsigned int i=0;
i < num_of_chunks;
i++) {
368 bool include_filtered_reads):
369 m_query_vector(query_vector),
370 m_opt_handle(options),
371 m_total_num_seqs(local_vdb.total_num_seqs),
372 m_total_length(local_vdb.total_length),
373 m_chunks_for_thread(local_vdb.chunks_for_thread),
374 m_num_threads(local_vdb.chunks_for_thread.
size()),
376 m_include_filtered_reads(include_filtered_reads)
384 bool include_filtered_reads):
385 m_opt_handle(options),
386 m_total_num_seqs(local_vdb.total_num_seqs),
387 m_total_length(local_vdb.total_length),
388 m_chunks_for_thread(local_vdb.chunks_for_thread),
389 m_num_threads(local_vdb.chunks_for_thread.
size()),
391 m_include_filtered_reads(include_filtered_reads),
406 return(
a.length >
b.length);
411 vector<vector<SSortStruct> > & out_list, vector<Uint8> & acc_size)
415 for(
unsigned int i=0;
i <
in_list.size();
i++)
417 unsigned int min_index = 0;
418 for(
unsigned int j=1; j<num_threads; j++) {
419 if(acc_size[j] < acc_size[min_index])
422 acc_size[min_index] +=
in_list[
i].length;
423 out_list[min_index].push_back(
in_list[
i]);
430 vector<SSortStruct> filtered_list;
431 for(
unsigned int i = 0;
i <
in_list.size();
i++) {
435 filtered_list.push_back(
in_list[
i]);
442 const unsigned int dbs_per_chunk,
const string tag)
447 Uint8 num_seqs_count = 0;
449 unsigned int db_count = 0;
450 for(
unsigned int i=0;
i <
in_list.size();
i++) {
452 num_seqs_count +=
in_list[
i].num_seqs;
453 if(num_seqs_count > (
Uint8)
kMax_I4 || db_count >= dbs_per_chunk) {
454 chunks.push_back(dbs);
455 _TRACE(
"Chunk: " << dbs <<
" Num Seqs: " << num_seqs_count -
in_list[
i].num_seqs);
457 num_seqs_count =
in_list[
i].num_seqs;
468 chunks.push_back(dbs);
469 _TRACE(
"Chunk: " << dbs <<
" Num Seqs: " << num_seqs_count);
481 string max_dbs_env =
env.Get(
"VDB_MAX_DBS_PER_CHUNK");
487 if(max_dbs_per_chunk && max_dbs_per_chunk < dbs_per_chunk)
488 dbs_per_chunk = max_dbs_per_chunk;
490 return dbs_per_chunk;
500 vector<string>::iterator uq = std::unique(dbs.begin(), dbs.end());
501 dbs.erase(uq, dbs.end());
503 unsigned int num_dbs = dbs.size();
504 unsigned int num_threads = (num_dbs < threads) ? num_dbs : threads;
506 vector <SSortStruct> p,
r;
507 Uint8 total_length = 0;
508 Uint8 total_num_seqs = 0;
514 #pragma omp parallel for num_threads(num_threads) schedule(static) if (num_threads > 1) \
515 shared(num_dbs, p, dbs) reduction(+ : total_length, total_num_seqs)
516 for(
unsigned int i=0;
i < num_dbs;
i++) {
518 p[
i].db_name = dbs[
i];
523 total_length += p[
i].length;
524 total_num_seqs += p[
i].num_seqs;
527 openmp_exception += e.
what();
531 else if(search_mode ==
eBoth) {
534 #pragma omp parallel for num_threads(num_threads) schedule(static) if (num_threads > 1) \
535 shared(num_dbs, p,r, dbs) reduction(+ : total_length, total_num_seqs)
536 for(
unsigned int i=0;
i < num_dbs;
i++) {
538 p[
i].db_name = dbs[
i];
540 r[
i].db_name = dbs[
i];
550 total_length += (p[
i].length +
r[
i].length);
551 total_num_seqs += (p[
i].num_seqs +
r[
i].num_seqs);
554 openmp_exception += e.
what();
560 #pragma omp parallel for num_threads(num_threads) schedule(static) if (num_threads > 1) \
561 shared(num_dbs, r, dbs) reduction(+ : total_length, total_num_seqs)
562 for(
unsigned int i=0;
i < num_dbs;
i++) {
565 r[
i].db_name = dbs[
i];
570 total_length +=
r[
i].length;
571 total_num_seqs +=
r[
i].num_seqs;
578 openmp_exception += e.
what();
593 num_dbs =
r.size() + p.size();
599 if(max_csra_thread != 0 && threads > max_csra_thread) {
600 threads = max_csra_thread;
602 num_threads = (num_dbs < threads) ? num_dbs : threads;
611 else if(total_num_seqs == 0){
613 string zero_seq_err =
"DB list contains no searchable seqs in sra_mode " +
NStr::IntToString(search_mode) +
".";
621 vector<Uint8> acc_size(num_threads, 0);
626 vector<vector<SSortStruct> > list_thread(num_threads);
633 for(
unsigned int t=0;
t < num_threads;
t++) {
639 vector<vector<SSortStruct> > list_thread(num_threads);
646 for(
unsigned int t=0;
t < num_threads;
t++) {
676 for(
unsigned int i=0;
i < rs.
size();
i++)
678 rs[
i].TrimSeqAlign(hit_list_size);
686 return (orig_size + 100);
688 else if(orig_size < 500)
690 return (orig_size + 75);
693 return (orig_size + 50);
712 if(num_chunks == 1) {
728 vector<CRef<CSearchResultSet> >
results;
729 for(
unsigned int i=0;
i < num_chunks;
i++)
770 for(
unsigned int i=0;
i <
orig.Size();
i++)
773 q->SetMaskedRegions(
orig[
i]->GetMaskedRegions());
774 q->SetGeneticCodeId(
orig[
i]->GetGeneticCodeId());
804 vector<CRef<CSearchResultSet> >
results;
805 vector<CRef<IQueryFactory> > query_factory;
806 vector<CRef<CPssmWithParameters> > pssm;
832 thread[
t]->Join(
reinterpret_cast<void**
> (&thread_results[
t]));
837 if(thread_results[
t] ==
NULL) {
841 results.push_back(thread_results[
t]->thread_result_set);
842 delete (thread_results[
t]);
Declares the CBlastNucleotideOptionsHandle class.
Defines BLAST error codes (user errors included)
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
void push_back(const value_type &element)
Add a value to the back of this container.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
bool m_include_filtered_reads
CRef< CBlastQueryVector > m_query_vector
CRef< CBlastOptionsHandle > m_opt_handle
void x_AdjustDbSize(void)
CRef< objects::CPssmWithParameters > m_pssm
static string PreprocessDBs(CLocalVDBBlast::SLocalVDBStruct &local_vdb, const string db_names, unsigned int num_threads=kDisableThreadedSearch, ESRASearchMode seach_mode=eAligned)
void x_PrepareQuery(vector< CRef< IQueryFactory > > &qf_v)
void x_PreparePssm(vector< CRef< CPssmWithParameters > > &pssm)
CLocalVDBBlast(const CLocalVDBBlast &)
static const unsigned int kDisableThreadedSearch
CRef< CSearchResultSet > RunThreadedSearch()
CRef< CSearchResultSet > Run()
unsigned int m_num_threads
vector< vector< string > > & m_chunks_for_thread
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Handle to the protein-protein options to the BLAST algorithm.
Runs a single iteration of the PSI-BLAST algorithm on a BLAST database.
Search Results for All Queries.
Search Results for One Query.
Handle to the protein-translated nucleotide options to the BLAST algorithm.
BlastSeqSrc * GetSRASeqSrc()
Return the stored SRA BlastSeqSrc object.
static Uint4 GetMaxNumCSRAThread(void)
static void GetVDBStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, bool getRefStats=false)
Fucntion to get around the OID (blastseqsrc) limit So num of seqs > int4 can be returned.
static bool IsCSRA(const string &db_name)
CRef< blast::IBlastSeqInfoSrc > GetSRASeqInfoSrc()
Return the SRA BlastSeqInfoSrc object (create if none exists).
static void GetAllStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, Uint8 &ref_num_seqs, Uint8 &ref_length)
CVDBThread(const CVDBThread &)
void * Main(void)
Derived (user-created) class must provide a real thread function.
CRef< CBlastOptionsHandle > m_opt_handle
CRef< CSearchResultSet > RunTandemSearches(void)
CRef< CPssmWithParameters > m_pssm
CVDBThread & operator=(const CVDBThread &)
CRef< IQueryFactory > m_query_factory
vector< string > m_chunks
bool m_include_filtered_reads
CVDBThread(CRef< IQueryFactory > query_factory, vector< string > &chunks, CRef< CBlastOptions > options, bool include_filtered_reads)
Collection of masked regions for a single query sequence.
Class for the messages for an individual query sequence.
static bool DLIST_NAME() in_list(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
CRef< CSearchResultSet > Run()
Run the PSI-BLAST engine for one iteration.
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
void SetHitlistSize(int s)
CRef< CSearchResultSet > Run()
Executes the search.
virtual BLAST_SequenceBlk * GetSequenceBlk()=0
Accessor for the BLAST_SequenceBlk structure.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
int GetHitlistSize() const
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
void SetDbSeqNum(unsigned int n)
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
size_type size() const
Identical to GetNumResults, provided to facilitate STL-style iteration.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
CRef< CBlastOptions > Clone() const
Explicit deep copy of the Blast options object.
void push_back(value_type &element)
Add a value to the back of this container.
void Combine(const TQueryMessages &other)
Combine other messages with these.
void clear()
Clears the contents of this object.
Int4 GetNumExtensions()
Retrieve the number of extensions performed during the search.
Int8 GetEffectiveSearchSpace() const
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
#define MSerial_AsnBinary
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
Main class to perform a BLAST search on the local machine.
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines NCBI C++ exception handling.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Multi-threading – mutexes; rw-locks; semaphore.
Multi-threading – classes, functions, and features.
Defines: CTimeFormat - storage class for time format.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
Declares the CPSIBlastOptionsHandle class.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
Complete type definition of Blast Sequence Source ADT.
vector< vector< string > > chunks_for_thread
CRef< CSearchResultSet > thread_result_set
Declares the CTBlastnOptionsHandle class.
const string k_CSRA_CHUNK("CSRA_CHUNK: ")
CRef< CSearchResultSet > s_RunLocalVDBSearch(const string &dbs, CRef< IQueryFactory > query_factory, CRef< CBlastOptionsHandle > opt_handle, Int4 &num_extensions, bool include_filtered_reads)
void s_TrimResults(CSearchResultSet &rs, int hit_list_size)
static CRef< CSearchResultSet > s_CombineSearchSets(vector< CRef< CSearchResultSet > > &t, unsigned int num_of_threads, const int list_size)
static void s_DivideDBsForThread(unsigned int num_threads, vector< SSortStruct > &in_list, vector< vector< SSortStruct > > &out_list, vector< Uint8 > &acc_size)
static void s_GetChunksForThread(vector< SSortStruct > &in_list, vector< string > &chunks, const unsigned int dbs_per_chunk, const string tag)
static const unsigned int DEFAULT_MAX_DBS_PER_CHUNK
static const unsigned int DEFAULT_MAX_DBS_OPEN
static void s_RemoveNonCSRAEntry(vector< SSortStruct > &in_list)
static int s_GetModifiedHitlistSize(const int orig_size)
CRef< CBlastQueryVector > s_CloneBlastQueryVector(const CBlastQueryVector &orig)
static bool s_SortDbSize(const SSortStruct &a, const SSortStruct &b)
const string k_NOT_CSRA_DB("NOT_CSRA")
static unsigned int s_GetNumDbsPerChunk(unsigned int num_threads, unsigned int num_dbs)
CRef< CSearchResultSet > s_RunPsiVDBSearch(const string &dbs, CRef< CPssmWithParameters > pssm, CRef< CBlastOptionsHandle > opt_handle, bool include_filtered_reads)
static void s_MergeAlignSet(CSeq_align_set &final_set, const CSeq_align_set &input_set, const int list_size)
Declares the CLocalVDBBlast class.