NCBI C++ ToolKit
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: kblastthr.cpp 78714 2017-07-13 12:19:48Z madden $
2  * ===========================================================================
3  *
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Tom Madden
27  *
28  * File Description:
29  * Class to MT KMER BLAST searches
30  *
31  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
48 #include <math.h>
60 #include "kblastthr.hpp"
64 USING_SCOPE(blast);
72 {
74  for(TSeqLocVector::iterator iter=tsl_v.begin(); iter!=tsl_v.end(); ++iter)
75  {
77  blast_q.Reset(new CBlastSearchQuery(*(*iter).seqloc, *(*iter).scope));
78  q_vec->AddQuery(blast_q);
79  }
80  return q_vec;
81 }
84 {
85  // cerr << "Thread number " << GetSelf() << " exiting\n";
86 }
88 /////////////////////////////////////////////////////////////////////////////
89 // Main program for CThread
92 {
94  // FIXME. Do not want this with GILIST: target_db->GetSeqDb()->SetNumberOfThreads(1, true);
95  CRef<CLocalDbAdapter> db_adapter(new CLocalDbAdapter(*target_db));
96  const bool kIsProtein = true;
98  CRef<CScope> scope(CBlastScopeSource(dlconfig).NewScope());
99  int batchNumber=-1;
102  // FIXME: should be done as part of clone.
103  optsHndle->SetThresh(m_BlastOptsHandle->GetThresh());
104  optsHndle->SetMinHits(m_BlastOptsHandle->GetMinHits());
106  CBlastKmerSearch blastSearch(optsHndle, db_adapter);
107  const CRef< CSeqDBGiList > gilist = target_db->GetGiList();
108  if (gilist && gilist->NotEmpty())
109  blastSearch.SetGiListLimit(gilist);
110  const CRef< CSeqDBGiList > neggilist = target_db->GetNegativeGiList();
111  if (neggilist && neggilist->NotEmpty())
112  {
113  vector<TGi> gis;
114  neggilist->GetGiList(gis);
115  CSeqDBIdSet idset(gis, CSeqDBIdSet::eGi, false);
116  blastSearch.SetGiListLimit(idset.GetNegativeList());
117  }
119 // CStopWatch watch;
120 // watch.Start();
121  bool isDone=false;
122  while (!isDone)
123  {
124  TSeqLocVector query_vector;
125  globalGuard.Lock();
126  isDone = m_Input.End();
127  if (isDone)
128  {
131  break;
132  }
133  else
134  {
135  query_vector = m_Input.GetNextSeqLocBatch(*scope);
136  CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(query_vector));
137  blastSearch.SetQuery(qf);
138  batchNumber=globalBatchNumber++;
139  }
141  CRef<CSearchResultSet> blast_results = blastSearch.Run();
142  CRef<CLocalDbAdapter> db_adapter(new CLocalDbAdapter(*target_db));
143  CRef<CBlastFormat> formatter(new CBlastFormat(optsHndle->GetOptions(), *db_adapter,
147  false, false, BLAST_GENETIC_CODE, BLAST_GENETIC_CODE, false, false, -1,
150  vector<SFormatResultValues> results_v;
151  CRef<CBlastQueryVector> q_vec = s_GetBlastQueryVector(query_vector);
152  results_v.push_back(SFormatResultValues(q_vec, blast_results, formatter));
153  m_FormattingThr->QueueResults(batchNumber, results_v);
154  }
156  return (void*) NULL;
157 }
Produce formatted blast output for command line applications.
Declares the CBlastAdvancedProteinOptionsHandle class.
Interface for reading SRA sequences into blast input.
Interface for converting sources of sequence data into blast sequence input.
Default matrix name: BLOSUM62.
Definition: blast_options.h:77
Default genetic code for query and/or database.
Declares the CBlastProteinOptionsHandle class.
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
void QueueResults(int batchNumber, vector< SFormatResultValues > results)
Queue results for printing.
CRef< CSearchDatabase > GetSearchDatabase() const
Retrieve the search database information.
Definition: blast_args.hpp:936
This class formats the BLAST results for command line applications.
TSeqLocVector GetNextSeqLocBatch(CScope &scope)
Read and convert the next batch of sequences.
Definition: blast_input.cpp:98
bool End()
Determine if we have reached the end of the BLAST input.
Threading class for BlastKmer searches.
Definition: kblastapi.hpp:57
void SetGiListLimit(CRef< CSeqDBGiList > list)
Limits output by GILIST.
Definition: kblastapi.hpp:78
void SetQuery(CRef< IQueryFactory > queryFactory)
Sets the queries. Overrides any queries already set.
Definition: kblastapi.hpp:74
CRef< CSearchResultSet > Run(void)
Run a KMER and then BLAST search.
Definition: kblastapi.cpp:154
CNcbiOstream & m_OutFile
Definition: kblastthr.hpp:98
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
Definition: kblastthr.cpp:91
CRef< CBlastDatabaseArgs > m_DbArgs
Definition: kblastthr.hpp:92
virtual ~CBlastKmerThread(void)
Definition: kblastthr.cpp:83
CBlastAsyncFormatThread * m_FormattingThr
Definition: kblastthr.hpp:100
CBlastInput & m_Input
Definition: kblastthr.hpp:90
Boolean m_BelieveQuery
Definition: kblastthr.hpp:102
CRef< CFormattingArgs > m_FormattingArgs
Definition: kblastthr.hpp:96
CRef< CBlastpKmerOptionsHandle > m_BlastOptsHandle
Definition: kblastthr.hpp:94
Query Vector.
Definition: sseqloc.hpp:276
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
Definition: sseqloc.hpp:293
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
Search Query.
Definition: sseqloc.hpp:147
Handle to the KMER BLASTP options.
CFastMutex –.
Definition: ncbimtx.hpp:667
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
void GetGiList(vector< TGi > &gis) const
Get the gi list.
bool NotEmpty() const
Return true if there are elements present.
SeqDB ID list for performing boolean set operations.
CRef< CSeqDBNegativeList > GetNegativeList()
Retrieve a negative GI list.
Operators to edit gaps in sequences.
const CRef< CSeqDBGiList > & GetNegativeGiList() const
Accessor for the negative gi list.
int GetCandidateSeqs() const
Gets the max number of candidate matches to process with BLAST.
int GetMinHits() const
Returns the number of hits to initiate calculation of Jaccard distance.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
CRef< CBlastOptions > Clone() const
Explicit deep copy of the Blast options object.
const CRef< CSeqDBGiList > & GetGiList() const
Accessor for the gi list.
void SetMinHits(int minhits=1)
Sets the number of hits ot initiate calculation of Jaccard distance.
void SetCandidateSeqs(int candidates=1000)
Sets the max number of candidate matches to process with BLAST @candidates max number of target seque...
void SetThresh(double thresh=0.1)
Sets Threshold for Jaccard distance.
double GetThresh() const
Returns threshold for Jaccard distance (range: 0-1)
#define NULL
Definition: ncbistd.hpp:225
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
void Lock(void)
Acquire mutex for the current thread with no nesting checks.
void Unlock(void)
Release mutex with no owner or nesting checks.
int globalBatchNumber
Definition: kblastthr.cpp:67
int globalNumThreadsDone
Definition: kblastthr.cpp:68
CFastMutex globalGuard
Definition: kblastthr.cpp:66
Definition: kblastthr.cpp:62
CRef< CBlastQueryVector > s_GetBlastQueryVector(TSeqLocVector &tsl_v)
Definition: kblastthr.cpp:71
Main class to perform a BLAST search on the local machine.
const string kIsProtein
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
The Object manager core.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Defines BLAST database access classes.
Defines exception class and several constants for SeqDB.
Defines a concrete strategy for the IBlastSeqInfoSrc interface for sequence identifiers retrieval fro...
Implementation of the BlastSeqSrc interface for a vector of sequence locations.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Configuration structure for the CBlastScopeSource.
@ eUseNoDataLoaders
Do not add any data loaders.
Contains query, results and CBlastFormat for one batch.
Uniform BLAST Search Interface.
Modified on Fri Feb 23 11:48:14 2024 by rev. 669887