NCBI C++ ToolKit
blastkmer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blastkmer.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Tom Madden
27  *
28  * File Description:
29  * BLAST-kmer searches
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
35 #include <objmgr/seq_vector.hpp>
38 
40 BEGIN_SCOPE(blast)
41 
42 
44  CRef<CBlastKmerOptions> options,
45  CRef<CSeqDB> seqdb,
46  string kmerfile)
47  : m_QueryVector(query_vector),
48  m_Opts (options),
49  m_SeqDB(seqdb),
50  m_GIList(NULL)
51 {
52  if (kmerfile != "")
53  m_KmerFiles.push_back(kmerfile);
54  else
55  seqdb->FindVolumePaths(m_KmerFiles, false);
56 
57  if(options->Validate() == false)
58  NCBI_THROW(CException, eUnknown, "ERROR: kmer options validation failed");
59 
60 }
61 
64  const string& dbname)
65  : m_Opts (options),
66  m_GIList(NULL)
67 {
68  m_QueryVector.push_back(query);
69 
71 
73 
74  if(options->Validate() == false)
75  NCBI_THROW(CException, eUnknown, "ERROR: kmer options validation failed");
76 }
77 
78 void
79 CBlastKmer::x_ProcessQuery(const string& query_seq, SOneBlastKmerSearch& kmerSearch, const SBlastKmerParameters& kmerParams, uint32_t *a, uint32_t *b, vector < vector<int> >& kvector, vector<int> badMers)
80 {
81 
82  int num_bands = kmerParams.numHashes/kmerParams.rowsPerBand;
83 
84  int do_seg=0; // FIXME
85 
86  bool kmerFound = false;
87  if (kmerParams.version < 3)
88  kmerFound = minhash_query(query_seq, kmerSearch.queryHash, kmerParams.numHashes, a, b, do_seg, kmerParams.kmerNum, kmerParams.alphabetChoice, kmerParams.chunkSize);
89  else
90  kmerFound = minhash_query2(query_seq, kmerSearch.queryHash, kmerParams.kmerNum,
91  kmerParams.numHashes, kmerParams.alphabetChoice, badMers, kmerParams.chunkSize);
92 
93  if (!kmerFound)
94  NCBI_THROW(CException, eUnknown, "WARNING: No KMERs in query");
95 
96  if (kmerParams.version < 2)
97  get_LSH_hashes(kmerSearch.queryHash, kmerSearch.queryLSHHash, num_bands, kmerParams.rowsPerBand);
98  else if (kmerParams.version == 2) // 2 and 30 should be variables
99  get_LSH_hashes2(kmerSearch.queryHash, kmerSearch.queryLSHHash, kmerParams.rowsPerBand, kmerParams.samples, kvector);
100  else
101  get_LSH_hashes5(kmerSearch.queryHash, kmerSearch.queryLSHHash, kmerParams.numHashes, kmerParams.rowsPerBand);
102 }
103 
104 void
105 CBlastKmer::x_RunKmerFile(const vector < vector <uint32_t> >& query_hash, const vector < vector <uint32_t> >& query_LSH_hash, CMinHashFile& mhfile, TBlastKmerPrelimScoreVector& score_vector, BlastKmerStats& kmer_stats)
106 {
107 
108  int num_hashes = mhfile.GetNumHashes();
109 
110  // LSH parameters per http://infolab.stanford.edu/~ullman/mmds/ch3.pdf
111 
112  // populate LSH tables
113  uint64_t* lsh = mhfile.GetLSHArray();
114  int kmerVer = mhfile.GetVersion();
115 
116  vector< set<uint32_t > > candidates;
117  candidates.resize(query_hash.size());
118  get_LSH_match_from_hash(query_LSH_hash, lsh, candidates);
119  int minHits = m_Opts->GetMinHits();
120  if (minHits == 0)
121  { // Choose value based upon alphabet
122  if (mhfile.GetAlphabet() == 0)
123  minHits=1;
124  else
125  minHits=2;
126  }
127 
128  neighbor_query(query_hash,
129  lsh,
130  candidates,
131  mhfile,
132  num_hashes,
133  minHits,
134  m_Opts->GetThresh(),
135  score_vector,
136  kmer_stats,
137  kmerVer);
138 
139  kmer_stats.num_sequences = mhfile.GetNumSeqs();
140 
141  return;
142 }
143 
144 static void
145 s_GetQuerySequence(const TSeqLocVector& query_vector, string& query_seq, CRef<CSeq_id>& seqid, int queryNum)
146 {
147  query_seq.clear();
148  CSeqVector seqvect(*(query_vector[queryNum].seqloc), *(query_vector[queryNum].scope));
150  seqvect.GetSeqData(0, seqvect.size(), query_seq);
151  seqid.Reset(new CSeq_id());
152  seqid->Assign(*(query_vector[queryNum].seqloc->GetId()));
153 }
154 
155 static void
157 {
158  for(TBlastKmerPrelimScoreVector::iterator iter=score_vector.begin(); iter != score_vector.end(); ++iter)
159  {
160  (*iter).first += offset;
161  }
162 }
163 
164 bool
165 s_SortFinalResults(const pair<uint32_t, double>& one, const pair<uint32_t, double>& two)
166 {
167  return one.second > two.second;
168 }
169 
170 void
171 s_GetAllGis(vector<TGi>& retvalue, TBlastKmerPrelimScoreVector results, CRef<CSeqDB> seqdb)
172 {
173  for(TBlastKmerPrelimScoreVector::iterator itr=results.begin(); itr != results.end(); ++itr)
174  {
175  seqdb->GetGis((*itr).first, retvalue, true);
176  }
177 }
178 
180 CBlastKmer::x_SearchMultipleQueries(int firstQuery, int numQuery, const SBlastKmerParameters& kmerParams, uint32_t *a, uint32_t *b, vector < vector<int> >& kValues, vector<int> badMers)
181 {
182  TQueryMessages errs;
183  int numThreads = (int) GetNumberOfThreads();
184  int numFiles = static_cast<int>( m_KmerFiles.size());
185  if (numThreads > numQuery)
186  numThreads = numFiles;
187 
188  vector<SOneBlastKmerSearch> kmerSearchVector;
189  kmerSearchVector.reserve(numQuery);
190  for (int i=0; i<numQuery; i++)
191  {
192  SOneBlastKmerSearch kmerSearch(numFiles);
193  try {
194  string query_seq;
195  CRef<CSeq_id> qseqid;
196  s_GetQuerySequence(m_QueryVector, query_seq, qseqid, i+firstQuery);
197  if (query_seq.length() < static_cast<string::size_type>(kmerParams.kmerNum))
198  NCBI_THROW(CException, eUnknown, "WARNING: Query shorter than KMER length");
199 
200  kmerSearch.qSeqid = qseqid;
201  x_ProcessQuery(query_seq, kmerSearch, kmerParams, a, b, kValues, badMers);
202  } catch (const ncbi::CException& e) {
203  kmerSearch.status=1;
204  string msg = e.GetMsg();
205  kmerSearch.errDescription=msg;
206  if (msg.find("WARNING:") != std::string::npos)
207  kmerSearch.severity=eBlastSevWarning;
208  else
209  kmerSearch.severity=eBlastSevError;
210  } catch (const std::exception& e) {
211  kmerSearch.status=1;
212  kmerSearch.errDescription=string(e.what());
213  kmerSearch.severity=eBlastSevError;
214  } catch (...) {
215  kmerSearch.status=1;
216  kmerSearch.errDescription=string("Unknown error");
217  kmerSearch.severity=eBlastSevError;
218  }
219  kmerSearchVector.push_back(kmerSearch);
220  }
221 
222 #pragma omp parallel for num_threads(numThreads)
223 for(int index=0; index<numFiles; index++)
224 {
225  CMinHashFile mhfile(m_KmerFiles[index]);
226  for (int i=0; i<numQuery; i++)
227  {
228  SOneBlastKmerSearch& kmerSearch = kmerSearchVector[i];
229  if (kmerSearch.status)
230  continue;
231  x_RunKmerFile(kmerSearch.queryHash, kmerSearch.queryLSHHash, mhfile, kmerSearch.scoreVector[index], (kmerSearch.kmerStatsVector[index]));
232  }
233 }
234 
235 
236  CRef<CBlastKmerResultsSet> kmerResultSet(new CBlastKmerResultsSet());
237  for (int i=0; i<numQuery; i++)
238  {
239  CRef<CBlastKmerResults> kmerResults;
240  SOneBlastKmerSearch& kmerSearch = kmerSearchVector[i];
241  if (kmerSearch.status)
242  {
243  kmerResults.Reset(MakeEmptyResults(m_QueryVector, i+firstQuery, kmerSearch.errDescription, kmerSearch.severity));
244  kmerResultSet->push_back(kmerResults);
245  continue;
246  }
247 
248  BlastKmerStats kmer_stats;
249 
250  TBlastKmerPrelimScoreVector final_results;
251  size_t final_size=0; // Total elements needed for all vectors.
252  for (int index=0; index<numFiles; index++)
253  final_size += kmerSearch.scoreVector[index].size();
254  final_results.reserve(final_size);
255 
256  int offset=0;
257  for (int index=0; index<numFiles; index++)
258  {
259  TBlastKmerPrelimScoreVector score_vector = kmerSearch.scoreVector[index];
260  if (kmerParams.version > 1)
261  {
262  s_AdjustPrelimScoreVectorOID(score_vector, offset);
263  offset += kmerSearch.kmerStatsVector[index].num_sequences;
264  }
265  final_results.insert(final_results.end(), score_vector.begin(), score_vector.end());
266 
267  // Sum the statistics.
268  kmer_stats.hit_count += kmerSearch.kmerStatsVector[index].hit_count;
269  kmer_stats.jd_count += kmerSearch.kmerStatsVector[index].jd_count;
270  kmer_stats.oids_considered += kmerSearch.kmerStatsVector[index].oids_considered;
271  kmer_stats.jd_oid_count += kmerSearch.kmerStatsVector[index].jd_oid_count;
272  kmer_stats.total_matches += kmerSearch.kmerStatsVector[index].total_matches;
273  kmer_stats.num_sequences += kmerSearch.kmerStatsVector[index].num_sequences;
274  }
275 
276  // Sort by score
277  sort(final_results.begin(), final_results.end(), s_SortFinalResults);
278  if (m_Opts->GetNumTargetSeqs() > 0)
279  {
280  int vec_size = static_cast<int>( final_results.size() );
281  int num_matches = m_Opts->GetNumTargetSeqs();
282  if (vec_size > num_matches)
283  final_results.erase(final_results.begin()+num_matches, final_results.end());
284  }
285 
286  CRef<CSeqDB> seqdb;
287  if (m_GIList && !m_GIList.Empty())
288  {
289  vector<TGi> myGis;
290  s_GetAllGis(myGis, final_results, m_SeqDB);
291  CRef<CSeqDBGiList> intersect( new CIntersectionGiList(*m_GIList, myGis));
292  const string& dbname = m_SeqDB->GetDBNameList();
293  if (intersect->Size() > 0)
294  seqdb.Reset(new CSeqDB(dbname, CSeqDB::eProtein, intersect));
295  else // No matches that have proper GI
296  final_results.erase(final_results.begin(), final_results.end());
297  }
298  else if (m_NegGIList && !m_NegGIList.Empty())
299  {
300  vector<TGi> myGis;
301  s_GetAllGis(myGis, final_results, m_SeqDB);
302  CRef<CSeqDBGiList> intersect( new CIntersectionGiList(*m_NegGIList, myGis));
303  const string& dbname = m_SeqDB->GetDBNameList();
304  if (intersect->Size() > 0)
305  seqdb.Reset(new CSeqDB(dbname, CSeqDB::eProtein, intersect));
306  else // No matches that have proper GI
307  final_results.erase(final_results.begin(), final_results.end());
308  }
309  if (seqdb.NotEmpty())
310  kmerResults.Reset(new CBlastKmerResults(kmerSearch.qSeqid, final_results, kmer_stats, seqdb, errs));
311  else
312  kmerResults.Reset(new CBlastKmerResults(kmerSearch.qSeqid, final_results, kmer_stats, m_SeqDB, errs));
313  kmerResultSet->push_back(kmerResults);
314  }
315  return kmerResultSet;
316 }
317 
320  CRef<CBlastKmerResultsSet> retval = Run();
321  return retval;
322 }
323 
326  CMinHashFile mhfile(m_KmerFiles[0]);
327  int kmerVer = mhfile.GetVersion();
328  int num_hashes = mhfile.GetNumHashes();
329  int samples = mhfile.GetSegStatus();
330  int kmerNum = mhfile.GetKmerSize();
331  int alphabetChoice = mhfile.GetAlphabet();
332  uint32_t* random_nums = mhfile.GetRandomNumbers();
333  int rows_per_band = mhfile.GetRows();
334  vector<int> badMers;
335  mhfile.GetBadMers(badMers);
336 
337  // hash coefficients
338  vector<uint32_t> a(num_hashes);
339  vector<uint32_t> b(num_hashes);
340 
341  // obtain random coefficients for hashing
342  if (kmerVer == 3)
343  {
344  a[0] =random_nums[0];
345  b[0] =random_nums[1];
346  }
347  else
348  {
349  for(int i=0;i<num_hashes;i++)
350  a[i] = random_nums[i];
351  for(int i=0;i<num_hashes;i++)
352  b[i] = random_nums[i+num_hashes];
353  }
354 
355  vector < vector<int> > kValues;
356  if (kmerVer == 2)
357  {
358  int total=0;
359  unsigned char* kvaluesArray = mhfile.GetKValues();
360  for (int i=0; i<samples; i++)
361  {
362  vector<int> temp;
363  for (int j=0; j<rows_per_band; j++)
364  temp.push_back(kvaluesArray[total++]);
365  kValues.push_back(temp);
366  }
367  }
368 
369  SBlastKmerParameters kmerParams(num_hashes, rows_per_band, samples, kmerNum, alphabetChoice, kmerVer);
370  if (kmerVer > 2)
371  kmerParams.chunkSize = mhfile.GetChunkSize();
372 
373  int numQueries = static_cast<int>(m_QueryVector.size());
374 
375  CRef<CBlastKmerResultsSet> kmerResultsSet = x_SearchMultipleQueries(0, numQueries, kmerParams, a.data(), b.data(), kValues, badMers);
376 
377  return kmerResultsSet;
378 }
379 
380 END_SCOPE(blast)
382 
@ eBlastSevError
Definition: blast_message.h:58
@ eBlastSevWarning
Definition: blast_message.h:57
void s_GetAllGis(vector< TGi > &retvalue, TBlastKmerPrelimScoreVector results, CRef< CSeqDB > seqdb)
Definition: blastkmer.cpp:171
static void s_AdjustPrelimScoreVectorOID(TBlastKmerPrelimScoreVector &score_vector, int offset)
Definition: blastkmer.cpp:156
bool s_SortFinalResults(const pair< uint32_t, double > &one, const pair< uint32_t, double > &two)
Definition: blastkmer.cpp:165
static void s_GetQuerySequence(const TSeqLocVector &query_vector, string &query_seq, CRef< CSeq_id > &seqid, int queryNum)
Definition: blastkmer.cpp:145
CRef< CBlastKmerResults > MakeEmptyResults(TSeqLocVector &queryVector, int queryNum, const string &errMsg, EBlastSeverity severity=eBlastSevError)
Empty results (use on error)
vector< pair< uint32_t, double > > TBlastKmerPrelimScoreVector
Vector of pairs of database OIDs and scores.
void neighbor_query(const vector< vector< uint32_t > > &query_hash, const uint64_t *lsh, vector< set< uint32_t > > &candidates, CMinHashFile &mhfile, int num_hashes, int min_hits, double thresh, TBlastKmerPrelimScoreVector &score_vector, BlastKmerStats &kmer_stats, int kmerVersion)
void get_LSH_hashes(vector< vector< uint32_t > > &query_hash, vector< vector< uint32_t > > &lsh_hash_vec, int num_bands, int rows_per_band)
void get_LSH_hashes5(vector< vector< uint32_t > > &query_hash, vector< vector< uint32_t > > &lsh_hash_vec, int numHashes, int numRows)
Gets the LSH hash for one hash function.
bool minhash_query2(const string &query, vector< vector< uint32_t > > &seq_hash, int kmerNum, int numHashes, int alphabetChoice, vector< int > badMers, int chunkSize)
Hash the query for the minimum values;.
void get_LSH_match_from_hash(const vector< vector< uint32_t > > &lsh_hash_vec, const uint64_t *lsh_array, vector< set< uint32_t > > &candidates)
bool minhash_query(const string &query, vector< vector< uint32_t > > &seq_hash, int num_hashes, uint32_t *a, uint32_t *b, int do_seg, int kmerNum, int alphabetChoice, int chunkSize)
void get_LSH_hashes2(vector< vector< uint32_t > > &query_hash, vector< vector< uint32_t > > &lsh_hash_vec, int num_k, int num_l, vector< vector< int > > &kValues)
Class of optiosn for the KMEr search.
int GetNumTargetSeqs() const
Gets the number of matches (subject sequences) to return.
double GetThresh() const
Get the threshold.
int GetMinHits() const
Get the number of LSH hits to initiate the calculation of the Jaccard distance.
bool Validate() const
Checks that options are valid.
This class holds one or more CBlastKmerResults.
This class represents the results for one KMER search (one query).
Class to perform a KMER-BLASTP search.
Definition: blastkmer.hpp:72
CRef< CBlastKmerOptions > m_Opts
Specifies values for some options (e.g., threshold)
Definition: blastkmer.hpp:161
CRef< CSeqDBNegativeList > m_NegGIList
Negative GIList to limit search by.
Definition: blastkmer.hpp:174
TSeqLocVector m_QueryVector
Holds the query seqloc and scope.
Definition: blastkmer.hpp:158
CRef< CBlastKmerResultsSet > Run()
Performs search on one or more queries Performs search on one or more queries.
Definition: blastkmer.cpp:325
void x_RunKmerFile(const vector< vector< uint32_t > > &query_hash, const vector< vector< uint32_t > > &query_LSH_hash, CMinHashFile &mhfile, TBlastKmerPrelimScoreVector &score_vector, BlastKmerStats &kmer_stats)
Search individual kmer file.
Definition: blastkmer.cpp:105
CRef< CBlastKmerResultsSet > x_SearchMultipleQueries(int firstQuery, int numQuery, const SBlastKmerParameters &kmerParams, uint32_t *a, uint32_t *b, vector< vector< int > > &kValues, vector< int > badMers)
Search multiple queries.
Definition: blastkmer.cpp:180
CRef< CSeqDBGiList > m_GIList
GIList to limit search by.
Definition: blastkmer.hpp:170
CRef< CBlastKmerResultsSet > RunSearches()
Definition: blastkmer.cpp:319
CBlastKmer(TSeqLocVector &query_vector, CRef< CBlastKmerOptions > options, CRef< CSeqDB > seqdb, string kmerfile=kEmptyStr)
Constructor Processes all proteins in TSeqLocVector.
Definition: blastkmer.cpp:43
CRef< CSeqDB > m_SeqDB
CSeqDB for BLAST db.
Definition: blastkmer.hpp:164
vector< string > m_KmerFiles
Name of the kmer files.
Definition: blastkmer.hpp:167
void x_ProcessQuery(const string &query_seq, SOneBlastKmerSearch &kmerSearch, const SBlastKmerParameters &kmerParams, uint32_t *a, uint32_t *b, vector< vector< int > > &kvalues, vector< int > badMers)
Preprocess query to sequence hashes.
Definition: blastkmer.cpp:79
GI list containing the intersection of two other lists of GIs.
Access data in Minhash files.
Definition: mhfile.hpp:108
void GetBadMers(vector< int > &badMers) const
Overrepresented KMERs.
Definition: mhfile.cpp:112
int GetNumHashes(void) const
Returns the number of values in an array of hashes (probably 32)
Definition: mhfile.hpp:118
uint64_t * GetLSHArray(void) const
Definition: mhfile.hpp:151
int GetVersion(void) const
Definition: mhfile.hpp:113
int GetNumSeqs(void) const
Definition: mhfile.hpp:115
int GetRows(void) const
Definition: mhfile.hpp:125
uint32_t * GetRandomNumbers(void) const
Definition: mhfile.cpp:102
int GetChunkSize(void) const
Get number of letters in a chunk (version 3 or higher)
Definition: mhfile.hpp:140
int GetKmerSize(void) const
Returns the length of the KMER.
Definition: mhfile.hpp:123
int GetSegStatus(void) const
Definition: mhfile.hpp:120
int GetAlphabet(void) const
One of two alphabets from Shiryev et al.
Definition: mhfile.hpp:131
unsigned char * GetKValues(void) const
LSH points for Buhler approach.
Definition: mhfile.cpp:128
CRef –.
Definition: ncbiobj.hpp:618
int Size() const
CSeqDB.
Definition: seqdb.hpp:161
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
Definition: seqdb.cpp:1040
void GetGis(int oid, vector< TGi > &gis, bool append=false) const
Gets a list of GIs for an OID.
Definition: seqdb.cpp:1070
const string & GetDBNameList() const
Get list of database names.
Definition: seqdb.cpp:760
@ eProtein
Definition: seqdb.hpp:174
CSeqVector –.
Definition: seq_vector.hpp:65
Class for the messages for an individual query sequence.
size_t GetNumberOfThreads(void) const
Accessor for the number of threads to use.
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
@ eUnknown
Definition: app_popup.hpp:72
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void SetCoding(TCoding coding)
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
constexpr auto sort(_Init &&init)
unsigned int a
Definition: ncbi_localip.c:102
int offset
Definition: replacements.h:160
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
unsigned int uint32_t
Definition: stdint.h:126
unsigned __int64 uint64_t
Definition: stdint.h:136
Structure for ancillary data on KMER search.
int jd_count
How often was the Jaccard distance calculated.
int total_matches
How many matches returned.
int num_sequences
Number of database sequences considered (in this volume)
int oids_considered
How many OIDs were considered as candidates.
int hit_count
How many hits to the hash array were there?
int jd_oid_count
How many OIDs was the Jaccard distance calculated for.
int version
Version of index used (0 indicates default).
int chunkSize
size of a query chunk to process (default is 150).
int numHashes
Number of hash functions per signature.
int samples
Number of samples of query signature are made?
int rowsPerBand
Number of values sampled from signature.
int alphabetChoice
15 or 10 letter alphabet (0 for 15, 1 for 10).
int kmerNum
number of letters in KMER.
vector< TBlastKmerPrelimScoreVector > scoreVector
Scores for one query.
EBlastSeverity severity
Error or warning (only use if status is non-zero).
int status
Status of the query (0 is good, otherwise an error has occurred)
vector< vector< uint32_t > > queryLSHHash
LSH Hashes for one query (multiple chunks)
vector< BlastKmerStats > kmerStatsVector
Stats for one query.
vector< vector< uint32_t > > queryHash
Hashes for one query (multiple chunks)
CRef< CSeq_id > qSeqid
Seqid of the query.
string errDescription
Error description.
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
static string query
Modified on Fri Dec 01 04:47:15 2023 by modify_doxy.py rev. 669887