NCBI C++ ToolKit
blast_dbindex.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_dbindex.cpp 102142 2024-04-09 11:57:30Z camacho $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksandr Morgulis
27 *
28 */
29 
30 /// @file blast_dbindex.cpp
31 /// Functionality for indexed databases
32 
33 #include <ncbi_pch.hpp>
34 #include <sstream>
35 #include <list>
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbithr.hpp>
42 
45 
49 
51 
52 // Comment this out to continue with extensions.
53 // #define STOP_AFTER_PRESEARCH 1
54 
55 // Comment this to suppress index-related tracing
56 // #define TRACE_DBINDEX 1
57 
58 #ifdef TRACE_DBINDEX
59 # define IDX_TRACE(_m) { std::cerr << _m << std::endl; }
60 #else
61 # define IDX_TRACE(_m)
62 #endif
63 
64 /** @addtogroup AlgoBlast
65  *
66  * @{
67  */
68 
69 extern "C" {
70 
71 /** Get the seed search results for a give subject id and chunk number.
72 
73  @param idb_v [I] Database and index data.
74  @param oid_i [I] Subject id.
75  @param chunk_i [I] Chunk number.
76  @param init_hitlist [I/O] Results are returned here.
77 
78  @return Word size used for search.
79 */
80 static unsigned long s_MB_IdbGetResults(
81  Int4 oid_i, Int4 chunk_i,
82  BlastInitHitList * init_hitlist );
83 
84 static int s_MB_IdbCheckOid( Int4 oid, Int4 * last_vol_oid );
85 
86 static void s_MB_IdxEndSearchIndication( Int4 last_vol_id );
87 }
88 
90 BEGIN_SCOPE( blast )
91 
93 USING_SCOPE( ncbi::blastdbindex );
94 
95 /** No-op callback for setting concurrency state.
96  @sa DbIndexSetUsingThreadsFnType()
97 */
98 static void NullSetUsingThreads( bool ) {}
99 
100 /** No-op callback for setting the number of threads.
101  @sa DbIndexSetNumThreadsFnType()
102 */
103 static void NullSetNumThreads( size_t ) {}
104 
105 /** No-op callback for setting query info. Used when index search is not enabled.
106  @sa DbIndexSetQueryInfoFnType()
107 */
108 static void NullSetQueryInfo(
109  LookupTableWrap * ,
111 
112 /** No-op callback to run indexed search. Used when index search is not enabled.
113  @sa DbIndexRunSearchFnType()
114 */
115 static void NullRunSearch(
116  BLAST_SequenceBlk * ,
118 
119 /** Global pointer to the appropriate callback to set the state of concurrency. */
121 
122 /** Global pointer to the appropriate callback to set the number of threads. */
124 
125 /** Global pointer to the appropriate callback to set query info, based
126  on whether or not index search is enabled.
127 */
129 
130 /** Global pointer to the appropriate callback to run indexed search, based
131  on whether or not index search is enabled.
132 */
134 
135 //------------------------------------------------------------------------------
136 /** This class is responsible for loading indices and doing the actual
137  seed search.
138 
139  It acts as a middle man between the blast engine and dbindex library.
140 */
141 class CIndexedDb : public CObject
142 {
143 protected:
144 
145  CRef< CBlastSeqLocWrap > locs_wrap_; /**< Current set of unmasked query locations. */
146 
147 public:
148 
149  static CRef< CIndexedDb > Index_Set_Instance; /**< Shared representation of
150  currently loaded index volumes. */
151 
152  /** Object destructor. */
153  virtual ~CIndexedDb();
154 
155  /** Check whether any results were reported for a given subject sequence.
156 
157  @param oid The subject sequence id
158  @param last_vol_id The volume id checked just before oid
159  @return 0 --- if oid was handled by indexed search but no seeds found;
160  1 --- if oid was handled by indexed search and seeds were found;
161  2 --- if oid was not handled by indexed search
162  */
163  virtual int CheckOid( Int4 oid, Int4 * last_vol_id ) = 0;
164 
165  /** Function used by threads to indicate that they are done with
166  iterating over the database sequences.
167 
168  @param last_vol_id the last accessed volime index
169  */
170  virtual void EndSearchIndication( Int4 last_vol_id ) = 0;
171 
172  /** Run preliminary indexed search functionality.
173 
174  @param queries Queries descriptor.
175  @param locs Unmasked intervals of queries.
176  @param lut_options Lookup table parameters, like target word size.
177  @param word_options Contains window size of two-hits based search.
178  */
179  virtual void DoPreSearch(
180  BLAST_SequenceBlk * queries,
181  LookupTableOptions * lut_options,
182  BlastInitialWordOptions * word_options ) = 0;
183 
184  /** Set the current set of unmasked query segments.
185  @param locs_wrap unmasked query segments
186  */
188  { locs_wrap_ = locs_wrap; }
189 
190  /** Return results corresponding to a given subject sequence and chunk.
191 
192  @param oid [I] The subject sequence id.
193  @param chunk [I] The chunk number.
194  @param init_hitlist [I/O] The results are returned here.
195 
196  @return Word size used for search.
197  */
198  virtual unsigned long GetResults(
199  CDbIndex::TSeqNum oid,
200  CDbIndex::TSeqNum chunk,
201  BlastInitHitList * init_hitlist ) const = 0;
202 
203  virtual int MinIndexWordSize() = 0;
204 };
205 
206 //------------------------------------------------------------------------------
207 /** Index wrapper for old style MegaBLAST indexing functionality.
208 */
210 {
211 private:
212 
213  /** Type used to represent collections of search result sets. */
214  typedef vector< CConstRef< CDbIndex::CSearchResults > > TResultSet;
215 
216  /** Type used to map loaded indices to subject ids. */
217  typedef vector< CDbIndex::TSeqNum > TSeqMap;
218 
219  /** Find an index corresponding to the given subject id.
220 
221  @param oid The subject sequence id.
222  @return Index of the corresponding index data in
223  \e this->indices_.
224  */
225  TSeqMap::size_type LocateIndex( CDbIndex::TSeqNum oid ) const
226  {
227  for( TSeqMap::size_type i = 0; i < seqmap_.size(); ++i ) {
228  if( seqmap_[i] > oid ) return i;
229  }
230 
231  assert( 0 );
232  return 0;
233  }
234 
235  TResultSet results_; /**< Set of result sets, one per loaded index. */
236  TSeqMap seqmap_; /**< For each element of \e indices_ with index i
237  seqmap_[i] contains one plus the last oid of
238  that database index. */
239 
240  vector< string > index_names_; /**< List of index volume names. */
241  CRef< CDbIndex > index_; /**< Currently loaded index */
242 
243 public:
244 
245  /** Object constructor.
246 
247  @param indexname A string that is a comma separated list of index
248  file prefix, number of threads, first and
249  last chunks of the index.
250  */
251  explicit CIndexedDb_Old( const string & indexname );
252 
253  /** Check whether any results were reported for a given subject sequence.
254 
255  @note Overrides CIndexedDb::CheckOid()
256 
257  @param oid The subject sequence id.
258  @return 0 --- if no seeds were found for oid;
259  1 --- if seeds were found for oid;
260  */
261  virtual int CheckOid( Int4 oid, Int4 * )
262  {
263  TSeqMap::size_type i = LocateIndex( oid );
265  if( i > 0 ) oid -= seqmap_[i-1];
266  return results->CheckResults( oid ) ? eHasResults : eNoResults;
267  }
268 
269  /** Not used */
270  virtual void EndSearchIndication( Int4 ) {}
271 
272  virtual int MinIndexWordSize();
273 
274 private:
275 
276  /** Invoke the seed search procedure on each of the loaded indices.
277 
278  Each search is run in a separate thread. The function waits until
279  all threads are complete before it returns.
280 
281  @param queries Queries descriptor.
282  @param locs Unmasked intervals of queries.
283  @param lut_options Lookup table parameters, like target word size.
284  @param word_options Contains window size of two-hits based search.
285  */
286  void PreSearch(
287  BLAST_SequenceBlk * queries, BlastSeqLoc * locs,
288  LookupTableOptions * lut_options,
289  BlastInitialWordOptions * word_options );
290 
291 public:
292 
293  /** Wrapper around PreSearch().
294 
295  Runs PreSearch() and then frees locs_wrap_.
296 
297  @note Overrides CIndexedDb::DoPreSearch().
298  */
299  virtual void DoPreSearch(
300  BLAST_SequenceBlk * queries,
301  LookupTableOptions * lut_options,
302  BlastInitialWordOptions * word_options )
303  {
304  PreSearch(
305  queries, locs_wrap_->getLocs(),
306  lut_options, word_options );
308  }
309 
310  /** Return results corresponding to a given subject sequence and chunk.
311 
312  @note Overrides CIndexedDb::GetResults().
313 
314  @param oid [I] The subject sequence id.
315  @param chunk [I] The chunk number.
316  @param init_hitlist [I/O] The results are returned here.
317 
318  @return Word size used for search.
319  */
320  virtual unsigned long GetResults(
321  CDbIndex::TSeqNum oid,
322  CDbIndex::TSeqNum chunk,
323  BlastInitHitList * init_hitlist ) const;
324 };
325 
326 //------------------------------------------------------------------------------
327 /** Index wrapper for new style MegaBLAST indexing functionality.
328 
329  Each leaf volume of the BLAST database is indexed separately (with
330  possibly multiple index volumes) or not indexed at all. For the database
331  volumes that are not indexed processing is delegated back to the default
332  BLAST search.
333 */
335 {
336 private:
337 
338  /** Alias for a vector os strings. */
339  typedef std::vector< std::string > TStrVec;
340 
341  /** Information about one leaf index volume. */
343  {
344  SIZE_TYPE start_oid; ///< OId of the first sequence of the volume.
345  SIZE_TYPE n_oids; ///< Number of sequences in the volume.
346  std::string name; ///< Fully qualified name of the volume.
347  bool has_index; ///< 'true' if the volume is indexed.
348 
349  /** Volumes are compared by their starting ordinal ids. */
350  friend bool operator<(
351  const SVolumeDescriptor & a, const SVolumeDescriptor & b )
352  {
353  return a.start_oid < b.start_oid;
354  }
355 
356  /** This is only used for debug tracing. Print out information
357  about the volume.
358  */
359  friend std::ostream & operator<<(
360  std::ostream & os, const SVolumeDescriptor & vd )
361  {
362  os << vd.name << '[' << vd.start_oid << ',' << vd.n_oids << ','
363  << vd.has_index << ']';
364  return os;
365  }
366  };
367 
368  /// List of leaf index volumes.
369  typedef std::vector< SVolumeDescriptor > TVolList;
370 
371  /// This type captures the seeds found by search of an index volume.
373 
374  /** Reference count for the volume results.
375 
376  Holds results for a given volume only while there is a search
377  thread potentially in need of those results.
378  */
379  struct SVolResults
380  {
381  SVolResults() : ref_count( 0 ) {}
382 
383  TVolResults res; ///< Seed set or null.
384  int ref_count; ///< How many threads still need the result set.
385  };
386 
387  /// List of reference counted result holders.
388  typedef std::vector< SVolResults > TResultsHolder;
389 
390  /** Generate a list of BLAST database names from a single string.
391 
392  @param db_spec string containing space separated list of names
393  @param db_names [out] resulting list of database names
394  */
395  static void ParseDBNames( const std::string db_spec, TStrVec & db_names );
396 
397  /** Generate a list of leaf database volumes from a list of
398  database names.
399 
400  @param db_names BLAST database names
401  @param db_vols [out] resulting list of leaf database volume names
402  */
403  static void EnumerateDbVolumes(
404  const TStrVec & db_names, TStrVec & db_vols );
405 
406  /** This is only used for debugging output. */
407  static void TraceNames( const TStrVec & names )
408  {
409 #ifdef TRACE_DBINDEX
410  ITERATE( TStrVec, i, names ) { IDX_TRACE( "\t" << *i ); }
411 #endif
412  }
413 
414  /** This is only used for debugging output. */
415  void TraceVolumes( void )
416  {
417 #ifdef TRACE_DBINDEX
418  ITERATE( TVolList, i, volumes_ ) { IDX_TRACE( "\t" << *i ); }
419 #endif
420  }
421 
422  /** Auxiliary function thet returns the oid value
423  that is one more than the largest oid used so far.
424  */
425  SIZE_TYPE GetNextUnusedOID( void ) const;
426 
427  /** Update the seed sets, if necessary.
428 
429  If oid belongs to the volume at vol_idx, then does nothing.
430  Otherwise finds the index of the volume containing oid and
431  saves it in *vol_idx. Updates the reference counts of all
432  volumes between the old and new values of *vol_idx, releasing
433  the result sets if necessary. If the results for new volume
434  are not yet available, searches the new volume and stores
435  the results.
436 
437  @param oid ordinal id of the subject sequence
438  @param vol_idx [in/out] index of the volume containing ordinal
439  id of the sequence last accessed by this thread;
440  updated to the index of the volume containing
441  oid
442  */
443  void UpdateIndex( Int4 oid, Int4 * vol_idx );
444 
445  /* Add index volumes corresponding to the given database volume.
446 
447  If an index exists for the given database volume, adds information
448  about all corresponding index volumes to volumes_. Otherwise a single
449  entry is added to volumes_ with has_index flag set to 'false'.
450 
451  @param vol_name database volume name
452  @param idx_not_resolved [out] returns 'true' if the database volume
453  has no associated index
454  */
455  void AddIndexInfo( const std::string & vol_name, bool & idx_not_resolved );
456 
457  virtual int MinIndexWordSize();
458 
459  /** Find a volume containing the given subject ordinal id. */
460  TVolList::const_iterator FindVolume( SIZE_TYPE oid ) const
461  {
462  SVolumeDescriptor s = { oid };
463  TVolList::const_iterator r(
464  std::upper_bound( volumes_.begin(), volumes_.end(), s ) );
465  ASSERT( r != volumes_.begin() );
466  return --r;
467  }
468 
469  TVolList volumes_; ///< index volume descriptors
470  TResultsHolder results_holder_; ///< reference counted seed set holders
471  CFastMutex mtx_; ///< mutex used for thread sync
472  BLAST_SequenceBlk * queries_; ///< query data (from BLAST)
473  CDbIndex::SSearchOptions sopt_; ///< common search parameters
474  bool multiple_threads_; /**< flag indicating that multithreading
475  is in effect */
476  size_t n_threads_; ///< number of search threads running
477 
478 public:
479 
480  /** Object constructor.
481 
482  If all database indices were resolved successfully, then 'false' is
483  returned in partial; otherwise 'true' is returned.
484 
485  @param indexname MegaBLAST database name (can be a space separated
486  list of databases)
487  @param partial [O] returns 'true' if not all database indices were
488  resolved
489  */
490  explicit CIndexedDb_New( const string & indexname, bool & partial );
491 
492  /** Object destructor.
493  */
494  virtual ~CIndexedDb_New();
495 
496  /** Check whether any results were reported for a given subject sequence.
497 
498  @note Overrides CIndexedDb::CheckOid().
499 
500  @param oid The subject sequence id.
501  @param last_vol_id The volume id checked just before oid
502  @return 0 --- if oid was handled by indexed search but no seeds found;
503  1 --- if oid was handled by indexed search and seeds were found;
504  2 --- if oid was not handled by indexed search
505  */
506  virtual int CheckOid( Int4 oid, Int4 * last_vol_id );
507 
508  /** Function used by threads to indicate that they are done with
509  iterating over the database sequences.
510 
511  @param last_vol_id the last accessed volime index
512  */
513  virtual void EndSearchIndication( Int4 last_vol_id );
514 
515  /** Run preliminary indexed search functionality.
516 
517  @note Overrides CIndexedDb::DoPreSearch().
518 
519  @param queries Queries descriptor.
520  @param locs Unmasked intervals of queries.
521  @param lut_options Lookup table parameters, like target word size.
522  @param word_options Contains window size of two-hits based search.
523  */
524  virtual void DoPreSearch(
525  BLAST_SequenceBlk * queries,
526  LookupTableOptions * lut_options,
527  BlastInitialWordOptions * word_options );
528 
529  /** Return results corresponding to a given subject sequence and chunk.
530 
531  @note Overrides CIndexedDb::GetResults().
532 
533  @param oid [I] The subject sequence id.
534  @param chunk [I] The chunk number.
535  @param init_hitlist [I/O] The results are returned here.
536 
537  @return Word size used for search.
538  */
539  virtual unsigned long GetResults(
540  CDbIndex::TSeqNum oid,
541  CDbIndex::TSeqNum chunk,
542  BlastInitHitList * init_hitlist ) const;
543 
544  /** Set the concurrency status.
545 
546  @param multiple_threads 'true' if concurrent search is being performed;
547  'false' otherwise
548  */
549  void SetMultipleThreads( bool multiple_threads )
550  {
551  IDX_TRACE( "setting multiple threads to " <<
552  (multiple_threads ? "true" : "false") );
553  multiple_threads_ = multiple_threads;
554  if( multiple_threads_ ) n_threads_ = 0;
555  }
556 
557  /** Set the number of threads used for concurrent search.
558 
559  @param n_threads number of search threads.
560  */
561  void SetNumThreads( size_t n_threads )
562  {
563  ASSERT( n_threads > 1 );
564  IDX_TRACE( "setting number of search threads to " << n_threads );
565  n_threads_ = n_threads;
566  }
567 };
568 
569 //------------------------------------------------------------------------------
571 
572 //------------------------------------------------------------------------------
573 /// Run indexed search.
574 /// @param queries query data
575 /// @param lut_options lookup table parameters
576 /// @param word_options word parameters
577 static void IndexedDbRunSearch(
578  BLAST_SequenceBlk * queries,
579  LookupTableOptions * lut_options,
580  BlastInitialWordOptions * word_options )
581 {
582  CIndexedDb * idb( CIndexedDb::Index_Set_Instance.GetPointerOrNull() );
583  if( idb == 0 ) return;
584  idb->DoPreSearch( queries, lut_options, word_options );
585 }
586 
587 //------------------------------------------------------------------------------
588 /// Set state of concurrency in the index structure.
589 /// @param multiple_threads 'true' if multiple search threads are used;
590 /// 'false' otherwise
591 static void IndexedDbSetUsingThreads( bool multiple_threads )
592 {
593  CIndexedDb * idb( CIndexedDb::Index_Set_Instance.GetPointerOrNull() );
594  if( idb == 0 ) return;
595  CIndexedDb_New * idbn( dynamic_cast< CIndexedDb_New * >( idb ) );
596  if ( idbn == 0 ) return;
597  ASSERT( idbn != 0 );
598  idbn->SetMultipleThreads( multiple_threads );
599 }
600 
601 //------------------------------------------------------------------------------
602 /// Set the number of concurrent search threads in the index structure.
603 /// @param n_threads number of concurrent search threads.
604 static void IndexedDbSetNumThreads( size_t n_threads )
605 {
606  CIndexedDb * idb( CIndexedDb::Index_Set_Instance.GetPointerOrNull() );
607  if( idb == 0 ) return;
608  CIndexedDb_New * idbn( dynamic_cast< CIndexedDb_New * >( idb ) );
609  if ( idbn == 0 ) return;
610  ASSERT( idbn != 0 );
611  idbn->SetNumThreads( n_threads );
612 }
613 
614 //------------------------------------------------------------------------------
615 /// Set information about unmasked query segments.
616 /// @param lt_wrap lookup table information to update
617 /// @param locs_wrap set of unmasked query segments
619  LookupTableWrap * lt_wrap,
620  CRef< CBlastSeqLocWrap > locs_wrap )
621 {
622  CIndexedDb * idb( CIndexedDb::Index_Set_Instance.GetPointerOrNull() );
623  if( idb == 0 ) return;
624  lt_wrap->read_indexed_db = (void *)(&s_MB_IdbGetResults);
625  lt_wrap->check_index_oid = (void *)(&s_MB_IdbCheckOid);
627  idb->SetQueryInfo( locs_wrap );
628 }
629 
630 //------------------------------------------------------------------------------
632  const std::string db_spec, TStrVec & db_names )
633 {
634  static const char * SEP = " ";
635 
636  string::size_type pos( 0 ), pos1( 0 );
637 
638  while( pos1 != string::npos ) {
639  pos1 = db_spec.find_first_of( SEP, pos );
640  db_names.push_back( db_spec.substr( pos, pos1 - pos ) );
641  pos = pos1 + 1;
642  }
643 }
644 
645 //------------------------------------------------------------------------------
647  const TStrVec & db_names, TStrVec & db_vols )
648 {
649  CSeqDB db( db_names, CSeqDB::eNucleotide, 0, 0, false );
650  db.FindVolumePaths( db_vols, true );
651 }
652 
653 //------------------------------------------------------------------------------
655 {
656  if( !volumes_.empty() ) {
657  const SVolumeDescriptor & vd( *volumes_.rbegin() );
658  return vd.start_oid + vd.n_oids;
659  }
660  else return 0;
661 }
662 
663 //------------------------------------------------------------------------------
665  const std::string & vol_name, bool & partial )
666 {
667  bool idx_not_resolved( false );
668  CSeqDB db( vol_name, CSeqDB::eNucleotide, 0, 0, false );
669  size_t dbnseq( (size_t)db.GetNumOIDs() );
671 
672  try {
673  shdr.Reset( GetIndexSuperHeader( vol_name + ".shd" ) );
674  }
675  catch( CException & e ) {
676  ERR_POST(
677  Info << "index superheader for volume " << vol_name
678  << " was not loaded (" << e.what() << ")" );
679  idx_not_resolved = true;
680  }
681 
682  if( !idx_not_resolved && shdr->GetNumSeq() != dbnseq ) {
683  ERR_POST(
684  Error << "numbers of OIDs reported by the database and "
685  << "by the index do not match. Index for volume "
686  << vol_name << " will not be used" );
687  idx_not_resolved = true;
688  }
689 
690  if( !idx_not_resolved ) {
691  size_t curr_vols_size( volumes_.size() );
692  size_t total_idxvol_oids( 0 );
693 
694  for( size_t i( 0 ), e( shdr->GetNumVol() ); i < e; ++i ) {
697  vol_name, i ) ) );
698 
699  if( name.empty() ) {
700  ERR_POST(
701  Error << "index volume " << name
702  << " not resolved; index will not be used for "
703  << vol_name );
704  idx_not_resolved = true;
705  }
706 
707  if( !idx_not_resolved ) {
708  size_t idxvol_oids( GetIdxVolNumOIDs( name ) );
709 
710  if( idxvol_oids == 0 ) {
711  idx_not_resolved = true;
712  ERR_POST(
713  Error << "index volume " << name
714  << " reports no sequences; index will "
715  << "not be used for " << vol_name );
716  }
717  else {
718  SVolumeDescriptor vd = {
719  GetNextUnusedOID(), idxvol_oids, name, true };
720  volumes_.push_back( vd );
721  total_idxvol_oids += idxvol_oids;
722  }
723  }
724 
725  if( idx_not_resolved ) {
726  volumes_.resize( curr_vols_size );
727  break;
728  }
729  }
730 
731  if( !idx_not_resolved && dbnseq != total_idxvol_oids ) {
732  ERR_POST(
733  Error << "total of oids reported by index volumes ("
734  << total_idxvol_oids << ") does not match "
735  << "the number of oids reported by the superheader ("
736  << dbnseq << "); index will not be used for "
737  << vol_name );
738  volumes_.resize( curr_vols_size );
739  idx_not_resolved = true;
740  }
741  }
742 
743  partial = (partial || idx_not_resolved);
744 
745  if( idx_not_resolved ) {
746  SVolumeDescriptor vd = { GetNextUnusedOID(), dbnseq, vol_name, false };
747  volumes_.push_back( vd );
748  return;
749  }
750 }
751 
752 //------------------------------------------------------------------------------
753 CIndexedDb_New::CIndexedDb_New( const string & indexname, bool & partial )
754  : queries_( 0 ), multiple_threads_( false ), n_threads_( 1 )
755 {
756  // ENABLE_IDX_TRACE;
757  IDX_TRACE( "creating new style CIndexedDb object" );
758  partial = false;
759 
760  // Enumerate the databases.
761  //
762  IDX_TRACE( "db spec given: " << indexname );
763  TStrVec db_names;
764  ParseDBNames( indexname, db_names );
765  IDX_TRACE( "list of databases:" );
766  TraceNames( db_names );
767 
768  // Enumerate primitive database volumes.
769  //
770  TStrVec db_vol_names;
771  EnumerateDbVolumes( db_names, db_vol_names );
772  IDX_TRACE( "list of database volumes in order:" );
773  TraceNames( db_vol_names );
774 
775  // Populate volume information for each resolved database volume.
776  //
777  ITERATE( TStrVec, dbvi, db_vol_names ) { AddIndexInfo( *dbvi, partial ); }
778  IDX_TRACE( "final index volume list:" );
779  TraceVolumes();
780 
781  // Check if any volume has index. If not, do not use indexing.
782  //
783  {
784  bool has_index( false );
785 
786  ITERATE( TVolList, i, volumes_ )
787  {
788  if( i->has_index ) {
789  has_index = true;
790  break;
791  }
792  }
793 
794  if( !has_index ) {
795  NCBI_THROW( CDbIndex_Exception, eBadOption,
796  "no database volume has an index" );
797  }
798  }
799 
800  // Initialize the results contexts.
801  //
802  results_holder_.resize( volumes_.size() );
803 }
804 
805 //------------------------------------------------------------------------------
807 {
808  IDX_TRACE( "destroying new style CIndexedDb object" );
809 }
810 
811 //------------------------------------------------------------------------------
812 void CIndexedDb_New::UpdateIndex( Int4 oid, Int4 * vol_idx_p )
813 {
814  Int4 & vol_idx( *vol_idx_p );
815  Int4 new_vol_idx;
816  bool find_volume( true );
817 
818  if( vol_idx != LAST_VOL_IDX_INIT ) {
819  const SVolumeDescriptor & vd( volumes_[vol_idx] );
820  if( vd.start_oid + vd.n_oids > (SIZE_TYPE)oid ) find_volume = false;
821  }
822 
823  if( !find_volume ) return;
824  TVolList::const_iterator vi( FindVolume( oid ) );
825  new_vol_idx = vi - volumes_.begin();
826  if( !vi->has_index ) { vol_idx = new_vol_idx; return; }
827  CFastMutexGuard lock( mtx_ );
828  SVolResults & res( results_holder_[new_vol_idx] );
829  Int4 min_vol_idx( vol_idx == -1 ? 0 : vol_idx );
830 
831  if( res.ref_count <= 0 ) {
832  res.ref_count += n_threads_;
833  IDX_TRACE( "loading volume " << new_vol_idx << ": " << vi->name );
834  ASSERT( vi->has_index );
835  CRef< CDbIndex > index( CDbIndex::Load( vi->name ) );
836 
837  if( index == 0 ) {
838  std::ostringstream os;
839  os << "CIndexedDb: could not load index volume: " << vi->name;
840  NCBI_THROW( CIndexedDbException, eIndexInitError, os.str() );
841  }
842 
843  IDX_TRACE( "searching volume " << vi->name );
844  res.res = index->Search( queries_, locs_wrap_->getLocs(), sopt_ );
845  IDX_TRACE( "results loaded for " << vi->name );
846  }
847 
848  for( ; min_vol_idx < new_vol_idx; ++min_vol_idx ) {
849  if( --results_holder_[min_vol_idx].ref_count == 0 ) {
850  results_holder_[min_vol_idx].res.Reset( 0 );
851  IDX_TRACE( "unloaded results for volume " <<
852  volumes_[min_vol_idx].name );
853  }
854  }
855 
856  vol_idx = new_vol_idx;
857 }
858 
859 //------------------------------------------------------------------------------
860 int CIndexedDb_New::CheckOid( Int4 oid, Int4 * last_vol_idx )
861 {
862  if( *last_vol_idx == LAST_VOL_IDX_NULL ) {
863  TVolList::const_iterator vi( FindVolume( oid ) );
864  if( vi->has_index ) return eHasResults;
865  else return eNotIndexed;
866  }
867 
868  UpdateIndex( oid, last_vol_idx );
869  TVolList::const_iterator vi( volumes_.begin() + *last_vol_idx );
870  if( !vi->has_index ) return eNotIndexed;
871  oid -= vi->start_oid;
872  return results_holder_[*last_vol_idx].res->CheckResults( oid ) ?
874 }
875 
876 //------------------------------------------------------------------------------
878 {
879  CFastMutexGuard lock( mtx_ );
880  if( last_vol_idx == LAST_VOL_IDX_INIT ) last_vol_idx = 0;
881 
882  for( Int4 i( last_vol_idx ); i < (Int4)volumes_.size(); ++i ) {
883  if( --results_holder_[i].ref_count == 0 ) {
884  results_holder_[i].res.Reset( 0 );
885  IDX_TRACE( "unloaded results for volume " << volumes_[i].name );
886  }
887  }
888 }
889 
890 //------------------------------------------------------------------------------
892  BLAST_SequenceBlk * queries, LookupTableOptions * lut_options,
893  BlastInitialWordOptions * word_options )
894 {
895  queries_ = queries;
896  sopt_.word_size = lut_options->word_size;
897  sopt_.two_hits = word_options->window_size;
898  IDX_TRACE( "set word size to " << sopt_.word_size );
899  IDX_TRACE( "set two_hits to " << sopt_.two_hits );
900 }
901 
902 //------------------------------------------------------------------------------
905  BlastInitHitList * init_hitlist ) const
906 {
907  TVolList::const_iterator vi( FindVolume( oid ) );
908  ASSERT( vi->start_oid <= oid );
909  ASSERT( vi->start_oid + vi->n_oids > oid );
910  ASSERT( vi->has_index );
911  oid -= vi->start_oid;
912  BlastInitHitList * res( 0 );
913  const TVolResults & vr( results_holder_[vi - volumes_.begin()].res );
914  ASSERT( vr != 0 );
915 
916  if( (res = vr->GetResults( oid, chunk )) != 0 ) {
917  BlastInitHitListMove( init_hitlist, res );
918  return vr->GetWordSize();
919  }
920  else {
921  BlastInitHitListReset( init_hitlist );
922  return 0;
923  }
924 }
925 
926 static int s_GetMinimumSupportedWordSizeByIndex(const string& fname)
927 {
928  CMemoryFile index_map(fname);
929  SIndexHeader header = ReadIndexHeader< false >(index_map.GetPtr());
930  int rv = header.hkey_width_ + header.stride_ -1;
931  ERR_POST(Info << "Minimal supported word size in " << fname << " is " << rv);
932  return rv;
933 }
934 
936 {
937  int rv = 0;
938  if (volumes_.size() > 0){
940  }
941  return rv;
942 }
943 //------------------------------------------------------------------------------
944 CIndexedDb_Old::CIndexedDb_Old( const string & indexnames )
945 {
946  if( !indexnames.empty() ) {
947  vector< string > dbnames;
948  string::size_type start = 0, end = 0;
949 
950  // Interpret indexname as a space separated list of database names.
951  //
952  while( start != string::npos ) {
953  end = indexnames.find_first_of( " ", start );
954  dbnames.push_back( indexnames.substr( start, end - start ) );
955  start = indexnames.find_first_not_of( " ", end );
956  }
957 
958  std::sort( dbnames.begin(), dbnames.end(), &SeqDB_CompareVolume );
959 
960  for( vector< string >::const_iterator dbni = dbnames.begin();
961  dbni != dbnames.end(); ++dbni ) {
962  const string & indexname = *dbni;
963 
964  // Parse the indexname as a comma separated list
965  unsigned long start_vol = 0, stop_vol = 99;
966  start = 0;
967  end = indexname.find_first_of( ",", start );
968  string index_base = indexname.substr( start, end );
969  start = end + 1;
970 
971  if( start < indexname.length() && end != string::npos ) {
972  end = indexname.find_first_of( ",", start );
973  start = end + 1;
974 
975  if( start < indexname.length() && end != string::npos ) {
976  end = indexname.find_first_of( ",", start );
977  string start_vol_str =
978  indexname.substr( start, end - start );
979 
980  if( !start_vol_str.empty() ) {
981  start_vol = atoi( start_vol_str.c_str() );
982  }
983 
984  start = end + 1;
985 
986  if( start < indexname.length() && end != string::npos ) {
987  end = indexname.find_first_of( ",", start );
988  string stop_vol_str =
989  indexname.substr( start, end - start);
990 
991  if( !stop_vol_str.empty() ) {
992  stop_vol = atoi( stop_vol_str.c_str() );
993  }
994  }
995  }
996  }
997 
998  if( start_vol <= stop_vol ) {
999  long last_i = -1;
1000 
1001  for( long i = start_vol; (unsigned long)i <= stop_vol; ++i ) {
1002  ostringstream os;
1003  os << index_base << "." << setw( 2 ) << setfill( '0' )
1004  << i << ".idx";
1005  string name = SeqDB_ResolveDbPath( os.str() );
1006 
1007  if( !name.empty() ){
1008  if( i - last_i > 1 ) {
1009  for( long j = last_i + 1; j < i; ++j ) {
1010  ERR_POST( Error << "Index volume "
1011  << j << " not resolved." );
1012  }
1013  }
1014 
1015  index_names_.push_back( name );
1016  last_i = i;
1017  }
1018  }
1019  }
1020  }
1021  }
1022 
1023  if( index_names_.empty() ) {
1024  string msg("no index file specified or index '");
1025  msg += indexnames + "*' not found.";
1026  NCBI_THROW(CDbIndex_Exception, eBadOption, msg);
1027  }
1028 }
1029 
1030 //------------------------------------------------------------------------------
1032 {
1033 }
1034 
1035 //------------------------------------------------------------------------------
1037  BLAST_SequenceBlk * queries, BlastSeqLoc * locs,
1038  LookupTableOptions * lut_options ,
1039  BlastInitialWordOptions * word_options )
1040 {
1042  sopt.word_size = lut_options->word_size;
1043  sopt.two_hits = word_options->window_size;
1044 
1045  for( vector< string >::size_type v = 0;
1046  v < index_names_.size(); v += 1 ) {
1047  CRef< CDbIndex > index;
1048  string result;
1049 
1050  try { index = CDbIndex::Load( index_names_[v] ); }
1051  catch( CException & e ) { result = e.what(); }
1052 
1053  if( index == 0 ) {
1054  NCBI_THROW( CIndexedDbException, eIndexInitError,
1055  string( "CIndexedDb: could not load index" ) +
1056  index_names_[v] + ": " + result );
1057  }
1058 
1059  index_ = index;
1060  results_.push_back( CConstRef< CDbIndex::CSearchResults >( null ) );
1061  CDbIndex::TSeqNum s = seqmap_.empty() ? 0 : *seqmap_.rbegin();
1062  seqmap_.push_back( s + (index->StopSeq() - index->StartSeq()) );
1064  results = index_->Search( queries, locs, sopt );
1065  }
1066 }
1067 
1068 //------------------------------------------------------------------------------
1071  BlastInitHitList * init_hitlist ) const
1072 {
1073  BlastInitHitList * res = 0;
1074  TSeqMap::size_type i = LocateIndex( oid );
1076  if( i > 0 ) oid -= seqmap_[i-1];
1077 
1078  if( (res = results->GetResults( oid, chunk )) != 0 ) {
1079  BlastInitHitListMove( init_hitlist, res );
1080  return results->GetWordSize();
1081  }else {
1082  BlastInitHitListReset( init_hitlist );
1083  return 0;
1084  }
1085 }
1086 /// Get the minimum acceptable word size to use with indexed search.
1087 /// @return the minimum acceptable word size
1089 {
1090  int rv = 0;
1091  if (index_names_.size() > 0){
1093  }
1094  return rv;
1095 }
1096 //------------------------------------------------------------------------------
1097 
1099 {
1100  string rv = kEmptyStr;
1101  if (CIndexedDb::Index_Set_Instance != 0 ) {
1102  try {
1104  if (word_size < min_ws) {
1106  rv = "MegaBLAST database index requires word size greater than ";
1107  rv += NStr::IntToString(min_ws);
1108  rv += ".";
1109  }
1110  }
1111  catch (CException & e) {
1112  rv = "Failed to read index MegaBLAST db min word size.";
1113  }
1114 
1115  }
1116  else {
1117  rv = "Empty index db instance";
1118  }
1119  return rv;
1120 }
1121 
1123  const string & indexname, bool old_style, bool & partial, const int word_size )
1124 {
1126  partial = false;
1127 
1128  if( !old_style ) {
1129  try {
1130  ERR_POST( Info << "trying to load new style index at "
1131  << indexname );
1133  new CIndexedDb_New( indexname, partial ) );
1134 
1135  if( CIndexedDb::Index_Set_Instance != 0 ) {
1136  ERR_POST( Info << "new style index loaded" );
1137 
1138  if( partial ) {
1139  ERR_POST( Info << "some volumes are not resolved" );
1140  }
1141  return s_CheckMinWordSize(word_size);
1142  }
1143  else return "index allocation error";
1144  }
1145  catch( CException & e ) {
1146  ERR_POST( Info << "new style index failed to load" );
1147  result = e.what();
1148  }
1149  }
1150 
1151 
1152  try{
1153  ERR_POST( Info << "trying to load old style index at "
1154  << indexname );
1156  new CIndexedDb_Old( indexname ) );
1157 
1158  if( CIndexedDb::Index_Set_Instance != 0 ) {
1159  ERR_POST( Info << "old style index loaded" );
1160  return s_CheckMinWordSize(word_size);
1161  }
1162  else return "index allocation error";
1163  }
1164  catch( CException & e ) {
1165  ERR_POST( Info << "old style index failed to load" );
1166  result += "\n";
1167  result += e.what();
1168  }
1169 
1170  return result;
1171 }
1172 
1173 //------------------------------------------------------------------------------
1175 {
1180 }
1181 
1182 //------------------------------------------------------------------------------
1184 {
1189 }
1190 
1191 //------------------------------------------------------------------------------
1193 { return SetUsingThreadsFn; }
1194 
1196 { return SetNumThreadsFn; }
1197 
1200 
1201 END_SCOPE( blast )
1203 
1206 
1207 extern "C" {
1208 
1209 //------------------------------------------------------------------------------
1210 static void s_MB_IdxEndSearchIndication( Int4 last_vol_id )
1211 {
1212  return CIndexedDb::Index_Set_Instance->EndSearchIndication( last_vol_id );
1213 }
1214 
1215 //------------------------------------------------------------------------------
1216 static int s_MB_IdbCheckOid( Int4 oid, Int4 * last_vol_id )
1217 {
1218  _ASSERT( oid >= 0 );
1219  return CIndexedDb::Index_Set_Instance->CheckOid( oid, last_vol_id );
1220 }
1221 
1222 //------------------------------------------------------------------------------
1223 static unsigned long s_MB_IdbGetResults(
1224  Int4 oid_i, Int4 chunk_i,
1225  BlastInitHitList * init_hitlist )
1226 {
1227  _ASSERT( oid_i >= 0 );
1228  _ASSERT( chunk_i >= 0 );
1229  _ASSERT( init_hitlist != 0 );
1230 
1231  CDbIndex::TSeqNum oid = (CDbIndex::TSeqNum)oid_i;
1232  CDbIndex::TSeqNum chunk = (CDbIndex::TSeqNum)chunk_i;
1233 
1235  oid, chunk, init_hitlist );
1236 }
1237 
1238 } /* extern "C" */
1239 
1240 /* @} */
#define static
#define IDX_TRACE(_m)
Declarations for indexed blast databases.
void(* DbIndexSetUsingThreadsFnType)(bool multiple_threads)
Type of a callback to set the concurrency state in the index structure.
void(* DbIndexRunSearchFnType)(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Type of a callback to run the indexed seed search.
void(* DbIndexSetNumThreadsFnType)(size_t n_threads)
Type of a callback to provide the number of threads to the indexing library, when multi-threaded sear...
void(* DbIndexSetQueryInfoFnType)(LookupTableWrap *lt_wrap, CRef< CBlastSeqLocWrap > locs_wrap)
Type of a callback to set the query information in the index structure.
void BlastInitHitListReset(BlastInitHitList *init_hitlist)
Free the ungapped data substructures and reset initial HSP count to 0.
Definition: blast_extend.c:229
void BlastInitHitListMove(BlastInitHitList *dst, BlastInitHitList *src)
Move the contents of a BlastInitHitList structure.
Definition: blast_extend.c:248
Structures and functions prototypes used for BLAST gapped extension.
Structures and API used for saving BLAST hits.
Various auxiliary BLAST utility functions.
BlastSeqLoc * getLocs() const
Get access to the held object.
unsigned long GetWordSize() const
Get the search word size.
Definition: dbindex.hpp:540
bool CheckResults(TSeqNum subj) const
Check if any results are available for a given subject sequence.
Definition: dbindex.hpp:573
BlastInitHitList * GetResults(TSeqNum seq) const
Get the result set for a particular logical subject.
Definition: dbindex.hpp:529
Types of exception the indexing library can throw.
Definition: dbindex.hpp:409
TSeqNum StartSeq() const
Get the OID of the first sequence in the index.
Definition: dbindex.hpp:775
CConstRef< CSearchResults > Search(const BLAST_SequenceBlk *query, const BlastSeqLoc *locs, const SSearchOptions &search_options)
Search the index.
static CRef< CDbIndex > Load(const std::string &fname, bool nomap=false)
Load index.
Definition: dbindex.cpp:415
CSequenceIStream::TStreamPos TSeqNum
Type used to enumerate sequences in the index.
Definition: dbindex.hpp:484
TSeqNum StopSeq() const
Get the OID of the last sequence in the index.
Definition: dbindex.hpp:786
CFastMutex –.
Definition: ncbimtx.hpp:667
static std::string GenerateIndexVolumeName(const std::string &idxname, size_t volume)
Generate index volume file name from the index base name.
Definition: dbindex.cpp:105
Index wrapper exceptions.
Index wrapper for new style MegaBLAST indexing functionality.
Index wrapper for old style MegaBLAST indexing functionality.
This class is responsible for loading indices and doing the actual seed search.
CMemoryFile –.
Definition: ncbifile.hpp:2860
CObject –.
Definition: ncbiobj.hpp:180
CSeqDB.
Definition: seqdb.hpp:161
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
Definition: seqdb.cpp:1040
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Definition: seqdb.cpp:680
@ eNucleotide
Definition: seqdb.hpp:175
Include a standard set of the NCBI C++ Toolkit most basic headers.
const SIndexHeader ReadIndexHeader< false >(void *map)
Definition: dbindex.cpp:329
CRef< CIndexSuperHeader_Base > GetIndexSuperHeader(const std::string &fname)
Read superheader structure from the file.
Definition: dbindex.cpp:130
size_t GetIdxVolNumOIDs(const std::string &fname)
Read the index header information from the given file.
Definition: dbindex.cpp:255
static const struct name_t names[]
#define false
Definition: bool.h:36
#define SEP
Definition: attributes.c:220
std::string name
Fully qualified name of the volume.
virtual int MinIndexWordSize()=0
int ref_count
How many threads still need the result set.
virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const
Return results corresponding to a given subject sequence and chunk.
void TraceVolumes(void)
This is only used for debugging output.
DbIndexSetQueryInfoFnType GetDbIndexSetQueryInfoFn()
Return the appropriate callback to set query information in the index.
void ClearDbIndexCallbacks(void)
static DbIndexSetNumThreadsFnType SetNumThreadsFn
Global pointer to the appropriate callback to set the number of threads.
virtual ~CIndexedDb()
Object destructor.
static void IndexedDbSetUsingThreads(bool multiple_threads)
Set state of concurrency in the index structure.
vector< CConstRef< CDbIndex::CSearchResults > > TResultSet
Type used to represent collections of search result sets.
friend bool operator<(const SVolumeDescriptor &a, const SVolumeDescriptor &b)
Volumes are compared by their starting ordinal ids.
static void ParseDBNames(const std::string db_spec, TStrVec &db_names)
Generate a list of BLAST database names from a single string.
virtual int CheckOid(Int4 oid, Int4 *last_vol_id)
Check whether any results were reported for a given subject sequence.
friend std::ostream & operator<<(std::ostream &os, const SVolumeDescriptor &vd)
This is only used for debug tracing.
bool multiple_threads_
flag indicating that multithreading is in effect
CDbIndex::SSearchOptions sopt_
common search parameters
virtual ~CIndexedDb_New()
Object destructor.
virtual int CheckOid(Int4 oid, Int4 *last_vol_id)=0
Check whether any results were reported for a given subject sequence.
virtual void EndSearchIndication(Int4)
Not used.
virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Run preliminary indexed search functionality.
std::vector< std::string > TStrVec
Alias for a vector os strings.
virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Wrapper around PreSearch().
USING_SCOPE(ncbi::objects)
static DbIndexSetUsingThreadsFnType SetUsingThreadsFn
Global pointer to the appropriate callback to set the state of concurrency.
virtual void EndSearchIndication(Int4 last_vol_id)=0
Function used by threads to indicate that they are done with iterating over the database sequences.
void SetQueryInfo(CRef< CBlastSeqLocWrap > locs_wrap)
Set the current set of unmasked query segments.
void UpdateIndex(Int4 oid, Int4 *vol_idx)
Update the seed sets, if necessary.
virtual void EndSearchIndication(Int4 last_vol_id)
Function used by threads to indicate that they are done with iterating over the database sequences.
bool has_index
'true' if the volume is indexed.
static void TraceNames(const TStrVec &names)
This is only used for debugging output.
static void NullSetUsingThreads(bool)
No-op callback for setting concurrency state.
TResultsHolder results_holder_
reference counted seed set holders
static void NullRunSearch(BLAST_SequenceBlk *, LookupTableOptions *, BlastInitialWordOptions *)
No-op callback to run indexed search.
CIndexedDb_Old(const string &indexname)
Object constructor.
vector< string > index_names_
List of index volume names.
static unsigned long s_MB_IdbGetResults(Int4 oid_i, Int4 chunk_i, BlastInitHitList *init_hitlist)
Get the seed search results for a give subject id and chunk number.
TVolList volumes_
index volume descriptors
static void IndexedDbSetQueryInfo(LookupTableWrap *lt_wrap, CRef< CBlastSeqLocWrap > locs_wrap)
Set information about unmasked query segments.
virtual int MinIndexWordSize()
Get the minimum acceptable word size to use with indexed search.
virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)=0
Run preliminary indexed search functionality.
static int s_MB_IdbCheckOid(Int4 oid, Int4 *last_vol_oid)
CFastMutex mtx_
mutex used for thread sync
static DbIndexSetQueryInfoFnType SetQueryInfoFn
Global pointer to the appropriate callback to set query info, based on whether or not index search is...
static int s_GetMinimumSupportedWordSizeByIndex(const string &fname)
CIndexedDb_New(const string &indexname, bool &partial)
Object constructor.
TResultSet results_
Set of result sets, one per loaded index.
static void EnumerateDbVolumes(const TStrVec &db_names, TStrVec &db_vols)
Generate a list of leaf database volumes from a list of database names.
CConstRef< CDbIndex::CSearchResults > TVolResults
This type captures the seeds found by search of an index volume.
std::string DbIndexInit(const string &indexname, bool old_style, bool &partial, const int word_size)
TVolResults res
Seed set or null.
static void NullSetNumThreads(size_t)
No-op callback for setting the number of threads.
static void IndexedDbSetNumThreads(size_t n_threads)
Set the number of concurrent search threads in the index structure.
static void IndexedDbRunSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Run indexed search.
virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const
Return results corresponding to a given subject sequence and chunk.
static void s_MB_IdxEndSearchIndication(Int4 last_vol_id)
DbIndexSetUsingThreadsFnType GetDbIndexSetUsingThreadsFn()
Return the appropriate callback to set the concurrency state in the index structure.
TSeqMap seqmap_
For each element of indices_ with index i seqmap_[i] contains one plus the last oid of that database ...
BLAST_SequenceBlk * queries_
query data (from BLAST)
void SetMultipleThreads(bool multiple_threads)
Set the concurrency status.
DbIndexRunSearchFnType GetDbIndexRunSearchFn()
Return the appropriate callback to run indexed seed search.
std::string s_CheckMinWordSize(int word_size)
DbIndexSetNumThreadsFnType GetDbIndexSetNumThreadsFn()
Return the appropriate callback to set the number of threads in the index structure.
static DbIndexRunSearchFnType RunSearchFn
Global pointer to the appropriate callback to run indexed search, based on whether or not index searc...
std::vector< SVolResults > TResultsHolder
List of reference counted result holders.
std::vector< SVolumeDescriptor > TVolList
List of leaf index volumes.
void SetNumThreads(size_t n_threads)
Set the number of threads used for concurrent search.
TSeqMap::size_type LocateIndex(CDbIndex::TSeqNum oid) const
Find an index corresponding to the given subject id.
CRef< CBlastSeqLocWrap > locs_wrap_
Current set of unmasked query locations.
vector< CDbIndex::TSeqNum > TSeqMap
Type used to map loaded indices to subject ids.
TVolList::const_iterator FindVolume(SIZE_TYPE oid) const
Find a volume containing the given subject ordinal id.
CRef< CDbIndex > index_
Currently loaded index.
virtual int CheckOid(Int4 oid, Int4 *)
Check whether any results were reported for a given subject sequence.
static CRef< CIndexedDb > Index_Set_Instance
Shared representation of currently loaded index volumes.
void SetUpDbIndexCallbacks(void)
SIZE_TYPE start_oid
OId of the first sequence of the volume.
static void NullSetQueryInfo(LookupTableWrap *, CRef< CBlastSeqLocWrap >)
No-op callback for setting query info.
void AddIndexInfo(const std::string &vol_name, bool &idx_not_resolved)
virtual int MinIndexWordSize()
size_t n_threads_
number of search threads running
SIZE_TYPE GetNextUnusedOID(void) const
Auxiliary function thet returns the oid value that is one more than the largest oid used so far.
void PreSearch(BLAST_SequenceBlk *queries, BlastSeqLoc *locs, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Invoke the seed search procedure on each of the loaded indices.
SIZE_TYPE n_oids
Number of sequences in the volume.
virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const =0
Return results corresponding to a given subject sequence and chunk.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
string
Definition: cgiapp.hpp:687
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
void * GetPtr(void) const
Get pointer to beginning of data.
Definition: ncbifile.hpp:4281
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType * Release(void)
Release a reference to the object and return a pointer to the object.
Definition: ncbiobj.hpp:846
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
int i
Declarations for functions that extract hits from indexed blast databases (specialized for megablast)
#define LAST_VOL_IDX_NULL
#define LAST_VOL_IDX_INIT
@ eHasResults
@ eNotIndexed
@ eNoResults
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
unsigned int a
Definition: ncbi_localip.c:102
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
Multi-threading – classes, functions, and features.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Defines BLAST database access classes.
Defines exception class and several constants for SeqDB.
bool SeqDB_CompareVolume(const string &volpath1, const string &volpath2)
Compares two volume file names and determine the volume order.
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
#define assert(x)
Definition: srv_diag.hpp:58
Structure to hold a sequence.
Definition: blast_def.h:242
Structure to hold all initial HSPs for a given subject sequence.
Definition: blast_extend.h:158
Options needed for initial word finding and processing.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Simple record type used to specify index search parameters.
Definition: dbindex.hpp:639
unsigned long two_hits
Window for two-hit method (see megablast docs).
Definition: dbindex.hpp:641
unsigned long word_size
Target seed length.
Definition: dbindex.hpp:640
Reference count for the volume results.
Information about one leaf index volume.
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
Wrapper structure for different types of BLAST lookup tables.
Definition: lookup_wrap.h:50
void * end_search_indication
function used to report that a thread is done iterating over the database in preliminary search
Definition: lookup_wrap.h:57
void * check_index_oid
function used to check if seeds for a given oid are present
Definition: lookup_wrap.h:55
void * read_indexed_db
function used to retrieve hits from an indexed database
Definition: lookup_wrap.h:53
Structure into which an index header is loaded.
Definition: dbindex.hpp:290
unsigned long hkey_width_
Size in bp of the Nmer used as a hash key.
Definition: dbindex.hpp:293
unsigned long stride_
Stride used to index database locations.
Definition: dbindex.hpp:294
#define _ASSERT
else result
Definition: token2.c:20
Modified on Wed Apr 24 14:12:45 2024 by modify_doxy.py rev. 669887