NCBI C++ ToolKit
lookup_wrap.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: lookup_wrap.c 79232 2017-08-23 19:12:17Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Ilya Dondoshansky
27  *
28  */
29 
30 /** @file lookup_wrap.c
31  * Wrapper for different flavors of lookup tables allowing a uniform interface in the code.
32  * The wrapper (LookupTableWrap) contains an unsigned byte specifying the type of lookup
33  * table as well as a void pointer pointing to the actual lookup table. Examples of different
34  * types of lookup tables are those for protein queries, the "standard" nucleotide one, the
35  * megablast lookup table, etc.
36  */
37 
46 
48  const LookupTableOptions* lookup_options,
49  const QuerySetUpOptions* query_options,
50  BlastSeqLoc* lookup_segments, BlastScoreBlk* sbp,
51  LookupTableWrap** lookup_wrap_ptr, const BlastRPSInfo *rps_info,
52  Blast_Message* *error_msg,
53  BlastSeqSrc* seqsrc)
54 {
55  return LookupTableWrapInit_MT(query, lookup_options, query_options,
56  lookup_segments, sbp, lookup_wrap_ptr,
57  rps_info, error_msg, seqsrc, 1);
58 }
59 
60 
62  const LookupTableOptions* lookup_options,
63  const QuerySetUpOptions* query_options,
64  BlastSeqLoc* lookup_segments, BlastScoreBlk* sbp,
65  LookupTableWrap** lookup_wrap_ptr, const BlastRPSInfo *rps_info,
66  Blast_Message* *error_msg,
67  BlastSeqSrc* seqsrc,
68  Uint4 num_threads)
69 {
70  Int2 status = 0;
71  LookupTableWrap* lookup_wrap;
72  EBoneType bone_type;
73 
74  if (error_msg)
75  *error_msg = NULL;
76 
77  /* Construct the lookup table. */
78  *lookup_wrap_ptr = lookup_wrap =
79  (LookupTableWrap*) calloc(1, sizeof(LookupTableWrap));
80  lookup_wrap->lut_type = lookup_options->lut_type;
81 
82  switch ( lookup_options->lut_type ) {
83  case eAaLookupTable:
84  {
85  Int4** matrix = NULL;
86  Boolean has_pssm = FALSE;
87  if (sbp->psi_matrix && sbp->psi_matrix->pssm) {
88  matrix = sbp->psi_matrix->pssm->data;
89  has_pssm = TRUE;
90  } else {
91  matrix = sbp->matrix->data;
92  }
93  BlastAaLookupTableNew(lookup_options, (BlastAaLookupTable* *)
94  &lookup_wrap->lut);
95  ((BlastAaLookupTable*)lookup_wrap->lut)->use_pssm = has_pssm;
96  BlastAaLookupIndexQuery( (BlastAaLookupTable*) lookup_wrap->lut, matrix,
97  query, lookup_segments, 0);
98  /* if query length less than 64k, we can save cache by using small bone */
99  bone_type = ( query->length >= INT2_MAX*2) ? eBackbone: eSmallbone;
100  BlastAaLookupFinalize((BlastAaLookupTable*) lookup_wrap->lut, bone_type);
101  }
102  break;
103 
105  BlastCompressedAaLookupTableNew(query, lookup_segments,
106  (BlastCompressedAaLookupTable* *) &(lookup_wrap->lut),
107  lookup_options, sbp);
108  break;
109 
111  /* for indexed megablast, lookup table data is initialized
112  in the API layer, not here */
113  lookup_wrap->lut = NULL;
114  break;
115 
116  case eMixedMBLookupTable:
117  case eSmallNaLookupTable:
118  case eNaLookupTable:
119  case eMBLookupTable:
120  case eNaHashLookupTable:
121  {
122  Int4 lut_width;
123  Int4 max_q_off;
124  Int4 num_table_entries;
125 
126  num_table_entries = EstimateNumTableEntries(lookup_segments,
127  &max_q_off);
128  lookup_wrap->lut_type = BlastChooseNaLookupTable(
129  lookup_options, num_table_entries,
130  max_q_off, &lut_width);
131 
132  if (lookup_wrap->lut_type == eMBLookupTable) {
133  BlastMBLookupTableNew(query, lookup_segments,
134  (BlastMBLookupTable* *) &(lookup_wrap->lut),
135  lookup_options, query_options,
136  num_table_entries, lut_width,
137  seqsrc);
138  }
139  else if (lookup_wrap->lut_type == eSmallNaLookupTable) {
140  status = BlastSmallNaLookupTableNew(query, lookup_segments,
141  (BlastSmallNaLookupTable* *) &(lookup_wrap->lut),
142  lookup_options, query_options, lut_width);
143  if (status != 0) {
144  lookup_wrap->lut_type = eNaLookupTable;
145  status = BlastNaLookupTableNew(query, lookup_segments,
146  (BlastNaLookupTable* *) &(lookup_wrap->lut),
147  lookup_options, query_options, lut_width);
148  }
149  }
150  else if (lookup_wrap->lut_type == eNaHashLookupTable) {
151  status = BlastNaHashLookupTableNew(query, lookup_segments,
152  (BlastNaHashLookupTable**) &(lookup_wrap->lut),
153  lookup_options, query_options, seqsrc,
154  num_threads);
155 
156  }
157  else {
158  BlastNaLookupTableNew(query, lookup_segments,
159  (BlastNaLookupTable* *) &(lookup_wrap->lut),
160  lookup_options, query_options, lut_width);
161  }
162  }
163  ASSERT( lookup_wrap->lut_type != eMixedMBLookupTable );
164  break;
165 
166 
168  {
169  const Boolean kIsDna =
170  (lookup_options->lut_type == ePhiNaLookupTable);
171  status = SPHIPatternSearchBlkNew(lookup_options->phi_pattern, kIsDna, sbp,
172  (SPHIPatternSearchBlk* *) &(lookup_wrap->lut),
173  error_msg);
174  break;
175  }
176 
177  case eRPSLookupTable:
178  {
180  Int4 alphabet_size;
182  (&lookup_wrap->lut));
183 
184  /* if the alphabet size from the RPS database is too
185  small, mask all unsupported query letters */
186  lookup = (BlastRPSLookupTable*)(lookup_wrap->lut);
187  alphabet_size = lookup->alphabet_size;
188  if (alphabet_size < BLASTAA_SIZE)
189  Blast_MaskUnsupportedAA(query, alphabet_size);
190  break;
191  }
192  } /* end switch */
193 
194  return status;
195 }
196 
198 {
199  if (!lookup)
200  return NULL;
201 
202  switch(lookup->lut_type) {
203  case eMBLookupTable:
204  lookup->lut = (void*)
206  break;
207 
209  case eMixedMBLookupTable:
210  lookup->lut = NULL;
211  break;
212 
213  case ePhiLookupTable:
214  case ePhiNaLookupTable:
215  lookup->lut = (void*)
217  break;
218 
219  case eRPSLookupTable:
220  lookup->lut = (void*)
222  break;
223 
224  case eSmallNaLookupTable:
225  lookup->lut = (void*)
227  break;
228 
229  case eNaLookupTable:
230  lookup->lut = (void*)
232  break;
233 
234  case eNaHashLookupTable:
235  lookup->lut = (void*)
237  break;
238 
239  case eAaLookupTable:
240  lookup->lut = (void*)
242  break;
243 
245  lookup->lut = (void*)
248  break;
249  }
250 
251  sfree(lookup);
252  return NULL;
253 }
254 
256 {
257  Int4 offset_array_size;
258 
259  switch (lookup->lut_type) {
260  case eMBLookupTable:
261  offset_array_size = OFFSET_ARRAY_SIZE +
262  ((BlastMBLookupTable*)lookup->lut)->longest_chain;
263  break;
264  case eAaLookupTable:
265  offset_array_size = OFFSET_ARRAY_SIZE +
266  ((BlastAaLookupTable*)lookup->lut)->longest_chain;
267  break;
269  offset_array_size = OFFSET_ARRAY_SIZE +
270  ((BlastCompressedAaLookupTable*)lookup->lut)->longest_chain;
271  break;
272  case eSmallNaLookupTable:
273  offset_array_size = OFFSET_ARRAY_SIZE +
274  ((BlastSmallNaLookupTable*)lookup->lut)->longest_chain;
275  break;
276  case eNaLookupTable:
277  offset_array_size = OFFSET_ARRAY_SIZE +
278  ((BlastNaLookupTable*)lookup->lut)->longest_chain;
279  break;
280  case eNaHashLookupTable:
281  offset_array_size = OFFSET_ARRAY_SIZE +
282  ((BlastNaHashLookupTable*)lookup->lut)->longest_chain;
283  break;
284  default:
285  offset_array_size = OFFSET_ARRAY_SIZE;
286  break;
287  }
288  return offset_array_size;
289 }
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:50
Routines for creating protein BLAST lookup tables.
Int2 RPSLookupTableNew(const BlastRPSInfo *rps_info, BlastRPSLookupTable **lut)
Create a new RPS blast lookup table.
EBoneType
types of cells
@ eBackbone
@ eSmallbone
Int4 BlastCompressedAaLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastCompressedAaLookupTable **lut, const LookupTableOptions *opt, BlastScoreBlk *sbp)
Create a new compressed protein lookup table.
BlastCompressedAaLookupTable * BlastCompressedAaLookupTableDestruct(BlastCompressedAaLookupTable *lookup)
Free the compressed lookup table.
BlastAaLookupTable * BlastAaLookupTableDestruct(BlastAaLookupTable *lookup)
Free the lookup table.
BlastRPSLookupTable * RPSLookupTableDestruct(BlastRPSLookupTable *lookup)
Free the lookup table.
void BlastAaLookupIndexQuery(BlastAaLookupTable *lookup, Int4 **matrix, BLAST_SequenceBlk *query, BlastSeqLoc *unmasked_regions, Int4 query_bias)
Index a protein query.
Int4 BlastAaLookupFinalize(BlastAaLookupTable *lookup, EBoneType bone_type)
Pack the data structures comprising a protein lookup table into their final form.
Int4 BlastAaLookupTableNew(const LookupTableOptions *opt, BlastAaLookupTable **lut)
Create a new protein lookup table.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BLAST filtering functions.
void Blast_MaskUnsupportedAA(BLAST_SequenceBlk *seq, Uint1 min_invalid)
Mask protein letters that are currently unsupported.
Routines for creating nucleotide BLAST lookup tables.
BlastSmallNaLookupTable * BlastSmallNaLookupTableDestruct(BlastSmallNaLookupTable *lookup)
Free a small nucleotide lookup table.
Int4 BlastNaLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, Int4 lut_width)
Create a new nucleotide lookup table.
ELookupTableType BlastChooseNaLookupTable(const LookupTableOptions *lookup_options, Int4 approx_table_entries, Int4 query_length, Int4 *lut_width)
choose the type of nucleotide lookup table to be used for a blast search
BlastMBLookupTable * BlastMBLookupTableDestruct(BlastMBLookupTable *mb_lt)
Deallocate memory used by the Mega BLAST lookup table.
BlastNaHashLookupTable * BlastNaHashLookupTableDestruct(BlastNaHashLookupTable *lookup)
Free a nucleotide lookup table.
Int2 BlastMBLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *location, BlastMBLookupTable **mb_lt_ptr, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, Int4 approx_table_entries, Int4 lut_width, BlastSeqSrc *seqsrc)
Create the lookup table for Mega BLAST.
Int4 BlastNaHashLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaHashLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, BlastSeqSrc *seqsrc, Uint4 num_threads)
Int4 BlastSmallNaLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastSmallNaLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, Int4 lut_width)
Create a new small nucleotide lookup table.
BlastNaLookupTable * BlastNaLookupTableDestruct(BlastNaLookupTable *lookup)
Free a nucleotide lookup table.
@ eSmallNaLookupTable
lookup table for blastn with small query
@ eMixedMBLookupTable
use when some volumes are searched with index and some are not
@ eNaLookupTable
blastn lookup table
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ eIndexedMBLookupTable
use database index as a lookup structure
@ ePhiNaLookupTable
nucleotide lookup table for phi-blast
@ eAaLookupTable
standard protein (blastp) lookup table
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
@ ePhiLookupTable
protein lookup table specialized for phi-blast
@ eRPSLookupTable
RPS lookup table (rpsblast and rpstblastn)
@ eNaHashLookupTable
used for 16-base words
RPS BLAST structure definitions.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
#define NULL
Definition: ncbistd.hpp:225
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
Utility functions for lookup table generation.
Int4 EstimateNumTableEntries(BlastSeqLoc *location, Int4 *max_off)
Given a list of query locations, estimate the number of words that would need to be added to a lookup...
Definition: lookup_util.c:190
Int2 LookupTableWrapInit_MT(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc, Uint4 num_threads)
Create the lookup table for all query words (possibly multithreaded, depends on implementation).
Definition: lookup_wrap.c:61
Int4 GetOffsetArraySize(LookupTableWrap *lookup)
Determine the size of the offsets arrays to be filled by the ScanSubject function.
Definition: lookup_wrap.c:255
LookupTableWrap * LookupTableWrapFree(LookupTableWrap *lookup)
Deallocate memory for the lookup table.
Definition: lookup_wrap.c:197
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Definition: lookup_wrap.c:47
Wrapper for all lookup tables used in BLAST.
#define OFFSET_ARRAY_SIZE
Default size of offset arrays filled in a single ScanSubject call.
Definition: lookup_wrap.h:119
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define INT2_MAX
largest number represented by signed (two byte) short
Definition: ncbi_std.h:156
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
Pseudo lookup table structure and database scanning functions used in PHI-BLAST.
SPHIPatternSearchBlk * SPHIPatternSearchBlkFree(SPHIPatternSearchBlk *pattern_blk)
Deallocate memory for the PHI BLAST lookup table.
Definition: phi_lookup.c:690
Int2 SPHIPatternSearchBlkNew(char *pattern, Boolean is_dna, BlastScoreBlk *sbp, SPHIPatternSearchBlk **pattern_blk, Blast_Message **error_msg)
Initialize the pattern items structure, serving as a "pseudo" lookup table in a PHI BLAST search.
Definition: phi_lookup.c:388
Structure to hold a sequence.
Definition: blast_def.h:242
The basic lookup table structure for blastp searches.
The lookup table structure for protein searches using a compressed alphabet.
The lookup table structure used for Mega BLAST.
The basic lookup table structure for blastn searches.
The RPS engine uses this structure to access all of the RPS blast related data (assumed to be collect...
Definition: blast_rps.h:120
The basic lookup table structure for RPS blast searches.
Structure used for scoring calculations.
Definition: blast_stat.h:177
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
Definition: blast_stat.h:186
SBlastScoreMatrix * matrix
scoring matrix data
Definition: blast_stat.h:185
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Lookup table structure for blastn searches with small queries.
Structure to hold the a message from the core of the BLAST engine.
Definition: blast_message.h:70
Options needed to construct a lookup table Also needed: query sequence and query length.
char * phi_pattern
PHI-BLAST pattern.
ELookupTableType lut_type
What kind of lookup table to construct?
Wrapper structure for different types of BLAST lookup tables.
Definition: lookup_wrap.h:50
void * lut
Pointer to the actual lookup table structure.
Definition: lookup_wrap.h:52
ELookupTableType lut_type
What kind of a lookup table it is?
Definition: lookup_wrap.h:51
Options required for setting up the query sequence.
int ** data
actual scoring matrix data, stored in row-major form
Definition: blast_stat.h:140
Structure containing all auxiliary information needed in a pattern search.
Definition: pattern.h:155
SBlastScoreMatrix * pssm
position-specific score matrix
Definition: blast_stat.h:150
static string query
@ FALSE
Definition: testodbc.c:27
@ TRUE
Definition: testodbc.c:27
voidp calloc(uInt items, uInt size)
Modified on Thu Feb 22 17:08:36 2024 by modify_doxy.py rev. 669887