NCBI C++ ToolKit
blast_query_info.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_query_info.c 75049 2016-10-17 19:00:07Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_query_info.c
31  * Functions to manipulate the BlastQueryInfo structure
32  */
33 
34 
38 
39 Int4
41 {
42  if (program == eBlastTypePsiTblastn || Blast_QueryIsProtein(program)) {
43  return context;
44  } else if (Blast_QueryIsTranslated(program)) {
45  return context / NUM_FRAMES;
46  } else {
47  return context / NUM_STRANDS;
48  }
49 }
50 
51 Int4
53 {
54  int context = BSearchContextInfo(query_offset, query_info);
55 
56  return Blast_GetQueryIndexFromContext(context, program);
57 }
58 
60 {
61  const unsigned int kNumContexts = BLAST_GetNumberOfContexts(program);
62  BlastQueryInfo* retval = NULL;
63 
64  if (num_queries <= 0) {
65  return retval;
66  }
67  ASSERT(kNumContexts != 0);
68 
69  retval = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo));
70  if ( !retval ) {
71  return BlastQueryInfoFree(retval);
72  }
73 
74  retval->num_queries = num_queries;
75 
76  retval->first_context = 0;
77  retval->last_context = retval->num_queries * kNumContexts - 1;
78 
79  retval->contexts = (BlastContextInfo*) calloc(retval->last_context + 1,
80  sizeof(BlastContextInfo));
81 
82  if ( !retval->contexts ) {
83  return BlastQueryInfoFree(retval);
84  } else {
85  int i;
86  for (i = 0; i < retval->last_context + 1; i++) {
87  retval->contexts[i].query_index =
89  ASSERT(retval->contexts[i].query_index != -1);
90 
91  retval->contexts[i].frame = BLAST_ContextToFrame(program, i);
92  ASSERT(retval->contexts[i].frame != INT1_MAX);
93 
94  retval->contexts[i].is_valid = TRUE;
95 
96  if (Blast_ProgramIsMapping(program)) {
98  }
99  }
100  }
101 
102  return retval;
103 }
104 
106 {
107  if (query_info) {
108  sfree(query_info->contexts);
109  query_info->pattern_info =
110  SPHIQueryInfoFree(query_info->pattern_info);
111  sfree(query_info);
112  }
113  return NULL;
114 }
115 
117 {
118  BlastQueryInfo* retval = BlastMemDup(query_info, sizeof(BlastQueryInfo));
119  Int4 num_contexts = query_info->last_context + 1;
120 
121  retval->contexts =
122  BlastMemDup(query_info->contexts, num_contexts * sizeof(BlastContextInfo));
123 
124  if (query_info->pattern_info) {
125  retval->pattern_info =
126  SPHIQueryInfoCopy(query_info->pattern_info);
127  }
128 
129  return retval;
130 }
131 
132 /** Calculates length of the DNA query from the BlastQueryInfo structure that
133  * contains context information for translated frames for a set of queries.
134  * @param query_info Query information containing data for all contexts [in]
135  * @param query_index Which query to find DNA length for?
136  * @return DNA length of the query, calculated as sum of 3 protein frame lengths,
137  * plus 2, because 2 last nucleotide residues do not have a
138  * corresponding codon.
139  */
140 static Int4
141 s_GetTranslatedQueryDNALength(const BlastQueryInfo* query_info, Int4 query_index)
142 {
143  Int4 start_context = NUM_FRAMES*query_index;
144  Int4 dna_length = 2;
145  Int4 index;
146 
147  /* Make sure that query index is within appropriate range, and that this is
148  really a translated search */
149  ASSERT(query_index < query_info->num_queries);
150  ASSERT(start_context < query_info->last_context);
151 
152  /* If only reverse strand is searched, then forward strand contexts don't
153  have lengths information */
154  if (query_info->contexts[start_context].query_length == 0)
155  start_context += 3;
156 
157  for (index = start_context; index < start_context + 3; ++index)
158  dna_length += query_info->contexts[index].query_length;
159 
160  return dna_length;
161 }
162 
164  EBlastProgramType program,
165  Int4 query_index)
166 {
167  const Uint4 kNumContexts = BLAST_GetNumberOfContexts(program);
168  ASSERT(query_index < qinfo->num_queries);
169 
170  if (Blast_QueryIsTranslated(program)) {
171  return s_GetTranslatedQueryDNALength(qinfo, query_index);
172  } else if (program == eBlastTypeBlastn || program == eBlastTypeMapping) {
173  Int4 retval = qinfo->contexts[query_index*kNumContexts].query_length;
174  if (retval <= 0) {
175  retval = qinfo->contexts[query_index*kNumContexts+1].query_length;
176  }
177  return retval;
178  } else {
179  return qinfo->contexts[query_index*kNumContexts].query_length;
180  }
181 }
182 
183 /* FIXME: should the EBlastProgramType be added as a member of the
184  * BlastQueryInfo structure? Without it, there's many operations that can't be
185  * done, so it doesn't make sense to have them separate... */
186 Int8
188  EBlastProgramType program,
189  Int4 query_index)
190 {
191  Int8 retval = 0;
192  Int4 i = 0;
193  const Int4 kNumContexts = (Int4)BLAST_GetNumberOfContexts(program);
194  ASSERT(query_index < qinfo->num_queries);
195 
196  for (i = query_index*kNumContexts; i < (query_index+1)*kNumContexts; i++) {
197  if ( (retval = qinfo->contexts[i].eff_searchsp) != 0) {
198  break;
199  }
200  }
201  return retval;
202 }
203 
204 void
206  EBlastProgramType program,
207  Int4 query_index,
208  Int8 eff_searchsp)
209 {
210  Int4 i = 0;
211  const Int4 kNumContexts = (Int4)BLAST_GetNumberOfContexts(program);
212  ASSERT(query_index < qinfo->num_queries);
213 
214  for (i = query_index*kNumContexts; i < (query_index+1)*kNumContexts; i++) {
215  qinfo->contexts[i].eff_searchsp = eff_searchsp;
216  }
217 }
218 
220 {
221  Int4 m=0, b=0, e=0, size=0;
222 
223  size = A->last_context+1;
224 
225  if (A->min_length > 0 && A->max_length > 0 && A->first_context == 0) {
226  b = MIN(n / (A->max_length + 1), size - 1);
227  e = MIN(n / (A->min_length + 1) + 1, size);
228  ASSERT(e <= size);
229  }
230  else {
231  b = 0;
232  e = size;
233  }
234 
235  while (b < e - 1) {
236  m = (b + e) / 2;
237  if (A->contexts[m].query_offset > n)
238  e = m;
239  else
240  b = m;
241  }
242  return b;
243 }
244 
245 Uint4
247 {
248  BlastContextInfo * cinfo = & qinfo->contexts[qinfo->last_context];
249  return cinfo->query_offset + cinfo->query_length + (cinfo->query_length ? 2 : 1);
250 }
251 
252 Int4 *
254 {
255  /* The Many Values of 'Length'
256  *
257  * 1. info->last_context: the index of the last query offset.
258  *
259  * 2. count: the number of query offsets.
260  *
261  * 3. count + 1: the size of the output array (has an 'extra'
262  * member so as to communicate the last sequence length).
263  *
264  * 4. sz: the size of the context object array
265  */
266 
267  Uint4 count = (info->last_context + 1);
268  Uint4 sz = sizeof(Int4) * (count+1);
269  Uint4 frame = 0;
270  Int4 * result = 0;
271 
272  ASSERT(info);
273  ASSERT(info->contexts);
274 
275  result = malloc(sz);
276  memset(result, 0, sz);
277 
278  for(frame = 0; frame < count; frame++) {
279  result[frame] = info->contexts[frame].query_offset;
280  }
281 
282  /* One more entry, provides length info for last element. */
283 
284  result[count] = info->contexts[count-1].query_offset;
285 
286  if (info->contexts[count-1].query_length) {
287  result[count] += info->contexts[count-1].query_length + 1;
288  }
289 
290  return result;
291 }
292 
293 void
295  Int4 * new_offsets,
297 {
298  Uint4 count = (info->last_context + 1);
299  Uint4 i = 0;
300 
301  ASSERT(info);
302  ASSERT(new_offsets);
303 
304  if (! info->contexts) {
305  info->contexts = calloc(count, sizeof(BlastContextInfo));
306  }
307 
308  for(i = 0; i < count; i++) {
309  Int4 distance = 0;
310 
311  info->contexts[i].query_offset = new_offsets[i];
312 
313  distance = new_offsets[i+1] - new_offsets[i];
314  info->contexts[i].query_length = distance ? distance-1 : 0;
315 
316  /* Set the frame and query index */
317 
318  info->contexts[i].frame =
320 
321  info->contexts[i].query_index =
323  }
324 }
325 
326 Int2
328  BLAST_SequenceBlk** one_query_ptr,
329  const BlastQueryInfo* query_info,
330  BLAST_SequenceBlk* query, Int4 query_index)
331 {
332  Int4 num_frames;
333  Int4 index;
334  Int4 first_context;
335  Int4 query_offset;
336  BlastQueryInfo* one_query_info = NULL;
337  BLAST_SequenceBlk* one_query = NULL;
338 
339  if (!one_query_info_ptr || !one_query_ptr || !query_info || !query ||
340  query_index >= query_info->num_queries)
341  return -1;
342 
343  num_frames = (query_info->last_context / query_info->num_queries) + 1;
344  first_context = query_index*num_frames;
345  query_offset = query_info->contexts[first_context].query_offset;
346 
347  one_query_info = *one_query_info_ptr;
348  /* If this hasn't been already done, allocate new query information
349  structure. */
350  if (!one_query_info) {
351  one_query_info = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo));
352  *one_query_info_ptr = one_query_info;
353  one_query_info->contexts = (BlastContextInfo*) calloc(num_frames, sizeof(BlastContextInfo));
354  }
355  one_query = *one_query_ptr;
356  /* If this hasn't been already done, allocate new sequence block. */
357  if (!one_query) {
358  one_query = (BLAST_SequenceBlk*) calloc(1, sizeof(BLAST_SequenceBlk));
359  *one_query_ptr = one_query;
360  }
361  if (!one_query_info || !one_query)
362  return -1;
363 
364  one_query_info->num_queries = 1;
365  one_query_info->last_context = num_frames - 1;
366 
367  memcpy(one_query_info->contexts,
368  &query_info->contexts[first_context],
369  num_frames * sizeof(BlastContextInfo));
370 
371  /* Make context offsets relative to this query. */
372  for (index = 0; index < num_frames; ++index) {
373  one_query_info->contexts[index].query_offset -= query_offset;
374  }
375 
376  /* Fill the sequence block information for this one query. */
377  memset(one_query, 0, sizeof(BLAST_SequenceBlk));
378  one_query->sequence = &query->sequence[query_offset];
379  one_query->length =
380  one_query_info->contexts[num_frames-1].query_offset +
381  one_query_info->contexts[num_frames-1].query_length;
382  one_query->sequence_allocated = FALSE;
383  one_query->oid = query_index;
384 
385  return 0;
386 }
387 
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
Definition: blast_def.h:93
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
Definition: blast_def.h:88
Boolean Blast_ProgramIsMapping(EBlastProgramType p)
Definition: blast_program.c:76
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Definition: blast_program.c:60
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Definition: blast_program.c:40
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
@ eBlastTypePsiTblastn
Definition: blast_program.h:83
@ eBlastTypeMapping
Definition: blast_program.h:88
BlastQueryInfo * BlastQueryInfoDup(const BlastQueryInfo *query_info)
Duplicates the query information structure.
Int4 Blast_GetQueryIndexFromQueryOffset(Int4 query_offset, EBlastProgramType program, const BlastQueryInfo *query_info)
Return the query index (zero based), given the query offset in the initial HSP as the program.
BlastQueryInfo * BlastQueryInfoFree(BlastQueryInfo *query_info)
Deallocate memory for query information structure.
Int4 Blast_GetQueryIndexFromContext(Int4 context, EBlastProgramType program)
Given a context from BLAST engine core, return the query index.
static Int4 s_GetTranslatedQueryDNALength(const BlastQueryInfo *query_info, Int4 query_index)
Calculates length of the DNA query from the BlastQueryInfo structure that contains context informatio...
Int2 Blast_GetOneQueryStructs(BlastQueryInfo **one_query_info_ptr, BLAST_SequenceBlk **one_query_ptr, const BlastQueryInfo *query_info, BLAST_SequenceBlk *query, Int4 query_index)
Create auxiliary query structures with all data corresponding to a single query sequence within a con...
Int8 BlastQueryInfoGetEffSearchSpace(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Retrieve a query sequence's search space.
Int4 BSearchContextInfo(Int4 n, const BlastQueryInfo *A)
Search BlastContextInfo structures for the specified offset.
Int4 * ContextOffsetsToOffsetArray(const BlastQueryInfo *info)
Copy the context query offsets to an allocated array of Int4.
void OffsetArrayToContextOffsets(BlastQueryInfo *info, Int4 *new_offsets, EBlastProgramType prog)
Copy the context query offsets from an array of Int4, allocating the context array if needed.
void BlastQueryInfoSetEffSearchSpace(BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index, Int8 eff_searchsp)
Set a query sequence's search space.
Uint4 QueryInfo_GetSeqBufLen(const BlastQueryInfo *qinfo)
Get the number of bytes required for the concatenated sequence buffer, given a query info structure.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
Definitions and functions associated with the BlastQueryInfo structure.
@ eNoSegments
Sequence is not part of a pair.
Various auxiliary BLAST utility functions.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Definition: blast_util.c:839
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType program)
Get the number of contexts for a given program.
Definition: blast_util.c:1373
#define NULL
Definition: ncbistd.hpp:225
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
int i
yy_size_t n
static MDB_envinfo info
Definition: mdb_load.c:37
static char * prog
Definition: mdb_load.c:33
const struct ncbi::grid::netcache::search::fields::SIZE size
#define INT1_MAX
largest number represented by signed short (one byte)
Definition: ncbi_std.h:166
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Definition: ncbi_std.c:35
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
Functions for finding pattern matches in sequence (PHI-BLAST).
SPHIQueryInfo * SPHIQueryInfoFree(SPHIQueryInfo *pat_info)
Frees the pattern information structure.
Definition: pattern.c:496
SPHIQueryInfo * SPHIQueryInfoCopy(const SPHIQueryInfo *pat_info)
Copies the SPHIQueryInfo structure.
Definition: pattern.c:507
#define A
#define count
Structure to hold a sequence.
Definition: blast_def.h:242
Int4 oid
The ordinal id of the current sequence.
Definition: blast_def.h:250
Boolean sequence_allocated
TRUE if memory has been allocated for sequence.
Definition: blast_def.h:251
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
The context related information.
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 segment_flags
Flags describing segments for paired reads.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int8 eff_searchsp
Effective search space for this context.
Int4 query_index
Index of query (same for all frames)
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Int4 last_context
Index of the last element of the context array.
static string query
else result
Definition: token2.c:20
static CS_CONTEXT * context
Definition: will_convert.c:21
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Fri Sep 20 14:57:26 2024 by modify_doxy.py rev. 669887