NCBI C++ ToolKit
blast_rps.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_rps.h 55666 2012-09-10 13:46:47Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jason Papadopoulos
27  *
28  */
29 
30 /** @file blast_rps.h
31  * RPS BLAST structure definitions.
32  */
33 
34 #ifndef ALGO_BLAST_CORE__BLAST_RPS__H
35 #define ALGO_BLAST_CORE__BLAST_RPS__H
36 
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 #define RPS_MAGIC_NUM 0x1e16 /**< RPS blast version number */
44 #define RPS_MAGIC_NUM_28 0x1e17 /**< Version number for 28-letter alphabet */
45 #define NUM_EXPANSION_WORDS 3 /**< Intentionally unused words in .loo file */
46 
47 /** header of RPS blast '.loo' file */
48 
49 typedef struct BlastRPSLookupFileHeader {
50  Int4 magic_number; /**< value should be RPS_MAGIC_NUM */
51  Int4 num_lookup_tables; /**< hardwired to 1 at present */
52  Int4 num_hits; /**< number of hits in the lookup table */
53  Int4 num_filled_backbone_cells; /**< backbone cells that contain hits */
54  Int4 overflow_hits; /**< number of hits in overflow array */
55  Int4 unused[NUM_EXPANSION_WORDS];/**< empty space in the on-disk format */
56  Int4 start_of_backbone; /**< byte offset of start of backbone */
57  Int4 end_of_overflow; /**< byte offset to end of overflow array */
59 
60 /** header of RPS blast '.rps' file */
61 
62 typedef struct BlastRPSProfileHeader {
63  Int4 magic_number; /**< value should be RPS_MAGIC_NUM */
64  Int4 num_profiles; /**< number of PSSMs in the file */
65  Int4 start_offsets[1]; /**< start of an Int4 array that gives the starting
66  byte offset of each RPS DB sequence. There
67  are num_profiles+1 entries in the list, and
68  the last entry effectively contains the length
69  of all protein sequences combined. Note that
70  the length of each sequence includes one byte
71  at the end for an end-of-sequence sentinel */
72 
73  /* After the list of sequence start offsets comes the list
74  of PSSM rows. There is one row for each letter in the RPS
75  sequence database, and each row has BLASTAA_SIZE entries.
76  Because there is a sentinel byte at the end of each sequence,
77  there is also a PSSM row for each sentinel byte */
78 
80 
81 /** header for RPS blast frequency ratios ('.freq') file */
82 
83 #define FREQ_RATIO_SCALE 1000000000
84 
85 typedef struct BlastRPSFreqRatiosHeader {
87  Int4 num_profiles; /**< number of PSSMs in the file */
88  Int4 start_offsets[1]; /**< start of an Int4 array that gives the starting
89  byte offset of each RPS DB sequence. There
90  are num_profiles+1 entries in the list, and
91  the last entry effectively contains the length
92  of all protein sequences combined. Note that
93  the length of each sequence includes one byte
94  at the end for an end-of-sequence sentinel */
95 
97 
98 /** information derived from RPS blast '.aux' file */
99 
100 typedef struct BlastRPSAuxInfo {
101  char* orig_score_matrix; /**< score matrix used to derive PSSMs */
102  Int4 gap_open_penalty; /**< gap open penalty used in deriving PSSMs */
103  Int4 gap_extend_penalty; /**< gap extend penalty used in deriving PSSMs */
104  double ungapped_k; /**< ungapped Karlin value for orig_score_matrix
105  (not used) */
106  double ungapped_h; /**< ungapped Karlin value for orig_score_matrix
107  (not used) */
108  Int4 max_db_seq_length; /**< maximum DB sequence length (not used) */
109  Int4 db_length; /**< RPS DB search space (not used) */
110  double scale_factor; /**< the PSSMs are scaled by this amount, and so
111  all scores and all cutoff values must be
112  similarly scaled during the search */
113  double *karlin_k; /**< one Karlin value for each DB sequence */
115 
116 /** The RPS engine uses this structure to access all of the
117  * RPS blast related data (assumed to be collected in an
118  * implementation-specific manner).
119  */
120 typedef struct BlastRPSInfo {
121  BlastRPSLookupFileHeader *lookup_header; /**< for '.loo' file */
122  BlastRPSProfileHeader *profile_header; /**< for '.rps' file */
123  BlastRPSAuxInfo aux_info; /**< for '.aux' file */
124 
125  BlastRPSProfileHeader *freq_header; /**< for '.wcounts' file */
126  BlastRPSProfileHeader *obsr_header; /**< for '.obsr' file */
127  BlastRPSFreqRatiosHeader *freq_ratios_header; /**< for '.freq' file */
129 
130 #ifdef __cplusplus
131 }
132 #endif
133 #endif /* !ALGO_BLAST_CORE__BLAST_RPS__H */
#define NUM_EXPANSION_WORDS
Intentionally unused words in .loo file.
Definition: blast_rps.h:45
struct BlastRPSLookupFileHeader BlastRPSLookupFileHeader
header of RPS blast '.loo' file
struct BlastRPSFreqRatiosHeader BlastRPSFreqRatiosHeader
struct BlastRPSInfo BlastRPSInfo
The RPS engine uses this structure to access all of the RPS blast related data (assumed to be collect...
struct BlastRPSAuxInfo BlastRPSAuxInfo
information derived from RPS blast '.aux' file
struct BlastRPSProfileHeader BlastRPSProfileHeader
header of RPS blast '.rps' file
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
information derived from RPS blast '.aux' file
Definition: blast_rps.h:100
double ungapped_k
ungapped Karlin value for orig_score_matrix (not used)
Definition: blast_rps.h:104
Int4 gap_extend_penalty
gap extend penalty used in deriving PSSMs
Definition: blast_rps.h:103
double * karlin_k
one Karlin value for each DB sequence
Definition: blast_rps.h:113
Int4 max_db_seq_length
maximum DB sequence length (not used)
Definition: blast_rps.h:108
double ungapped_h
ungapped Karlin value for orig_score_matrix (not used)
Definition: blast_rps.h:106
double scale_factor
the PSSMs are scaled by this amount, and so all scores and all cutoff values must be similarly scaled...
Definition: blast_rps.h:110
Int4 gap_open_penalty
gap open penalty used in deriving PSSMs
Definition: blast_rps.h:102
char * orig_score_matrix
score matrix used to derive PSSMs
Definition: blast_rps.h:101
Int4 db_length
RPS DB search space (not used)
Definition: blast_rps.h:109
Int4 num_profiles
number of PSSMs in the file
Definition: blast_rps.h:87
Int4 start_offsets[1]
start of an Int4 array that gives the starting byte offset of each RPS DB sequence.
Definition: blast_rps.h:88
The RPS engine uses this structure to access all of the RPS blast related data (assumed to be collect...
Definition: blast_rps.h:120
BlastRPSProfileHeader * profile_header
for '.rps' file
Definition: blast_rps.h:122
BlastRPSProfileHeader * obsr_header
for '.obsr' file
Definition: blast_rps.h:126
BlastRPSAuxInfo aux_info
for '.aux' file
Definition: blast_rps.h:123
BlastRPSFreqRatiosHeader * freq_ratios_header
for '.freq' file
Definition: blast_rps.h:127
BlastRPSLookupFileHeader * lookup_header
for '.loo' file
Definition: blast_rps.h:121
BlastRPSProfileHeader * freq_header
for '.wcounts' file
Definition: blast_rps.h:125
header of RPS blast '.loo' file
Definition: blast_rps.h:49
Int4 unused[3]
empty space in the on-disk format
Definition: blast_rps.h:55
Int4 num_hits
number of hits in the lookup table
Definition: blast_rps.h:52
Int4 magic_number
value should be RPS_MAGIC_NUM
Definition: blast_rps.h:50
Int4 num_lookup_tables
hardwired to 1 at present
Definition: blast_rps.h:51
Int4 start_of_backbone
byte offset of start of backbone
Definition: blast_rps.h:56
Int4 num_filled_backbone_cells
backbone cells that contain hits
Definition: blast_rps.h:53
Int4 overflow_hits
number of hits in overflow array
Definition: blast_rps.h:54
Int4 end_of_overflow
byte offset to end of overflow array
Definition: blast_rps.h:57
header of RPS blast '.rps' file
Definition: blast_rps.h:62
Int4 magic_number
value should be RPS_MAGIC_NUM
Definition: blast_rps.h:63
Int4 num_profiles
number of PSSMs in the file
Definition: blast_rps.h:64
Int4 start_offsets[1]
start of an Int4 array that gives the starting byte offset of each RPS DB sequence.
Definition: blast_rps.h:65
Modified on Tue Apr 30 06:41:43 2024 by modify_doxy.py rev. 669887