NCBI C++ ToolKit
greedy_align.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: greedy_align.h 57477 2013-03-13 14:36:38Z maning $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Ilya Dondoshansky
27  *
28  */
29 
30 /** @file greedy_align.h
31  * Prototypes and structures for greedy gapped alignment
32  */
33 
34 #ifndef ALGO_BLAST_CORE__GREEDY_ALIGN__H
35 #define ALGO_BLAST_CORE__GREEDY_ALIGN__H
36 
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 /** sequence_length / (this number) is a measure of how hard the
45  alignment code will work to find the optimal alignment; in fact
46  this gives a worst case bound on the number of loop iterations */
47 #define GREEDY_MAX_COST_FRACTION 2
48 
49 /** The largest distance to be examined for an optimal alignment */
50 #define GREEDY_MAX_COST 1000
51 
52 /* ----- pool allocator ----- */
53 
54 /** Bookkeeping structure for greedy alignment. When aligning
55  two sequences, the members of this structure store the
56  largest offset into the second sequence that leads to a
57  high-scoring alignment for a given start point */
58 typedef struct SGreedyOffset {
59  Int4 insert_off; /**< Best offset for a path ending in an insertion */
60  Int4 match_off; /**< Best offset for a path ending in a match */
61  Int4 delete_off; /**< Best offset for a path ending in a deletion */
63 
64 /** Space structure for greedy alignment algorithm */
65 typedef struct SMBSpace {
66  SGreedyOffset* space_array; /**< array of bookkeeping structures */
67  Int4 space_allocated; /**< number of structures allocated */
68  Int4 space_used; /**< number of structures actually in use */
69  struct SMBSpace *next; /**< pointer to next structure in list */
71 
72 /** Allocate a space structure for greedy alignment
73  * At least num_space_arrays will be allocated, possibly more if the
74  * number is low.
75  *
76  * @param num_space_arrays number of array elements to allocated [in]
77  * @return Pointer to allocated structure, or NULL upon failure
78  */
80 SMBSpace* MBSpaceNew(int num_space_arrays);
81 
82 /** Free the space structure
83  @param sp Linked list of structures to free
84 */
86 void MBSpaceFree(SMBSpace* sp);
87 
88 /** All auxiliary memory needed for the greedy extension algorithm. */
89 typedef struct SGreedyAlignMem {
90  Int4 max_dist; /**< max distance to search */
91  Int4 xdrop; /**< Xdrop value */
92  Int4** last_seq2_off; /**< 2-D array of distances */
93  Int4* max_score; /**< array of maximum scores */
94  SGreedyOffset** last_seq2_off_affine; /**< Like last_seq2_off but for
95  affine searches */
96  Int4* diag_bounds; /**< bounds on ranges of diagonals */
97  SMBSpace* space; /**< local memory pool for
98  SGreedyOffset structs */
100 
101 /** Structure for locating high-scoring seeds for greedy alignment */
102 typedef struct SGreedySeed {
103  Int4 start_q; /**< query offset of start of run of matches */
104  Int4 start_s; /**< subject offset of start of run of matches */
105  Int4 match_length; /**< length of run of matches */
107 
108 /** Perform the greedy extension algorithm with non-affine gap penalties.
109  * @param seq1 First sequence (always uncompressed) [in]
110  * @param len1 Maximal extension length in first sequence [in]
111  * @param seq2 Second sequence (may be compressed) [in]
112  * @param len2 Maximal extension length in second sequence [in]
113  * @param reverse Is extension performed in backwards direction? [in]
114  * @param xdrop_threshold X-dropoff value to use in extension [in]
115  * @param match_cost Match score to use in extension [in]
116  * @param mismatch_cost Mismatch score to use in extension [in]
117  * @param seq1_align_len Length of extension on sequence 1 [out]
118  * @param seq2_align_len Length of extension on sequence 2 [out]
119  * @param aux_data Structure containing all preallocated memory [in]
120  * @param edit_block Edit script structure for saving traceback.
121  * Traceback is not saved if NULL is passed. [in] [out]
122  * @param rem Offset within a byte of the compressed second sequence.
123  * Set to 4 if sequence is uncompressed. [in]
124  * @param fence_hit True is returned here if overrun is detected. [in]
125  * @param seed Structure to remember longest run of exact matches [out]
126  * @return The minimum distance between the two sequences, i.e.
127  * the number of mismatches plus gaps in the resulting alignment
128  */
130 Int4
131 BLAST_GreedyAlign (const Uint1* seq1, Int4 len1,
132  const Uint1* seq2, Int4 len2,
133  Boolean reverse, Int4 xdrop_threshold,
134  Int4 match_cost, Int4 mismatch_cost,
135  Int4* seq1_align_len, Int4* seq2_align_len,
136  SGreedyAlignMem* aux_data,
137  GapPrelimEditBlock *edit_block, Uint1 rem,
138  Boolean * fence_hit, SGreedySeed *seed);
139 
140 /** Perform the greedy extension algorithm with affine gap penalties.
141  * @param seq1 First sequence (always uncompressed) [in]
142  * @param len1 Maximal extension length in first sequence [in]
143  * @param seq2 Second sequence (may be compressed) [in]
144  * @param len2 Maximal extension length in second sequence [in]
145  * @param reverse Is extension performed in backwards direction? [in]
146  * @param xdrop_threshold X-dropoff value to use in extension [in]
147  * @param match_cost Match score to use in extension [in]
148  * @param mismatch_cost Mismatch score to use in extension [in]
149  * @param in_gap_open Gap opening penalty [in]
150  * @param in_gap_extend Gap extension penalty [in]
151  * @param seq1_align_len Length of extension on sequence 1 [out]
152  * @param seq2_align_len Length of extension on sequence 2 [out]
153  * @param aux_data Structure containing all preallocated memory [in]
154  * @param edit_block Edit script structure for saving traceback.
155  * Traceback is not saved if NULL is passed. [in] [out]
156  * @param rem Offset within a byte of the compressed second sequence.
157  * Set to 4 if sequence is uncompressed. [in]
158  * @param fence_hit True is returned here if overrun is detected. [in]
159  * @param seed Structure to remember longest run of exact matches [out]
160  * @return The score of the alignment
161  */
163 Int4
164 BLAST_AffineGreedyAlign (const Uint1* seq1, Int4 len1,
165  const Uint1* seq2, Int4 len2,
166  Boolean reverse, Int4 xdrop_threshold,
167  Int4 match_cost, Int4 mismatch_cost,
168  Int4 in_gap_open, Int4 in_gap_extend,
169  Int4* seq1_align_len, Int4* seq2_align_len,
170  SGreedyAlignMem* aux_data,
171  GapPrelimEditBlock *edit_block, Uint1 rem,
172  Boolean * fence_hit, SGreedySeed *seed);
173 
174 #ifdef __cplusplus
175 }
176 #endif
177 #endif /* !ALGO_BLAST_CORE__GREEDY_ALIGN__H */
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Definitions of structures used for saving traceback information.
struct SGreedySeed SGreedySeed
Structure for locating high-scoring seeds for greedy alignment.
struct SMBSpace SMBSpace
Space structure for greedy alignment algorithm.
struct SGreedyOffset SGreedyOffset
Bookkeeping structure for greedy alignment.
void MBSpaceFree(SMBSpace *sp)
Free the space structure.
Definition: greedy_align.c:80
Int4 BLAST_AffineGreedyAlign(const Uint1 *seq1, Int4 len1, const Uint1 *seq2, Int4 len2, Boolean reverse, Int4 xdrop_threshold, Int4 match_cost, Int4 mismatch_cost, Int4 in_gap_open, Int4 in_gap_extend, Int4 *seq1_align_len, Int4 *seq2_align_len, SGreedyAlignMem *aux_data, GapPrelimEditBlock *edit_block, Uint1 rem, Boolean *fence_hit, SGreedySeed *seed)
Perform the greedy extension algorithm with affine gap penalties.
Definition: greedy_align.c:755
Int4 BLAST_GreedyAlign(const Uint1 *seq1, Int4 len1, const Uint1 *seq2, Int4 len2, Boolean reverse, Int4 xdrop_threshold, Int4 match_cost, Int4 mismatch_cost, Int4 *seq1_align_len, Int4 *seq2_align_len, SGreedyAlignMem *aux_data, GapPrelimEditBlock *edit_block, Uint1 rem, Boolean *fence_hit, SGreedySeed *seed)
Perform the greedy extension algorithm with non-affine gap penalties.
Definition: greedy_align.c:380
SMBSpace * MBSpaceNew(int num_space_arrays)
Allocate a space structure for greedy alignment At least num_space_arrays will be allocated,...
Definition: greedy_align.c:43
struct SGreedyAlignMem SGreedyAlignMem
All auxiliary memory needed for the greedy extension algorithm.
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Definition: gapinfo.h:73
All auxiliary memory needed for the greedy extension algorithm.
Definition: greedy_align.h:89
SGreedyOffset ** last_seq2_off_affine
Like last_seq2_off but for affine searches.
Definition: greedy_align.h:94
Int4 xdrop
Xdrop value.
Definition: greedy_align.h:91
Int4 * max_score
array of maximum scores
Definition: greedy_align.h:93
SMBSpace * space
local memory pool for SGreedyOffset structs
Definition: greedy_align.h:97
Int4 max_dist
max distance to search
Definition: greedy_align.h:90
Int4 * diag_bounds
bounds on ranges of diagonals
Definition: greedy_align.h:96
Int4 ** last_seq2_off
2-D array of distances
Definition: greedy_align.h:92
Bookkeeping structure for greedy alignment.
Definition: greedy_align.h:58
Int4 delete_off
Best offset for a path ending in a deletion.
Definition: greedy_align.h:61
Int4 insert_off
Best offset for a path ending in an insertion.
Definition: greedy_align.h:59
Int4 match_off
Best offset for a path ending in a match.
Definition: greedy_align.h:60
Structure for locating high-scoring seeds for greedy alignment.
Definition: greedy_align.h:102
Int4 start_q
query offset of start of run of matches
Definition: greedy_align.h:103
Int4 match_length
length of run of matches
Definition: greedy_align.h:105
Int4 start_s
subject offset of start of run of matches
Definition: greedy_align.h:104
Space structure for greedy alignment algorithm.
Definition: greedy_align.h:65
struct SMBSpace * next
pointer to next structure in list
Definition: greedy_align.h:69
Int4 space_allocated
number of structures allocated
Definition: greedy_align.h:67
Int4 space_used
number of structures actually in use
Definition: greedy_align.h:68
SGreedyOffset * space_array
array of bookkeeping structures
Definition: greedy_align.h:66
static int seed
Definition: test_table.cpp:132
Modified on Fri Sep 20 14:57:31 2024 by modify_doxy.py rev. 669887