NCBI C++ ToolKit
traceback.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_COBALT___TRACEBACK__HPP
2 #define ALGO_COBALT___TRACEBACK__HPP
3 
4 /* $Id: traceback.hpp 33815 2007-05-04 17:18:18Z kazimird $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's offical duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================*/
28 
29 /*****************************************************************************
30 
31 File name: traceback.hpp
32 
33 Author: Jason Papadopoulos
34 
35 Contents: Interface for CEditScript class
36 
37 ******************************************************************************/
38 
39 /// @file traceback.hpp
40 /// Interface for CEditScript class
41 
46 
47 #include <algo/cobalt/base.hpp>
48 #include <algo/cobalt/seq.hpp>
49 
51 BEGIN_SCOPE(cobalt)
52 
53 /// Interface for the traceback from blast hits
55 {
56 public:
57  /// Generate empty traceback
58  ///
60 
61  /// Generate traceback from a blast alignment
62  /// @param blast_tback Edit script for the alignment,
63  /// computed by the blast engine [in]
64  ///
65  CEditScript(GapEditScript *blast_tback);
66 
67  /// Generate traceback from a Dense_seg
68  /// @param denseg Dense_seg representing a single pairwise alignment [in]
69  ///
70  CEditScript(const objects::CDense_seg& denseg);
71 
72  /// Generate traceback from a Dense_diag
73  /// @param dendiag Dense_diag representing a single
74  /// ungapped pairwise alignment [in]
75  ///
76  CEditScript(const objects::CDense_diag& dendiag);
77 
78  /// Destructor
79  ///
81 
82  /// Test whether edit script is empty
83  /// @return true if there are no edit operations in the script
84  ///
85  bool Empty() { return m_Script.empty(); }
86 
87  /// Reverse an edit script; insertions become deletions
88  /// and vice versa
89  ///
91  {
92  for (size_t i = 0; i < m_Script.size(); i++) {
93  if (m_Script[i].op_type == eGapAlignIns)
94  m_Script[i].op_type = eGapAlignDel;
95  else if (m_Script[i].op_type == eGapAlignDel)
96  m_Script[i].op_type = eGapAlignIns;
97  }
98  }
99 
100  /// Return an edit script corresponding to a subset of
101  /// the complete traceback available
102  /// @param tback_range The portion of the traceback desired.
103  /// The starting and ending offsets in this
104  /// range will also be included in the traceback
105  /// structure returned [in]
106  /// @return The subset of the traceback
107  ///
108  CEditScript MakeEditScript(TRange tback_range);
109 
110  /// Convert a CNWAligner edit script to a CEditScript
111  /// @param tback The edit script generated by CNWAligner [in]
112  /// @param tback_range The portion of the traceback desired.
113  /// The starting and ending offsets in this
114  /// range will also be included in the traceback
115  /// structure returned [in]
116  /// @return The subset of the traceback
117  ///
118  static CEditScript MakeEditScript(const CNWAligner::TTranscript& tback,
119  TRange tback_range);
120 
121  /// Given a subject offset, find the corresponding query offset
122  /// @param start_offsets The sequence offsets corresponding to the
123  /// start of the edit script [in]
124  /// @param new_offsets The offsets into the unaligned sequences
125  /// where the specified subject offset occurs [out]
126  /// @param seq2_target The subject offset to find [in]
127  /// @param new_tback The offset of the traceback operation where
128  /// where seq2_target was found [out]
129  /// @param go_past_seq1_gap If seq2_target aligns with a gap in seq1,
130  /// include the gap if true [in]
131  ///
132  void FindOffsetFromSeq2(TOffsetPair start_offsets,
133  TOffsetPair& new_offsets,
134  TOffset seq2_target, TOffset& new_tback,
135  bool go_past_seq1_gap);
136 
137  /// Given a query offset, find the corresponding subject offset
138  /// @param start_offsets The sequence offsets corresponding to the
139  /// start of the edit script [in]
140  /// @param new_offsets The offsets into the unaligned sequences
141  /// where the specified query offset occurs [out]
142  /// @param seq1_target The query offset to find [in]
143  /// @param new_tback The offset of the traceback operation where
144  /// where seq1_target was found [out]
145  /// @param go_past_seq2_gap If seq1_target aligns with a gap in seq2,
146  /// include the gap if true [in]
147  ///
148  void FindOffsetFromSeq1(TOffsetPair start_offsets,
149  TOffsetPair& new_offsets,
150  TOffset seq1_target, TOffset& new_tback,
151  bool go_past_seq2_gap);
152 
153  /// Compute the score associated with (a portion of) an alignment
154  /// Assumes that seq1 is a sequence and that seq2 is a PSSM
155  /// @param tback_range The starting and ending traceback operation
156  /// of the sub-alignment to score [in]
157  /// @param start_offsets The sequence offsets of the beginning of the
158  /// region described by the CEditScript [in]
159  /// @param seq1 The complete first sequence [in]
160  /// @param seq2_pssm PSSM representing the second sequence [in]
161  /// @param gap_open Penalty for opening a gap [in]
162  /// @param gap_extend Penalty for extending a gap [in]
163  /// @return The score of the (sub-)alignment given by tback_range
164  ///
165  int GetScore(TRange tback_range, TOffsetPair start_offsets,
166  CSequence& seq1, int **seq2_pssm,
167  int gap_open, int gap_extend);
168 
169  /// Compile a list of regions in the current edit script that
170  /// contain substitutions
171  /// @param start_offsets The sequence offsets corresponding to the
172  /// start of the range described by the CEditScript [in]
173  /// @return List of regions within the CEditScript that contain
174  /// substitutions
175  ///
176  vector<TOffsetPair> ListMatchRegions(TOffsetPair start_offsets);
177 
178  /// Validate that the alignment described by the CEditScript
179  /// has the same size for each sequence as the input ranges
180  /// @param seq1_range Start/stop offsets of the first sequence [in]
181  /// @param seq2_range Start/stop offsets of the second sequence [in]
182  ///
183  void VerifyScript(TRange seq1_range, TRange seq2_range);
184 
185 private:
186 
187  /// Runlength-encoded representation of a traceback
188  /// operation. Note that we follow the blast convention,
189  /// where a deletion is a gap in the first sequence and
190  /// an insertion is a gap in the second sequence
191  ///
192  struct STracebackOp {
193  EGapAlignOpType op_type; ///< type of operation
194  int num_ops; ///< number of such operations
195 
196  /// Create a new operation
197  /// @param op Type of operation [in]
198  /// @param num Number of operations [in]
199  ///
201  : op_type(op), num_ops(num) {}
202  };
203 
204  /// Edit script type
205  typedef vector<STracebackOp> TScriptOps;
206 
207  /// The list of edit operations in the current edit script
209 
210  /// Add a new edit operation to the current list
211  /// @param op_type Type of new operation [in]
212  /// @param num_ops The number of such operations [in]
213  ///
214  void AddOps(EGapAlignOpType op_type, int num_ops);
215 };
216 
217 
218 END_SCOPE(cobalt)
220 
221 #endif // ALGO_COBALT___TRACEBACK__HPP
User-defined methods of the data storage class.
Definitions used by all COBALT aligner components.
int TOffset
Basic data type for offsets into a sequence.
Definition: base.hpp:49
pair< TOffset, TOffset > TOffsetPair
Basic type specifying a range on a sequence.
Definition: base.hpp:52
Interface for the traceback from blast hits.
Definition: traceback.hpp:55
CEditScript(const objects::CDense_diag &dendiag)
Generate traceback from a Dense_diag.
bool Empty()
Test whether edit script is empty.
Definition: traceback.hpp:85
vector< STracebackOp > TScriptOps
Edit script type.
Definition: traceback.hpp:205
TScriptOps m_Script
The list of edit operations in the current edit script.
Definition: traceback.hpp:208
CEditScript()
Generate empty traceback.
Definition: traceback.hpp:59
~CEditScript()
Destructor.
Definition: traceback.hpp:80
void ReverseEditScript()
Reverse an edit script; insertions become deletions and vice versa.
Definition: traceback.hpp:90
CEditScript(const objects::CDense_seg &denseg)
Generate traceback from a Dense_seg.
Class for representing protein sequences.
Definition: seq.hpp:54
Definitions of structures used for saving traceback information.
EGapAlignOpType
Operation types within the edit script.
Definition: gapinfo.h:44
@ eGapAlignIns
Insertion: a gap in subject.
Definition: gapinfo.h:51
@ eGapAlignDel
Deletion: a gap in query.
Definition: gapinfo.h:45
vector< ETranscriptSymbol > TTranscript
Definition: nw_aligner.hpp:199
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_COBALT_EXPORT
Definition: ncbi_export.h:977
int i
Interface for CSequence class.
Runlength-encoded representation of a traceback operation.
Definition: traceback.hpp:192
STracebackOp(EGapAlignOpType op, int num)
Create a new operation.
Definition: traceback.hpp:200
int num_ops
number of such operations
Definition: traceback.hpp:194
EGapAlignOpType op_type
type of operation
Definition: traceback.hpp:193
Edit script: linked list of correspondencies between two sequences.
Definition: gapinfo.h:57
Modified on Tue May 21 10:59:10 2024 by modify_doxy.py rev. 669887