NCBI C++ ToolKit
nw_formatter.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_ALIGN__NW_FORMAT__HPP
2 #define ALGO_ALIGN__NW_FORMAT__HPP
3 
4 /* $Id: nw_formatter.hpp 100425 2023-07-31 13:44:51Z mozese2 $
5 * ===========================================================================
6 *
7 * public DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Yuri Kapustin, Boris Kiryutin
30 *
31 * File Description:
32 * Library's formatting functionality.
33 */
34 
35 /** @addtogroup AlgoAlignFormat
36  *
37  * @{
38  */
39 
40 #include <corelib/ncbistd.hpp>
41 #include <corelib/ncbiobj.hpp>
45 
46 #include <deque>
47 
49 
50 
52  class CSeq_align;
54 
55 
57 {
58 public:
59 
60  CNWFormatter(const CNWAligner& aligner);
61 
62  // supported text formats
69  eFormatExonTable, // spliced alignments
70  eFormatExonTableEx //
71  };
72 
73  // seq-align format flags
75  eSAFF_None = 0,
76  eSAFF_DynProgScore = 1,
77  eSAFF_Identity = 2,
78  eSAFF_TrimEndGaps = 4
79  };
80 
81  // setters
82 
83  void SetSeqIds(CConstRef<objects::CSeq_id> id1,
85 
86  // formatters
87 
88  void AsText(string* output, ETextFormatType type,
89  size_t line_width = 100) const;
90 
91  // formatters in case the sequences were taken from a Seq_loc
92  // default arguments are for the whole sequence alignment
93 
94  CRef<objects::CSeq_align> AsSeqAlign (
95  TSeqPos query_start = 0, objects::ENa_strand query_strand = objects::eNa_strand_plus,
96  TSeqPos subj_start = 0, objects::ENa_strand subj_strand = objects::eNa_strand_plus,
97  int SAFF_flags = eSAFF_None) const;
98 
99  CRef<objects::CDense_seg> AsDenseSeg (
100  TSeqPos query_start = 0, objects::ENa_strand query_strand = objects::eNa_strand_plus,
101  TSeqPos subj_start = 0, objects::ENa_strand subj_strand = objects::eNa_strand_plus,
102  int SAFF_flags = eSAFF_None) const;
103 
104  // SSegment is a structural unit of a spliced alignment. It represents
105  // either an exon or an unaligned segment.
107 
108  public:
109 
110  bool m_exon; // true == exon; false == unaligned
111  double m_idty; // ranges from 0.0 to 1.0
112  size_t m_len; // lenths of the alignment, not of an interval
113  size_t m_box [4]; // query([0],[1]) and subj([2],[3]) coordinates
114  string m_annot; // text description like AG<exon>GT
115  string m_details; // transcript for exons, '-' for gaps
116 
117  float m_score; // dynprog score (normalized)
118 
119 
120  //old style:
121 
122  void ImproveFromLeft(const char* seq1, const char* seq2,
124  void ImproveFromRight(const char* seq1, const char* seq2,
126 
127  //trimming, new style:
128  void ImproveFromLeft1(const char* seq1, const char* seq2,
130  void ImproveFromRight1(const char* seq1, const char* seq2,
132 
133  size_t GapLength(); //count total gap length
134  bool IsLowComplexityExon(const char *rna_seq);
135 
136  //check if 100% extension is possible, returns the length of possible extension
137  int CanExtendRight(const vector<char>& mrna, const vector<char>& genomic) const;
138  int CanExtendLeft(const vector<char>& mrna, const vector<char>& genomic) const;
139 
140  //do extend, 100% identity in extension is implied
141  void ExtendRight(const vector<char>& mrna, const vector<char>& genomic, Int8 ext_len, const CNWAligner* aligner);
142  void ExtendLeft(const vector<char>& mrna, const vector<char>& genomic, Int8 ext_len, const CNWAligner* aligner);
143 
144  void Update(const CNWAligner* aligner); // recompute members
145  const char* GetDonor(void) const; // raw pointers to parts of annot
146  const char* GetAcceptor(void) const; // or zero if less than 2 chars
147  void SetToGap();//set segment to a gap
148 
149  static bool s_IsConsensusSplice(const char* donor, const char* acceptor,
150  bool semi_as_cons = false);
151 
152  // NetCache-related serialization
153  typedef vector<char> TNetCacheBuffer;
154  void ToBuffer (TNetCacheBuffer* buf) const;
155  void FromBuffer (const TNetCacheBuffer& buf);
156  };
157 
158  // partition a spliced alignment into SSegment's
159  void MakeSegments(vector<SSegment>* psegments) const;
160  void MakeSegments(deque<SSegment>* psegments) const;//should be deprecated
161 
162 private:
163 
166 
167  size_t x_ApplyTranscript(vector<char>* seq1_transformed,
168  vector<char>* seq2_transformed) const;
169 };
170 
171 
173 
174 /* @} */
175 
176 #endif /* ALGO_ALIGN__NW_FORMAT__HPP */
CObject –.
Definition: ncbiobj.hpp:180
Include a standard set of the NCBI C++ Toolkit most basic headers.
static SQLCHAR output[256]
Definition: print.c:5
const CNWAligner * m_aligner
CConstRef< objects::CSeq_id > m_Seq2Id
vector< char > TNetCacheBuffer
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XALGOALIGN_EXPORT
Definition: ncbi_export.h:985
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
char * buf
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
string GetAcceptor(const objects::CSpliced_exon &exon)
string GetDonor(const objects::CSpliced_exon &exon)
Definition: type.c:6
Modified on Sun Jul 21 04:13:28 2024 by modify_doxy.py rev. 669887