NCBI C++ ToolKit
splign_exon_trim.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_ALIGN_SPLIGN_EXON_TRIM_HPP
2 #define ALGO_ALIGN_SPLIGN_EXON_TRIM_HPP
3 
4 /* $Id: splign_exon_trim.hpp 100425 2023-07-31 13:44:51Z mozese2 $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Boris Kiyutin
30 *
31 * File Description: exon trimming (alignment post processing step)
32 *
33 * ===========================================================================
34 */
35 
36 
40 
42 
43 
44 //trimming of spliced alignments
45 class CSplignTrim {
46 public:
47 
48  typedef CNWFormatter::SSegment TSeg; //AKA CSplign::TSegment
49  typedef vector<TSeg> TSegs; //AKA CSplign::TSegments
50 
51  CSplignTrim(const char *seq, int seqlen, CConstRef<CSplicedAligner> aligner, double max_part_exon_drop) : m_seq(seq), m_seqlen(seqlen), m_aligner(aligner), m_MaxPartExonIdentDrop(max_part_exon_drop)
52  {
53  }
54 
55  //check if the exon segments[p] abuts another exon in genomic coordinates
56  static bool HasAbuttingExonOnRight(TSegs segments, TSeqPos p);
57  //same check on the left side
58  static bool HasAbuttingExonOnLeft(TSegs segments, TSeqPos p);
59 
60  //legacy check
61  //if two short throws away and reterns true
62  //otherwise returns false
63  bool ThrowAwayShortExon(TSeg& s);
64 
65  bool ThrowAway20_28_90(TSeg& s);
66 
67  //cut len bases from left
68  //len is length on alignment to cut
69  void CutFromLeft(size_t len, TSeg& s);
70 
71  void CutToMatchLeft(TSeg& s);
72  void Cut50FromLeft(TSeg& s);
73  void ImproveFromLeft(TSeg& s);
74 
75  //cut len bases from right
76  //len is length on alignment to cut
77  void CutFromRight(size_t len, TSeg& s);
78 
79  void CutToMatchRight(TSeg& s);
80  void Cut50FromRight(TSeg& s);
81  void ImproveFromRight(TSeg& s);
82 
83  // aka stich holes
84  //joins exons segments[p1] and segments[p1] into a singe exon
85  //everithing in between becomes a regular gap in query adjacent to a regular gap in subject
86  void JoinExons(TSegs& segments, TSeqPos p1, TSeqPos p2);
87 
88  //trims exons around internal alignment gaps to complete codons
89  //if CDS can be retrieved from bioseq
90  void TrimHolesToCodons(TSegs& segments, objects::CBioseq_Handle& mrna_bio_handle, bool mrna_strand, TSeqPos mrna_len);
91 
92  // updates m_annot for a segment based on SSegment::m_box and CSplignTrim:m_seq
93  void UpdateAnnot(TSeg& s);
94 
95  // implies s.exon, s.m_box, and s.m_details are correct
96  // updates the rest of segment fields including m_annot
97  void Update(TSeg& s);
98 
99  void AdjustGaps(TSegs& segments);
100 
101 private:
102  const char *m_seq;//genomic sequence
103  const int m_seqlen;
105  const double m_MaxPartExonIdentDrop;
106 };
107 
109 
110 #endif
void ImproveFromLeft(TSeg &s)
const double m_MaxPartExonIdentDrop
void Cut50FromLeft(TSeg &s)
void CutToMatchRight(TSeg &s)
void UpdateAnnot(TSeg &s)
CSplignTrim(const char *seq, int seqlen, CConstRef< CSplicedAligner > aligner, double max_part_exon_drop)
vector< TSeg > TSegs
static bool HasAbuttingExonOnLeft(TSegs segments, TSeqPos p)
void Cut50FromRight(TSeg &s)
void TrimHolesToCodons(TSegs &segments, objects::CBioseq_Handle &mrna_bio_handle, bool mrna_strand, TSeqPos mrna_len)
void ImproveFromRight(TSeg &s)
CConstRef< CSplicedAligner > m_aligner
bool ThrowAway20_28_90(TSeg &s)
void JoinExons(TSegs &segments, TSeqPos p1, TSeqPos p2)
void AdjustGaps(TSegs &segments)
static bool HasAbuttingExonOnRight(TSegs segments, TSeqPos p)
bool ThrowAwayShortExon(TSeg &s)
void CutFromLeft(size_t len, TSeg &s)
void CutToMatchLeft(TSeg &s)
void Update(TSeg &s)
const char * m_seq
const int m_seqlen
void CutFromRight(size_t len, TSeg &s)
CNWFormatter::SSegment TSeg
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
int len
Modified on Fri Sep 20 14:57:52 2024 by modify_doxy.py rev. 669887