NCBI C++ ToolKit
alntext.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_ALNTEXT__HPP
2 #define OBJTOOLS_ALNTEXT__HPP
3 
4 /* $Id: alntext.hpp 100758 2023-09-07 19:05:43Z mozese2 $
5 * ===========================================================================
6 *
7 * public DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Eyal Mozes
30 *
31 * File Description:
32 * Text representation of protein alignment
33 * refactored from algo/prosplign
34 *
35 */
36 
37 #include <corelib/ncbistd.hpp>
38 #include <corelib/ncbiargs.hpp>
39 #include <corelib/ncbiobj.hpp>
42 
43 #include <list>
44 
46 
48  class CScope;
49  class CSeqVector_CI;
50  class CTrans_table;
52 
53 /// Text representation of ProSplign alignment
54 // dna : GATGAAACAGCACTAGTGACAGGTAAA----GATCTAAATATCGTTGA<skip>GGAAGACATCCATTGGCAATGGCAATGGCAT
55 // translation: D E T A L V T G K S K Y h hh I H
56 // match : | | + | | | | | | + ++ + | XXXXXbad partXXXXX
57 // protein : D E Q S F --- T G K E Y S K Y y.....intron.....yy L H D T S T E G
58 //
59 // there are no "<skip>", "intron", or "bad part" in actual values
61 public:
62  static const char GAP_CHAR; // used in dna and protein text
63  static const char SPACE_CHAR; // translation and protein
64  static const char INTRON_CHAR; // protein
65  static const char INTRON_OR_GAP[];
66 
67  // used in match text
68  static const char BAD_PIECE_CHAR;
69  static const char MISMATCH_CHAR;
70  static const char BAD_OR_MISMATCH[];
71  static const char MATCH_CHAR;
72  static const char POSIT_CHAR;
73 
74  CProteinAlignText(objects::CScope& scope, const objects::CSeq_align& seqalign, const string& matrix_name = "BLOSUM62");
76 
77  const string& GetDNA() const { return m_dna; }
78  const string& GetTranslation() const { return m_translation; }
79  const string& GetMatch() const { return m_match; }
80  const string& GetProtein() const { return m_protein; }
81 
82  static CRef<objects::CSeq_loc> GetGenomicBounds(objects::CScope& scope,
83  const objects::CSeq_align& seqalign);
84 
85  static char TranslateTriplet(const objects::CTrans_table& table,
86  const string& triplet);
87 
88 private:
89  string m_dna;
90  string m_translation;
91  string m_match;
92  string m_protein;
93 
94  const objects::CTrans_table* m_trans_table;
96 
97  void AddDNAText(objects::CSeqVector_CI& genomic_ci, int& nuc_prev, int len);
98  void TranslateDNA(int phase, size_t len, bool is_insertion);
99  void AddProtText(objects::CSeqVector_CI& protein_ci, int& prot_prev, int len);
100  void MatchText(size_t len, bool is_match=false);
101  char MatchChar(size_t i);
102  void AddHoleText(bool prev_3_prime_splice, bool cur_5_prime_splice,
103  objects::CSeqVector_CI& genomic_ci, objects::CSeqVector_CI& protein_ci,
104  int& nuc_prev, int& prot_prev,
105  int nuc_cur_start, int prot_cur_start);
106  void AddSpliceText(objects::CSeqVector_CI& genomic_ci, int& nuc_prev, char match);
107 };
108 
110 
111 
112 #endif
const char BAD_OR_MISMATCH[]
Definition: Info.cpp:65
const char INTRON_OR_GAP[]
Definition: Info.cpp:60
Text representation of ProSplign alignment.
Definition: alntext.hpp:60
SNCBIFullScoreMatrix m_matrix
Definition: alntext.hpp:95
const string & GetDNA() const
Definition: alntext.hpp:77
static const char MATCH_CHAR
Definition: alntext.hpp:71
static const char INTRON_CHAR
Definition: alntext.hpp:64
static const char SPACE_CHAR
Definition: alntext.hpp:63
const objects::CTrans_table * m_trans_table
Definition: alntext.hpp:94
static const char MISMATCH_CHAR
Definition: alntext.hpp:69
const string & GetMatch() const
Definition: alntext.hpp:79
string m_protein
Definition: alntext.hpp:92
const string & GetProtein() const
Definition: alntext.hpp:80
static const char BAD_PIECE_CHAR
Definition: alntext.hpp:68
static const char POSIT_CHAR
Definition: alntext.hpp:72
string m_translation
Definition: alntext.hpp:90
const string & GetTranslation() const
Definition: alntext.hpp:78
static const char GAP_CHAR
Definition: alntext.hpp:62
CScope –.
Definition: scope.hpp:92
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XALNMGR_EXPORT
Definition: ncbi_export.h:1065
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
int len
Defines command line argument related classes.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Definition: pcre2_match.c:594
Modified on Fri Sep 20 14:58:14 2024 by modify_doxy.py rev. 669887