NCBI C++ ToolKit
annot.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef __GNOMON__ANNOT__HPP
2 #define __GNOMON__ANNOT__HPP
3 
4 /* $Id: annot.hpp 67192 2015-04-27 14:27:13Z souvorov $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Vyacheslav Chetvernin
30  *
31  * File Description:
32  *
33  * Builds annotation models out of chained alignments:
34  * selects good chains as alternatively spliced genes,
35  * selects good chains inside other chains introns,
36  * other chains filtered to leave one chain per placement,
37  * gnomon is run to improve chains and predict models in regions w/o chains
38  *
39  */
40 
42 #include <algo/gnomon/gnomon.hpp>
43 #include <algo/gnomon/chainer.hpp>
44 
46 
48 
49 class CArgDescriptions;
50 class CArgs;
51 
52 BEGIN_SCOPE(gnomon)
53 
54 
56 public:
57  CGeneSelector();
58 
59  /// Filters genes, excluding genes flagged to be skipped.
60  ///
61  /// Results are pushed to the back of the specifed output container.
62  ///
63  /// @param chains Input list of models, which are not modified.
64  /// @param bad_aligns Output list of skipped models.
65  /// @param dest Output list of models which were not skipped.
66  /// @return List of models which were not skipped.
67  /// @warning The models' gene IDs are renumbered.
68  ///
69  /// @see SelectGenes(TGeneModelList& chains, TGeneModelList& bad_aligns)
70  void FilterGenes(TGeneModelList& chains, TGeneModelList& bad_aligns,
71  TGeneModelList& dest);
72 
73  /// Filters genes, excluding genes flagged to be skipped.
74  ///
75  /// Results are returned in a new list.
76  ///
77  /// @param chains Input list of models, which are not modified.
78  /// @param bad_aligns Output list of skipped models.
79  /// @return List of models which were not skipped.
80  /// @warning The models' gene IDs are renumbered.
81  ///
82  /// @see SelectGenes(TGeneModelList& chains, TGeneModelList& bad_aligns)
83  TGeneModelList FilterGenes(TGeneModelList& chains, TGeneModelList& bad_aligns);
84 
85 private:
86 };
87 
89 public:
92 
93  bool GnomonNeeded() const { return do_gnomon; }
94 
95  void Predict(TGeneModelList& models, TGeneModelList& bad_aligns);
96  void Predict(TGeneModelList& models, TGeneModelList& bad_aligns, TSignedSeqPos left, TSignedSeqPos right);
97 
98 public:
99  int mincontig;
101 
102 private:
103  void RemoveShortHolesAndRescore(TGeneModelList chains);
104  void Predict(TSignedSeqPos llimit, TSignedSeqPos rlimit, TGeneModelList::const_iterator il, TGeneModelList::const_iterator ir,
105  TGeneModelList& models,
106  bool leftmostwall, bool rightmostwall, bool leftmostanchor, bool rightmostanchor,
107  TGeneModelList& bad_aligns);
108 
109  double TryWithoutObviouslyBadAlignments(TGeneModelList& aligns, TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns,
110  bool leftwall, bool rightwall, bool leftanchor, bool rightanchor,
111  TSignedSeqPos left, TSignedSeqPos right,
112  TSignedSeqRange& tested_range);
113  double TryToEliminateOneAlignment(TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns,
114  bool leftwall, bool rightwall, bool leftanchor, bool rightanchor);
115  double TryToEliminateAlignmentsFromTail(TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns,
116  bool leftwall, bool rightwall, bool leftanchor, bool rightanchor);
117  double ExtendJustThisChain(CGeneModel& chain, TSignedSeqPos left, TSignedSeqPos right);
118 
119  bool do_gnomon;
120  int window;
121  int margin;
122  bool wall;
123  double mpp;
124  double nonconsensp;
125 
127 };
128 
131 
132  virtual void transform_model(CGeneModel& a);
133 private:
134  const CResidueVec& seq;
135 };
136 
138 public:
139  static bool CanBeConnectedIntoOne(const CGeneModel& a, const CGeneModel& b);
140  static size_t CountCommonSplices(const CGeneModel& a, const CGeneModel& b);
141  static bool AreSimilar(const CGeneModel& a, const CGeneModel& b, int tolerance);
142  static bool BadOverlapTest(const CGeneModel& a, const CGeneModel& b);
143  static bool RangeNestedInIntron(TSignedSeqRange r, const CGeneModel& algn, bool check_in_holes = true);
144  static bool HaveCommonExonOrIntron(const CGeneModel& a, const CGeneModel& b);
145 };
146 
148 public:
149  static void SetupArgDescriptions(CArgDescriptions* arg_desc);
150  static void ReadArgs(CGnomonAnnotator* annot, const CArgs& args);
151 };
152 
153 END_SCOPE(gnomon)
155 
156 #endif // __GNOMON__ANNOT__HPP
157 
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
bool GnomonNeeded() const
Definition: annot.hpp:93
double nonconsensp
Definition: annot.hpp:124
static bool HaveCommonExonOrIntron(const CGeneModel &a, const CGeneModel &b)
static bool RangeNestedInIntron(TSignedSeqRange r, const CGeneModel &algn, bool check_in_holes=true)
static size_t CountCommonSplices(const CGeneModel &a, const CGeneModel &b)
static bool AreSimilar(const CGeneModel &a, const CGeneModel &b, int tolerance)
static bool CanBeConnectedIntoOne(const CGeneModel &a, const CGeneModel &b)
static bool BadOverlapTest(const CGeneModel &a, const CGeneModel &b)
vector< TResidue > CResidueVec
list< CGeneModel > TGeneModelList
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XALGOGNOMON_EXPORT
Definition: ncbi_export.h:1001
unsigned int a
Definition: ncbi_localip.c:102
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
virtual void transform_model(CGeneModel &a)
Definition: annot.cpp:799
const CResidueVec & seq
Definition: annot.hpp:134
RemoveTrailingNs(const CResidueVec &seq)
Definition: annot.cpp:794
Modified on Sat Mar 02 10:56:44 2024 by modify_doxy.py rev. 669887