NCBI C++ ToolKit
advanced_cleanup.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef GPIPE_ALIGN_PROC__ADVANCED_CLEANUP__HPP
2 #define GPIPE_ALIGN_PROC__ADVANCED_CLEANUP__HPP
3 
4 /* $Id: advanced_cleanup.hpp 92210 2020-12-22 18:51:04Z grichenk $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Eyal Mozes
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
37 #include <objmgr/scope.hpp>
38 
41 
46 
48 
49 
50 ///////////////////////////////////////////////////////////////////////
51 
53 {
54 public:
55  static void SetupArgDescriptions(CArgDescriptions &arg_desc);
56 
57  enum EQueryType {
61  eProtein
62  };
63 
67  eDirBoth
68  };
69 
72  typedef pair<TCoord,TCoord> TCoordRange;
73 
75  objects::CSeq_id_Handle query;
77  objects::CSeq_id_Handle subject;
81 
82  static unsigned s_MaxRnaIntronSize;
83  static unsigned s_MinRnaTotalCoverage;
84 
85  bool HasRnaCharacteristics();
86  };
87 
89 
91 
92  typedef vector< pair<TSeqPos, CRef< objects::CSeq_align > > > TAlignsByPos;
93 
94 
97  void Reset();
98 
99  void SetParams(const CArgs& args);
100  void SetScope(const CRef<objects::CScope> &scope);
101 
102  void SetHardMaskRanges(objects::CSeq_id_Handle idh, const CSplign::TSeqRangeColl& mask_ranges) {
103  m_Splign.SetHardMaskRanges(idh, mask_ranges);
104  }
105 
106  /// Divide list of RNA alignments into Splign compartments
107  /// @param one_pair - If true, all alignments are guaranteed to have the
108  /// same query and subject
109  void Cleanup(const objects::CSeq_align_set::Tdata& input_aligns,
110  objects::CSeq_align_set::Tdata& cleaned_aligns,
111  EQueryType query_type = eInfer,
112  bool with_best_placement = true,
113  bool one_pair = false,
114  ESplignDirRun splign_direction = eDirBoth);
115 
116  void Cleanup(const TAlignsBySubject& query_aligns,
117  objects::CSeq_align_set::Tdata& cleaned_aligns,
118  EQueryType query_type = eInfer,
119  bool with_best_placement = true,
120  ESplignDirRun splign_direction = eDirBoth);
121 
122  void DivideByQuerySubjectPairs(const objects::CSeq_align_set::Tdata& input_aligns,
123  TAlignsBySeqIds &aligns_by_pair);
124 
125  /// Divide list of RNA alignments into Splign compartments
126  /// @param one_pair - If true, all alignments are guaranteed to have the
127  /// same query and subject
128  void GetSplignCompartments(const objects::CSeq_align_set::Tdata& input_aligns,
129  list<CSplignCompartment> &compartments,
130  bool one_pair = false);
131 
132  /// Divide list of genomic alignments into compartments
133  /// @param one_pair - If true, all alignments are guaranteed to have the
134  /// same query and subject
135  void GetGenomicCompartments(const objects::CSeq_align_set::Tdata& input_aligns,
136  list< CRef<objects::CSeq_align_set> > &compartments,
137  bool one_pair = false);
138 
139  void GetProsplignCompartments(const objects::CSeq_align_set::Tdata& input_aligns,
140  prosplign::TCompartments &compartments,
141  bool one_pair = false,
142  TAlignsByPos *aligns_by_pos = NULL);
143 
146  ESplignDirRun dir);
147 
148  void CleanupGenomicCompartment(const objects::CSeq_align_set::Tdata& compart,
149  objects::CSeq_align_set::Tdata& cleaned_aligns,
150  bool add_scores = true);
151 
152  void BestPlacement(objects::CSeq_align_set::Tdata &aligns);
153 
154 private:
156 
158 
159  double m_Penalty;
160  double m_MinIdty;
164  bool m_NoXF;
168 
172  unique_ptr<prosplign::CCompartOptions> m_CompartOptions;
177 
179  {
180  public:
181  CSplignAlignmentHit(const objects::CSeq_align &align);
182 
184 
186 
187  private:
189  };
190 
191  bool x_CleanupProsplignCompartment(const objects::CSeq_annot &compartment,
192  const TAlignsByPos &aligns_by_pos,
193  objects::CSeq_align_set::Tdata& cleaned_aligns,
194  TSeqRange &genomic_range);
195 
196  void x_CleanupProsplignAsGenomic(const TAlignsByPos &aligns_by_pos,
197  const TSeqRange &genomic_range,
198  objects::CSeq_align_set::Tdata& cleaned_aligns);
199 
201  const list<CSplignCompartment> &splign_compartments,
202  list< CRef<objects::CSeq_align_set> > &genomic_compartments);
203 
204  void x_AddStandardAlignmentScores(objects::CSeq_align& align);
205 };
206 
207 
208 
209 ///////////////////////////////////////////////////////////////////////
210 
211 
213 
214 #endif // GPIPE_ALIGN_PROC__ADVANCED_CLEANUP__HPP
215 
CRef< objects::CSeq_align > GetAlign() const
CSplignAlignmentHit(const objects::CSeq_align &align)
void x_CleanupProsplignAsGenomic(const TAlignsByPos &aligns_by_pos, const TSeqRange &genomic_range, objects::CSeq_align_set::Tdata &cleaned_aligns)
void SetParams(const CArgs &args)
TAccessor::TCoord TCoord
unique_ptr< prosplign::CCompartOptions > m_CompartOptions
CRef< objects::CScope > m_Scope
map< objects::CSeq_id_Handle, CSeq_align_set::Tdata > TAlignsBySubject
CRef< objects::CSeq_align > RunSplignOnCompartment(const CSplignCompartment &compart, ESplignDirRun dir)
void x_SplignCompartmentsToGenomicFormat(const list< CSplignCompartment > &splign_compartments, list< CRef< objects::CSeq_align_set > > &genomic_compartments)
void GetGenomicCompartments(const objects::CSeq_align_set::Tdata &input_aligns, list< CRef< objects::CSeq_align_set > > &compartments, bool one_pair=false)
Divide list of genomic alignments into compartments.
static void SetupArgDescriptions(CArgDescriptions &arg_desc)
map< objects::CSeq_id_Handle, vector< pair< TSeqPos, TSeqPos > > > m_seq_gaps
void Cleanup(const TAlignsBySubject &query_aligns, objects::CSeq_align_set::Tdata &cleaned_aligns, EQueryType query_type=eInfer, bool with_best_placement=true, ESplignDirRun splign_direction=eDirBoth)
void SetScope(const CRef< objects::CScope > &scope)
vector< pair< TSeqPos, CRef< objects::CSeq_align > > > TAlignsByPos
void GetProsplignCompartments(const objects::CSeq_align_set::Tdata &input_aligns, prosplign::TCompartments &compartments, bool one_pair=false, TAlignsByPos *aligns_by_pos=NULL)
void CleanupGenomicCompartment(const objects::CSeq_align_set::Tdata &compart, objects::CSeq_align_set::Tdata &cleaned_aligns, bool add_scores=true)
map< objects::CSeq_id_Handle, TAlignsBySubject > TAlignsBySeqIds
void GetSplignCompartments(const objects::CSeq_align_set::Tdata &input_aligns, list< CSplignCompartment > &compartments, bool one_pair=false)
Divide list of RNA alignments into Splign compartments.
void Cleanup(const objects::CSeq_align_set::Tdata &input_aligns, objects::CSeq_align_set::Tdata &cleaned_aligns, EQueryType query_type=eInfer, bool with_best_placement=true, bool one_pair=false, ESplignDirRun splign_direction=eDirBoth)
Divide list of RNA alignments into Splign compartments.
CCompartmentAccessor< CSplign::THit > TAccessor
bool x_CleanupProsplignCompartment(const objects::CSeq_annot &compartment, const TAlignsByPos &aligns_by_pos, objects::CSeq_align_set::Tdata &cleaned_aligns, TSeqRange &genomic_range)
CRef< CProSplignScoring > m_ProSplignScoring
void x_AddStandardAlignmentScores(objects::CSeq_align &align)
void SetHardMaskRanges(objects::CSeq_id_Handle idh, const CSplign::TSeqRangeColl &mask_ranges)
void DivideByQuerySubjectPairs(const objects::CSeq_align_set::Tdata &input_aligns, TAlignsBySeqIds &aligns_by_pair)
CRef< CProSplign > m_ProSplign
CRef< CProSplignOutputOptions > m_ProSplignOutputOptions
void BestPlacement(objects::CSeq_align_set::Tdata &aligns)
pair< TCoord, TCoord > TCoordRange
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
TCompartmentFinder::TCoord TCoord
CSplign is the central library object for computing spliced cDNA-to-genomic alignments.
Definition: splign.hpp:74
void SetHardMaskRanges(objects::CSeq_id_Handle idh, const TSeqRangeColl &mask_ranges)
Definition: splign.hpp:233
vector< THitRef > THitRefs
Definition: splign.hpp:295
Definition: map.hpp:338
list< CRef< CSeq_annot > > TCompartments
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
The Object manager core.
Modified on Mon May 20 05:03:51 2024 by modify_doxy.py rev. 669887