NCBI C++ ToolKit
unordered_spliter.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UNORDERED_SPLITTER__HPP
2 #define UNORDERED_SPLITTER__HPP
3 
4 /* $Id: unordered_spliter.hpp 57897 2013-04-23 14:16:04Z boukn $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Nathan Bouk
30  *
31  * File Description: A tool for aligning sequences that contain unordered pieces
32  * (aka: phase 1 clones). It will split a sequence into parts,
33  * put the parts into their own CBioseq, and into the given
34  * Scope, and spit out the new Seq-ids. Other code can then
35  * use the parts as their own sequences, and align them.
36  * This tool can then remap objects that use those local IDs
37  * back to their original Seq-id.
38  */
39 
40 #include <corelib/ncbistd.hpp>
41 #include <corelib/ncbiobj.hpp>
43 
47 #include <objmgr/scope.hpp>
51 
54 
55 
57 
59  class CScope;
60  class CSeq_align;
61  class CSeq_align_set;
62  class CSeq_id;
63  class CDense_seg;
64  class CSeq_interval;
65  class CBioseq_Handle;
67 
68 
69 
71 {
72 public:
73 
74  CUnorderedSplitter(objects::CScope& Scope) : m_Scope(&Scope) { ; }
75 
76  typedef list<CRef<objects::CSeq_id> > TSeqIdList;
77  void SplitId(const objects::CSeq_id& Id, TSeqIdList& SplitIds);
78  void SplitLoc(const objects::CSeq_loc& Loc, TSeqIdList& SplitIds);
79 
80  typedef list<CRef<objects::CSeq_align> > TSeqAlignList;
81  void CombineAlignments(const TSeqAlignList& SourceAligns, TSeqAlignList& MergedAligns);
82 
83  void GetSplitIdList(TSeqIdList& SplitIdList);
84 
85 protected:
86 
87 
88 private:
89 
91 
94 
95  typedef list< CRef<objects::CSeq_interval> > TIntervalList;
98 
99  void x_SplitDeltaExt(const objects::CSeq_id& Id,
100  objects::CBioseq_Handle OrigHandle,
101  TSeqIdList& SplitIds,
102  TSeqRange LimitRange = TSeqRange() );
103 
104  void x_SplitSeqData(const objects::CSeq_id& Id,
105  objects::CBioseq_Handle OrigHandle,
106  TSeqIdList& SplitIds,
107  TSeqRange LimitRange = TSeqRange() );
108 
110  x_FixAlignment(const objects::CSeq_align& SourceAlignment);
111 
112  static bool s_SortByQueryStart(const CRef<objects::CSeq_align>& A,
114  void x_SortAlignSet(TSeqAlignList& AlignSet);
115 
116  void x_MakeAlignmentsUnique(TSeqAlignList& Alignments);
117  void x_MakeAlignmentPairUnique(CRef<objects::CSeq_align> First,
119  void x_TrimRows(const objects::CDense_seg& DomSeg, objects::CDense_seg& NonSeg, int Row);
120  bool x_IsAllGap(const objects::CDense_seg& Denseg);
121 
122  void x_StripDistantAlignments(TSeqAlignList& Alignments);
123 
124 };
125 
126 
127 
128 // For sequences that need to be split up, like Phase 1 clones.
130 {
131 public:
133 
135  void SetSeqMasker(CSeqMasker* SeqMasker);
136 
137  bool Empty() { // either nothing was added, or everything that was added was pure gap
138  return m_SeqIdListSet.SetIdList().empty();
139  }
140 
142  objects::CScope& Scope, const blast::CBlastOptionsHandle& BlastOpts);
144  objects::CScope& Scope, const blast::CBlastOptionsHandle& BlastOpts,
145  const CAlignResultsSet& Alignments, int Threshold);
147  objects::CScope& Scope, const blast::CBlastOptionsHandle& BlastOpts);
148 
149 protected:
150  list<CRef<objects::CSeq_id> > m_OrigSeqIdList;
153 };
154 
155 
156 // For sequence locations that need to be split up, like Phase 1 clones.
158 {
159 public:
161 
163  void SetSeqMasker(CSeqMasker* SeqMasker);
164 
165  bool Empty() { // either nothing was added, or everything that was added was pure gap
166  return m_SeqIdListSet.SetIdList().empty();
167  }
168 
170  objects::CScope& Scope, const blast::CBlastOptionsHandle& BlastOpts);
172  objects::CScope& Scope, const blast::CBlastOptionsHandle& BlastOpts,
173  const CAlignResultsSet& Alignments, int Threshold);
175  objects::CScope& Scope, const blast::CBlastOptionsHandle& BlastOpts);
176 
177 protected:
178  list<CRef<objects::CSeq_loc> > m_OrigSeqLocList;
181 };
182 
183 
184 
186 {
187 public:
188 
190  : m_Splitter(Splitter) { ; }
191 
192  string GetName() const { return "split_seq_aligner"; }
193 
194  TAlignResultsRef GenerateAlignments(objects::CScope& Scope,
195  ISequenceSet* Querys,
196  ISequenceSet* Subjects,
197  TAlignResultsRef AccumResults);
198 
199 private:
201 
202 };
203 
204 
205 
207 
208 #endif
static CRef< CScope > m_Scope
CBioseq_Handle –.
CScope –.
Definition: scope.hpp:92
list< CRef< objects::CSeq_id > > & SetIdList()
Main interface to window based masker functionality.
Definition: seq_masker.hpp:53
CSplitSeqAlignMerger(CUnorderedSplitter *Splitter)
CUnorderedSplitter * m_Splitter
TAlignResultsRef GenerateAlignments(objects::CScope &Scope, ISequenceSet *Querys, ISequenceSet *Subjects, TAlignResultsRef AccumResults)
void SetSeqMasker(CSeqMasker *SeqMasker)
CSplitSeqIdListSet(CUnorderedSplitter *Splitter)
CUnorderedSplitter * m_Splitter
CRef< blast::CLocalDbAdapter > CreateLocalDbAdapter(objects::CScope &Scope, const blast::CBlastOptionsHandle &BlastOpts)
CRef< blast::IQueryFactory > CreateQueryFactory(objects::CScope &Scope, const blast::CBlastOptionsHandle &BlastOpts, const CAlignResultsSet &Alignments, int Threshold)
CSeqIdListSet m_SeqIdListSet
void AddSeqId(CRef< objects::CSeq_id > Id)
list< CRef< objects::CSeq_id > > m_OrigSeqIdList
CRef< blast::IQueryFactory > CreateQueryFactory(objects::CScope &Scope, const blast::CBlastOptionsHandle &BlastOpts)
CUnorderedSplitter * m_Splitter
CRef< blast::IQueryFactory > CreateQueryFactory(objects::CScope &Scope, const blast::CBlastOptionsHandle &BlastOpts)
void AddSeqLoc(CRef< objects::CSeq_loc > Loc)
CSplitSeqLocListSet(CUnorderedSplitter *Splitter)
CRef< blast::CLocalDbAdapter > CreateLocalDbAdapter(objects::CScope &Scope, const blast::CBlastOptionsHandle &BlastOpts)
list< CRef< objects::CSeq_loc > > m_OrigSeqLocList
CSeqIdListSet m_SeqIdListSet
CRef< blast::IQueryFactory > CreateQueryFactory(objects::CScope &Scope, const blast::CBlastOptionsHandle &BlastOpts, const CAlignResultsSet &Alignments, int Threshold)
void SetSeqMasker(CSeqMasker *SeqMasker)
list< CRef< objects::CSeq_interval > > TIntervalList
CUnorderedSplitter(objects::CScope &Scope)
CRef< objects::CScope > m_Scope
TSplitIntervalsMap m_PartsMap
map< string, CRef< objects::CSeq_interval > > TSplitIntervalsMap
list< CRef< objects::CSeq_align > > TSeqAlignList
list< CRef< objects::CSeq_id > > TSeqIdList
map< string, TIntervalList > TSplitIntMap
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
#define A
#define Loc
#define B
Modified on Fri Sep 20 14:57:49 2024 by modify_doxy.py rev. 669887