NCBI C++ ToolKit
merge_tree_core.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef MERGE_TREE_CORE__HPP
2 #define MERGE_TREE_CORE__HPP
3 
4 /* $Id: merge_tree_core.hpp 95353 2021-11-08 20:20:45Z mozese2 $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Authors: Nathan Bouk
30 *
31 * File Description:
32 * Alignment merge by way of depth-first tree search
33 *
34 * ===========================================================================
35 */
36 
37 #include <corelib/ncbistr.hpp>
38 #include <corelib/ncbidiag.hpp>
39 #include <corelib/ncbistd.hpp>
40 #include <corelib/ncbiobj.hpp>
41 #include <corelib/ncbi_system.hpp>
42 
43 #include <serial/serialbase.hpp>
44 #include <serial/serial.hpp>
45 
49 
50 
51 
54 #include <objmgr/scope.hpp>
55 #include <objmgr/seq_vector.hpp>
56 #include <util/range.hpp>
57 
58 
61 
62 //
63 // Uncomment this to enable verbose debug output
64 //
65 //#define MERGE_TREE_VERBOSE_DEBUG 1
66 
67 
69 
71 class CSeq_id;
72 class CSeq_align;
74 
75 
76 
77 
78 
79 class CMergeNode : public CObject
80 {
81 public:
82  CMergeNode(int NId) { Id = NId; SelfScore = ChainScore = numeric_limits<ssize_t>::min(); QI = SI = -1; }
83  CMergeNode(CEquivRange Value, int NId) : Equiv(Value) { Id = NId; SelfScore = ChainScore = numeric_limits<ssize_t>::min(); QI= SI= -1; }
84 
86  int Id;
87 
90 
94 
95  int QI, SI;
96 };
97 bool operator<(const CMergeNode& A, const CMergeNode& B);
98 
100 typedef vector<TMergeNode> TMergeNodeVec;
101 
102 bool operator<(const TMergeNode& A, const TMergeNode& B);
103 
104 class CTreeAlignMerger;
105 
106 // Only Valid for Split Equivs
107 // 100 equivs is instant
108 // 1000 takes a second
109 // 10000 takes a 30 minutes
110 // 100000 probably takes eternity
112 {
113 public:
115  {
116  m_NodeIdCounter = 0;
118  m_NodeIdCounter++;
119  }
120  ~CMergeTree();
121 
123 
124  const static unsigned int kFrameBufSize = 20000;
125  const static unsigned int kMaxChildFrames = 2;
126 
127  struct SFindBeforesIterFrame;
129 
132  bool Returned;
133 
135  vector<TFrameRef> ChildFrames;
136 
138  {
139  ChildFrames.reserve(kMaxChildFrames);
140  }
141  };
142 
143  typedef deque<SFindBeforesIterFrame> TFrameBuffer;
144 
145  struct SScoring {
146  int Match;
147  int MisMatch;
148  int GapOpen;
150  SScoring() : Match(3), MisMatch(-1), GapOpen(-1), GapExtend(-1) { ; }
151  SScoring(int M, int X, int O, int E) : Match(M), MisMatch(X), GapOpen(O), GapExtend(E) { ; }
152  };
153 
154  void SetScoring(SScoring Scoring) { m_Scoring = Scoring; }
155 
156  void AddEquiv( CEquivRange NewEquiv ) ;
157  void AddEquivs(const TEquivList& Equivs);
158 
159  void Search(TEquivList& Path, bool Once=false);
160 
161  int Score(const TEquivList& Path) const;
162 
163  bool IsEmpty() const;
164  size_t Size() const;
165  size_t Links() const;
166 
167  void Print(CNcbiOstream& Out);
168 
169 
170  // mod from algo/blast/core/blast_def.h ,
171  // but didnt want to require the blast include
172  typedef bool (*TInterruptFnPtr) (void* callback_data);
173  TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr Callback, void* CallbackData)
174  { m_Callback = Callback; m_CallbackData = CallbackData; return m_Callback; }
175  bool GetInterrupted() { return m_Interrupted; }
176 private:
177 
180 
181  void x_AddChild(TMergeNode Parent, TMergeNode Child);
182  void x_RemoveChild(TMergeNode Parent, TMergeNode Child);
183  void x_AddParent(TMergeNode Child, TMergeNode Parent);
184  void x_RemoveParent(TMergeNode Child, TMergeNode Parent);
185 
186  void x_LinkNodes(TMergeNode Parent, TMergeNode Child);
187  void x_UnLinkNodes(TMergeNode Parent, TMergeNode Child);
188 
189 
191 
193 
197  unsigned int m_InterruptCounter;
199  const static unsigned int INTERRUPT_CHECK_RATE = 100;
200 
202 
206 
207  void x_FindLeafs(TMergeNode Curr, set<TMergeNode>& Leafs, TBitVec& Explored);
208 
209  // Trickles down
210  bool x_FindBefores(TMergeNode New, TMergeNode Curr,
211  set<TMergeNode>& Befores, TBitVec& Explored, TBitVec& Inserted, int& Depth);
212  bool x_FindAfters(TMergeNode New, TMergeNode Curr,
213  set<TMergeNode>& Afters, TBitVec& Explored, TBitVec& Inserted);
214  // Trickles up, start from leafs
216  set<TMergeNode>& Befores, TBitVec& Explored, TBitVec& Inserted, int& Depth);
217  bool x_FindBefores_Up_Iter(TMergeNode New, TMergeNode StartCurr,
218  set<TMergeNode>& Befores, TBitVec& Explored, TBitVec& Inserted, int& Depth);
219  bool x_FindAfters_Up(TMergeNode New, TMergeNode Curr,
220  set<TMergeNode>& Afters, TBitVec& Explored, TBitVec& Inserted);
221 
222 
223 
224  // returns best answer starting point
226  TBitVec& Explored,
227  TMergeNode& UnBest);
229  TBitVec& Explored,
230  TMergeNode& UnBest);
231 
232  void x_EvalGap(const CEquivRange& Early, const CEquivRange& Late,
233  ssize_t& GapOpen, ssize_t& GapExtend) const;
234 
235 
236  bool x_IsEventualChildOf(TMergeNode Parent, TMergeNode ToFind, TBitVec& Explored);
237 
238  void x_Print(CNcbiOstream& Out, TMergeNode Node, int Level, int& Count, TBitVec& Explored);
239 
240  void x_Dot(CNcbiOstream& Out, TMergeNode Node);
241  void x_Dot_Nodes(CNcbiOstream& Out, TMergeNode Node, TBitVec& Explored);
242  void x_Dot_Edges(CNcbiOstream& Out, TMergeNode Node, TBitVec& Explored);
243 
244  size_t x_CountChildNodes(TMergeNode Node, TBitVec& Explored) const;
245  size_t x_CountChildLinks(TMergeNode Node, TBitVec& Explored) const;
246 };
247 
248 
250 
251 
252 #endif // end MERGE_TREE_CORE__HPP
#define false
Definition: bool.h:36
#define bool
Definition: bool.h:34
CMergeNode(int NId)
CEquivRange Equiv
ssize_t ChainScore
set< CRef< CMergeNode > > Children
CMergeNode(CEquivRange Value, int NId)
CRef< CMergeNode > BestChild
set< CRef< CMergeNode > > Parents
ssize_t SelfScore
TInterruptFnPtr m_Callback
CMergeTree(CTreeAlignMerger &align_merger)
void AddEquiv(CEquivRange NewEquiv)
int Score(const TEquivList &Path) const
CTreeAlignMerger & m_AlignMerger
void SetScoring(SScoring Scoring)
deque< SFindBeforesIterFrame > TFrameBuffer
bool GetInterrupted()
map< int, TMergeNode > TNodeCache
void x_RemoveParent(TMergeNode Child, TMergeNode Parent)
void x_Dot_Edges(CNcbiOstream &Out, TMergeNode Node, TBitVec &Explored)
bool x_FindBefores_Up_Iter(TMergeNode New, TMergeNode StartCurr, set< TMergeNode > &Befores, TBitVec &Explored, TBitVec &Inserted, int &Depth)
TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr Callback, void *CallbackData)
void x_Dot(CNcbiOstream &Out, TMergeNode Node)
void x_LinkNodes(TMergeNode Parent, TMergeNode Child)
set< TMergeNode > m_Leaves
void Search(TEquivList &Path, bool Once=false)
void Print(CNcbiOstream &Out)
bitvec< unsigned int > TBitVec
void x_Dot_Nodes(CNcbiOstream &Out, TMergeNode Node, TBitVec &Explored)
void * m_CallbackData
static const unsigned int kFrameBufSize
bool x_FindBefores(TMergeNode New, TMergeNode Curr, set< TMergeNode > &Befores, TBitVec &Explored, TBitVec &Inserted, int &Depth)
void x_UnLinkNodes(TMergeNode Parent, TMergeNode Child)
SFindBeforesIterFrame * TFrameRef
size_t Links() const
void x_AddChild(TMergeNode Parent, TMergeNode Child)
void x_AddParent(TMergeNode Child, TMergeNode Parent)
bool x_FindBefores_Up_Recur(TMergeNode New, TMergeNode Curr, set< TMergeNode > &Befores, TBitVec &Explored, TBitVec &Inserted, int &Depth)
SScoring m_Scoring
void x_Print(CNcbiOstream &Out, TMergeNode Node, int Level, int &Count, TBitVec &Explored)
TNodeCache m_NodeCache
size_t Size() const
void AddEquivs(const TEquivList &Equivs)
void x_FindLeafs(TMergeNode Curr, set< TMergeNode > &Leafs, TBitVec &Explored)
TMergeNode x_GetNode(CEquivRange Equiv)
TMergeNode x_Search_Iter(TMergeNode StartNode, TBitVec &Explored, TMergeNode &UnBest)
bool x_FindAfters(TMergeNode New, TMergeNode Curr, set< TMergeNode > &Afters, TBitVec &Explored, TBitVec &Inserted)
size_t x_CountChildLinks(TMergeNode Node, TBitVec &Explored) const
bool x_FindAfters_Up(TMergeNode New, TMergeNode Curr, set< TMergeNode > &Afters, TBitVec &Explored, TBitVec &Inserted)
size_t x_CountChildNodes(TMergeNode Node, TBitVec &Explored) const
void x_RemoveChild(TMergeNode Parent, TMergeNode Child)
void x_CheckInterruptCallback()
TMergeNode m_Root
bool x_IsEventualChildOf(TMergeNode Parent, TMergeNode ToFind, TBitVec &Explored)
bool(* TInterruptFnPtr)(void *callback_data)
bool IsEmpty() const
unsigned int m_InterruptCounter
TMergeNode x_Search_Recur(TMergeNode CurrNode, TBitVec &Explored, TMergeNode &UnBest)
static const unsigned int INTERRUPT_CHECK_RATE
void x_EvalGap(const CEquivRange &Early, const CEquivRange &Late, ssize_t &GapOpen, ssize_t &GapExtend) const
static const unsigned int kMaxChildFrames
CObject –.
Definition: ncbiobj.hpp:180
Definition: set.hpp:45
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
#define A(i)
Definition: ecp_curves.c:948
vector< CEquivRange > TEquivList
Definition: equiv_range.hpp:48
string Path(const string &dir, const string &file)
Definition: fileutil.cpp:243
#define NULL
Definition: ncbistd.hpp:225
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
vector< TMergeNode > TMergeNodeVec
CRef< CMergeNode > TMergeNode
bool operator<(const CMergeNode &A, const CMergeNode &B)
GenericValue< UTF8<> > Value
GenericValue with UTF8 encoding.
Definition: document.h:2107
int ssize_t
Definition: ncbiconf_msvc.h:92
Defines NCBI C++ diagnostic APIs, classes, and macros.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
const double E
T min(T x_, T y_)
The Object manager core.
void Out(T t, int w, CNcbiOstream &to=cout)
Definition: parse.cpp:467
SScoring(int M, int X, int O, int E)
Modified on Thu Feb 29 12:18:32 2024 by modify_doxy.py rev. 669887