NCBI C++ ToolKit
seqdbfilter.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_READERS_SEQDB__SEQDBFILTER_HPP
2 #define OBJTOOLS_READERS_SEQDB__SEQDBFILTER_HPP
3 
4 /* $Id: seqdbfilter.hpp 97226 2022-06-28 12:33:29Z fongah2 $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Kevin Bealer
30  *
31  */
32 
33 /// @file seqdbfilter.hpp
34 /// Implementation for some assorted ID list filtering code.
35 ///
36 /// Defines classes:
37 /// CSeqDB_AliasMask
38 /// CSeqDB_FilterTree
39 ///
40 /// Implemented for: UNIX, MS-Windows
41 
43 
45 
46 /// Import definitions from the ncbi::objects namespace.
48 
49 
50 /// Something else yet again etc.
51 class CSeqDB_AliasMask : public CObject {
52 public:
53  /// Various types of masking.
54  enum EMaskType {
55  eGiList, ///< GI list.
56  eTiList, ///< TI list.
57  eSiList, ///< SI list.
58  eOidList, ///< OID list.
59  eOidRange, ///< OID Range [start, end).
60  eMemBit, ///< MEMBIT filter.
61  eTaxIdList ///< Taxonomay Id List.
62  };
63 
64  /// Constructor for file-based filtering.
65  /// @param Type of file-based filtering to apply.
66  /// @param fn Name of file containing included IDs info.
67  CSeqDB_AliasMask(EMaskType mask_type, const CSeqDB_Path & fn, int OidMaskType = 0)
68  : m_MaskType(mask_type),
69  m_Path (fn),
70  m_Begin (0),
71  m_End (0),
72  m_MemBit (0),
73  m_OidMaskType (OidMaskType)
74  {
75  if(OidMaskType && m_MaskType == eOidList) {
76  m_OidMaskType = OidMaskType;
77  }
78  }
79 
80  /// Constructor for OID range filter.
81  /// @param begin First included OID.
82  /// @param begin OID after last included OID.
83  CSeqDB_AliasMask(int begin, int end)
85  m_Begin (begin),
86  m_End (end),
87  m_MemBit (0),
88  m_OidMaskType (0)
89  {
90  }
91 
92  /// Constructor for MEMBIT filter.
93  /// @param mem_bit to filter
94  CSeqDB_AliasMask(int mem_bit)
96  m_Begin (0),
97  m_End (0),
98  m_MemBit (mem_bit),
99  m_OidMaskType(0)
100  {
101  }
102 
103  /// Build string describing the filtering action.
104  /// @return string describing the filtering action.
105  string ToString() const
106  {
107  const char *p = 0;
108  bool r = false;
109 
110  switch(m_MaskType) {
111  case eGiList: p="eGiList"; break;
112  case eTiList: p="eTiList"; break;
113  case eSiList: p="eSiList"; break;
114  case eOidList: p="eOidList"; break;
115  case eOidRange: p="eOidRange";
116  r = true;
117  break;
118  case eMemBit: p="eMemBit"; break;
119  case eTaxIdList: p="eTaxIdList"; break;
120  }
121 
122  string oss;
123  oss = oss + "{ " + p + ", ";
124 
125  if (r) {
126  oss = oss + NStr::IntToString(m_Begin) + ":" + NStr::IntToString(m_End);
127  } else {
128  oss = oss + m_Path.GetPathS();
129  }
130  oss += " }";
131  return oss;
132  }
133 
134  /// Get type of filtering applied.
135  /// @return type of filtering applied.
137  {
138  return m_MaskType;
139  }
140 
141  /// Get path of file-based filter.
142  /// @return path of file-based filter.
144  {
145  return m_Path;
146  }
147 
148  /// Get first included OID.
149  /// @return First included OID.
150  int GetBegin() const
151  {
152  return m_Begin;
153  }
154 
155  /// Get OID after last included OID.
156  /// @return OID after last included OID.
157  int GetEnd() const
158  {
159  return m_End;
160  }
161 
162  /// Get Membit
163  /// @return MemBit
164  int GetMemBit() const
165  {
166  return m_MemBit;
167  }
168 
169  int GetOidMaskType() const
170  {
171  return m_OidMaskType;
172  }
173 
174  void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
175  {
176  ddc.SetFrame("CSeqDB_AliasMask");
178  ddc.Log("m_MaskType", m_MaskType);
179  ddc.Log("m_Path", m_Path.GetPathS());
180  ddc.Log("m_Begin", m_Begin);
181  ddc.Log("m_End", m_End);
182  ddc.Log("m_MemBit", m_MemBit);
183  }
184 
185 private:
186  /// Type of filtering to apply.
188 
189  /// Path of file describing included IDs.
191 
192  /// First included OID.
193  int m_Begin;
194 
195  /// OID after last included OID.
196  int m_End;
197 
198  /// Membit to filter
199  int m_MemBit;
200 
201  /// Oid Mask Type
203 };
204 
205 
206 /// Tree of nodes describing filtering of database sequences.
207 class CSeqDB_FilterTree : public CObject {
208 public:
209  /// Type used to store lists of filters found here.
210  typedef vector< CRef<CSeqDB_AliasMask> > TFilters;
211 
212  /// Construct.
214  {
215  }
216 
217  /// Set the node name.
218  /// @param name Name of alias node generating this filter node.
219  void SetName(string name)
220  {
221  m_Name = name;
222  }
223 
224  /// Add filters to this node.
225  /// @param filters Filters to add here.
226  void AddFilters(const TFilters & filters)
227  {
228  m_Filters.insert(m_Filters.end(), filters.begin(), filters.end());
229  }
230 
231  /// Get filters from this node.
232  /// @return Filters attached here.
233  const TFilters & GetFilters() const
234  {
235  return m_Filters;
236  }
237 
238  /// Add a child node to this node.
239  /// @param node Child node to add here.
241  {
242  m_SubNodes.push_back(node);
243  }
244 
245  /// Add several child nodes to this node.
246  /// @param node Child nodes to add here.
247  void AddNodes(const vector< CRef<CSeqDB_FilterTree> > & nodes)
248  {
249  m_SubNodes.insert(m_SubNodes.end(), nodes.begin(), nodes.end());
250  }
251 
252  /// Get child nodes attached to this node.
253  /// @return Child nodes attached here.
254  const vector< CRef<CSeqDB_FilterTree> > & GetNodes() const
255  {
256  return m_SubNodes;
257  }
258 
259  /// Attach a volume to this node.
260  /// @param vol Path to new volume.
261  void AddVolume(const CSeqDB_BasePath & vol)
262  {
263  m_Volumes.push_back(vol);
264  }
265 
266  /// Attach several volumes to this node.
267  /// @param vols Paths to new volumes.
268  void AddVolumes(const vector<CSeqDB_BasePath> & vols)
269  {
270  m_Volumes.insert(m_Volumes.end(), vols.begin(), vols.end());
271  }
272 
273  /// Get volumes attached to this node.
274  /// @return Paths to attached volumes.
275  const vector<CSeqDB_BasePath> & GetVolumes() const
276  {
277  return m_Volumes;
278  }
279 
280  /// Print a formatted description of this tree.
281  ///
282  /// This is very useful for maintainability, e.g. debugging and
283  /// for analysis of system behavior. It prints an indented tree
284  /// of filter tree nodes with volumes and mask information.
285  void Print() const
286  {
287  int indent = 0;
288  x_Print(indent);
289  }
290 
291  /// Specialized this tree for the indicated volume.
292  ///
293  /// This method returns a copy of this filter tree, specialized on
294  /// the specified volume. Filter Tree specialization removes all
295  /// volumes except the one matching the provided name, and cleans
296  /// up any unnecessary or ineffective elements. This tree is not
297  /// changed in place, but the new tree will share any subelements
298  /// of this tree that did not need to change.
299  ///
300  /// Because the OID list is constructed recursively from this tree
301  /// structure, inefficiencies or redundancies here can result in
302  /// unnecessary and possibly very expensive extra work. Thus, the
303  /// goal here is to produce the simplest tree that can correctly
304  /// represent all filtering for the given volume.
305  ///
306  /// @param volname The name of the volume to specialize on.
307  /// @return A specialized and simplified tree.
308  CRef<CSeqDB_FilterTree> Specialize(string volname) const;
309 
310  /// Check whether this tree represents any volume filtering.
311  /// @return True iff any volumes included here are filtered.
312  bool HasFilter() const;
313 
314 private:
315  /// "Pretty-print" this tree in symbolic form.
316  /// @param indent The amount of spaces to indent each line.
317  void x_Print(int indent) const
318  {
319  string tab1(indent, ' ');
320  string tab2(indent+4, ' ');
321 
322  cout << tab1 << "Node(" << m_Name << ")\n";
323  cout << tab1 << "{\n";
324  ITERATE(TFilters, iter, m_Filters) {
325  cout << tab2 << "Filter -> " << (**iter).ToString() << "\n";
326  }
327 
328  if (m_Filters.size() && m_Volumes.size())
329  cout << "\n";
330 
331  ITERATE(vector<CSeqDB_BasePath>, vol_iter, m_Volumes) {
332  cout << tab2 << "Volume: " << vol_iter->GetBasePathS() << "\n";
333  }
334 
335  if ((m_Filters.size() || m_Volumes.size()) && m_SubNodes.size())
336  cout << "\n";
337 
338  bool first = true;
339 
340  ITERATE(vector< CRef<CSeqDB_FilterTree> >, iter, m_SubNodes) {
341  if (first) {
342  first = false;
343  } else {
344  cout << "\n";
345  }
346  (**iter).x_Print(indent + 4);
347  }
348  cout << tab1 << "}\n";
349  }
350 
351  /// Prevent copy-construction of this object.
353 
354  /// Prevent assignment of this class.
356 
357  /// The node name.
358  string m_Name;
359 
360  /// List of sequence inclusion filters.
362 
363  /// Other nodes included by this node.
364  vector< CRef<CSeqDB_FilterTree> > m_SubNodes;
365 
366  /// Database volumes attached at this level.
367  vector<CSeqDB_BasePath> m_Volumes;
368 };
369 
370 
372 
373 #endif // OBJTOOLS_READERS_SEQDB__SEQDBFILTER_HPP
374 
375 
void SetFrame(const string &frame)
Definition: ddumpable.cpp:137
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
Definition: ddumpable.cpp:151
CObject –.
Definition: ncbiobj.hpp:180
Something else yet again etc.
Definition: seqdbfilter.hpp:51
int m_End
OID after last included OID.
int GetEnd() const
Get OID after last included OID.
CSeqDB_Path m_Path
Path of file describing included IDs.
CSeqDB_AliasMask(EMaskType mask_type, const CSeqDB_Path &fn, int OidMaskType=0)
Constructor for file-based filtering.
Definition: seqdbfilter.hpp:67
int m_Begin
First included OID.
void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
CSeqDB_AliasMask(int mem_bit)
Constructor for MEMBIT filter.
Definition: seqdbfilter.hpp:94
EMaskType
Various types of masking.
Definition: seqdbfilter.hpp:54
@ eGiList
GI list.
Definition: seqdbfilter.hpp:55
@ eSiList
SI list.
Definition: seqdbfilter.hpp:57
@ eTaxIdList
Taxonomay Id List.
Definition: seqdbfilter.hpp:61
@ eOidRange
OID Range [start, end).
Definition: seqdbfilter.hpp:59
@ eTiList
TI list.
Definition: seqdbfilter.hpp:56
@ eMemBit
MEMBIT filter.
Definition: seqdbfilter.hpp:60
@ eOidList
OID list.
Definition: seqdbfilter.hpp:58
string ToString() const
Build string describing the filtering action.
EMaskType m_MaskType
Type of filtering to apply.
int GetBegin() const
Get first included OID.
EMaskType GetType() const
Get type of filtering applied.
CSeqDB_Path GetPath() const
Get path of file-based filter.
int m_MemBit
Membit to filter.
CSeqDB_AliasMask(int begin, int end)
Constructor for OID range filter.
Definition: seqdbfilter.hpp:83
int m_OidMaskType
Oid Mask Type.
int GetOidMaskType() const
int GetMemBit() const
Get Membit.
CSeqDB_BasePath.
Tree of nodes describing filtering of database sequences.
void AddVolume(const CSeqDB_BasePath &vol)
Attach a volume to this node.
CSeqDB_FilterTree()
Construct.
TFilters m_Filters
List of sequence inclusion filters.
void AddNodes(const vector< CRef< CSeqDB_FilterTree > > &nodes)
Add several child nodes to this node.
void x_Print(int indent) const
"Pretty-print" this tree in symbolic form.
bool HasFilter() const
Check whether this tree represents any volume filtering.
Definition: seqdbfilter.cpp:81
void SetName(string name)
Set the node name.
string m_Name
The node name.
const vector< CRef< CSeqDB_FilterTree > > & GetNodes() const
Get child nodes attached to this node.
vector< CRef< CSeqDB_AliasMask > > TFilters
Type used to store lists of filters found here.
void Print() const
Print a formatted description of this tree.
void AddFilters(const TFilters &filters)
Add filters to this node.
vector< CSeqDB_BasePath > m_Volumes
Database volumes attached at this level.
vector< CRef< CSeqDB_FilterTree > > m_SubNodes
Other nodes included by this node.
CSeqDB_FilterTree(CSeqDB_FilterTree &other)
Prevent copy-construction of this object.
CSeqDB_FilterTree & operator=(CSeqDB_FilterTree &other)
Prevent assignment of this class.
const TFilters & GetFilters() const
Get filters from this node.
void AddVolumes(const vector< CSeqDB_BasePath > &vols)
Attach several volumes to this node.
const vector< CSeqDB_BasePath > & GetVolumes() const
Get volumes attached to this node.
CRef< CSeqDB_FilterTree > Specialize(string volname) const
Specialized this tree for the indicated volume.
Definition: seqdbfilter.cpp:38
void AddNode(CRef< CSeqDB_FilterTree > node)
Add a child node to this node.
CSeqDB_Path.
const string & GetPathS() const
Get the path as a string.
static unsigned char depth[2 *(256+1+29)+1]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: ncbiobj.cpp:988
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
USING_SCOPE(objects)
Import definitions from the ncbi::objects namespace.
Defines database volume access classes.
string indent(" ")
Modified on Fri Sep 20 14:57:45 2024 by modify_doxy.py rev. 669887