NCBI C++ ToolKit
seqdbvolset.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_READERS_SEQDB__SEQDBVOLSET_HPP
2 #define OBJTOOLS_READERS_SEQDB__SEQDBVOLSET_HPP
3 
4 /* $Id: seqdbvolset.hpp 78368 2017-06-13 18:26:55Z rackerst $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Kevin Bealer
30  *
31  */
32 
33 /// @file seqdbvolset.hpp
34 /// Manages a set of database volumes.
35 ///
36 /// Defines classes:
37 /// CSeqDBVolSet
38 /// CVolEntry
39 ///
40 /// Implemented for: UNIX, MS-Windows
41 
43 #include "seqdbfilter.hpp"
45 
47 
48 /// Import definitions from the ncbi::objects namespace.
50 
51 /// CSeqDBVolEntry
52 ///
53 /// This class controls access to the CSeqDBVol class. It contains
54 /// data that is not relevant to the internal operation of a volume,
55 /// but is associated with that volume for operations over the volume
56 /// set as a whole, such as the starting OID of the volume and masking
57 /// information (GI and OID lists).
58 
60 public:
61  /// Constructor
62  ///
63  /// This creates a object containing the specified volume object
64  /// pointer. Although this object owns the pointer, it uses a
65  /// vector, so it does not keep an auto pointer or CRef<>.
66  /// Instead, the destructor of the CSeqDBVolSet class deletes the
67  /// volumes by calling Free() in a destructor. Using indirect
68  /// pointers (CRef<> for example) would require slightly more
69  /// cycles in several performance critical paths.
70  ///
71  /// @param new_vol
72  /// A pointer to a volume.
74  : m_Vol (new_vol),
75  m_OIDStart (0),
76  m_OIDEnd (0)
77  {
78  }
79 
80  /// Free the volume object
81  ///
82  /// The associated volume object is deleted.
83  void Free()
84  {
85  if (m_Vol) {
86  delete m_Vol;
87  m_Vol = 0;
88  }
89  }
90 
91  /// Set the OID range
92  ///
93  /// The volume is queried for the number of OIDs it contains, and
94  /// the starting and ending OIDs are set.
95  ///
96  /// @param start The first OID in the range.
97  void SetStartAndEnd(int start)
98  {
99  m_OIDStart = start;
100  m_OIDEnd = start + m_Vol->GetNumOIDs();
101  }
102 
103  /// Get the starting OID in this volume's range.
104  ///
105  /// This returns the first OID in this volume's OID range.
106  ///
107  /// @return The starting OID of the range
108  int OIDStart() const
109  {
110  return m_OIDStart;
111  }
112 
113  /// Get the ending OID in this volume's range.
114  ///
115  /// This returns the first OID past the end of this volume's OID
116  /// range.
117  ///
118  /// @return
119  /// The ending OID of the range
120  int OIDEnd() const
121  {
122  return m_OIDEnd;
123  }
124 
125  /// Get a pointer to the underlying volume object.
127  {
128  return m_Vol;
129  }
130 
131  /// Get a const pointer to the underlying volume object.
132  const CSeqDBVol * Vol() const
133  {
134  return m_Vol;
135  }
136 
137 private:
138  /// The underlying volume object
140 
141  /// The start of the OID range.
143 
144  /// The end of the OID range.
145  int m_OIDEnd;
146 };
147 
148 
149 /// CSeqDBVolSet
150 ///
151 /// This class stores a set of CSeqDBVol objects and defines an
152 /// interface to control usage of them. Several methods are provided
153 /// to create the set of volumes, or to get the required volumes by
154 /// different criteria. Also, certain methods perform operations over
155 /// the set of volumes. The CSeqDBVolEntry class, defined internally
156 /// to this one, provides some of this abstraction.
158 public:
159  /// Standard Constructor
160  ///
161  /// An object of this class will be constructed after the alias
162  /// files have been read, and the volume names will come from that
163  /// processing step. All of the specified volumes will be opened
164  /// and the metadata will be verified during construction.
165  ///
166  /// @param atlas
167  /// The memory management object to use.
168  /// @param vol_names
169  /// The names of the volumes this object will manage.
170  /// @param prot_nucl
171  /// Whether these are protein or nucleotide sequences.
172  /// @param user_list
173  /// If specified, will be used to include deflines by GI or TI.
174  /// @param neg_list
175  /// If specified, will be used to exclude deflines by GI or TI.
176  CSeqDBVolSet(CSeqDBAtlas & atlas,
177  const vector<string> & vol_names,
178  char prot_nucl,
179  CSeqDBGiList * user_list,
180  CSeqDBNegativeList * neg_list);
181 
182  /// Default Constructor
183  ///
184  /// An empty volume set will be created; this is in support of the
185  /// CSeqDBExpert class's default constructor.
186  CSeqDBVolSet();
187 
188  /// Destructor
189  ///
190  /// The destructor will release all resources still held, but some
191  /// of the resources will probably already be cleaned up via a
192  /// call to the UnLease method.
193  ~CSeqDBVolSet();
194 
195  /// Find a volume by OID.
196  ///
197  /// Many of the CSeqDB methods identify which sequence to use by
198  /// OID. That OID applies to all sequences in all volumes of the
199  /// opened database(s). This method is used to find the volume
200  /// (if any) that contains this OID, and to return both a pointer
201  /// to that volume and the OID within that volume that corresponds
202  /// to the global input OID.
203  ///
204  /// @param oid
205  /// The global OID to search for.
206  /// @param vol_oid
207  /// The returned OID within the relevant volume.
208  /// @return
209  /// A pointer to the volume containing the oid, or NULL.
210  CSeqDBVol * FindVol(int oid, int & vol_oid) const
211  {
212  // The 'const' usage here should be cleaned up, i.e. const
213  // should be removed from most of SeqDB's methods. Since the
214  // atlas often remaps the actual file data due to seemingly
215  // read-only user requests, there are very few parts of this
216  // code that can really be considered const. "Conceptual"
217  // const is not worth the trouble, particularly for internal
218  // methods.
219 
220  // A good technique would be to remove all or nearly all of
221  // the 'mutable' keywords, then remove the word 'const' from
222  // almost everything the compiler complains about.
223 
224  int vol_idx(0);
225  return const_cast<CSeqDBVol*>(FindVol(oid, vol_oid, vol_idx));
226  }
227 
228  /// Find a volume by OID.
229  ///
230  /// Many of the CSeqDB methods identify which sequence to use by
231  /// OID. That OID applies to all sequences in all volumes of the
232  /// opened database(s). This method is used to find the volume
233  /// (if any) that contains this OID, and to return a pointer to
234  /// that volume, the OID within that volume that corresponds to
235  /// the global input OID, and the volume index.
236  ///
237  /// @param oid
238  /// The global OID to search for.
239  /// @param vol_oid
240  /// The returned OID within the relevant volume.
241  /// @param vol_idx
242  /// The returned index of the relevant volume.
243  /// @return
244  /// A pointer to the volume containing the oid, or NULL.
245  const CSeqDBVol * FindVol(int oid, int & vol_oid, int & vol_idx) const
246  {
247  int rec_indx = m_RecentVol;
248 
249  if (rec_indx < (int) m_VolList.size()) {
250  const CSeqDBVolEntry & rvol = m_VolList[rec_indx];
251 
252  if ((rvol.OIDStart() <= oid) &&
253  (rvol.OIDEnd() > oid)) {
254 
255  vol_oid = oid - rvol.OIDStart();
256  vol_idx = rec_indx;
257 
258  return rvol.Vol();
259  }
260  }
261 
262  for(int index = 0; index < (int) m_VolList.size(); index++) {
263  if ((m_VolList[index].OIDStart() <= oid) &&
264  (m_VolList[index].OIDEnd() > oid)) {
265 
266  m_RecentVol = index;
267 
268  vol_oid = oid - m_VolList[index].OIDStart();
269  vol_idx = index;
270 
271  return m_VolList[index].Vol();
272  }
273  }
274 
275  return NULL;
276  }
277 
278  /// Find a volume by OID.
279  ///
280  /// Many of the CSeqDB methods identify which sequence to use by
281  /// OID. That OID applies to all sequences in all volumes of the
282  /// opened database(s). This method is used to find the volume
283  /// (if any) that contains this OID, and to return both a pointer
284  /// to that volume and the OID within that volume that corresponds
285  /// to the global input OID.
286  ///
287  /// @param oid
288  /// The global OID to search for.
289  /// @param vol_oid
290  /// The returned OID within the relevant volume.
291  /// @return
292  /// A pointer to the volume containing the oid, or NULL.
293  CSeqDBVol * FindVol(int oid, int & vol_oid)
294  {
295  int rec_indx = m_RecentVol;
296 
297  if (rec_indx < (int) m_VolList.size()) {
298  CSeqDBVolEntry & rvol = m_VolList[rec_indx];
299 
300  if ((rvol.OIDStart() <= oid) &&
301  (rvol.OIDEnd() > oid)) {
302 
303  vol_oid = oid - rvol.OIDStart();
304 
305  return rvol.Vol();
306  }
307  }
308 
309  for(int index = 0; index < (int) m_VolList.size(); index++) {
310  if ((m_VolList[index].OIDStart() <= oid) &&
311  (m_VolList[index].OIDEnd() > oid)) {
312 
313  m_RecentVol = index;
314 
315  vol_oid = oid - m_VolList[index].OIDStart();
316 
317  return m_VolList[index].Vol();
318  }
319  }
320 
321  return 0;
322  }
323 
324  /// Find a volume by index.
325  ///
326  /// This method returns a volume by index, so that 0 is the first
327  /// volume, and N-1 is the last volume of a set of N.
328  ///
329  /// @param i
330  /// The index of the volume to return.
331  /// @return
332  /// A pointer to the indicated volume, or NULL.
333  const CSeqDBVol * GetVol(int i) const
334  {
335  if (m_VolList.empty()) {
336  return 0;
337  }
338 
339  if (i >= (int) m_VolList.size()) {
340  return 0;
341  }
342 
343  m_RecentVol = i;
344 
345  return m_VolList[i].Vol();
346  }
347 
348  /// Find a volume by index.
349  ///
350  /// This method returns a volume by index, so that 0 is the first
351  /// volume, and N-1 is the last volume of a set of N.
352  ///
353  /// @param i
354  /// The index of the volume to return.
355  /// @return
356  /// A pointer to the indicated volume, or NULL.
358  {
359  if (m_VolList.empty()) {
360  return 0;
361  }
362 
363  if (i >= (int) m_VolList.size()) {
364  return 0;
365  }
366 
367  m_RecentVol = i;
368 
369  return m_VolList[i].Vol();
370  }
371 
372  /// Find a volume entry by index.
373  ///
374  /// This method returns a CSeqDBVolEntry by index, so that 0 is
375  /// the first volume, and N-1 is the last volume of a set of N.
376  ///
377  /// @param i
378  /// The index of the volume entry to return.
379  /// @return
380  /// A pointer to the indicated volume entry, or NULL.
381  const CSeqDBVolEntry * GetVolEntry(int i) const
382  {
383  if (m_VolList.empty()) {
384  return 0;
385  }
386 
387  if (i >= (int) m_VolList.size()) {
388  return 0;
389  }
390 
391  m_RecentVol = i;
392 
393  return & m_VolList[i];
394  }
395 
396  /// Find a volume by name.
397  ///
398  /// Each volume has a name, which is the name of the component
399  /// files (.pin, .psq, etc), without the file extension. This
400  /// method returns a const pointer to the volume matching the
401  /// specified name.
402  ///
403  /// @param volname
404  /// The name of the volume to search for.
405  /// @return
406  /// A pointer to the volume matching the specified name, or NULL.
407  const CSeqDBVol * GetVol(const string & volname) const
408  {
409  if (const CSeqDBVolEntry * v = x_FindVolName(volname)) {
410  return v->Vol();
411  }
412  return 0;
413  }
414 
415  /// Find a volume by name (non-const version).
416  ///
417  /// Each volume has a name, which is the name of the component
418  /// files (.pin, .psq, etc), without the file extension. This
419  /// method returns a non-const pointer to the volume matching the
420  /// specified name.
421  ///
422  /// @param volname
423  /// The name of the volume to search for.
424  /// @return
425  /// A pointer to the volume matching the specified name, or NULL.
426  CSeqDBVol * GetVol(const string & volname)
427  {
428  if (CSeqDBVolEntry * v = x_FindVolName(volname)) {
429  return v->Vol();
430  }
431  return 0;
432  }
433 
434  /// Get the number of volumes
435  ///
436  /// This returns the number of volumes available from this set.
437  /// It would be needed, for example, in order to iterate over all
438  /// volumes with the GetVol(int) method.
439  /// @return
440  /// The number of volumes available from this set.
441  int GetNumVols() const
442  {
443  return (int)m_VolList.size();
444  }
445 
446  /// Get the size of the OID range.
447  ///
448  /// This method returns the total size of the combined (global)
449  /// OID range of this database.
450  ///
451  /// @return
452  /// The number of OIDs.
453  int GetNumOIDs() const
454  {
455  return x_GetNumOIDs();
456  }
457 
458  /// Return storage held by the volumes
459  ///
460  /// This method returns any storage held by CSeqDBMemLease objects
461  /// which are part of this set of volumes. The memory leases will
462  /// be reacquired by the volumes if the data is requested again.
463  void UnLease()
464  {
465  for(int index = 0; index < (int) m_VolList.size(); index++) {
466  m_VolList[index].Vol()->UnLease();
467  }
468  }
469 
470  /// Get the first OID in a volume.
471  ///
472  /// Each volume is considered to span a range of OIDs. This
473  /// method returns the first OID in the OID range of the indicated
474  /// volume. The returned OID may not be included (ie. it may be
475  /// turned off via a filtering mechanism).
476  ///
477  /// @param i
478  /// The index of the volume.
479  int GetVolOIDStart(int i) const
480  {
481  if (m_VolList.empty()) {
482  return 0;
483  }
484 
485  if (i >= (int) m_VolList.size()) {
486  return 0;
487  }
488 
489  m_RecentVol = i;
490 
491  return m_VolList[i].OIDStart();
492  }
493 
494  /// Find total volume length for all volumes
495  ///
496  /// Each volume in the set has an internally stored length, which
497  /// indicates the length (in nucleotides/residues/bases) of all of
498  /// the sequences in the volume. This returns the total of these
499  /// lengths.
500  ///
501  /// @return
502  /// The sum of the lengths of all volumes.
504  {
505  Uint8 vol_total = 0;
506 
507  for(int index = 0; index < (int) m_VolList.size(); index++) {
508  vol_total += m_VolList[index].Vol()->GetVolumeLength();
509  }
510 
511  return vol_total;
512  }
513 
514  int GetMaxLength() const
515  {
516  int max_len = 0;
517 
518  for(int index = 0; index < (int) m_VolList.size(); index++) {
519  max_len = max( max_len, m_VolList[index].Vol()->GetMaxLength());
520  }
521 
522  return max_len;
523  }
524 
525  int GetMinLength() const
526  {
527  int min_len = INT4_MAX;
528 
529  for(int index = 0; index < (int) m_VolList.size(); index++) {
530  min_len = min( min_len, m_VolList[index].Vol()->GetMinLength());
531  }
532 
533  return min_len;
534  }
535 
536  /// Optimize the GI list configuration.
537  ///
538  /// This tells the volumes to examine and optimize their GI list
539  /// configuration. It should not be called until all GI lists
540  /// have been added to the volumes (by alias file processing).
542  {
543  for(int i = 0; i< (int) m_VolList.size(); i++) {
544  m_VolList[i].Vol()->OptimizeGiLists();
545  }
546  }
547 
548 private:
549  /// Private constructor to prevent copy operation.
551 
552  /// Private operator to prevent assignment.
554 
555  /// Get the size of the entire OID range.
556  int x_GetNumOIDs() const
557  {
558  if (m_VolList.empty())
559  return 0;
560 
561  return m_VolList.back().OIDEnd();
562  }
563 
564  /// Add a volume
565  ///
566  /// This method adds a volume to the set.
567  ///
568  /// @param atlas
569  /// The memory management layer object.
570  /// @param nm
571  /// The name of the volume.
572  /// @param pn
573  /// The sequence type.
574  /// @param user_list
575  /// If specified, will be used to include deflines by ID.
576  /// @param neg_list
577  /// If specified, will be used to exclude deflines by ID.
578  /// @param locked
579  /// The lock holder object for this thread.
580  void x_AddVolume(CSeqDBAtlas & atlas,
581  const string & nm,
582  char pn,
583  CSeqDBGiList * user_list,
584  CSeqDBNegativeList * neg_list,
585  CSeqDBLockHold & locked);
586 
587  /// Find a volume by name
588  ///
589  /// This returns the CSeqDBVolEntry object for the volume matching
590  /// the specified name.
591  ///
592  /// @param volname
593  /// The name of the volume.
594  /// @return
595  /// A const pointer to the CSeqDBVolEntry object, or NULL.
596  const CSeqDBVolEntry * x_FindVolName(const string & volname) const
597  {
598  for(int i = 0; i< (int) m_VolList.size(); i++) {
599  if (volname == m_VolList[i].Vol()->GetVolName()) {
600  return & m_VolList[i];
601  }
602  }
603 
604  return 0;
605  }
606 
607  /// Find a volume by name
608  ///
609  /// This returns the CSeqDBVolEntry object for the volume matching
610  /// the specified name (non const version).
611  ///
612  /// @param volname
613  /// The name of the volume.
614  /// @return
615  /// A non-const pointer to the CSeqDBVolEntry object, or NULL.
616  CSeqDBVolEntry * x_FindVolName(const string & volname)
617  {
618  for(int i = 0; i < (int) m_VolList.size(); i++) {
619  if (volname == m_VolList[i].Vol()->GetVolName()) {
620  return & m_VolList[i];
621  }
622  }
623 
624  return 0;
625  }
626 
627  /// The actual set of volumes.
628  vector<CSeqDBVolEntry> m_VolList;
629 
630  /// The index of the most recently used volume
631  ///
632  /// This variable is mutable and volatile, but is not protected by
633  /// locking. Instead, the following precautions are always taken.
634  ///
635  /// 1. First, the value is copied into a local variable.
636  /// 2. Secondly, the range is always checked.
637  /// 3. It is always treated as a hint; there is always fallback
638  /// code to search for the correct volume.
639  mutable volatile int m_RecentVol;
640 };
641 
643 
644 #endif // OBJTOOLS_READERS_SEQDB__SEQDBVOLSET_HPP
645 
646 
CSeqDBAtlas class.
Definition: seqdbatlas.hpp:298
CSeqDBGiList.
CSeqDBLockHold.
Definition: seqdbatlas.hpp:167
CSeqDBNegativeList.
CSeqDBVolEntry.
Definition: seqdbvolset.hpp:59
const CSeqDBVol * Vol() const
Get a const pointer to the underlying volume object.
int OIDStart() const
Get the starting OID in this volume's range.
int m_OIDEnd
The end of the OID range.
int OIDEnd() const
Get the ending OID in this volume's range.
void Free()
Free the volume object.
Definition: seqdbvolset.hpp:83
CSeqDBVol * m_Vol
The underlying volume object.
CSeqDBVol * Vol()
Get a pointer to the underlying volume object.
int m_OIDStart
The start of the OID range.
void SetStartAndEnd(int start)
Set the OID range.
Definition: seqdbvolset.hpp:97
CSeqDBVolEntry(CSeqDBVol *new_vol)
Constructor.
Definition: seqdbvolset.hpp:73
CSeqDBVolSet.
int GetMinLength() const
void OptimizeGiLists()
Optimize the GI list configuration.
CSeqDBVolSet()
Default Constructor.
Definition: seqdbvolset.cpp:89
void UnLease()
Return storage held by the volumes.
const CSeqDBVol * GetVol(const string &volname) const
Find a volume by name.
~CSeqDBVolSet()
Destructor.
Definition: seqdbvolset.cpp:94
const CSeqDBVolEntry * GetVolEntry(int i) const
Find a volume entry by index.
void x_AddVolume(CSeqDBAtlas &atlas, const string &nm, char pn, CSeqDBGiList *user_list, CSeqDBNegativeList *neg_list, CSeqDBLockHold &locked)
Add a volume.
const CSeqDBVolEntry * x_FindVolName(const string &volname) const
Find a volume by name.
const CSeqDBVol * GetVol(int i) const
Find a volume by index.
int x_GetNumOIDs() const
Get the size of the entire OID range.
CSeqDBVol * FindVol(int oid, int &vol_oid)
Find a volume by OID.
Uint8 GetVolumeSetLength() const
Find total volume length for all volumes.
CSeqDBVolSet & operator=(const CSeqDBVolSet &)
Private operator to prevent assignment.
CSeqDBVol * GetVolNonConst(int i)
Find a volume by index.
int GetNumVols() const
Get the number of volumes.
CSeqDBVolSet(const CSeqDBVolSet &)
Private constructor to prevent copy operation.
int GetMaxLength() const
CSeqDBVolEntry * x_FindVolName(const string &volname)
Find a volume by name.
CSeqDBVol * FindVol(int oid, int &vol_oid) const
Find a volume by OID.
int GetNumOIDs() const
Get the size of the OID range.
vector< CSeqDBVolEntry > m_VolList
The actual set of volumes.
volatile int m_RecentVol
The index of the most recently used volume.
const CSeqDBVol * FindVol(int oid, int &vol_oid, int &vol_idx) const
Find a volume by OID.
int GetVolOIDStart(int i) const
Get the first OID in a volume.
CSeqDBVol * GetVol(const string &volname)
Find a volume by name (non-const version).
CSeqDBVol class.
Definition: seqdbvol.hpp:169
int GetNumOIDs() const
Get the number of OIDs for this volume.
Definition: seqdbvol.cpp:2370
#define NULL
Definition: ncbistd.hpp:225
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
#define INT4_MAX
largest nubmer represented by signed int
Definition: ncbi_std.h:141
T max(T x_, T y_)
T min(T x_, T y_)
Implementation for some assorted ID list filtering code.
Defines database volume access classes.
USING_SCOPE(objects)
Import definitions from the ncbi::objects namespace.
Modified on Thu Apr 11 15:11:11 2024 by modify_doxy.py rev. 669887