NCBI C++ ToolKit
seqdbvol.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_READERS_SEQDB__SEQDBVOL_HPP
2 #define OBJTOOLS_READERS_SEQDB__SEQDBVOL_HPP
3 
4 /* $Id: seqdbvol.hpp 101863 2024-02-26 14:52:30Z fongah2 $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Kevin Bealer
30  *
31  */
32 
33 /// @file seqdbvol.hpp
34 /// Defines database volume access classes.
35 ///
36 /// Defines classes:
37 /// CSeqDBVol
38 ///
39 /// Implemented for: UNIX, MS-Windows
40 
44 #include "seqdbcol.hpp"
45 #include <objects/seq/seq__.hpp>
46 
48 
49 /// Import definitions from the objects namespace.
51 
52 /// CSeqDBGiIndex
53 ///
54 /// This class maintains the OID->GI translation
55 class CSeqDBGiIndex : public CObject {
56 public:
58  typedef int TOid;
59 // typedef int TGi;
60 
62  const string & dbname,
63  char prot_nucl)
64  : m_Lease (atlas),
65  //m_Fname (dbname + '.' + prot_nucl + "og"),
66  m_NumOIDs (0) {
67  m_Lease.Init(dbname + '.' + prot_nucl + "og");
68  }
69 
71  {
72  m_Lease.Clear();
73  }
74 
75  static bool IndexExists(const string & name,
76  const char prot_nucl)
77  {
78  string fn(name + '.' + prot_nucl + "og");
79  return CFile(fn).Exists();
80  }
81 
82  TGi GetSeqGI(TOid oid, CSeqDBLockHold & locked);
83 
84 private:
86  //string m_Fname;
89 };
90 
91 
92 /// CSeqDBRangeList
93 ///
94 /// This class maintains a list of ranges of sequence offsets that are
95 /// desired for performance optimization. For large sequences that
96 /// need to be unpacked, this class describes the subsets of those
97 /// sequences that will actually be used. Each instance of this class
98 /// corresponds to sequence data for one OID.
99 
100 class CSeqDBRangeList : public CObject {
101 public:
102  /// Constructor.
103  /// @param atlas The SeqDB memory management layer. [in]
105  : m_CacheData (false)
106  {
107  // Sequence caching is not implemented yet. It would increase
108  // performance further, but requires some consideration of the
109  // design with respect to locking and correctness.
110  }
111 
112  /// Destructor.
114  {
115  FlushSequence();
116  }
117 
118  /// Returns true if the sequence data is cached.
119  bool IsCached()
120  {
121  return false;
122  }
123 
124  /// List of sequence offset ranges.
126 
127  /// Set ranges of the sequence that will be used.
128  /// @param ranges Offset ranges of the sequence that are needed. [in]
129  /// @param append_ranges If true, combine new ranges with old. [in]
130  /// @param cache_data If true, SeqDB is allowed to cache data. [in]
131  void SetRanges(const TRangeList & ranges,
132  bool append_ranges,
133  bool cache_data);
134 
135  /// Get ranges of sequence offsets that will be used.
137  {
138  return m_Ranges;
139  }
140 
141  /// Flush cached sequence data (if any).
143  {
144  }
145 
146  /// Sequences shorter than this will not use ranges in any case.
147  static int ImmediateLength()
148  {
149  return 10240;
150  }
151 
152 private:
153  /// Range of offsets needed for this sequence.
155 
156  /// True if caching of sequence data is required for this sequence.
158 };
159 
160 /// CSeqDBVol class.
161 ///
162 /// This object defines access to one database volume. It aggregates
163 /// file objects associated with the sequence and header data, and
164 /// ISAM objects used for translation of GIs and PIGs for data in this
165 /// volume. The extensions managed here include those with file
166 /// extensions (pin, phr, psq, nin, nhr, and nsq), plus the optional
167 /// ISAM objects via the CSeqDBIsam class.
168 
169 class CSeqDBVol {
170 public:
171  /// Import TIndx definition from the CSeqDBAtlas class.
173 
174  /// Constructor.
175  ///
176  /// All files connected with the database volume will be opened,
177  /// metadata about the volume will be read from the index file,
178  /// and identifier translation indices will be opened. The name
179  /// of these files is the specified name of the volume plus an
180  /// extension.
181  ///
182  /// @param atlas
183  /// The memory management layer object. [in]
184  /// @param name
185  /// The base name of the volumes files. [in]
186  /// @param prot_nucl
187  /// The sequence type, kSeqTypeProt, or kSeqTypeNucl. [in]
188  /// @param user_list
189  /// Specifies GIs or TIs of sequences to include. [in]
190  /// @param neg_list
191  /// Specifies GIs or TIs of sequences to exclude. [in]
192  /// @param vol_start
193  /// The volume's starting OID. [in]
194  /// @param locked
195  /// The lock holder object for this thread. [in]
196  CSeqDBVol(CSeqDBAtlas & atlas,
197  const string & name,
198  char prot_nucl,
199  CSeqDBGiList * user_list,
200  CSeqDBNegativeList * neg_list,
201  int vol_start,
202  CSeqDBLockHold & locked);
203 
204  /// Open sequence file
205  ///
206  /// By default, sequence file is opened on a "lazy" schedule.
207  /// This method will force the sequence file to be opened.
208  ///
209  /// @param locked
210  /// The lock holder object for this thread. [in]
211  void OpenSeqFile(CSeqDBLockHold &locked) const;
212 
213  /// Sequence length for protein databases.
214  ///
215  /// This method returns the length of the sequence in bases, and
216  /// should only be called for protein sequences. It does not
217  /// require synchronization via the atlas object's lock.
218  ///
219  /// @param oid
220  /// The OID of the sequence. [in]
221  /// @param locked
222  /// The lock holder object for this thread. [in]
223  /// @return
224  /// The length in bases of the sequence.
225  int GetSeqLengthProt(int oid) const;
226 
227  /// Approximate sequence length for nucleotide databases.
228  ///
229  /// This method returns the length of the sequence using a fast
230  /// method that may be off by as much as 4 bases. The method is
231  /// designed to be unbiased, meaning that the total length of
232  /// large numbers of sequences will approximate what the exact
233  /// length would be. The approximate lengths will change if the
234  /// database is regenerated. It does not require synchronization.
235  ///
236  /// @param oid
237  /// The OID of the sequence. [in]
238  /// @param locked
239  /// The lock holder object for this thread. [in]
240  /// @return
241  /// The approximate length in bases of the sequence.
242  int GetSeqLengthApprox(int oid) const;
243 
244  /// Exact sequence length for nucleotide databases.
245  ///
246  /// This method returns the length of the sequence in bases, and
247  /// should only be called for nucleotide sequences. It requires
248  /// synchronization via the atlas object's lock, which must be
249  /// done in the calling code.
250  ///
251  /// @param oid
252  /// The OID of the sequence. [in]
253  /// @param locked
254  /// The lock holder object for this thread. [in]
255  /// @return
256  /// The length in bases of the sequence.
257  int GetSeqLengthExact(int oid) const;
258 
259  /// Get filtered sequence header information.
260  ///
261  /// This method returns the set of Blast-def-line objects stored
262  /// for each sequence. These contain descriptive information
263  /// related to the sequence. If OID filtering is enabled and a
264  /// membership bit is used, only deflines with that membership bit
265  /// set will be returned. The OID list existence and membership
266  /// bit are contained in filt_info. This field may be NULL, in
267  /// which case OID list bit filtering is not done (in this case
268  /// the deflines are not cached).
269  ///
270  /// @param oid
271  /// The OID of the sequence. [in]
272  /// @param locked
273  /// The lock holder object for this thread. [in]
274  /// @return
275  /// The set of blast-def-lines describing this sequence.
277  GetFilteredHeader(int oid,
278  CSeqDBLockHold & locked) const;
279 
280  /// Get the sequence type stored in this database.
281  ///
282  /// This method returns the type of sequences stored in this
283  /// database, either kSeqTypeProt for protein, or kSeqTypeNucl for
284  /// nucleotide.
285  ///
286  /// @return
287  /// Either kSeqTypeProt for protein, or kSeqTypeNucl for nucleotide.
288  char GetSeqType() const;
289 
290  /// Get a CBioseq object for this sequence.
291  ///
292  /// This method builds and returns a Bioseq for this sequence.
293  /// The taxonomy information is cached in this volume, so it
294  /// should not be modified directly, or other Bioseqs from this
295  /// SeqDB object may be affected. If the CBioseq has an OID list,
296  /// and it uses a membership bit, the deflines included in the
297  /// CBioseq will be filtered based on the membership bit. Zero
298  /// for the membership bit means no filtering. Filtering can also
299  /// be done by a GI, in which case, only the defline matching that
300  /// GI will be returned. The seqdata parameter can be specified
301  /// as false to indicate that sequence data should not be included
302  /// in this object; in this case the CSeq_inst object attached to
303  /// the bioseq will be configured to a "not set" state. This is
304  /// used to allow Bioseq summary data to be provided without the
305  /// performance penalty of loading (possibly very large) sequence
306  /// data from disk.
307  ///
308  /// @param oid
309  /// The OID of the sequence. [in]
310  /// @param pref_gi
311  /// If specified, only return deflines containing this GI. [in]
312  /// @param pref_seq_id
313  /// If specified, only return deflines containing this Seq_id. [in]
314  /// @param tax_info
315  /// The taxonomy database object. [in]
316  /// @param seqdata
317  /// Include sequence data in the returned Bioseq. [in]
318  /// @param locked
319  /// The lock holder object for this thread. [in]
320  /// @return
321  /// A CBioseq describing this sequence.
323  GetBioseq(int oid,
324  TGi pref_gi,
325  const CSeq_id * pref_seq_id,
326  bool seqdata,
327  CSeqDBLockHold & locked);
328 
329  /// Get the sequence data.
330  ///
331  /// This method gets the sequence data, returning a pointer and
332  /// the length of the sequence. The atlas will be locked, but the
333  /// lock may also be returned during this method. The computation
334  /// of the length of a nucleotide sequence involves a one byte
335  /// read that is likely to cause a page fault. Releasing the
336  /// atlas lock before this (potential) page fault can help the
337  /// average performance in the multithreaded case. It is safe to
338  /// release the lock because the sequence data is pinned down by
339  /// the reference count we have acquired to return to the user.
340  /// The returned sequence data is intended for blast searches, and
341  /// will contain random values in any ambiguous regions.
342  ///
343  /// @param oid
344  /// The OID of the sequence. [in]
345  /// @param buffer
346  /// The returned sequence data. [out]
347  /// @param locked
348  /// The lock holder object for this thread. [in]
349  /// @param in_lease
350  /// Only perform sequence retrieval if the requested oid is
351  /// within the previous lease [in]
352  /// @return
353  /// The length of this sequence in bases.
354  int GetSequence(int oid, const char ** buffer) const
355  {
356  return x_GetSequence(oid, buffer);
357  }
358 
359  /// Get a sequence with ambiguous regions.
360  ///
361  /// This method gets the sequence data, returning a pointer and
362  /// the length of the sequence. For nucleotide sequences, the
363  /// data can be returned in one of two encodings. Specify either
364  /// (kSeqDBNuclNcbiNA8) for NCBI/NA8, or (kSeqDBNuclBlastNA8) for
365  /// Blast/NA8. The data can also be allocated in one of three
366  /// ways, enumerated in ESeqDBAllocType. Specify eAtlas to use
367  /// the Atlas code, eMalloc to use the malloc() function, or eNew
368  /// to use the new operator.
369  ///
370  /// @param oid
371  /// The OID of the sequence. [in]
372  /// @param buffer
373  /// The returned sequence data. [out]
374  /// @param nucl_code
375  /// The encoding of the returned sequence data. [in]
376  /// @param alloc_type
377  /// The allocation routine used. [in]
378  /// @param region
379  /// If non-null, the offset range to get. [in]
380  /// @param locked
381  /// The lock holder object for this thread. [in]
382  /// @return
383  /// The length of this sequence in bases.
384  int GetAmbigSeq(int oid,
385  char ** buffer,
386  int nucl_code,
387  ESeqDBAllocType alloc_type,
388  SSeqDBSlice * region,
389  CSeqDB::TSequenceRanges * masks) const;
390 
391  int GetAmbigPartialSeq(int oid,
392  char ** buffer,
393  int nucl_code,
394  ESeqDBAllocType alloc_type,
395  CSeqDB::TSequenceRanges * partial_ranges,
396  CSeqDB::TSequenceRanges * masks) const;
397 
398  /// Get the Seq-ids associated with a sequence.
399  ///
400  /// This method returns a list containing all the CSeq_id objects
401  /// associated with a sequence.
402  ///
403  /// @param oid
404  /// The OID of the sequence. [in]
405  /// @param locked
406  /// The lock holder object for this thread. [in]
407  /// @return
408  /// The list of Seq-id objects for this sequences.
409  list< CRef<CSeq_id> > GetSeqIDs(int oid) const;
410  // same as above version with cached CObjectIStreamAsnBinary
411  list< CRef<CSeq_id> > GetSeqIDs(int oid, CObjectIStreamAsnBinary *inpstr) const;
412 
413  /// Get the GI of a sequence
414  /// This method returns the gi of the sequence
415  ///
416  /// @param oid
417  /// The OID of the sequence. [in]
418  /// @return
419  /// The oid of the sequence
420  TGi GetSeqGI(int oid, CSeqDBLockHold & locked) const;
421 
422  /// Get the volume title.
423  /// @return The volume's title.
424  string GetTitle() const;
425 
426  /// Get sqlite file name associated with this volume
427  /// Empty string if version 4
428  string GetLMDBFileName() const;
429 
430  /// Get the formatting date of the volume.
431  /// @return The create-date of the volume.
432  string GetDate() const;
433 
434  /// Get the number of OIDs for this volume.
435  /// @return The number of OIDs.
436  int GetNumOIDs() const;
437 
438  /// Get the total length of this volume (in bases).
439  /// @return The total volume length.
440  Uint8 GetVolumeLength() const;
441 
442  /// Get the length of the largest sequence in this volume.
443  /// @return The largest sequence's length.
444  int GetMaxLength() const;
445 
446  /// Get the length of the smallest sequence in this volume.
447  /// @return The smallest sequence's length.
448  int GetMinLength() const;
449 
450  /// Get the volume name.
451  /// @return The volume name.
452  const string & GetVolName() const
453  {
454  return m_VolName;
455  }
456 
457  /// Return expendable resources held by this volume.
458  ///
459  /// This volume holds resources acquired via the atlas. This
460  /// method returns all such resources which can be automatically
461  /// reacquired (but not, for example, the index file data).
462  void UnLease();
463 
464 
465  /// Find the OID given a PIG.
466  ///
467  /// A lookup is done for the PIG, and if found, the corresponding
468  /// OID is returned.
469  ///
470  /// @param pig
471  /// The pig to look up. [in]
472  /// @param oid
473  /// The returned ordinal ID. [out]
474  /// @param locked
475  /// The lock holder object for this thread. [in]
476  /// @return
477  /// True if the PIG was found.
478  bool PigToOid(int pig, int & oid) const;
479 
480  /// Find the PIG given an OID.
481  ///
482  /// If this OID is associated with a PIG, the PIG is returned.
483  ///
484  /// @param oid
485  /// The oid of the sequence. [in]
486  /// @param pig
487  /// The returned PIG. [out]
488  /// @param locked
489  /// The lock holder object for this thread. [in]
490  /// @return
491  /// True if a PIG was returned.
492  bool GetPig(int oid, int & pig, CSeqDBLockHold & locked) const;
493 
494  /// Find the OID given a TI.
495  ///
496  /// A lookup is done for the TI, and if found, the corresponding
497  /// OID is returned.
498  ///
499  /// @param ti
500  /// The ti to look up. [in]
501  /// @param oid
502  /// The returned ordinal ID. [out]
503  /// @param locked
504  /// The lock holder object for this thread. [in]
505  /// @return
506  /// True if the TI was found.
507  bool TiToOid(Int8 ti,
508  int & oid,
509  CSeqDBLockHold & locked) const;
510 
511  /// Find the OID given a GI.
512  ///
513  /// A lookup is done for the GI, and if found, the corresponding
514  /// OID is returned.
515  ///
516  /// @param gi
517  /// The gi to look up. [in]
518  /// @param oid
519  /// The returned ordinal ID. [out]
520  /// @param locked
521  /// The lock holder object for this thread. [in]
522  /// @return
523  /// True if an OID was returned.
524  bool GiToOid(TGi gi, int & oid, CSeqDBLockHold & locked) const;
525 
526  /// Find the GI given an OID.
527  ///
528  /// If this OID is associated with a GI, the GI is returned.
529  ///
530  /// @param oid
531  /// The oid of the sequence. [in]
532  /// @param gi
533  /// The returned GI. [out]
534  /// @param locked
535  /// The lock holder object for this thread. [in]
536  /// @return
537  /// True if a GI was returned.
538  bool GetGi(int oid,
539  TGi & gi,
540  CSeqDBLockHold & locked) const;
541 
542  /// Find OIDs for the specified accession or formatted Seq-id.
543  ///
544  /// An attempt will be made to simplify the string by parsing it
545  /// into a list of Seq-ids. If this works, the best Seq-id (for
546  /// lookup purposes) will be formatted and the resulting string
547  /// will be looked up in the string ISAM file. The resulting set
548  /// of OIDs will be returned. If the string is not found, the
549  /// array will be left empty. Most matches only produce one OID.
550  ///
551  /// @param acc
552  /// An accession or formatted Seq-id for which to search. [in]
553  /// @param oids
554  /// A set of OIDs found for this sequence. [out]
555  /// @param locked
556  /// The lock holder object for this thread. [in]
557  void AccessionToOids(const string & acc,
558  vector<int> & oids,
559  CSeqDBLockHold & locked) const;
560 
561  /// Find OIDs for the specified Seq-id.
562  ///
563  /// The Seq-id will be formatted and the resulting string will be
564  /// looked up in the string ISAM file. The resulting set of OIDs
565  /// will be returned. If the string is not found, the array will
566  /// be left empty. Most matches only produce one OID.
567  ///
568  /// @param seqid
569  /// A Seq-id for which to search. [in]
570  /// @param oids
571  /// A set of OIDs found for this sequence. [out]
572  /// @param locked
573  /// The lock holder object for this thread. [in]
574  void SeqidToOids(CSeq_id & seqid,
575  vector<int> & oids,
576  CSeqDBLockHold & locked) const;
577 
578  /// Find the OID at a given index into the database.
579  ///
580  /// This method considers the database as one long array of bases,
581  /// and finds the base at an offset into that array. The sequence
582  /// nearest that base is determined, and the sequence's OID is
583  /// returned. The OIDs are assigned to volumes in a different
584  /// order than with the readdb library, which can be an issue when
585  /// splitting the database for load balancing purposes. When
586  /// computing the OID range, be sure to use GetNumOIDs(), not
587  /// GetNumSeqs().
588  ///
589  /// @param first_seq
590  /// This OID or later is always returned. [in]
591  /// @param residue
592  /// The position to find relative to the total length. [in]
593  /// @param locked
594  /// The lock holder object for this thread. [in]
595  /// @return
596  /// The OID of the sequence nearest the specified residue.
597  int GetOidAtOffset(int first_seq,
598  Uint8 residue,
599  CSeqDBLockHold & locked) const;
600 
601  /// Translate Gis to Oids for the given vector of Gi/Oid pairs.
602  ///
603  /// This method iterates over a vector of Gi/Oid pairs. For each
604  /// pair where OID is -1, the GI will be looked up in the ISAM
605  /// file, and (if found) the correct OID will be stored (otherwise
606  /// the -1 will remain). This method will normally be called once
607  /// for each volume.
608  ///
609  /// @param gis
610  /// The set of GI/OID, TI/OID, and Seq-id/OID pairs. [in|out]
611  /// @param locked
612  /// The lock holder object for this thread. [in]
613  void IdsToOids(CSeqDBGiList & gis,
614  CSeqDBLockHold & locked) const;
615 
616  /// Add OIDs for this volume, filtered by negative ID lists.
617  ///
618  /// This method iterates over a vector of Gis or Tis. For each
619  /// GI+OID or TI+OID line in the ISAM file, the OID's bit will be
620  /// enabled in the ID list, if the GI or TI is not found in the
621  /// negated GI or TI lists. This method will normally be called
622  /// once for each volume.
623  ///
624  /// @param gis
625  /// The set of GIs, TIs, and the OID bitmap. [in|out]
626  /// @param locked
627  /// The lock holder object for this thread. [in]
628  void IdsToOids(CSeqDBNegativeList & gis,
629  CSeqDBLockHold & locked) const;
630 
631  /// Filter this volume using the specified GI list.
632  ///
633  /// A volume can be filtered by a GI list. This method attaches a
634  /// GI list to the volume, in addition to any GI lists that are
635  /// already attached.
636  ///
637  /// @param gilist
638  /// A list of GIs to use as a filter. [in]
640  {
641  m_VolumeGiLists.push_back(gilist);
642  }
643 
644  /// Simplify the GI list configuration.
645  ///
646  /// When all user and volume GI lists have been attached, the user
647  /// GI list may be removed; this is only possible if neither the
648  /// user nor volume GI lists contain Seq-id data.
649  void OptimizeGiLists() const;
650 
651  /// Fetch data as a CSeq_data object.
652  ///
653  /// All or part of the sequence is fetched in a CSeq_data object.
654  /// The portion of the sequence returned is specified by begin and
655  /// end. An exception will be thrown if begin is greater than or
656  /// equal to end, or if end is greater than or equal to the length
657  /// of the sequence. Begin and end should be specified in bases;
658  /// a range like (0,1) specifies 1 base, not 2. Nucleotide data
659  /// will always be returned in ncbi4na format.
660  ///
661  /// @param oid Specifies the sequence to fetch. [in]
662  /// @param begin Specifies the start of the data to get. [in]
663  /// @param end Specifies the end of the data to get. [in]
664  /// @param locked The lock holder object for this thread. [in]
665  /// @return The sequence data as a Seq-data object.
666  CRef<CSeq_data> GetSeqData(int oid,
667  TSeqPos begin,
668  TSeqPos end,
669  CSeqDBLockHold & locked) const;
670 
671  /// Get Raw Sequence and Ambiguity Data.
672  ///
673  /// Get a pointer to the raw sequence and ambiguity data, and the
674  /// length of each. The encoding for these is not defined here
675  /// and should not be relied on to be compatible between different
676  /// database format versions. NULL can be supplied for parameters
677  /// that are not needed (except oid). RetSequence() must be
678  /// called with the pointer returned by 'buffer' if and only if
679  /// that pointer is supplied as non-null by the user. Protein
680  /// sequences will never have ambiguity data. Ambiguity data will
681  /// be packed in the returned buffer at offset *seq_length.
682  ///
683  /// @param oid Ordinal id of the sequence. [in]
684  /// @param buffer Buffer of raw data. [out]
685  /// @param seq_length Returned length of the sequence data. [out]
686  /// @param seq_length Returned length of the ambiguity data. [out]
687  /// @param locked Lock holder object for this thread. [in]
688  void GetRawSeqAndAmbig(int oid,
689  const char ** buffer,
690  int * seq_length,
691  int * ambig_length) const;
692 
693  /// Get GI Bounds.
694  ///
695  /// Fetch the lowest, highest, and total number of GIs. If the
696  /// operation fails, zero will be returned for count.
697  ///
698  /// @param low_id Lowest GI value in database. [out]
699  /// @param high_id Highest GI value in database. [out]
700  /// @param count Number of GI values in database. [out]
701  /// @param locked Lock holder object for this thread. [in]
702  void GetGiBounds(TGi & low_id,
703  TGi & high_id,
704  int & count,
705  CSeqDBLockHold & locked) const;
706 
707  /// Get PIG Bounds.
708  ///
709  /// Fetch the lowest, highest, and total number of PIGs. If the
710  /// operation fails, zero will be returned for count.
711  ///
712  /// @param low_id Lowest PIG value in database. [out]
713  /// @param high_id Highest PIG value in database. [out]
714  /// @param count Number of PIG values in database. [out]
715  /// @param locked Lock holder object for this thread. [in]
716  void GetPigBounds(int & low_id,
717  int & high_id,
718  int & count,
719  CSeqDBLockHold & locked) const;
720 
721  /// Get String Bounds.
722  ///
723  /// Fetch the lowest, highest, and total number of string keys in
724  /// the database index. If the operation fails, zero will be
725  /// returned for count.
726  ///
727  /// @param low_id Lowest string value in database. [out]
728  /// @param high_id Highest string value in database. [out]
729  /// @param count Number of string values in database. [out]
730  /// @param locked Lock holder object for this thread. [in]
731  void GetStringBounds(string & low_id,
732  string & high_id,
733  int & count) const;
734 
735  /// List of sequence offset ranges.
737 
738  /// Apply a range of offsets to a database sequence.
739  ///
740  /// The GetAmbigSeq() method requires an amount of work (and I/O)
741  /// which is proportional to the size of the sequence data (more
742  /// if ambiguities are present). In some cases, only certain
743  /// subranges of this data will be utilized. This method allows
744  /// the user to specify which parts of a sequence are actually
745  /// needed by the user. (Care should be taken if one SeqDB object
746  /// is shared by several program components.) (Note that offsets
747  /// above the length of the sequence will not generate an error,
748  /// and are replaced by the sequence length.)
749  ///
750  /// If ranges are specified for a sequence, data areas in
751  /// specified sequences will be accurate, but data outside the
752  /// specified ranges should not be accessed, and no guarantees are
753  /// made about what data they will contain. If the keep_current
754  /// flag is true, the range will be added to existing ranges. If
755  /// false, existing ranges will be flushed and replaced by new
756  /// ranges. To remove ranges, call this method with an empty list
757  /// of ranges; future calls will return the complete sequence.
758  ///
759  /// If the cache_data flag is provided, data for this sequence
760  /// will be kept for the duration of SeqDB's lifetime. To disable
761  /// caching (and flush cached data) for this sequence, call the
762  /// method again, but specify cache_data to be false.
763  ///
764  /// @param oid OID of the sequence. [in]
765  /// @param offset_ranges Ranges of sequence data to return. [in]
766  /// @param append_ranges Append new ranges to existing list. [in]
767  /// @param cache_data Keep sequence data for future callers. [in]
768  /// @param locked Lock holder object for this thread. [in]
769  void SetOffsetRanges(int oid,
770  const TRangeList & offset_ranges,
771  bool append_ranges,
772  bool cache_data) const;
773 
774  /// Flush all offset ranges cached
775  /// @param locked Lock holder object for this thread. [in]
776  void FlushOffsetRangeCache();
777 
778  /// Get the sequence hash for a given OID.
779  ///
780  /// The sequence data is fetched and the sequence hash is
781  /// computed and returned.
782  ///
783  /// @param oid The sequence to compute the hash of. [in]
784  /// @return The sequence hash.
785  unsigned GetSequenceHash(int oid);
786 
787  /// Get the OIDs for a given sequence hash.
788  ///
789  /// The OIDs corresponding to a hash value (if any) are found and
790  /// returned. If none are found, the vector will be empty. If
791  /// the index does not exist for this volume, an exception will be
792  /// thrown. Some false positives may be returned due to hash
793  /// value collisions.
794  ///
795  /// @param hash The sequence hash to look up. [in]
796  /// @param oids OIDs of sequences with this hash. [out]
797  /// @param locked Lock holder object for this thread. [in|out]
798  void HashToOids(unsigned hash,
799  vector<int> & oids,
800  CSeqDBLockHold & locked) const;
801 
802  /// List the titles of all columns for this volume.
803  void ListColumns(set<string> & titles,
804  CSeqDBLockHold & locked);
805 
806  /// Get an ID number for a given column title.
807  ///
808  /// For a given column title, this returns an ID that can be used
809  /// to access that column in the future. The returned ID number
810  /// is specific to this instance of SeqDB. If the database does
811  /// not have a column with this name, -1 will be returned.
812  ///
813  /// @param title Column title to search for. [in]
814  /// @param locked The lock holder object for this thread. [in]
815  /// @return Column ID number for this column, or -1. [in]
816  int GetColumnId(const string & title,
817  CSeqDBLockHold & locked);
818 
819  /// Get all metadata for the specified column.
820  ///
821  /// Columns may contain user-defined metadata as a list of
822  /// key-value pairs. For the specified column, this returns that
823  /// column's metadata in the provided map. If multiple volumes
824  /// are present, and they define contradictory meta data (this is
825  /// more common when multiple databases are opened at once), this
826  /// method returns the first value it finds for each metadata key.
827  /// If this is unsatisfactory, the two-argument version of this
828  /// method may be used to get more precise values for specific
829  /// volumes.
830  ///
831  /// @param col_id The column id from GetColumnId. [in]
832  /// @param locked The lock holder object for this thread. [in]
833  /// @return The map of metadata for this column. [out]
834  const map<string,string> &
835  GetColumnMetaData(int col_id,
836  CSeqDBLockHold & locked);
837 
838  /// Fetch the data blob for the given column and oid.
839  ///
840  /// This method finds the blob data for this OID and column, and
841  /// stores a reference to in the provided blob. If `keep' is
842  /// true, a `lifetime' object is attached to the blob to insure
843  /// the memory is not unmapped when the atlas lock is released.
844  ///
845  /// It is important to specify `keep' correctly to avoid memory
846  /// faults and/or deadlocks. If `keep' is false, the blob must
847  /// not be returned to the user or accessed after the atlas lock
848  /// is released, since the memory it references may no longer be
849  /// mmapped. On the other hand, if `keep' is true, the blob may
850  /// be safely returned to the user, but must not be reassigned or
851  /// destructed until the atlas lock is released (or a deadlock
852  /// will occur). This includes destruction due `stack unwinding'.
853  ///
854  /// For similar reasons, the blob should be empty on input.
855  ///
856  /// @param col_id The column to fetch data from. [in]
857  /// @param oid The OID of the blob. [in]
858  /// @param blob The data will be returned here. [out]
859  /// @param keep If true, increment the memory region. [in]
860  /// @param locked The lock holder object for this thread. [in]
861  void GetColumnBlob(int col_id,
862  int oid,
863  CBlastDbBlob & blob,
864  bool keep,
865  CSeqDBLockHold & locked);
866 
867  /// Set the MEMB_BIT fitlering for this volume.
868  ///
869  /// This method sets the MEMB_BIT for the volume. If the
870  /// MEMB_BIT has already been set, and the new bit is different,
871  /// exception will be thrown. This prevents conflicting MEMB_BIT
872  /// settings within an alias tree; nevertheless, it also prevents
873  /// aggregating the same volume with different MEMB_BIT settings,
874  /// such as "DBLIST swissprot pdb". The latter case is probably
875  /// not desired. Support for this "paralogous" case will probably
876  /// come later.
877  ///
878  /// @param mbit The bit to set [in]
879  void SetMemBit(int mbit) const {
880  if (m_MemBit && mbit != m_MemBit) {
881  NCBI_THROW(CSeqDBException, eFileErr,
882  "MEMB_BIT error: conflicting bit found.");
883  }
884  m_MemBit = mbit;
885  }
886 
887  void SetOidMaskType(int oid_masks) const {
888  m_OidMaskType = oid_masks;
889  }
890 
891  int GetOidMaskType() const {
892  return m_OidMaskType;
893  }
894 
895 private:
896  void x_StringToOids(const string & acc,
897  ESeqDBIdType id_type,
898  Int8 ident,
899  const string & str_id,
900  bool simplified,
901  vector<int> & oids) const;
902 
903  /// A set of GI lists.
904  typedef vector< CRef<CSeqDBGiList> > TGiLists;
905 
906  /// Returns true if this volume has a positive ID list.
907  bool x_HaveGiList(void) const
908  {
909  return ! (m_UserGiList.Empty() && m_VolumeGiLists.empty());
910  }
911 
912  /// Returns true if this volume has a negative ID list.
913  bool x_HaveNegativeList(void) const
914  {
915  return m_NegativeList.NotEmpty();
916  }
917 
918  /// Returns true if this volume has an ID list.
919  bool x_HaveIdFilter(void) const
920  {
921  return x_HaveGiList() || x_HaveNegativeList();
922  }
923 
924  /// Determine if a user ID list affects this ID, and how.
925  ///
926  /// This is used to accumulate information about a Seq-id in two
927  /// boolean variables. In order for a Seq-id to be considered
928  /// `included', it must pass filtering by both the user ID list
929  /// (if one was specified) and at least one of the set of ID lists
930  /// attached to the volume (if any exist). This function will be
931  /// called repeatedly for each ID in a defline to determine if the
932  /// defline as a whole passes the filtering tests. If the
933  /// booleans are set to true, this code never sets it to false,
934  /// and can skip the associated test. This is because a defline
935  /// is included if one of its Seq-ids matches the volume ID list
936  /// but a different one matches the user ID list. For negative ID
937  /// lists this returns true if the type of ID matches the kind
938  /// used by the negative list, but the ID is not found therein.
939  ///
940  /// @param id Sequence id to check for. [in]
941  /// @param have_user Will be set if the user list has id. [in|out]
942  /// @param have_vol Will be set if the volume list has id. [in|out]
943  void x_FilterHasId(const CSeq_id & id,
944  bool & have_user,
945  bool & have_vol) const
946  {
947  if (! have_user) {
949  have_user |= x_ListIncludesId(*m_UserGiList, id);
950  } else if (m_NegativeList.NotEmpty() && m_NegativeList->HasIdFilters() ) {
951  have_user |= x_ListIncludesId(*m_NegativeList, id);
952  } else {
953  have_user = true;
954  }
955  }
956 
957  if (! have_vol) {
958  if (m_VolumeGiLists.empty()) {
959  have_vol = true;
960  } else {
962  if (x_ListIncludesId(**gilist, id)) {
963  have_vol = true;
964  break;
965  }
966  }
967  }
968  }
969  }
970 
971  /// Returns true if this volume's ID list has this Seq-id.
972  /// @param L A GI list to test against. [in]
973  /// @param id A Seq-id to test against L. [in]
974  /// @return True if the list contains the specified Seq-id.
975  bool x_ListIncludesId(CSeqDBGiList & L, const CSeq_id & id) const
976  {
977  return L.FindId(id);
978  }
979 
980  /// Returns true if this ID is not found in the negative ID list.
981  ///
982  /// This checks whether an ID is found in the negative ID list,
983  /// and whether the ID is the right type (so that it might
984  /// possibly be found). If the ID is the right type, and is not
985  /// found, this method returns true. In other cases it returns
986  /// false. This technique could be described as treating the
987  /// negative GI list as the list of all GIs not mentioned in the
988  /// vector stored in the list, and similarly for the TIs. This
989  /// means that every TI and GI in the ASN.1 for this defline must
990  /// be mentioned in the negative ID list in order to exclude the
991  /// defline. In normal practice, only one GI or TI ever exists
992  /// for a defline.
993  ///
994  /// @param L A GI list to test against. [in]
995  /// @param id A Seq-id to test against L. [in]
996  /// @return True if the list contains the specified Seq-id.
997  bool x_ListIncludesId(CSeqDBNegativeList & L, const CSeq_id & id) const
998  {
999  // A defline is included IFF either a GI or TI is found, and
1000  // that ID is not on the list.
1001 
1002  // I use the terms 'included' and 'mentioned' to describe the
1003  // negative list processing as follows: "A negative list
1004  // INCLUDES a TI or GI if that ID is not MENTIONED in the
1005  // negative list."
1006 
1007  bool match_type = false;
1008  bool found = L.FindId(id, match_type);
1009 
1010  return (! found) && match_type;
1011  }
1012 
1013  /// Get sequence header object.
1014  ///
1015  /// This method returns the sequence header information as an
1016  /// ASN.1 object. Seq-ids of type "gnl|BL_ORD_ID|#" are stored as
1017  /// values relative to this volume. If they will be returned to
1018  /// the user in any way, specify true for adjust_oids to adjust
1019  /// them to the global OID range.
1020  ///
1021  /// @param oid
1022  /// The OID of the sequence. [in]
1023  /// @param adjust_oids
1024  /// If true, BL_ORD_ID ids will be adjusted to this volume. [in]
1025  /// @param changed
1026  /// Indicates whether ASN.1 data needed changes (optional). [out]
1027  /// @param locked
1028  /// The lock holder object for this thread. [in]
1029  /// @return
1030  /// The Blast-def-line-set describing this sequence.
1032  x_GetHdrAsn1(int oid,
1033  bool adjust_oids,
1034  bool * changed) const;
1035 
1037  x_GetHdrAsn1(int oid,
1038  bool adjust_oids,
1039  bool * changed,
1040  CObjectIStreamAsnBinary *inpstr) const;
1041  /// Get sequence header binary data.
1042  ///
1043  /// This method returns the sequence header information as a
1044  /// reference to raw ASN.1 binary data. This reference can be
1045  /// used until the next access to the Atlas layer or the header
1046  /// data memory lease.
1047  ///
1048  /// @param oid
1049  /// The OID of the sequence. [in]
1050  /// @param locked
1051  /// The lock holder object for this thread. [in]
1052  /// @return
1053  /// The Blast-def-line-set describing this sequence.
1054  CTempString x_GetHdrAsn1Binary(int oid) const;
1055 
1056  /// Get binary sequence header information.
1057  ///
1058  /// This method reads the sequence header information (as binary
1059  /// encoded ASN.1) into a supplied char vector.
1060  ///
1061  /// @param oid
1062  /// The OID of the sequence. [in]
1063  /// @param hdr_data
1064  /// The returned binary ASN.1 of the Blast-def-line-set. [out]
1065  /// @param locked
1066  /// The lock holder object for this thread. [in]
1067  void
1068  x_GetFilteredBinaryHeader(int oid,
1069  vector<char> & hdr_data) const;
1070 
1071 
1072  /// Get sequence header information.
1073  ///
1074  /// This method returns the set of Blast-def-line objects stored
1075  /// for each sequence. These contain descriptive information
1076  /// related to the sequence. If OID filtering is enabled and a
1077  /// membership bit is used, only deflines with that membership bit
1078  /// set will be returned.
1079  ///
1080  /// @param oid
1081  /// The OID of the sequence. [in]
1082  /// @param changed
1083  /// Indicates whether ASN.1 data needed changes (optional). [out]
1084  /// @param locked
1085  /// The lock holder object for this thread. [in]
1086  /// @return
1087  /// The set of blast-def-lines describing this sequence.
1089  x_GetFilteredHeader(int oid,
1090  bool * changed) const;
1091 
1093  x_GetFilteredHeader(int oid,
1094  bool * changed,
1095  CObjectIStreamAsnBinary *inpstr ) const;
1096 
1097  /// Get sequence header information structures.
1098  ///
1099  /// This method reads the sequence header information and returns
1100  /// a Seqdesc suitable for inclusion in a CBioseq. This object
1101  /// will contain an opaque type, storing the sequence headers as
1102  /// binary ASN.1, wrapped in a C++ ASN.1 structure (CSeqdesc).
1103  ///
1104  /// @param oid
1105  /// The OID of the sequence. [in]
1106  /// @param locked
1107  /// The lock holder object for this thread. [in]
1108  /// @return
1109  /// The CSeqdesc to include in the CBioseq.
1110  CRef<CSeqdesc> x_GetAsnDefline(int oid) const;
1111 
1112  /// Returns 'p' for protein databases, or 'n' for nucleotide.
1113  char x_GetSeqType() const;
1114 
1115  /// Get ambiguity information.
1116  ///
1117  /// This method is used to fetch the ambiguity data for sequences
1118  /// in a nucleotide database. The ambiguity data describes
1119  /// sections of the nucleotide sequence for which more than one of
1120  /// 'A', 'C', 'G', or 'T' are possible. The integers returned by
1121  /// this function contain a packed description of the ranges of
1122  /// the sequence which have such data. This method only returns
1123  /// the array of integers, and does not interpret them, except for
1124  /// byte swapping.
1125  ///
1126  /// @param oid
1127  /// The OID of the sequence. [in]
1128  /// @param ambchars
1129  /// The returned array of ambiguity descriptors. [out]
1130  /// @param locked
1131  /// The lock holder object for this thread. [in]
1132  void x_GetAmbChar(int oid,
1133  vector<Int4> & ambchars) const;
1134 
1135  /// Get a sequence with ambiguous regions.
1136  ///
1137  /// This method gets the sequence data, returning a pointer and
1138  /// the length of the sequence. For nucleotide sequences, the
1139  /// data can be returned in one of two encodings. Specify either
1140  /// (kSeqDBNuclNcbiNA8) for NCBI/NA8, or (kSeqDBNuclBlastNA8) for
1141  /// Blast/NA8. The data can also be allocated in one of three
1142  /// ways, enumerated in ESeqDBAllocType. Specify eAtlas to use
1143  /// the Atlas code, eMalloc to use the malloc() function, or eNew
1144  /// to use the new operator.
1145  ///
1146  /// @param oid
1147  /// The OID of the sequence. [in]
1148  /// @param buffer
1149  /// The returned sequence data. [out]
1150  /// @param nucl_code
1151  /// The encoding of the returned sequence data. [in]
1152  /// @param alloc_type
1153  /// The allocation routine used. [in]
1154  /// @param region
1155  /// If non-null, the offset range to get. [in]
1156  /// @param locked
1157  /// The lock holder object for this thread. [in]
1158  /// @return
1159  /// The length of this sequence in bases.
1160  int x_GetAmbigSeq(int oid,
1161  char ** buffer,
1162  int nucl_code,
1163  ESeqDBAllocType alloc_type,
1164  SSeqDBSlice * region,
1165  CSeqDB::TSequenceRanges *masks) const;
1166 
1167  /// Allocate memory in one of several ways.
1168  ///
1169  /// This method provides functionality to allocate memory with the
1170  /// atlas layer, using malloc, or using the new [] operator. The
1171  /// user is expected to return the data using the corresponding
1172  /// deallocation technique.
1173  ///
1174  /// @param length
1175  /// The number of bytes to get. [in]
1176  /// @param alloc_type
1177  /// The type of allocation routine to use. [in]
1178  /// @param locked
1179  /// The lock holder object for this thread. [in]
1180  /// @return
1181  /// A pointer to the allocated memory.
1182  char * x_AllocType(size_t length,
1183  ESeqDBAllocType alloc_type) const;
1184 
1185  /// Get sequence data.
1186  ///
1187  /// The sequence data is found and returned for the specified
1188  /// sequence. The caller owns the data and a hold on the
1189  /// underlying memory region. There is a memory access in this
1190  /// code that tends to trigger a soft (and possibly hard) page
1191  /// fault in the nucleotide case. If the can_release and keep
1192  /// flags are true, this code may return the lock holder object
1193  /// before that point to reduce lock contention in multithreaded
1194  /// code.
1195  ///
1196  /// @param oid
1197  /// The ordinal ID of the sequence to get. [in]
1198  /// @param buffer
1199  /// The returned sequence data buffer. [out]
1200  /// @param keep
1201  /// Specify true if the caller wants a hold on the sequence. [in]
1202  /// @param locked
1203  /// The lock holder object for this thread. [in]
1204  /// @param can_release
1205  /// Specify true if the atlas lock can be released. [in]
1206  /// @param in_lease
1207  /// Only perform retrieval if the oid is within previous lease [in]
1208  /// @return
1209  /// The length of the sequence in bases.
1210  int x_GetSequence(int oid,
1211  const char ** buffer) const;
1212 
1213  /// Get partial sequence data.
1214  ///
1215  /// The sequence data is found and returned for the specified oid
1216  /// and offset range. If the region argument is non-null, the
1217  /// region endpoints are verified against the sequence endpoints.
1218  /// Otherwise, this method is the same as x_GetSequence(). Note
1219  /// that the code returns the length of the region in bases, but
1220  /// buffer is set to a pointer to the beginning of the sequence,
1221  /// not the beginning of the region.
1222  ///
1223  /// @param oid
1224  /// The ordinal ID of the sequence to get. [in]
1225  /// @param buffer
1226  /// The returned sequence data buffer. [out]
1227  /// @param keep
1228  /// Specify true if the caller wants a hold on the sequence. [in]
1229  /// @param locked
1230  /// The lock holder object for this thread. [in]
1231  /// @param can_release
1232  /// Specify true if the atlas lock can be released. [in]
1233  /// @param region
1234  /// If non-null, the offset range to get. [in]
1235  /// @return
1236  /// The length of the returned portion in bases.
1237  int x_GetSequence(int oid,
1238  const char ** buffer,
1239  bool keep,
1240  CSeqDBLockHold & locked,
1241  bool can_release,
1242  SSeqDBSlice * region) const;
1243 
1244  /// Get defline filtered by several criteria.
1245  ///
1246  /// This method returns the set of deflines for a sequence. If
1247  /// there is an OID list and membership bit, these will be
1248  /// filtered by membership bit. If there is a preferred GI is
1249  /// specified, the defline matching that GI (if found) will be
1250  /// moved to the front of the set.
1251  ///
1252  /// @param oid
1253  /// The ordinal ID of the sequence to get. [in]
1254  /// @param preferred_gi
1255  /// This GI's defline (if non-zero and found) will be put at the front of the list. [in]
1256  /// @param preferred_seqid
1257  /// This SeqID's defline (if non-NULL and found) will be put at the front of the list. [in]
1258  /// @param locked
1259  /// The lock holder object for this thread. [in]
1260  /// @return
1261  /// The defline set for the specified oid.
1263  x_GetTaxDefline(int oid,
1264  TGi preferred_gi,
1265  const CSeq_id * preferred_seq_id);
1266 
1267 
1268  /// Get taxonomic descriptions of a sequence.
1269  ///
1270  /// This method builds a set of CSeqdesc objects from taxonomic
1271  /// information and blast deflines. If there is an OID list and
1272  /// membership bit, the deflines will be filtered by membership
1273  /// bit. If there is a preferred GI is specified, the defline
1274  /// matching that GI (if found) will be moved to the front of the
1275  /// set. This method is called as part of the processing for
1276  /// building a CBioseq object.
1277  ///
1278  /// @param oid
1279  /// The ordinal ID of the sequence to get. [in]
1280  /// @param preferred_gi
1281  /// This GI's defline (if non-zero and found) will be put at the front of the list. [in]
1282  /// @param preferred_seqid
1283  /// This SeqID's defline (if non-NULL and found) will be put at the front of the list. [in]
1284  /// @param tax_info
1285  /// Taxonomic info to encode. [in]
1286  /// @param locked
1287  /// The lock holder object for this thread. [in]
1288  /// @return
1289  /// A list of CSeqdesc objects for the specified oid.
1290  list< CRef<CSeqdesc> >
1291  x_GetTaxonomy(int oid,
1292  TGi preferred_gi,
1293  const CSeq_id * preferred_seq_id);
1294 
1295 
1296  /// Returns the base-offset of the specified oid.
1297  ///
1298  /// This method finds the starting offset of the OID relative to
1299  /// the start of the volume, and returns that distance as a number
1300  /// of bytes. The range of the return value should be from zero
1301  /// to the size of the sequence file in bytes. Note that the
1302  /// total volume length in bytes can be found by submitting the
1303  /// OID count as the input oid, because the index file contains
1304  /// one more array element than there are sequences.
1305  ///
1306  /// @param oid
1307  /// The sequence of which to get the starting offset. [in]
1308  /// @param locked
1309  /// The lock holder object for this thread. [in]
1310  /// @return
1311  /// The offset in the volume of that sequence in bytes.
1312  Uint8 x_GetSeqResidueOffset(int oid) const;
1313 
1314  /// Find all columns for this volume.
1315  ///
1316  /// This method looks for and opens any columns that might be
1317  /// associated with this database volume.
1318  ///
1319  /// @param locked
1320  /// The lock holder object for this thread. [in]
1321  void x_OpenAllColumns(CSeqDBLockHold & locked);
1322 
1323  /// Check Seq-id versions for special sparse-id support case.
1324  ///
1325  /// The BlastDB `sparse indexing' feature omits versions when
1326  /// emitting (string) ISAM indices. If a search for a Seq-id with
1327  /// a version fails, SeqDB strips the version and tries the search
1328  /// again. However, for non-sparse databases, this second search
1329  /// has the harmful side effect that it can find IDs with the same
1330  /// accession but an incorrect version. This method scans the OID
1331  /// list and removes the OIDs with incorrect versions. It should
1332  /// only be called in cases when the version removal needed to be
1333  /// done to get results.
1334  ///
1335  /// @param acc
1336  /// An accession or formatted Seq-id for which to search. [in]
1337  /// @param oids
1338  /// A set of OIDs found for this sequence. [out]
1339  /// @param locked
1340  /// The lock holder object for this thread. [in]
1341  void x_CheckVersions(const string & acc,
1342  vector<int> & oids) const;
1343 
1344  void x_OpenSeqFile(void) const;
1345  void x_OpenHdrFile(void) const;
1346  void x_OpenPigFile(void) const;
1347  void x_UnleasePigFile(void) const;
1348  void x_OpenGiFile(void) const;
1349  void x_UnleaseGiFile(void) const;
1350  void x_OpenStrFile(void) const;
1351  void x_UnleaseStrFile(void) const;
1352  void x_OpenTiFile(void) const;
1353  void x_UnleaseTiFile(void) const;
1354  void x_OpenHashFile(void) const;
1355  void x_OpenOidFile(void) const;
1356 
1357  /// The memory management layer.
1359 
1360  /// True if the volume is protein, false for nucleotide.
1361  bool m_IsAA;
1362 
1363  /// The name of this volume.
1364  string m_VolName;
1365 
1366  /// Metadata plus offsets into the sequence, header, and ambiguity data.
1368 
1369  /// Contains sequence data for this volume.
1371 
1372  /// Contains header (defline) information for this volume.
1374 
1375  // These are mutable because they defer initialization.
1376 
1377  /// Handles translation of GIs to OIDs.
1379 
1380  /// Handles translation of GIs to OIDs.
1382 
1383  /// Handles translation of strings (accessions) to OIDs.
1385 
1386  /// Handles translation of TI (trace ids) to OIDs.
1388 
1389  /// Handles translation of sequence hash value to OIDs.
1391 
1392  /// The GI index file (for fast oid->gi conversion)
1394 
1395  /// This cache allows CBioseqs to share taxonomic objects.
1397 
1398  /// The user ID list, if one exists.
1400 
1401  /// The negative ID list, if one exists.
1403 
1404  /// The volume GI lists, if any exist.
1406 
1407  /// The filtering MEMB_BIT
1408  mutable int m_MemBit;
1409 
1410  mutable int m_OidMaskType;
1411 
1412  /// Cached/ranged sequence info type.
1414 
1415  /// Cached/ranged sequence info.
1417 
1418  /// Starting OID of this volume.
1420 
1421  /// First OID past end of this volume.
1423 
1424  /// Filtered defline plus whether binary data needed changes.
1425  typedef pair<CRef<CBlast_def_line_set>, bool> TDeflineCacheItem;
1426 
1427  /// Cache of filtered deflines.
1429 
1430  /// True if we have opened the columns for this volume.
1432 
1433  /// True if the volume file has been (at least tried to) opened
1434  mutable bool m_SeqFileOpened;
1435  mutable bool m_HdrFileOpened;
1436  mutable bool m_HashFileOpened;
1437  mutable bool m_OidFileOpened;
1438 
1446 
1447 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
1448  (!defined(NCBI_COMPILER_MIPSPRO)) )
1449  /// Set of columns defined for this volume.
1450  vector< CRef<CSeqDBColumn> > m_Columns;
1451 #endif
1452 };
1453 
1455 
1456 #endif // OBJTOOLS_READERS_SEQDB__SEQDBVOL_HPP
1457 
1458 
`Blob' Class for SeqDB (and WriteDB).
Definition: seqdbblob.hpp:56
CFastMutex –.
Definition: ncbimtx.hpp:667
CFile –.
Definition: ncbifile.hpp:1604
CObjectIStreamAsnBinary –.
Definition: objistrasnb.hpp:59
CObject –.
Definition: ncbiobj.hpp:180
CSeqDBAtlas class.
Definition: seqdbatlas.hpp:297
CNcbiStreamoff TIndx
The type used for file offsets.
Definition: seqdbatlas.hpp:301
CSeqDBException.
Definition: seqdbcommon.hpp:73
void Clear()
Clears the memory mapobject.
Definition: seqdbatlas.hpp:737
void Init(const string filename)
Initializes a memory map object.
Definition: seqdbatlas.hpp:702
CSeqDBGiIndex.
Definition: seqdbvol.hpp:55
CSeqDBFileMemMap m_Lease
Definition: seqdbvol.hpp:85
CSeqDBGiIndex(CSeqDBAtlas &atlas, const string &dbname, char prot_nucl)
Definition: seqdbvol.hpp:61
CSeqDBAtlas::TIndx TIndx
Definition: seqdbvol.hpp:57
static bool IndexExists(const string &name, const char prot_nucl)
Definition: seqdbvol.hpp:75
Int4 m_NumOIDs
Definition: seqdbvol.hpp:88
TGi GetSeqGI(TOid oid, CSeqDBLockHold &locked)
Definition: seqdbvol.cpp:51
CSeqDBGiList.
bool HasIdFilters() const
bool FindId(const CSeq_id &id)
Test for existence of a Seq-id by type.
Simple int-keyed cache.
CSeqDBLockHold.
Definition: seqdbatlas.hpp:166
CSeqDBNegativeList.
bool FindId(const CSeq_id &id, bool &match_type)
Test for existence of a TI or GI here and report whether the ID was one of those types.
bool HasIdFilters() const
CSeqDBRangeList.
Definition: seqdbvol.hpp:100
~CSeqDBRangeList()
Destructor.
Definition: seqdbvol.hpp:113
CSeqDBRangeList()
Constructor.
Definition: seqdbvol.hpp:104
TRangeList m_Ranges
Range of offsets needed for this sequence.
Definition: seqdbvol.hpp:154
void SetRanges(const TRangeList &ranges, bool append_ranges, bool cache_data)
Set ranges of the sequence that will be used.
Definition: seqdbvol.cpp:3189
const TRangeList & GetRanges()
Get ranges of sequence offsets that will be used.
Definition: seqdbvol.hpp:136
static int ImmediateLength()
Sequences shorter than this will not use ranges in any case.
Definition: seqdbvol.hpp:147
bool IsCached()
Returns true if the sequence data is cached.
Definition: seqdbvol.hpp:119
bool m_CacheData
True if caching of sequence data is required for this sequence.
Definition: seqdbvol.hpp:157
set< pair< int, int > > TRangeList
List of sequence offset ranges.
Definition: seqdbvol.hpp:125
void FlushSequence()
Flush cached sequence data (if any).
Definition: seqdbvol.hpp:142
CSeqDBVol class.
Definition: seqdbvol.hpp:169
void OptimizeGiLists() const
Simplify the GI list configuration.
Definition: seqdbvol.cpp:3203
bool m_HaveColumns
True if we have opened the columns for this volume.
Definition: seqdbvol.hpp:1431
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Get the Seq-ids associated with a sequence.
Definition: seqdbvol.cpp:1826
int x_GetSequence(int oid, const char **buffer, bool keep, CSeqDBLockHold &locked, bool can_release, SSeqDBSlice *region) const
Get partial sequence data.
CFastMutex m_MtxCachedRange
Definition: seqdbvol.hpp:1445
CRef< CSeqDBIsam > m_IsamGi
Handles translation of GIs to OIDs.
Definition: seqdbvol.hpp:1381
void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified Seq-id.
Definition: seqdbvol.cpp:2787
Uint8 x_GetSeqResidueOffset(int oid) const
Returns the base-offset of the specified oid.
Definition: seqdbvol.cpp:2897
void x_OpenHashFile(void) const
Definition: seqdbvol.cpp:234
void x_UnleasePigFile(void) const
Definition: seqdbvol.cpp:146
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const
Definition: seqdbvol.cpp:1522
void x_UnleaseTiFile(void) const
Definition: seqdbvol.cpp:221
CSeqDBAtlas & m_Atlas
The memory management layer.
Definition: seqdbvol.hpp:1358
void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified accession or formatted Seq-id.
Definition: seqdbvol.cpp:2773
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)
Fetch the data blob for the given column and oid.
Definition: seqdbvol.cpp:3258
CRef< CSeqDBHdrFile > m_Hdr
Contains header (defline) information for this volume.
Definition: seqdbvol.hpp:1373
CFastMutex m_MtxPig
Definition: seqdbvol.hpp:1440
void x_OpenSeqFile(void) const
Definition: seqdbvol.cpp:116
CSeqDBIntCache< CRef< CSeqdesc > > m_TaxCache
This cache allows CBioseqs to share taxonomic objects.
Definition: seqdbvol.hpp:1396
const string & GetVolName() const
Get the volume name.
Definition: seqdbvol.hpp:452
void x_OpenTiFile(void) const
Definition: seqdbvol.cpp:209
int GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
Definition: seqdbvol.cpp:1484
vector< CRef< CSeqDBColumn > > m_Columns
Set of columns defined for this volume.
Definition: seqdbvol.hpp:1450
CRef< CSeqDBIdxFile > m_Idx
Metadata plus offsets into the sequence, header, and ambiguity data.
Definition: seqdbvol.hpp:1367
void x_OpenStrFile(void) const
Definition: seqdbvol.cpp:184
CFastMutex m_MtxTi
Definition: seqdbvol.hpp:1442
int GetSeqLengthExact(int oid) const
Exact sequence length for nucleotide databases.
Definition: seqdbvol.cpp:296
void x_StringToOids(const string &acc, ESeqDBIdType id_type, Int8 ident, const string &str_id, bool simplified, vector< int > &oids) const
Definition: seqdbvol.cpp:2620
void OpenSeqFile(CSeqDBLockHold &locked) const
Open sequence file.
Definition: seqdbvol.cpp:111
bool m_HashFileOpened
Definition: seqdbvol.hpp:1436
int GetColumnId(const string &title, CSeqDBLockHold &locked)
Get an ID number for a given column title.
Definition: seqdbvol.cpp:3376
CRef< CSeqDBIsam > m_IsamStr
Handles translation of strings (accessions) to OIDs.
Definition: seqdbvol.hpp:1384
vector< CRef< CSeqDBGiList > > TGiLists
A set of GI lists.
Definition: seqdbvol.hpp:904
CSeqDBIntCache< TDeflineCacheItem > m_DeflineCache
Cache of filtered deflines.
Definition: seqdbvol.hpp:1428
int x_GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
Definition: seqdbvol.cpp:1589
int m_VolStart
Starting OID of this volume.
Definition: seqdbvol.hpp:1419
int GetNumOIDs() const
Get the number of OIDs for this volume.
Definition: seqdbvol.cpp:2370
map< int, CRef< CSeqDBRangeList > > TRangeCache
Cached/ranged sequence info type.
Definition: seqdbvol.hpp:1413
bool GetGi(int oid, TGi &gi, CSeqDBLockHold &locked) const
Find the GI given an OID.
Definition: seqdbvol.cpp:2577
CRef< CBlast_def_line_set > x_GetHdrAsn1(int oid, bool adjust_oids, bool *changed) const
Get sequence header object.
Definition: seqdbvol.cpp:2184
void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const
Get PIG Bounds.
Definition: seqdbvol.cpp:3093
void x_FilterHasId(const CSeq_id &id, bool &have_user, bool &have_vol) const
Determine if a user ID list affects this ID, and how.
Definition: seqdbvol.hpp:943
string m_VolName
The name of this volume.
Definition: seqdbvol.hpp:1364
CTempString x_GetHdrAsn1Binary(int oid) const
Get sequence header binary data.
Definition: seqdbvol.cpp:2285
CFastMutex m_MtxHdr
Definition: seqdbvol.hpp:1444
void FlushOffsetRangeCache()
Flush all offset ranges cached.
Definition: seqdbvol.cpp:3183
CSeqDBVol(CSeqDBAtlas &atlas, const string &name, char prot_nucl, CSeqDBGiList *user_list, CSeqDBNegativeList *neg_list, int vol_start, CSeqDBLockHold &locked)
Constructor.
Definition: seqdbvol.cpp:72
void IdsToOids(CSeqDBGiList &gis, CSeqDBLockHold &locked) const
Translate Gis to Oids for the given vector of Gi/Oid pairs.
Definition: seqdbvol.cpp:2483
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Get Raw Sequence and Ambiguity Data.
Definition: seqdbvol.cpp:2979
void x_OpenHdrFile(void) const
Definition: seqdbvol.cpp:125
bool x_HaveNegativeList(void) const
Returns true if this volume has a negative ID list.
Definition: seqdbvol.hpp:913
string GetLMDBFileName() const
Get sqlite file name associated with this volume Empty string if version 4.
Definition: seqdbvol.cpp:275
void ListColumns(set< string > &titles, CSeqDBLockHold &locked)
List the titles of all columns for this volume.
Definition: seqdbvol.cpp:3293
int GetSeqLengthApprox(int oid) const
Approximate sequence length for nucleotide databases.
Definition: seqdbvol.cpp:319
CFastMutex m_MtxStr
Definition: seqdbvol.hpp:1441
TRangeCache m_RangeCache
Cached/ranged sequence info.
Definition: seqdbvol.hpp:1416
int m_VolEnd
First OID past end of this volume.
Definition: seqdbvol.hpp:1422
int GetOidMaskType() const
Definition: seqdbvol.hpp:891
bool x_ListIncludesId(CSeqDBNegativeList &L, const CSeq_id &id) const
Returns true if this ID is not found in the negative ID list.
Definition: seqdbvol.hpp:997
bool m_SeqFileOpened
True if the volume file has been (at least tried to) opened.
Definition: seqdbvol.hpp:1434
bool GetPig(int oid, int &pig, CSeqDBLockHold &locked) const
Find the PIG given an OID.
Definition: seqdbvol.cpp:2406
int GetSeqLengthProt(int oid) const
Sequence length for protein databases.
Definition: seqdbvol.cpp:280
CFastMutex m_MtxSeq
Definition: seqdbvol.hpp:1443
CRef< CSeqDBSeqFile > m_Seq
Contains sequence data for this volume.
Definition: seqdbvol.hpp:1370
CRef< CSeqdesc > x_GetAsnDefline(int oid) const
Get sequence header information structures.
Definition: seqdbvol.cpp:1251
string GetTitle() const
Get the volume title.
Definition: seqdbvol.cpp:2375
CRef< CSeqDBGiIndex > m_GiIndex
The GI index file (for fast oid->gi conversion)
Definition: seqdbvol.hpp:1393
CRef< CSeqDBNegativeList > m_NegativeList
The negative ID list, if one exists.
Definition: seqdbvol.hpp:1402
TGiLists m_VolumeGiLists
The volume GI lists, if any exist.
Definition: seqdbvol.hpp:1405
CRef< CSeqDBIsam > m_IsamTi
Handles translation of TI (trace ids) to OIDs.
Definition: seqdbvol.hpp:1387
int x_GetSequence(int oid, const char **buffer) const
Get sequence data.
Definition: seqdbvol.cpp:1756
CRef< CSeqDBIsam > m_IsamHash
Handles translation of sequence hash value to OIDs.
Definition: seqdbvol.hpp:1390
void UnLease()
Return expendable resources held by this volume.
Definition: seqdbvol.cpp:2801
set< pair< int, int > > TRangeList
List of sequence offset ranges.
Definition: seqdbvol.hpp:736
list< CRef< CSeqdesc > > x_GetTaxonomy(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get taxonomic descriptions of a sequence.
Definition: seqdbvol.cpp:1081
int m_OidMaskType
Definition: seqdbvol.hpp:1410
int GetSequence(int oid, const char **buffer) const
Get the sequence data.
Definition: seqdbvol.hpp:354
CRef< CBlast_def_line_set > x_GetFilteredHeader(int oid, bool *changed) const
Get sequence header information.
Definition: seqdbvol.cpp:1953
char * x_AllocType(size_t length, ESeqDBAllocType alloc_type) const
Allocate memory in one of several ways.
Definition: seqdbvol.cpp:1456
void SetOidMaskType(int oid_masks) const
Definition: seqdbvol.hpp:887
void x_CheckVersions(const string &acc, vector< int > &oids) const
Check Seq-id versions for special sparse-id support case.
Definition: seqdbvol.cpp:2717
void AttachVolumeGiList(CRef< CSeqDBGiList > gilist) const
Filter this volume using the specified GI list.
Definition: seqdbvol.hpp:639
bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const
Find the OID given a GI.
Definition: seqdbvol.cpp:2471
bool m_HdrFileOpened
Definition: seqdbvol.hpp:1435
bool x_HaveGiList(void) const
Returns true if this volume has a positive ID list.
Definition: seqdbvol.hpp:907
TGi GetSeqGI(int oid, CSeqDBLockHold &locked) const
Get the GI of a sequence This method returns the gi of the sequence.
Definition: seqdbvol.cpp:1870
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end, CSeqDBLockHold &locked) const
Fetch data as a CSeq_data object.
Definition: seqdbvol.cpp:2907
void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const
Get GI Bounds.
Definition: seqdbvol.cpp:3068
int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const
Find the OID at a given index into the database.
Definition: seqdbvol.cpp:2822
char GetSeqType() const
Get the sequence type stored in this database.
Definition: seqdbvol.cpp:265
bool x_HaveIdFilter(void) const
Returns true if this volume has an ID list.
Definition: seqdbvol.hpp:919
void x_OpenAllColumns(CSeqDBLockHold &locked)
Find all columns for this volume.
Definition: seqdbvol.cpp:3307
CRef< CBioseq > GetBioseq(int oid, TGi pref_gi, const CSeq_id *pref_seq_id, bool seqdata, CSeqDBLockHold &locked)
Get a CBioseq object for this sequence.
Definition: seqdbvol.cpp:1287
int GetMinLength() const
Get the length of the smallest sequence in this volume.
Definition: seqdbvol.cpp:2390
void x_UnleaseStrFile(void) const
Definition: seqdbvol.cpp:196
CRef< CSeqDBGiList > m_UserGiList
The user ID list, if one exists.
Definition: seqdbvol.hpp:1399
CSeqDBAtlas::TIndx TIndx
Import TIndx definition from the CSeqDBAtlas class.
Definition: seqdbvol.hpp:172
void x_OpenGiFile(void) const
Definition: seqdbvol.cpp:159
pair< CRef< CBlast_def_line_set >, bool > TDeflineCacheItem
Filtered defline plus whether binary data needed changes.
Definition: seqdbvol.hpp:1425
CRef< CSeqDBIsam > m_IsamPig
Handles translation of GIs to OIDs.
Definition: seqdbvol.hpp:1378
bool m_IsAA
True if the volume is protein, false for nucleotide.
Definition: seqdbvol.hpp:1361
const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)
Get all metadata for the specified column.
Definition: seqdbvol.cpp:3277
CRef< CBlast_def_line_set > x_GetTaxDefline(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get defline filtered by several criteria.
Definition: seqdbvol.cpp:1036
void x_OpenOidFile(void) const
Definition: seqdbvol.cpp:251
int GetMaxLength() const
Get the length of the largest sequence in this volume.
Definition: seqdbvol.cpp:2385
bool PigToOid(int pig, int &oid) const
Find the OID given a PIG.
Definition: seqdbvol.cpp:2395
bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const
Find the OID given a TI.
Definition: seqdbvol.cpp:2438
void x_OpenPigFile(void) const
Definition: seqdbvol.cpp:134
void SetMemBit(int mbit) const
Set the MEMB_BIT fitlering for this volume.
Definition: seqdbvol.hpp:879
void x_GetFilteredBinaryHeader(int oid, vector< char > &hdr_data) const
Get binary sequence header information.
Definition: seqdbvol.cpp:2301
bool x_ListIncludesId(CSeqDBGiList &L, const CSeq_id &id) const
Returns true if this volume's ID list has this Seq-id.
Definition: seqdbvol.hpp:975
CFastMutex m_MtxGi
Definition: seqdbvol.hpp:1439
Uint8 GetVolumeLength() const
Get the total length of this volume (in bases).
Definition: seqdbvol.cpp:1880
CRef< CBlast_def_line_set > GetFilteredHeader(int oid, CSeqDBLockHold &locked) const
Get filtered sequence header information.
Definition: seqdbvol.cpp:1886
bool m_OidFileOpened
Definition: seqdbvol.hpp:1437
string GetDate() const
Get the formatting date of the volume.
Definition: seqdbvol.cpp:2380
unsigned GetSequenceHash(int oid)
Get the sequence hash for a given OID.
int m_MemBit
The filtering MEMB_BIT.
Definition: seqdbvol.hpp:1408
void x_UnleaseGiFile(void) const
Definition: seqdbvol.cpp:171
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data) const
Apply a range of offsets to a database sequence.
Definition: seqdbvol.cpp:3131
void GetStringBounds(string &low_id, string &high_id, int &count) const
Get String Bounds.
Definition: seqdbvol.cpp:3116
void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const
Get the OIDs for a given sequence hash.
Definition: seqdbvol.cpp:3233
void x_GetAmbChar(int oid, vector< Int4 > &ambchars) const
Get ambiguity information.
Definition: seqdbvol.cpp:2332
char x_GetSeqType() const
Returns 'p' for protein databases, or 'n' for nucleotide.
Definition: seqdbvol.cpp:270
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Definition: set.hpp:45
#define false
Definition: bool.h:36
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual bool Exists(void) const
Check existence of file.
Definition: ncbifile.hpp:4038
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
static pcre_uint8 * buffer
Definition: pcretest.c:1051
The SeqDB memory management layer.
Defines database column access classes.
ESeqDBAllocType
Certain methods have an "Alloc" version.
Int4 TOid
Ordinal ID in BLAST databases.
Definition: seqdbcommon.hpp:58
ESeqDBIdType
Various identifier formats used in Id lookup.
This file defines several SeqDB utility functions related to byte order and file system portability.
Defines database volume access classes.
USING_SCOPE(objects)
Import definitions from the objects namespace.
List of sequence offset ranges.
Definition: seqdb.hpp:236
OID-Range type to simplify interfaces.
Definition: _hash_fun.h:40
Modified on Sat Jun 15 11:51:58 2024 by modify_doxy.py rev. 669887