NCBI C++ ToolKit
asn_cache.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ___ASN_CACHE__HPP
2 #define ___ASN_CACHE__HPP
3 
4 /* $Id: asn_cache.hpp 80932 2018-01-23 19:55:42Z kotliaro $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Mike DiCuccio Cheinan Marks Eyal Mozes
30  *
31  * 2018-01-18: Adding support for hierarchical caches.
32  *
33  */
34 
35 /** @file asn_cache.hpp
36  * Contains the class definiton for CAsnCache, the main
37  * client class for accessing the ASN cache data.
38  *
39  */
40 
41 #include <corelib/ncbistd.hpp>
42 
44 
46 
47 
49 class CSubCacheCreate;
50 class CChunkFile;
51 class CSeqIdChunkFile;
52 class CBitVectorWrapper;
53 
54 /// CAsnCache is used by clients to access the ASN cache data. The ASN
55 /// cache is a cache of the ID database that is designed for fast access
56 /// and retrieval of CSeq_entry blobs.
57 /// @note Data in the ASN cache can also be accessed via the object manager
58 /// and the ASN cache data loader, CAsnCache_DataLoader.
59 class CAsnCache : public CObject,
60  public IAsnCacheStore
61 {
62 public:
63  /// Type used to hold raw (unformatted) blob data.
64  using TBuffer = vector<unsigned char>;
65 
66  CAsnCache(const CAsnCache&) = delete;
67  CAsnCache& operator=(const CAsnCache&) = delete;
68 
69  /// Pass in the path to the ASN cache to construct an object.
70  explicit CAsnCache(const string& db_path);
71 
72  /// Return the raw blob in an unformatted buffer.
73  bool GetRaw(const objects::CSeq_id_Handle& id, TBuffer& buffer);
74  bool GetMultipleRaw(const objects::CSeq_id_Handle& id, vector<TBuffer>& buffer);
75 
76  /// Return the cache blob, packed and uninterpreted
77  bool GetBlob(const objects::CSeq_id_Handle& id, objects::CCache_blob& blob);
78  bool GetMultipleBlobs(const objects::CSeq_id_Handle& id,
79  vector< CRef<objects::CCache_blob> >& blob);
80 
81  ///
82  /// Return the set of seq-ids associated with a given ID. By default, if
83  /// the SeqId index is not available, and the SeqIds can't be retrieved
84  /// cheaply, does nothing and return false. If cheap_only is set to false,
85  /// will always retrieve the SeqIds, by retrieving the full blob if that is
86  /// the only available way.
87  ///
88  bool GetSeqIds(const objects::CSeq_id_Handle& id,
89  vector<objects::CSeq_id_Handle>& all_ids,
90  bool cheap_only = true);
91 #if 0 // Is not being used anywhere
92 
93  ///
94  /// Check if the SeqId cache, for efficient retrieval of SeqIds, is
95  /// available
96  ///
97 
98  bool EfficientlyGetSeqIds() const { return m_SeqIdIndex.get(); }
99 #endif
100  /// Return a blob as a CSeq_entry object.
101  CRef<objects::CSeq_entry> GetEntry(const objects::CSeq_id_Handle& id);
102  vector< CRef<objects::CSeq_entry> > GetMultipleEntries(const objects::CSeq_id_Handle& id);
103 
104  /// Return the GI and timestamp for a given seq_id. This can be a very
105  /// fast way to look up the GI for an accession.version because only the
106  /// index is queried -- the blob is not retrieved.
107  bool GetIdInfo(const objects::CSeq_id_Handle& id,
108  CAsnIndex::TGi& gi,
109  time_t& timestamp);
110 
111  /// Return the GI and timestamp for a given seq_id. This can be a very
112  /// fast way to look up the GI for an accession.version because only the
113  /// index is queried -- the blob is not retrieved.
114  bool GetIdInfo(const objects::CSeq_id_Handle& id,
115  objects::CSeq_id_Handle& accession,
116  CAsnIndex::TGi& gi,
117  time_t& timestamp,
118  Uint4& sequence_length,
119  Uint4& tax_id);
120  /// Get the full ASN cache index entry. This does not retrieve the full
121  /// blob and is very fast.
122  bool GetIndexEntry(const objects::CSeq_id_Handle & id,
124  bool GetMultipleIndexEntries(const objects::CSeq_id_Handle & id,
125  vector<CAsnIndex::SIndexInfo> &info);
126 
127 
128  // AsnCacheStats
129  size_t GetGiCount() const;
132 
133 private:
134  string m_DbPath;
135  std::unique_ptr<IAsnCacheStore> m_Store;
136 };
137 
139 
140 
141 #endif // ___ASN_CACHE__HPP
CAsnCache is used by clients to access the ASN cache data.
Definition: asn_cache.hpp:61
void EnumIndex(IAsnCacheStore::TEnumIndexCallback cb) const
Definition: asn_cache.cpp:189
string m_DbPath
Definition: asn_cache.hpp:134
bool GetBlob(const objects::CSeq_id_Handle &id, objects::CCache_blob &blob)
Return the cache blob, packed and uninterpreted.
Definition: asn_cache.cpp:131
CAsnCache & operator=(const CAsnCache &)=delete
bool GetIdInfo(const objects::CSeq_id_Handle &id, objects::CSeq_id_Handle &accession, CAsnIndex::TGi &gi, time_t &timestamp, Uint4 &sequence_length, Uint4 &tax_id)
Return the GI and timestamp for a given seq_id.
size_t GetGiCount() const
Definition: asn_cache.cpp:179
void EnumSeqIds(IAsnCacheStore::TEnumSeqidCallback cb) const
Definition: asn_cache.cpp:184
vector< unsigned char > TBuffer
Type used to hold raw (unformatted) blob data.
Definition: asn_cache.hpp:64
vector< CRef< objects::CSeq_entry > > GetMultipleEntries(const objects::CSeq_id_Handle &id)
Definition: asn_cache.cpp:159
bool GetIdInfo(const objects::CSeq_id_Handle &id, CAsnIndex::TGi &gi, time_t &timestamp)
Return the GI and timestamp for a given seq_id.
bool GetMultipleRaw(const objects::CSeq_id_Handle &id, vector< TBuffer > &buffer)
Definition: asn_cache.cpp:149
bool GetRaw(const objects::CSeq_id_Handle &id, TBuffer &buffer)
Return the raw blob in an unformatted buffer.
Definition: asn_cache.cpp:143
bool GetIndexEntry(const objects::CSeq_id_Handle &id, CAsnIndex::SIndexInfo &info)
Get the full ASN cache index entry.
Definition: asn_cache.cpp:164
CRef< objects::CSeq_entry > GetEntry(const objects::CSeq_id_Handle &id)
Return a blob as a CSeq_entry object.
Definition: asn_cache.cpp:154
bool GetMultipleIndexEntries(const objects::CSeq_id_Handle &id, vector< CAsnIndex::SIndexInfo > &info)
Definition: asn_cache.cpp:170
bool GetMultipleBlobs(const objects::CSeq_id_Handle &id, vector< CRef< objects::CCache_blob > > &blob)
Definition: asn_cache.cpp:137
CAsnCache(const CAsnCache &)=delete
bool GetSeqIds(const objects::CSeq_id_Handle &id, vector< objects::CSeq_id_Handle > &all_ids, bool cheap_only=true)
Return the set of seq-ids associated with a given ID.
Definition: asn_cache.cpp:124
std::unique_ptr< IAsnCacheStore > m_Store
Definition: asn_cache.hpp:135
Uint8 TGi
Definition: asn_index.hpp:57
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
std::function< void(string, uint32_t, uint64_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t)> TEnumIndexCallback
std::function< void(string, uint32_t, uint64_t, uint32_t)> TEnumSeqidCallback
Include a standard set of the NCBI C++ Toolkit most basic headers.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static MDB_envinfo info
Definition: mdb_load.c:37
static pcre_uint8 * buffer
Definition: pcretest.c:1051
Modified on Tue Feb 27 05:51:31 2024 by modify_doxy.py rev. 669887