NCBI C++ ToolKit
asn_cache.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: asn_cache.cpp 80932 2018-01-23 19:55:42Z kotliaro $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  * 2018-01-18: Adding support for hierarchical caches.
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <corelib/ncbifile.hpp>
35 #include <db/bdb/bdb_cursor.hpp>
36 
37 #include <serial/serial.hpp>
38 #include <serial/objistrasnb.hpp>
39 
40 #include <util/compress/zlib.hpp>
41 #include <util/compress/stream.hpp>
42 
45 
53 
55 
56 
59 
60 CAsnCache::CAsnCache(const string& db_path)
61  : m_DbPath(db_path)
62 {
65 
66  vector<string> db_paths;
67 
68  // Add top-level directory to the collection of database paths.
69  if ( CFile(NASNCacheFileName::GetBDBIndex(db_path, CAsnIndex::e_main)).Exists() ) {
70  db_paths.push_back(db_path);
71  }
72 
73  // Add sub-caches - sub-cache name starts with "subcache" prefix - to the collection
74  // of database paths.
75 
76  string const kSubcacheMask = "subcache*";
77 
79  for ( auto const& item: items ) {
80  if ( item->IsDir() ) {
81  string path = item->GetPath();
82  path = CDirEntry::CreateAbsolutePath(path);
84 
85  string main_fname = NASNCacheFileName::GetBDBIndex(path, CAsnIndex::e_main);
86  if ( CFile(main_fname).Exists() ) {
87  db_paths.push_back(path);
88  }
89  }
90  }
91 
92  if ( db_paths.empty() ) {
94  "No ASN.1 Cache database is found in " + db_path);
95  }
96 
97  if ( 1 == db_paths.size() ) {
98  m_Store.reset(new CAsnCacheStore(db_paths.at(0)));
99  }
100  else {
101  m_Store.reset(new CAsnCacheStoreMany(db_paths));
102  }
103 }
104 
105 bool CAsnCache::GetIdInfo(const CSeq_id_Handle& idh,
106  CAsnIndex::TGi& this_gi,
107  time_t& this_timestamp)
108 {
109  return m_Store->GetIdInfo(idh, this_gi, this_timestamp);
110 }
111 
112 /// Get a partial index entry, returning only the externally useful
113 /// metadata. This is a very fast call.
114 bool CAsnCache::GetIdInfo(const CSeq_id_Handle & id,
115  CSeq_id_Handle& accession,
116  CAsnIndex::TGi& gi,
117  time_t& timestamp,
118  Uint4& sequence_length,
119  Uint4& tax_id)
120 {
121  return m_Store->GetIdInfo(id, accession, gi, timestamp, sequence_length, tax_id);
122 }
123 
125  vector<CSeq_id_Handle>& all_ids,
126  bool cheap_only)
127 {
128  return m_Store->GetSeqIds(id, all_ids, cheap_only);
129 }
130 
132  CCache_blob& blob)
133 {
134  return m_Store->GetBlob(idh, blob);
135 }
136 
138  vector< CRef<CCache_blob> >& blobs)
139 {
140  return m_Store->GetMultipleBlobs(id, blobs);
141 }
142 
144  TBuffer& buffer)
145 {
146  return m_Store->GetRaw(idh, buffer);
147 }
148 
149 bool CAsnCache::GetMultipleRaw(const CSeq_id_Handle& id, vector<TBuffer>& buffer)
150 {
151  return m_Store->GetMultipleRaw(id, buffer);
152 }
153 
155 {
156  return m_Store->GetEntry(idh);
157 }
158 
159 vector< CRef<CSeq_entry> > CAsnCache::GetMultipleEntries(const CSeq_id_Handle& id)
160 {
161  return m_Store->GetMultipleEntries(id);
162 }
163 
164 bool CAsnCache::GetIndexEntry(const objects::CSeq_id_Handle & id,
166 {
167  return m_Store->GetIndexEntry(id, info);
168 }
169 
170 bool CAsnCache::GetMultipleIndexEntries(const objects::CSeq_id_Handle & id,
171  vector<CAsnIndex::SIndexInfo> &info)
172 {
173  return m_Store->GetMultipleIndexEntries(id, info);
174 }
175 
176 
177 // AsnCacheStats implementation
178 
179 size_t CAsnCache::GetGiCount() const
180 {
181  return m_Store->GetGiCount();
182 }
183 
185 {
186  m_Store->EnumSeqIds(cb);
187 }
188 
190 {
191  m_Store->EnumIndex(cb);
192 }
193 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
USING_SCOPE(objects)
Contains the class definiton for CAsnCache, the main client class for accessing the ASN cache data.
Berkeley BDB file cursor.
void EnumIndex(IAsnCacheStore::TEnumIndexCallback cb) const
Definition: asn_cache.cpp:189
string m_DbPath
Definition: asn_cache.hpp:134
bool GetBlob(const objects::CSeq_id_Handle &id, objects::CCache_blob &blob)
Return the cache blob, packed and uninterpreted.
Definition: asn_cache.cpp:131
size_t GetGiCount() const
Definition: asn_cache.cpp:179
void EnumSeqIds(IAsnCacheStore::TEnumSeqidCallback cb) const
Definition: asn_cache.cpp:184
vector< unsigned char > TBuffer
Type used to hold raw (unformatted) blob data.
Definition: asn_cache.hpp:64
vector< CRef< objects::CSeq_entry > > GetMultipleEntries(const objects::CSeq_id_Handle &id)
Definition: asn_cache.cpp:159
bool GetIdInfo(const objects::CSeq_id_Handle &id, CAsnIndex::TGi &gi, time_t &timestamp)
Return the GI and timestamp for a given seq_id.
bool GetMultipleRaw(const objects::CSeq_id_Handle &id, vector< TBuffer > &buffer)
Definition: asn_cache.cpp:149
bool GetRaw(const objects::CSeq_id_Handle &id, TBuffer &buffer)
Return the raw blob in an unformatted buffer.
Definition: asn_cache.cpp:143
bool GetIndexEntry(const objects::CSeq_id_Handle &id, CAsnIndex::SIndexInfo &info)
Get the full ASN cache index entry.
Definition: asn_cache.cpp:164
CRef< objects::CSeq_entry > GetEntry(const objects::CSeq_id_Handle &id)
Return a blob as a CSeq_entry object.
Definition: asn_cache.cpp:154
bool GetMultipleIndexEntries(const objects::CSeq_id_Handle &id, vector< CAsnIndex::SIndexInfo > &info)
Definition: asn_cache.cpp:170
bool GetMultipleBlobs(const objects::CSeq_id_Handle &id, vector< CRef< objects::CCache_blob > > &blob)
Definition: asn_cache.cpp:137
CAsnCache(const CAsnCache &)=delete
bool GetSeqIds(const objects::CSeq_id_Handle &id, vector< objects::CSeq_id_Handle > &all_ids, bool cheap_only=true)
Return the set of seq-ids associated with a given ID.
Definition: asn_cache.cpp:124
std::unique_ptr< IAsnCacheStore > m_Store
Definition: asn_cache.hpp:135
Uint8 TGi
Definition: asn_index.hpp:57
CDir –.
Definition: ncbifile.hpp:1696
CFile –.
Definition: ncbifile.hpp:1605
CRef –.
Definition: ncbiobj.hpp:618
std::function< void(string, uint32_t, uint64_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t)> TEnumIndexCallback
std::function< void(string, uint32_t, uint64_t, uint32_t)> TEnumSeqidCallback
@ eFollowLinks
Follow symbolic links.
Definition: ncbimisc.hpp:145
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static string NormalizePath(const string &path, EFollowLinks follow_links=eIgnoreLinks)
Normalize a path.
Definition: ncbifile.cpp:820
TEntries GetEntries(const string &mask=kEmptyStr, TGetEntriesFlags flags=0) const
Get directory entries based on the specified "mask".
Definition: ncbifile.cpp:3846
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
Definition: ncbifile.cpp:665
list< TEntry > TEntries
Definition: ncbifile.hpp:1751
@ fIgnoreRecursive
Suppress "self recursive" elements (the directories "." and "..").
Definition: ncbifile.hpp:1756
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static MDB_envinfo info
Definition: mdb_load.c:37
string GetBDBIndex()
Definition: file_names.hpp:44
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
static uint8_t * buffer
Definition: pcre2test.c:1016
ZLib Compression API.
Modified on Fri Sep 20 14:57:13 2024 by modify_doxy.py rev. 669887