65 using TBuffer = std::vector<unsigned char>;
76 m_Index->SetCacheSize(128 * 1024 * 1024);
80 if ( !
CFile(main_fname).Exists() ) {
82 "cannot open ASN cache: failed to find file: " + main_fname);
88 if (
CFile(fname).Exists()) {
97 ERR_POST(
Error <<
"error opening seq-id cache: disabling: " << e);
106 vector<CAsnIndex::SIndexInfo>&
info,
109 bool was_id_found =
false;
124 cursor.From << seq_id <<
version;
127 while (cursor.Fetch() ==
eBDB_Ok) {
130 if (current_info.seq_id != seq_id) {
135 bool should_report = (!
version ||
version == current_info.version) &&
141 (
info[0].version < current_info.version ||
142 (
info[0].version == current_info.version &&
info[0].timestamp < current_info.timestamp))) ||
144 (
version &&
info[0].timestamp < current_info.timestamp))
151 info.push_back(current_info);
162 vector<CAsnIndex::SIndexInfo> info_vector;
166 info = info_vector[0];
172 time_t& this_timestamp)
186 this_timestamp =
info.timestamp;
199 Uint4& sequence_length,
208 timestamp =
info.timestamp;
210 sequence_length =
info.sequence_length;
211 tax_id =
info.taxonomy_id;
216 vector<CSeq_id_Handle>& all_ids,
219 bool was_seqid_blob_found =
false;
227 <<
" timestamp=" <<
info.timestamp
228 <<
" offs=" <<
info.offs
229 <<
" size=" <<
info.size);
231 if ( was_seqid_blob_found ){
236 ERR_POST(
"Unable to read or unpack a SeqIds chunk."
237 <<
" offset = " <<
info.offs <<
" size = " <<
info.size );
238 ERR_POST(
"SeqId = " <<
id.AsString() <<
" gi = " <<
info.gi
239 <<
" timestamp = " <<
info.timestamp );
241 was_seqid_blob_found =
false;
244 else if(!cheap_only) {
253 was_seqid_blob_found =
true;
257 return was_seqid_blob_found;
263 bool was_blob_found =
false;
269 if (! was_blob_found ) {
280 <<
" timestamp=" <<
info.timestamp
281 <<
" chunk=" <<
info.chunk
282 <<
" offs=" <<
info.offs
283 <<
" size=" <<
info.size);
300 ERR_POST(
"Unable to read or unpack a raw chunk. ChunkId = " <<
info.chunk
301 <<
" offset = " <<
info.offs <<
" size = " <<
info.size );
303 <<
" timestamp = " <<
info.timestamp );
313 vector<CAsnIndex::SIndexInfo>
info;
317 if (! was_blob_found ) {
321 ITERATE (vector<CAsnIndex::SIndexInfo>, blob_it,
info) {
324 blobs.push_back(blob);
327 return !blobs.empty();
344 vector< CRef<CCache_blob> > blobs;
348 buffer.resize(blobs.size());
350 (*blob_it)->UnPack(
buffer[blob_it - blobs.begin()]);
356 bool CAsnCacheStore::EfficientlyGetSeqIds()
const
375 vector< CRef<CSeq_entry> >
entries;
376 vector< CRef<CCache_blob> > blobs;
380 (*blob_it)->UnPack(*entry);
396 vector<CAsnIndex::SIndexInfo> &
info)
405 std::set<CAsnIndex::TGi> gi_set;
411 auto gi = index_ref.GetGi();
415 return gi_set.size();
425 cb(index_ref.GetSeqId(),
426 index_ref.GetVersion(),
428 index_ref.GetTimestamp());
439 cb(index_ref.GetSeqId(),
440 index_ref.GetVersion(),
442 index_ref.GetTimestamp(),
443 index_ref.GetChunkId(),
444 index_ref.GetOffset(),
446 index_ref.GetSeqLength(),
447 index_ref.GetTaxId());
455 : m_Index(db_paths.
size())
459 for (
auto const& db_path: db_paths ) {
460 std::unique_ptr<IAsnCacheStore> store(
new CAsnCacheStore(db_path) );
461 m_Stores.push_back(std::move(store));
468 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
481 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
494 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
507 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
525 vector<objects::CSeq_id_Handle>& all_ids,
528 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
543 bool CAsnCacheStoreMany::EfficientlyGetSeqIds()
const
551 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
565 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
574 return vector<CRef<CSeq_entry> >();
584 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
598 objects::CSeq_id_Handle& accession,
601 Uint4& sequence_length,
604 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
607 if (
m_Stores[
i]->
GetIdInfo(
id, accession, gi, timestamp, sequence_length, tax_id) ) {
617 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
628 vector<CAsnIndex::SIndexInfo> &
info)
630 std::shuffle(
m_Index.begin(),
m_Index.end(), default_random_engine());
646 for (
auto const& store:
m_Stores ) {
647 count += store->GetGiCount();
654 for (
auto const& store:
m_Stores ) {
655 store->EnumSeqIds(cb);
661 for (
auto const& store:
m_Stores ) {
662 store->EnumIndex(cb);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Contains the class definiton for CAsnCache, the main client class for accessing the ASN cache data.
CConstRef< objects::CBioseq > ExtractBioseq(CConstRef< objects::CSeq_entry > entry, const objects::CSeq_id_Handle &id)
void GetNormalizedSeqId(const objects::CSeq_id_Handle &id, string &id_str, Uint4 &version)
Berkeley BDB file cursor.
bool GetMultipleIndexEntries(const objects::CSeq_id_Handle &id, vector< CAsnIndex::SIndexInfo > &info)
void EnumSeqIds(IAsnCacheStore::TEnumSeqidCallback cb) const
CRef< objects::CSeq_entry > GetEntry(const objects::CSeq_id_Handle &id)
Check if the SeqId cache, for efficient retrieval of SeqIds, is available.
size_t GetGiCount() const
bool GetBlob(const objects::CSeq_id_Handle &id, objects::CCache_blob &blob)
Return the cache blob, packed and uninterpreted.
bool GetSeqIds(const objects::CSeq_id_Handle &id, vector< objects::CSeq_id_Handle > &all_ids, bool cheap_only)
Return the set of seq-ids associated with a given ID.
CAsnCacheStoreMany()=delete
std::vector< std::unique_ptr< IAsnCacheStore > > m_Stores
bool GetMultipleRaw(const objects::CSeq_id_Handle &id, vector< vector< unsigned char >> &buffer)
bool GetRaw(const objects::CSeq_id_Handle &id, vector< unsigned char > &buffer)
Return the raw blob in an unformatted buffer.
bool GetMultipleBlobs(const objects::CSeq_id_Handle &id, vector< CRef< objects::CCache_blob > > &blob)
bool GetIdInfo(const objects::CSeq_id_Handle &id, CAsnIndex::TGi &gi, time_t ×tamp)
Return the GI and timestamp for a given seq_id.
bool GetIndexEntry(const objects::CSeq_id_Handle &id, CAsnIndex::SIndexInfo &info)
Get the full ASN cache index entry.
std::vector< int > m_Index
void EnumIndex(IAsnCacheStore::TEnumIndexCallback cb) const
vector< CRef< objects::CSeq_entry > > GetMultipleEntries(const objects::CSeq_id_Handle &id)
std::unique_ptr< CAsnIndex > m_Index
static bool s_GetChunkAndOffset(const objects::CSeq_id_Handle &idh, CAsnIndex &index, vector< CAsnIndex::SIndexInfo > &info, bool multiple)
bool GetSeqIds(const objects::CSeq_id_Handle &id, vector< objects::CSeq_id_Handle > &all_ids, bool cheap_only)
Return the set of seq-ids associated with a given ID.
bool GetBlob(const objects::CSeq_id_Handle &id, objects::CCache_blob &blob)
Return the cache blob, packed and uninterpreted.
bool GetMultipleIndexEntries(const objects::CSeq_id_Handle &id, vector< CAsnIndex::SIndexInfo > &info)
std::unique_ptr< CChunkFile > m_CurrChunk
vector< CRef< objects::CSeq_entry > > GetMultipleEntries(const objects::CSeq_id_Handle &id)
CRef< objects::CSeq_entry > GetEntry(const objects::CSeq_id_Handle &id)
Check if the SeqId cache, for efficient retrieval of SeqIds, is available.
bool GetMultipleBlobs(const objects::CSeq_id_Handle &id, vector< CRef< objects::CCache_blob > > &blob)
size_t GetGiCount() const
std::unique_ptr< CSeqIdChunkFile > m_SeqIdChunk
CAsnIndex & x_GetIndexRef() const
bool GetMultipleRaw(const objects::CSeq_id_Handle &id, vector< vector< unsigned char >> &buffer)
bool GetIndexEntry(const objects::CSeq_id_Handle &id, CAsnIndex::SIndexInfo &info)
Get the full ASN cache index entry.
bool x_GetBlob(const CAsnIndex::SIndexInfo &info, objects::CCache_blob &blob)
std::unique_ptr< CAsnIndex > m_SeqIdIndex
void EnumIndex(IAsnCacheStore::TEnumIndexCallback cb) const
bool GetRaw(const objects::CSeq_id_Handle &id, vector< unsigned char > &buffer)
Return the raw blob in an unformatted buffer.
bool GetIdInfo(const objects::CSeq_id_Handle &id, CAsnIndex::TGi &gi, time_t ×tamp)
Return the GI and timestamp for a given seq_id.
CAsnIndex::TChunkId m_CurrChunkId
void EnumSeqIds(IAsnCacheStore::TEnumSeqidCallback cb) const
This is a simple BDB structure holding information about a given accession and its indexed location.
Berkeley DB file cursor class.
void UnPack(CSeq_entry &entry) const
std::function< void(string, uint32_t, uint64_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t)> TEnumIndexCallback
std::function< void(string, uint32_t, uint64_t, uint32_t)> TEnumSeqidCallback
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
@ eFollowLinks
Follow symbolic links.
void SetCondition(ECondition cond_from, ECondition cond_to=eNotSet)
Set search condition(type of interval)
EBDB_ErrCode Fetch(EFetchDirection fdir=eDefault)
Fetch record.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static string NormalizePath(const string &path, EFollowLinks follow_links=eIgnoreLinks)
Normalize a path.
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
bool NotNull(void) const THROWS_NONE
Check if pointer is not null – same effect as NotEmpty().
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
const TId & GetId(void) const
Get the Id member data.
list< CRef< CSeq_id > > TId
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
static pcre_uint8 * buffer
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
static wxAcceleratorEntry entries[3]