NCBI C++ ToolKit
asn_cache_loader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: asn_cache_loader.cpp 90547 2020-06-26 12:58:17Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Mike DiCuccio Cheinan Marks
27  *
28  * File Description: AsnCache dataloader. Implementations.
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 
36 #include <objmgr/impl/tse_info.hpp>
41 
45 
48 
49 
50 #define NCBI_USE_ERRCODE_X Objtools_AsnCache_Loader
51 
53 
55 
57  : requests(0)
58  , found(0)
59 {
60 }
61 
62 
64 {
65 }
66 
67 
70  const string& db_path,
71  CObjectManager::EIsDefault is_default,
73 {
74  TDbMaker maker(db_path);
75  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
76  return maker.GetRegisterInfo();
77 }
78 
79 
81 {
82  return "AsnCache_dataloader";
83 }
84 
85 
86 string CAsnCache_DataLoader::GetLoaderNameFromArgs(const string& db_path)
87 {
88  return string("AsnCache_dataloader:") + db_path;
89 }
90 
91 
94 {
95  m_IndexMap.resize(15);
96 }
97 
98 
100  : CDataLoader(dl_name)
101 {
102  m_IndexMap.resize(15);
103 }
104 
106  const string& db_path)
107  : CDataLoader(dl_name),
108  m_DbPath(db_path)
109 {
110  m_IndexMap.resize(15);
111 }
112 
113 
115 {
116  /**
117  size_t total_requests = 0;
118  size_t total_found = 0;
119  ITERATE (TIndexMap, it, m_IndexMap) {
120  LOG_POST(Error << "thread=" << it->first
121  << " requests=" << it->second.requests
122  << " found=" << it->second.found);
123  total_requests += it->second.requests;
124  total_found += it->second.found;
125  }
126  LOG_POST(Error << "total requests: " << total_requests);
127  LOG_POST(Error << "total found: " << total_found);
128  **/
129 }
130 
131 
134 {
135  SCacheInfo& index = x_GetIndex();
136  CFastMutexGuard LOCK(index.cache_mtx);
137 
139  TBlobId blob_id;
140  if (index.cache->GetIndexEntry(idh, info)) {
141  blob_id = new CBlobIdSeq_id(idh);
142  }
143  //LOG_POST(Error << "CAsnCache_DataLoader::GetBlobId(): " << idh);
144  return blob_id;
145 }
146 
147 
149 {
150  return true;
151 }
152 
153 
155 {
156  SCacheInfo& index = x_GetIndex();
157  CFastMutexGuard LOCK(index.cache_mtx);
158 
159  CAsnIndex::TGi gi = 0;
160  time_t timestamp = 0;
161  if (index.cache->GetIdInfo(idh, gi, timestamp)) {
162  //LOG_POST(Error << "CAsnCache_DataLoader::GetGi(): " << idh << " -> " << gi);
163  return GI_FROM(CAsnIndex::TGi, gi);
164  }
165  return ZERO_GI;
166 }
167 
168 
170 {
171  SCacheInfo& index = x_GetIndex();
172  CFastMutexGuard LOCK(index.cache_mtx);
173 
174  CAsnIndex::TGi gi = 0;
175  time_t timestamp = 0;
176  Uint4 sequence_length = 0;
177  Uint4 tax_id = 0;
178  CSeq_id_Handle acc;
179  if (index.cache->GetIdInfo(idh, acc, gi,
180  timestamp, sequence_length, tax_id)) {
181  return sequence_length;
182  }
183  return kInvalidSeqPos;
184 }
185 
186 
187 
188 
190  TIds& ids)
191 {
192  ///
193  /// okay, the contract is that we must return something if we know the
194  /// sequence. thus, if the sequence exists in the cache, we must return
195  /// something. If the SeqId index is available, the cache will use it to
196  /// get the ids quickly; otherwise it will use the expensive way, retrieving
197  /// the entire sequence.
198  ///
199  SCacheInfo& index = x_GetIndex();
200  CFastMutexGuard LOCK(index.cache_mtx);
201 
202  vector<CSeq_id_Handle> bioseq_ids;
203  bool res = index.cache->GetSeqIds(idh, bioseq_ids, false);
204  ++index.requests;
205  if (res) {
206  ids.swap(bioseq_ids);
207  }
208 }
209 
210 
212 {
213  SCacheInfo& index = x_GetIndex();
214  CFastMutexGuard LOCK(index.cache_mtx);
215 
216  CAsnIndex::TGi gi = 0;
217  time_t timestamp = 0;
218  Uint4 sequence_length = 0;
219  Uint4 tax_id = 0;
220  CSeq_id_Handle acc;
221  if (index.cache->GetIdInfo(idh, acc, gi,
222  timestamp, sequence_length, tax_id)) {
223  return TAX_ID_FROM(Uint4, tax_id);
224  }
225  return INVALID_TAX_ID;
226 }
227 
228 
229 #if NCBI_PRODUCTION_VER > 20110000
230 /// not yet in SC-6.0...
231 void CAsnCache_DataLoader::GetGis(const TIds& ids, TLoaded& loaded, TIds& ret)
232 {
233  SCacheInfo& index = x_GetIndex();
234  CFastMutexGuard LOCK(index.cache_mtx);
235 
236  ret.clear();
237  ret.resize(ids.size());
238 
239  loaded.clear();
240  loaded.resize(ids.size());
241  for (size_t i = 0; i < ids.size(); ++i) {
242  CAsnIndex::TGi gi = 0;
243  time_t timestamp = 0;
244  if (index.cache->GetIdInfo(ids[i], gi, timestamp)) {
246  loaded[i] = true;
247  }
248  }
249 }
250 #endif
251 
254 {
255  CSeq_id_Handle idh =
256  dynamic_cast<const CBlobIdSeq_id&>(*blob_id).GetValue();
257 
258  CTSE_LoadLock lock = GetDataSource()->GetTSE_LoadLock(blob_id);
259  if ( !lock.IsLoaded() ) {
260  SCacheInfo& index = x_GetIndex();
261  CFastMutexGuard LOCK(index.cache_mtx);
262 
263  CRef<CSeq_entry> entry = index.cache->GetEntry(idh);
264  ++index.requests;
265 
266  if (entry) {
267  ++index.found;
268  lock->SetSeq_entry(*entry);
269  lock.SetLoaded();
270  } else {
272  "CAsnCache_DataLoader::GetBlobById(): blob for " +
273  idh.AsString() + " not found");
274  }
275  }
276  return lock;
277 }
278 
279 
282  EChoice choice)
283 {
284  TTSE_LockSet locks;
285 
286  switch ( choice ) {
287  case eBlob:
288  case eBioseq:
289  case eCore:
290  case eBioseqCore:
291  case eSequence:
292  case eAll:
293  {{
294  TBlobId blob_id = GetBlobId(idh);
295  if (blob_id) {
296  locks.insert(GetBlobById(blob_id));
297  }
298  }}
299  break;
300 
301  default:
302  break;
303  }
304 
305  return locks;
306 }
307 
309 {
310  if (m_IndexMap.empty()) {
312  "setup failure: no cache objects available");
313  }
314 
315  CFastMutexGuard LOCK(m_Mutex);
316 
317  // hash to a pool of cache objects based on thread ID
318  int id = CThread::GetSelf();
319  id %= m_IndexMap.size();
320 
321  TIndexMap::iterator iter = m_IndexMap.begin() + id;
322  if ( !iter->get() ) {
323  iter->reset(new SCacheInfo);
324  (*iter)->cache.Reset(new CAsnCache(m_DbPath));
325  }
326  return **iter;
327 }
328 
329 
331 
332 // ===========================================================================
333 
335 
337 {
338  RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_AsnCache);
339 }
340 
341 
342 const string kDataLoader_AsnCache_DriverName("asncache");
343 
345 {
346 public:
349  virtual ~CAsnCache_DataLoaderCF(void) {}
350 
351 protected:
354  const TPluginManagerParamTree* params) const;
355 };
356 
357 
360  const TPluginManagerParamTree* params) const
361 {
362  string db_path =
363  GetParam(GetDriverName(), params,
364  "DbPath", false);
365 
366  // IsDefault and Priority arguments may be specified
368 }
369 
370 
374 {
376 }
377 
378 
382 {
383  NCBI_EntryPoint_DataLoader_AsnCache(info_list, method);
384 }
385 
386 
Contains the class definiton for CAsnCache, the main client class for accessing the ASN cache data.
USING_SCOPE(objects)
void NCBI_EntryPoint_DataLoader_AsnCache(CPluginManager< CDataLoader >::TDriverInfoList &info_list, CPluginManager< CDataLoader >::EEntryPointRequest method)
void NCBI_EntryPoint_xloader_asncache(CPluginManager< objects::CDataLoader >::TDriverInfoList &info_list, CPluginManager< objects::CDataLoader >::EEntryPointRequest method)
void DataLoaders_Register_AsnCache(void)
const string kDataLoader_AsnCache_DriverName("asncache")
virtual ~CAsnCache_DataLoaderCF(void)
virtual CDataLoader * CreateAndRegister(CObjectManager &om, const TPluginManagerParamTree *params) const
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
static string GetLoaderNameFromArgs(void)
virtual TTaxId GetTaxId(const CSeq_id_Handle &idh)
Request for a taxonomy id of a sequence.
virtual void GetIds(const CSeq_id_Handle &idh, TIds &ids)
virtual TTSE_Lock GetBlobById(const TBlobId &blob_id)
virtual TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &db_path, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
virtual TGi GetGi(const CSeq_id_Handle &idh)
Request for a gi of a sequence.
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
Request for a length of a sequence.
virtual ~CAsnCache_DataLoader(void)
virtual bool CanGetBlobById() const
CAsnCache is used by clients to access the ASN cache data.
Definition: asn_cache.hpp:61
bool GetIdInfo(const objects::CSeq_id_Handle &id, CAsnIndex::TGi &gi, time_t &timestamp)
Return the GI and timestamp for a given seq_id.
bool GetIndexEntry(const objects::CSeq_id_Handle &id, CAsnIndex::SIndexInfo &info)
Get the full ASN cache index entry.
Definition: asn_cache.cpp:164
CRef< objects::CSeq_entry > GetEntry(const objects::CSeq_id_Handle &id)
Return a blob as a CSeq_entry object.
Definition: asn_cache.cpp:154
bool GetSeqIds(const objects::CSeq_id_Handle &id, vector< objects::CSeq_id_Handle > &all_ids, bool cheap_only=true)
Return the set of seq-ids associated with a given ID.
Definition: asn_cache.cpp:124
Uint8 TGi
Definition: asn_index.hpp:57
const string & GetDriverName(void) const
CTSE_LoadLock GetTSE_LoadLock(const TBlobId &blob_id)
CObjectManager –.
void SetSeq_entry(CSeq_entry &entry, CTSE_SetObjectInfo *set_info=0)
Definition: tse_info.cpp:351
bool IsLoaded(void) const
void SetLoaded(void)
definition of a Culling tree
Definition: ncbi_tree.hpp:100
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define TAX_ID_FROM(T, value)
Definition: ncbimisc.hpp:1111
#define ZERO_GI
Definition: ncbimisc.hpp:1088
string
Definition: cgiapp.hpp:690
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
const value_type & GetValue(void) const
Definition: blob_id.hpp:122
CBlobIdFor< CSeq_id_Handle > CBlobIdSeq_id
Definition: blob_id.hpp:154
TLoader * GetLoader(void) const
Get pointer to the loader.
vector< CSeq_id_Handle > TIds
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
EIsDefault
Flag defining if the data loader is included in the "default" group.
TRegisterInfo GetRegisterInfo(void)
virtual void GetGis(const TIds &ids, TLoaded &loaded, TGis &ret)
Bulk request for gis of a set of sequences.
static void RegisterInObjectManager(CObjectManager &om, CLoaderMaker_Base &loader_maker, CObjectManager::EIsDefault is_default, CObjectManager::TPriority priority)
Register the loader only if the name is not yet registered in the object manager.
Definition: data_loader.cpp:53
@ eAll
all blobs (main and external)
@ eSequence
seq data
@ eBlob
whole main
@ eCore
?only seq-entry core?
@ eBioseq
main blob with complete bioseq
@ eBioseqCore
main blob with bioseq core (no seqdata and annots)
static void NCBI_EntryPointImpl(TDriverInfoList &info_list, EEntryPointRequest method)
Entry point implementation.
string GetParam(const string &driver_name, const TPluginManagerParamTree *params, const string &param_name, bool mandatory, const string &default_value) const
Utility function to get an element of parameter tree Throws an exception when mandatory parameter is ...
list< SDriverInfo > TDriverInfoList
List of driver information.
EEntryPointRequest
Actions performed by the entry point.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static TID GetSelf(void)
Definition: ncbithr.cpp:515
int i
static MDB_envinfo info
Definition: mdb_load.c:37
Plugin manager (using class factory paradigm).
Helper classes and templates to implement plugins.
CRef< objects::CObjectManager > om
structure for common cache reader&writer implementation
SRegisterLoaderInfo –.
Modified on Fri Sep 20 14:57:45 2024 by modify_doxy.py rev. 669887