NCBI C++ ToolKit
reader.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef READER__HPP_INCLUDED
2 #define READER__HPP_INCLUDED
3 /* $Id: reader.hpp 102228 2024-04-09 17:35:06Z vasilche $
4 * ===========================================================================
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's official duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government have not placed any restriction on its use or reproduction.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * Please cite the author in any work or product based on this material.
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description: Base data reader interface
29 *
30 */
31 
32 #include <corelib/ncbiobj.hpp>
33 #include <corelib/ncbimtx.hpp>
34 #include <corelib/ncbitime.hpp>
35 #include <util/cache/icache.hpp>
38 #include <list>
39 
42 
44 class CReadDispatcher;
45 class CBlob_id;
46 class CSeq_id_Handle;
47 class CLoadLockBlob;
50 struct SAnnotSelector;
51 
52 
54 {
55 public:
56  bool IsSetEnableSNP(void) const { return !m_EnableSNP.IsNull(); }
57  bool GetEnableSNP(void) const { return m_EnableSNP.GetValue(); }
58  void SetEnableSNP(CNullable<bool> enable) { m_EnableSNP = enable; }
59  void SetEnableSNP(bool enable) { m_EnableSNP = enable; }
60 
61  bool IsSetEnableWGS(void) const { return !m_EnableWGS.IsNull(); }
62  bool GetEnableWGS(void) const { return m_EnableWGS.GetValue(); }
63  void SetEnableWGS(CNullable<bool> enable) { m_EnableWGS = enable; }
64  void SetEnableWGS(bool enable) { m_EnableWGS = enable; }
65 
66  bool IsSetEnableCDD(void) const { return !m_EnableCDD.IsNull(); }
67  bool GetEnableCDD(void) const { return m_EnableCDD.GetValue(); }
68  void SetEnableCDD(CNullable<bool> enable) { m_EnableCDD = enable; }
69  void SetEnableCDD(bool enable) { m_EnableCDD = enable; }
70 
71 private:
75 };
76 
77 
79 {
80 public:
81  CReader(void);
82  virtual ~CReader(void);
83 
84  void InitParams(CConfig& conf, const string& driver_name,
85  int default_max_conn);
86 
87  typedef unsigned TConn;
88  typedef CBlob_id TBlobId;
89  typedef int TState;
90  typedef int TBlobState;
91  typedef int TBlobVersion;
92  typedef int TBlobSplitVersion;
93  typedef int TChunkId;
94  typedef int TContentsMask;
95  typedef vector<TChunkId> TChunkIds;
96  typedef vector<CSeq_id_Handle> TSeqIds;
97  typedef vector<CBlob_Info> TBlobIds;
98 
99  /// All LoadXxx() methods should return false if
100  /// there is no requested data in the reader.
101  /// This will notify dispatcher that there is no sense to retry.
102  virtual bool LoadSeq_idBlob_ids(CReaderRequestResult& result,
103  const CSeq_id_Handle& seq_id,
104  const SAnnotSelector* sel);
106  const CSeq_id_Handle& seq_id) = 0;
107  virtual bool LoadSeq_idGi(CReaderRequestResult& result,
108  const CSeq_id_Handle& seq_id);
109  virtual bool LoadSeq_idAccVer(CReaderRequestResult& result,
110  const CSeq_id_Handle& seq_id);
111  virtual bool LoadSeq_idLabel(CReaderRequestResult& result,
112  const CSeq_id_Handle& seq_id);
113  virtual bool LoadSeq_idTaxId(CReaderRequestResult& result,
114  const CSeq_id_Handle& seq_id);
115  virtual bool LoadSequenceHash(CReaderRequestResult& result,
116  const CSeq_id_Handle& seq_id);
117  virtual bool LoadSequenceLength(CReaderRequestResult& result,
118  const CSeq_id_Handle& seq_id);
119  virtual bool LoadSequenceType(CReaderRequestResult& result,
120  const CSeq_id_Handle& seq_id);
121 
122  // bulk requests
123  typedef vector<CSeq_id_Handle> TIds;
124  typedef vector<bool> TLoaded;
125  typedef vector<TIds> TBulkIds;
126  typedef vector<TGi> TGis;
127  typedef vector<string> TLabels;
128  typedef vector<TTaxId> TTaxIds;
129  typedef vector<int> TStates;
130  typedef vector<int> THashes;
131  typedef vector<bool> TKnown;
132  typedef vector<TSeqPos> TLengths;
133  typedef vector<CSeq_inst::EMol> TTypes;
134  typedef vector<CBlob_id> TPlainBlobIds;
135  typedef vector<pair<TBlobId, TChunkIds>> TBlobChunkIds;
136  virtual bool LoadBulkIds(CReaderRequestResult& result,
137  const TIds& ids, TLoaded& loaded, TBulkIds& ret);
138  virtual bool LoadAccVers(CReaderRequestResult& result,
139  const TIds& ids, TLoaded& loaded, TIds& ret);
140  virtual bool LoadGis(CReaderRequestResult& result,
141  const TIds& ids, TLoaded& loaded, TGis& ret);
142  virtual bool LoadLabels(CReaderRequestResult& result,
143  const TIds& ids, TLoaded& loaded, TLabels& ret);
144  virtual bool LoadTaxIds(CReaderRequestResult& result,
145  const TIds& ids, TLoaded& loaded, TTaxIds& ret);
146  virtual bool LoadHashes(CReaderRequestResult& result,
147  const TIds& ids, TLoaded& loaded,
148  THashes& ret, TKnown& known);
149  virtual bool LoadLengths(CReaderRequestResult& result,
150  const TIds& ids, TLoaded& loaded, TLengths& ret);
151  virtual bool LoadTypes(CReaderRequestResult& result,
152  const TIds& ids, TLoaded& loaded, TTypes& ret);
153  virtual bool LoadStates(CReaderRequestResult& result,
154  const TIds& ids, TLoaded& loaded, TStates& ret);
155 
157  const TBlobId& blob_id) = 0;
159  const TBlobId& blob_id) = 0;
160 
161  virtual bool LoadBlobs(CReaderRequestResult& result,
162  const CSeq_id_Handle& seq_id,
164  const SAnnotSelector* sel);
165  virtual bool LoadBlobs(CReaderRequestResult& result,
166  const CLoadLockBlobIds& lock,
168  const SAnnotSelector* sel);
170  const CBlob_id& blob_id) = 0;
171  virtual bool LoadBlob(CReaderRequestResult& result,
172  const CBlob_Info& blob_info);
173  virtual bool LoadChunk(CReaderRequestResult& result,
174  const TBlobId& blob_id, TChunkId chunk_id);
175  virtual bool LoadChunks(CReaderRequestResult& result,
176  const TBlobId& blob_id,
177  const TChunkIds& chunk_ids);
178  virtual bool LoadChunks(CReaderRequestResult& result,
179  const TBlobChunkIds& chunk_ids);
180  virtual bool LoadBlobSet(CReaderRequestResult& result,
181  const TSeqIds& seq_ids);
182  virtual bool LoadBlobs(CReaderRequestResult& result,
183  const TBlobIds& blob_infos);
184 
185  void SetAndSaveSeq_idSeq_ids(CReaderRequestResult& result,
186  const CSeq_id_Handle& seq_id,
187  const CFixedSeq_ids& seq_ids) const;
188  void SetAndSaveNoSeq_idSeq_ids(CReaderRequestResult& result,
189  const CSeq_id_Handle& seq_id,
190  TState state) const;
191 
196 
197  void SetAndSaveSeq_idAccVer(CReaderRequestResult& result,
198  const CSeq_id_Handle& seq_id,
199  const TSequenceAcc& acc_id) const;
200 
201  void SetAndSaveSeq_idGi(CReaderRequestResult& result,
202  const CSeq_id_Handle& seq_id,
203  const TSequenceGi& gi) const;
204 
205  // copy info
206  void SetAndSaveSeq_idAccFromSeqIds(CReaderRequestResult& result,
207  const CSeq_id_Handle& seq_id,
208  const CLoadLockSeqIds& seq_ids) const;
209  void SetAndSaveSeq_idGiFromSeqIds(CReaderRequestResult& result,
210  const CSeq_id_Handle& seq_id,
211  const CLoadLockSeqIds& seq_ids) const;
212  void SetAndSaveSeq_idSeq_ids(CReaderRequestResult& result,
213  const CSeq_id_Handle& seq_id,
214  const CLoadLockSeqIds& seq_ids) const;
215  void SetAndSaveNoSeq_idSeq_ids(CReaderRequestResult& result,
216  const CSeq_id_Handle& seq_id,
217  const CLoadLockGi& gi_lock) const;
218  void SetAndSaveSeq_idBlob_ids(CReaderRequestResult& result,
219  const CSeq_id_Handle& seq_id,
220  const SAnnotSelector* sel,
221  CLoadLockBlobIds& lock,
222  const CLoadLockBlobIds& blob_ids) const;
223  void SetAndSaveNoSeq_idBlob_ids(CReaderRequestResult& result,
224  const CSeq_id_Handle& seq_id,
225  const SAnnotSelector* sel,
226  const CLoadLockGi& gi_lock) const;
227 
228  void SetAndSaveSeq_idTaxId(CReaderRequestResult& result,
229  const CSeq_id_Handle& seq_id,
230  TTaxId taxid) const;
231  void SetAndSaveSequenceHash(CReaderRequestResult& result,
232  const CSeq_id_Handle& seq_id,
233  const TSequenceHash& hash) const;
234  void SetAndSaveSequenceLength(CReaderRequestResult& result,
235  const CSeq_id_Handle& seq_id,
236  TSeqPos length) const;
237  void SetAndSaveSequenceType(CReaderRequestResult& result,
238  const CSeq_id_Handle& seq_id,
239  const TSequenceType& type) const;
240  void SetAndSaveSeq_idBlob_ids(CReaderRequestResult& result,
241  const CSeq_id_Handle& seq_id,
242  const SAnnotSelector* sel,
243  const CFixedBlob_ids& blob_ids) const;
244  void SetAndSaveNoSeq_idBlob_ids(CReaderRequestResult& result,
245  const CSeq_id_Handle& seq_id,
246  const SAnnotSelector* sel,
247  TBlobState state) const;
248  void SetAndSaveBlobState(CReaderRequestResult& result,
249  const TBlobId& blob_id,
250  TBlobState blob_state) const;
251  void SetAndSaveBlobVersion(CReaderRequestResult& result,
252  const TBlobId& blob_id,
253  TBlobVersion version) const;
254  void SetAndSaveNoBlob(CReaderRequestResult& result,
255  const TBlobId& blob_id,
256  TChunkId chunk_id,
257  TBlobState blob_state);
258 
259  void SetAndSaveSeq_idLabelFromSeqIds(CReaderRequestResult& result,
260  const CSeq_id_Handle& seq_id,
261  const CLoadLockSeqIds& seq_ids) const;
262  void SetAndSaveSeq_idLabel(CReaderRequestResult& result,
263  const CSeq_id_Handle& seq_id,
264  const string& label) const;
265 
266  void SetAndSaveSeq_idBlob_ids(CReaderRequestResult& result,
267  const CSeq_id_Handle& seq_id,
268  const SAnnotSelector* sel,
269  CLoadLockBlobIds& lock,
270  const CFixedBlob_ids& blob_ids) const;
271  void SetAndSaveNoSeq_idBlob_ids(CReaderRequestResult& result,
272  const CSeq_id_Handle& seq_id,
273  const SAnnotSelector* sel,
274  CLoadLockBlobIds& lock,
275  TBlobState state) const;
276 
277  int SetMaximumConnections(int max);
278  void SetMaximumConnections(int max, int default_max);
279  int GetMaximumConnections(void) const;
280  virtual int GetMaximumConnectionsLimit(void) const;
281 
282  void SetPreopenConnection(bool preopen = true);
283  bool GetPreopenConnection(void) const;
284  void OpenInitialConnection(bool force);
285 
286  // returns the time in seconds when already retrived data
287  // could become obsolete by fresher version
288  // -1 - never
289  virtual int GetConst(const string& const_name) const;
290 
291  void SetMaximumRetryCount(int retry_count);
292  virtual int GetRetryCount(void) const;
293  virtual bool MayBeSkippedOnErrors(void) const;
294 
295  // CReadDispatcher can set m_Dispatcher
296  friend class CReadDispatcher;
297 
298  static int ReadInt(CNcbiIstream& stream);
299 
300  virtual void InitializeCache(CReaderCacheManager& cache_manager,
301  const TPluginManagerParamTree* params);
302  virtual void ResetCache(void);
303 
304  virtual void OpenConnection(TConn conn);
305  virtual void WaitBeforeNewConnection(TConn conn);
306  virtual void ConnectSucceeds(TConn conn);
307  virtual void ConnectFailed(TConn conn);
308  virtual void SetNewConnectionDelayMicroSec(unsigned long micro_sec);
309 
310  virtual void SetIncludeHUP(bool include_hup = true,
311  const string& web_cookie = NcbiEmptyString);
312  bool HasHUPIncluded() const
313  {
314  return m_IncludeHUP;
315  }
316 
317  virtual void SetParams(const CReaderParams& params);
318 
319 #if 0
320 /*
321 * On Windows this works with ostrstream,
322  but does not work with ostringstream
323 */
325  {
326  public:
327  CDebugPrinter(TConn conn, const char* name);
328  CDebugPrinter(const char* name);
329  ~CDebugPrinter();
330  };
331 #else
333  {
334  public:
335  CDebugPrinter(TConn conn, const char* name);
336  CDebugPrinter(const char* name);
337  ~CDebugPrinter();
338 
339  operator CNcbiOstrstream&(void) {
340  return m_os;
341  }
342  template<typename T>
343  friend CDebugPrinter& operator<<( CDebugPrinter& pr, const T& obj) {
344  pr.m_os << obj;
345  return pr;
346  }
347  private:
349  };
350 #endif
351 
352 protected:
354 
356 
357  // allocate connection slot with key 'conn'
358  virtual void x_AddConnectionSlot(TConn conn) = 0;
359  // disconnect and remove connection slot with key 'conn'
360  virtual void x_RemoveConnectionSlot(TConn conn) = 0;
361  // disconnect at connection slot with key 'conn'
362  virtual void x_DisconnectAtSlot(TConn conn, bool failed);
363  // force connection at connection slot with key 'conn'
364  virtual void x_ConnectAtSlot(TConn conn) = 0;
365  // report failed or stale connection
366  void x_ReportDisconnect(const char* reader, const char* server,
367  TConn conn, bool failed) const;
368  void x_SetIncludeHUP(bool include_hup)
369  {
370  m_IncludeHUP = include_hup;
371  }
372 
373 private:
375 
376  TConn x_AllocConnection(bool oldest = false);
377  void x_ReleaseConnection(TConn conn, double retry_delay = 0);
378  void x_ReleaseClosedConnection(TConn conn);
379  void x_AbortConnection(TConn conn, bool failed);
380 
381  void x_AddConnection(void);
382  void x_RemoveConnection(void);
383 
384  // parameters
388 
389  // current state
391  struct SConnSlot {
394  double m_RetryDelay;
395  };
396  typedef list<SConnSlot> TFreeConnections;
401  atomic<int> m_ConnectFailCount;
406 
407 private:
408  // to prevent copying
409  CReader(const CReader&);
410  void operator=(const CReader&);
411 };
412 
413 
414 ////////////////////////////////////////////////////////////////////////////
415 // CConn
416 ////////////////////////////////////////////////////////////////////////////
417 
418 
420 {
421 public:
423 
426 
427  void Release(void);
428  void Restart(void);
429 
430  bool IsAllocated(void) const {
431  return m_Result != 0;
432  }
433 
434  operator TConn(void) const
435  {
436  _ASSERT(IsAllocated());
437  return m_Conn;
438  }
439 
440 private:
444  bool m_Restart;
445 
446 private:
449 };
450 
451 
454 
455 #endif // READER__HPP_INCLUDED
ncbi::TMaskedQueryRegions mask
CMutex –.
Definition: ncbimtx.hpp:749
CObject –.
Definition: ncbiobj.hpp:180
void operator=(CReaderAllocatedConnection &)
CReaderAllocatedConnection(const CReaderAllocatedConnection &)
CReader::TConn TConn
Definition: reader.hpp:422
CReaderRequestResult * m_Result
Definition: reader.hpp:441
bool IsAllocated(void) const
Definition: reader.hpp:430
bool GetEnableSNP(void) const
Definition: reader.hpp:57
void SetEnableCDD(CNullable< bool > enable)
Definition: reader.hpp:68
bool GetEnableCDD(void) const
Definition: reader.hpp:67
CNullable< bool > m_EnableWGS
Definition: reader.hpp:73
CNullable< bool > m_EnableCDD
Definition: reader.hpp:74
bool GetEnableWGS(void) const
Definition: reader.hpp:62
bool IsSetEnableCDD(void) const
Definition: reader.hpp:66
void SetEnableWGS(CNullable< bool > enable)
Definition: reader.hpp:63
CNullable< bool > m_EnableSNP
Definition: reader.hpp:72
void SetEnableSNP(CNullable< bool > enable)
Definition: reader.hpp:58
bool IsSetEnableSNP(void) const
Definition: reader.hpp:56
void SetEnableWGS(bool enable)
Definition: reader.hpp:64
void SetEnableSNP(bool enable)
Definition: reader.hpp:59
bool IsSetEnableWGS(void) const
Definition: reader.hpp:61
void SetEnableCDD(bool enable)
Definition: reader.hpp:69
CNcbiOstrstream m_os
Definition: reader.hpp:348
friend CDebugPrinter & operator<<(CDebugPrinter &pr, const T &obj)
Definition: reader.hpp:343
virtual bool LoadSeq_idSeq_ids(CReaderRequestResult &result, const CSeq_id_Handle &seq_id)=0
CDataLoader::STypeFound TSequenceType
Definition: reader.hpp:194
CSemaphore m_NumFreeConnections
Definition: reader.hpp:399
vector< CSeq_id_Handle > TSeqIds
Definition: reader.hpp:96
CReadDispatcher * m_Dispatcher
Definition: reader.hpp:353
vector< TIds > TBulkIds
Definition: reader.hpp:125
CReaderAllocatedConnection CConn
Definition: reader.hpp:355
virtual void x_ConnectAtSlot(TConn conn)=0
int m_WaitTimeErrors
Definition: reader.hpp:404
int TBlobSplitVersion
Definition: reader.hpp:92
int TBlobVersion
Definition: reader.hpp:91
vector< CBlob_Info > TBlobIds
Definition: reader.hpp:97
TFreeConnections m_FreeConnections
Definition: reader.hpp:397
vector< int > TStates
Definition: reader.hpp:129
CBlob_id TBlobId
Definition: reader.hpp:88
void operator=(const CReader &)
CIncreasingTime m_WaitTime
Definition: reader.hpp:405
void x_SetIncludeHUP(bool include_hup)
Definition: reader.hpp:368
vector< string > TLabels
Definition: reader.hpp:127
vector< TGi > TGis
Definition: reader.hpp:126
virtual bool LoadBlob(CReaderRequestResult &result, const CBlob_id &blob_id)=0
unsigned TConn
Definition: reader.hpp:87
atomic< int > m_ConnectFailCount
Definition: reader.hpp:401
TConn m_NextNewConnection
Definition: reader.hpp:390
vector< TSeqPos > TLengths
Definition: reader.hpp:132
vector< pair< TBlobId, TChunkIds > > TBlobChunkIds
Definition: reader.hpp:135
bool m_IncludeHUP
Definition: reader.hpp:387
TConn m_MaxConnections
Definition: reader.hpp:385
vector< CSeq_inst::EMol > TTypes
Definition: reader.hpp:133
CTime m_LastTimeFailed
Definition: reader.hpp:402
vector< int > THashes
Definition: reader.hpp:130
vector< CSeq_id_Handle > TIds
Definition: reader.hpp:123
CDataLoader::SAccVerFound TSequenceAcc
Definition: reader.hpp:192
CReader(const CReader &)
CDataLoader::SHashFound TSequenceHash
Definition: reader.hpp:195
int TChunkId
Definition: reader.hpp:93
virtual bool LoadBlobState(CReaderRequestResult &result, const TBlobId &blob_id)=0
virtual void x_AddConnectionSlot(TConn conn)=0
int m_MaximumRetryCount
Definition: reader.hpp:400
bool m_PreopenConnection
Definition: reader.hpp:386
list< SConnSlot > TFreeConnections
Definition: reader.hpp:396
CTime m_NextConnectTime
Definition: reader.hpp:403
bool HasHUPIncluded() const
Definition: reader.hpp:312
vector< TChunkId > TChunkIds
Definition: reader.hpp:95
vector< bool > TLoaded
Definition: reader.hpp:124
virtual bool LoadBlobVersion(CReaderRequestResult &result, const TBlobId &blob_id)=0
CMutex m_ConnectionsMutex
Definition: reader.hpp:398
int TContentsMask
Definition: reader.hpp:94
vector< CBlob_id > TPlainBlobIds
Definition: reader.hpp:134
int TState
Definition: reader.hpp:89
int TBlobState
Definition: reader.hpp:90
vector< TTaxId > TTaxIds
Definition: reader.hpp:128
virtual void x_RemoveConnectionSlot(TConn conn)=0
CDataLoader::SGiFound TSequenceGi
Definition: reader.hpp:193
vector< bool > TKnown
Definition: reader.hpp:131
CSemaphore –.
Definition: ncbimtx.hpp:1375
CTime –.
Definition: ncbitime.hpp:296
definition of a Culling tree
Definition: ncbi_tree.hpp:100
#define T(s)
Definition: common.h:230
static CS_CONNECTION * conn
Definition: ct_dynamic.c:25
int failed
Definition: dbmorecmds.c:10
bool IsNull(void) const
Check if the object is unassigned.
Definition: ncbimisc.hpp:686
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TValue & GetValue(void) const
Get a const reference to the current value.
Definition: ncbimisc.hpp:703
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NcbiEmptyString
Definition: ncbistr.hpp:122
virtual void SetParams()
Called at the beginning of Run, before creating thread pool.
#define NCBI_XREADER_EXPORT
Definition: ncbi_export.h:1371
static const char label[]
Interfaces for a local cache of versioned binary large objects (BLOBS).
static int version
Definition: mdb_load.c:29
Multi-threading – mutexes; rw-locks; semaphore.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
Defines: CTimeFormat - storage class for time format.
T max(T x_, T y_)
CReader::CDebugPrinter CDebugPrinter
Better replacement of GetAccVer(), this method should be defined in data loaders, GetAccVer() is left...
Better replacement of GetGi(), this method should be defined in data loaders, GetGi() is left for com...
Better replacement of GetSequenceHash(), this method should be defined in data loaders,...
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
double m_RetryDelay
Definition: reader.hpp:394
SAnnotSelector –.
Definition: _hash_fun.h:40
Definition: type.c:6
#define _ASSERT
else result
Definition: token2.c:20
Modified on Fri Apr 12 17:18:21 2024 by modify_doxy.py rev. 669887