NCBI C++ ToolKit
snp_client.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SNP_CLIENT__HPP
2 #define SNP_CLIENT__HPP
3 
4 /* $Id: snp_client.hpp 100840 2023-09-18 17:18:09Z vasilche $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Aleksey Grichenko, Eugene Vasilchenko
30  *
31  * File Description: client for reading SNP data
32  *
33  */
34 
35 #include "psgs_request.hpp"
36 #include "timing.hpp"
41 
42 
44 
46 class CID2_Blob_Id;
48 class CID2S_Split_Info;
49 class CID2S_Chunk;
50 class CSeq_id_Handle;
51 class CSeq_entry;
53 
56 
57 
58 class CSNPFileInfo;
59 class CSNPClient;
60 
61 
63 {
64  size_t m_GCSize = 10;
65  size_t m_MissingGCSize = 10000;
66  unsigned m_FileReopenTime = 3600;
67  unsigned m_FileRecheckTime = 600;
68  unsigned m_FileOpenRetry = 3;
69  bool m_Split = true;
70  string m_AnnotName;
71  bool m_AddPTIS = true;
72  bool m_AllowNonRefSeq = false;
73  objects::CSeq_id::ESNPScaleLimit m_SNPScaleLimit = objects::CSeq_id::eSNPScaleLimit_Default;
74 };
75 
76 
77 struct SSNPData
78 {
79  typedef vector<CRef<objects::CID2S_Seq_annot_Info>> TAnnotInfo;
80 
81  string m_BlobId;
82  string m_Name;
84  string m_Error;
85 
89  int m_SplitVersion = 0;
90  psg_time_point_t m_Start = psg_clock_t::now();
91 };
92 
93 
94 class CSNPBlobId
95 {
96 public:
97  explicit CSNPBlobId(const CTempString& str);
98  CSNPBlobId(const CSNPFileInfo& file, const objects::CSeq_id_Handle& seq_id, size_t filter_index);
99  CSNPBlobId(const objects::CSNPDbSeqIterator& seq, size_t filter_index);
100  CSNPBlobId(const CSNPFileInfo& file, size_t seq_index, size_t filter_index);
101  ~CSNPBlobId(void);
102 
103  string ToString(void) const;
106 
107  bool IsSatId(void) const;
108 
109  Int4 GetSat(void) const;
110  Int4 GetSubSat(void) const;
111  Int4 GetSatKey(void) const;
112 
113  bool IsValidSat(void) const;
114  bool IsValidSubSat(void) const;
115  bool IsValidSatKey(void) const;
116 
117  static bool IsValidNAIndex(size_t index);
118  static bool IsValidNAVersion(size_t version);
119  static bool IsValidNA(pair<size_t, size_t> na)
120  {
121  return IsValidNAIndex(na.first) && IsValidNAVersion(na.second);
122  }
123  static bool IsValidSeqIndex(size_t seq_index);
124  static bool IsValidFilterIndex(size_t filter_index);
125 
126  static pair<size_t, size_t> ParseNA(CTempString acc);
127  static bool IsValidNA(CTempString acc)
128  {
129  return ParseNA(acc).first != 0;
130  }
131 
132  string GetSatNA(void) const;
133  int GetSatBase(void) const;
134  int GetSubSatBase(void) const;
135 
136  size_t GetNAIndex(void) const
137  {
138  return m_NAIndex;
139  }
140  size_t GetNAVersion(void) const
141  {
142  return m_NAVersion;
143  }
144  size_t GetSeqIndex(void) const
145  {
146  return m_SeqIndex;
147  }
148  size_t GetFilterIndex(void) const
149  {
150  return m_FilterIndex;
151  }
152 
154  void SetNAIndex(size_t na_index);
155  void SetNAVersion(size_t na_version);
156  void SetSeqAndFilterIndex(size_t seq_index,
157  size_t filter_index);
158 
159  objects::CSeq_id_Handle GetSeqId(void) const;
160  string GetAccession(void) const;
161 
162  bool IsPrimaryTrack() const
163  {
164  return m_IsPrimaryTrack;
165  }
166  bool IsPrimaryTrackGraph() const
167  {
168  return m_IsPrimaryTrackGraph;
169  }
170  bool IsPrimaryTrackFeat() const
171  {
172  return IsPrimaryTrack() && !IsPrimaryTrackGraph();
173  }
174 
177 
178 protected:
179  // ID2 blob id
182  bool m_IsPrimaryTrack;
186 
187  // SNP file name or VDB accession
188  string m_Accession;
189  // Ref Seq-id for annot blobs
190  objects::CSeq_id_Handle m_SeqId;
191 };
192 
193 
197 };
198 
199 
200 class CSNPSeqInfo : public CObject
201 {
202 public:
203  CSNPSeqInfo(CSNPFileInfo* file, const objects::CSNPDbSeqIterator& it);
204 
205  objects::CSNPDbSeqIterator GetSeqIterator(void) const;
206 
208  int GetAnnotChunkId(TSeqPos ref_pos) const;
209 
210  string GetAnnotName(void) const;
211 
212  void LoadRanges(void);
213 
214  CSNPBlobId GetBlobId(void) const;
215  void SetFilterIndex(size_t filter_index);
216  void SetFromBlobId(const CSNPBlobId& blob_id);
217 
218  bool IncludeFeat(void) const
219  {
221  }
222  bool IncludeGraph(void) const
223  {
225  }
226 
227  void LoadBlob(SSNPData& data, bool split_enabled);
228  void LoadChunk(SSNPData& data, int chunk_id);
229 
230 protected:
231  friend class CSNPClient;
232 
234  size_t m_SeqIndex;
235  size_t m_FilterIndex;
236  objects::CSeq_id_Handle m_SeqId;
237  bool m_IsPrimaryTrack;
239 };
240 
241 
242 class CSNPFileInfo : public CObject
243 {
244 public:
245  CSNPFileInfo(CSNPClient& client, const string& file_name);
246 
247  bool IsValidNA(void) const
248  {
249  return m_IsValidNA;
250  }
251 
252  const string& GetFileName(void) const
253  {
254  return m_FileName;
255  }
256  const string& GetAccession(void) const
257  {
258  return m_Accession;
259  }
260  const string& GetBaseAnnotName(void) const
261  {
262  return m_AnnotName;
263  }
264  string GetSNPAnnotName(size_t filter_index) const;
265 
266  CSNPBlobId GetAnnotBlobId(const objects::CSeq_id_Handle& id) const;
267 
268  CRef<CSNPSeqInfo> GetSeqInfo(const objects::CSeq_id_Handle& seq_id);
269  CRef<CSNPSeqInfo> GetSeqInfo(size_t seq_index);
271 
272  objects::CSNPDb& GetDb(void)
273  {
274  return m_SNPDb;
275  }
276  operator objects::CSNPDb& (void)
277  {
278  return GetDb();
279  }
280 
281  void AddSeq(const objects::CSeq_id_Handle& id);
282 
283 protected:
284  friend class CSNPClient;
285 
286  typedef map<objects::CSeq_id_Handle, CRef<CSNPSeqInfo> > TSeqById;
287  typedef map<size_t, CRef<CSNPSeqInfo> > TSeqByIdx;
288 
289  void x_Initialize(CSNPClient& client, const string& file_name);
290 
291  bool m_IsValidNA;
292  string m_FileName; // external VDB file access string
293  string m_Accession; // OM named annot accession (without filter index)
294  string m_AnnotName; // OM annot name (without filter index)
295  unsigned m_RemainingOpenRetries; // number of tries to open a VDB file
296  objects::CSNPDb m_SNPDb;
299 };
300 
301 
303 {
304 public:
306  ~CSNPClient(void);
307 
308  vector<string> WhatNACanProcess(SPSGS_AnnotRequest& annot_request,
309  TProcessorPriority priority = 0) const;
310  bool CanProcessRequest(CPSGS_Request& request, TProcessorPriority priority) const;
311 
312  vector<SSNPData> GetAnnotInfo(const objects::CSeq_id_Handle& id,
313  const string& name, objects::CSeq_id::ESNPScaleLimit scale_limit);
314  SSNPData GetBlobByBlobId(const string& blob_id);
315  SSNPData GetChunk(const string& id2info, int chunk_id);
316 
317  CRef<CSNPFileInfo> GetFileInfo(const string& acc);
318  CRef<CSNPSeqInfo> GetSeqInfo(const CSNPBlobId& blob_id);
319 
320  bool HaveValidSeq_id(const SPSGS_AnnotRequest& request) const;
321 
322  bool IsValidSeqId(const objects::CSeq_id_Handle& idh) const;
323  bool IsValidSeqId(const string& id, int id_type, int version = 0) const;
324 
325 private:
326  friend class CSNPFileInfo;
327 
329 
330  CRef<objects::CID2S_Seq_annot_Info> x_GetFeatInfo(const string& name, const objects::CSeq_id_Handle& id);
331  CRef<objects::CID2S_Seq_annot_Info> x_GetGraphInfo(const string& name, const objects::CSeq_id_Handle& id);
332 
335  EPSGOperationStatus status);
336 
338  shared_ptr<objects::CVDBMgr> m_Mgr;
341 };
342 
343 
347 
348 #endif // CDD_PROCESSOR__HPP
CID2S_Chunk –.
Definition: ID2S_Chunk.hpp:66
CID2S_Seq_annot_Info –.
CID2S_Split_Info –.
CID2_Blob_Id –.
Definition: ID2_Blob_Id.hpp:66
Int4 GetSubSat(void) const
void SetPrimaryTrackFeat()
bool IsValidSatKey(void) const
static bool IsValidSeqIndex(size_t seq_index)
bool m_IsPrimaryTrackGraph
int GetSubSatBase(void) const
static bool IsValidFilterIndex(size_t filter_index)
CSNPBlobId(const CSNPFileInfo &file, size_t seq_index, size_t filter_index)
bool IsValidSat(void) const
static pair< size_t, size_t > ParseNA(CTempString acc)
size_t GetFilterIndex(void) const
Definition: snp_client.hpp:148
string ToString(void) const
Get string representation of blob id.
bool IsPrimaryTrackFeat() const
Definition: snp_client.hpp:170
CSNPBlobId(const CTempString &str)
static bool IsValidNAIndex(size_t index)
Definition: snp_client.cpp:221
static bool IsValidNAVersion(size_t version)
Definition: snp_client.cpp:227
objects::CSeq_id_Handle GetSeqId(void) const
void FromString(CTempString str)
Int4 GetSat(void) const
void SetSatNA(CTempString acc)
void SetSeqAndFilterIndex(size_t seq_index, size_t filter_index)
size_t GetNAVersion(void) const
Definition: snp_client.hpp:140
static pair< size_t, size_t > ParseNA(CTempString acc)
Definition: snp_client.cpp:311
void SetNAIndex(size_t na_index)
static bool IsValidNA(pair< size_t, size_t > na)
Definition: snp_client.hpp:119
string m_Accession
Int4 GetSatKey(void) const
void SetNAVersion(size_t na_version)
CSNPBlobId(const CSNPFileInfo &file, const objects::CSeq_id_Handle &seq_id, size_t filter_index)
size_t GetNAIndex(void) const
Definition: snp_client.hpp:136
bool FromSatString(CTempString str)
bool IsPrimaryTrackGraph() const
Definition: snp_client.hpp:166
objects::CSeq_id_Handle m_SeqId
Definition: snp_client.hpp:190
int GetSatBase(void) const
bool IsValidSubSat(void) const
static bool IsValidNA(CTempString acc)
Definition: snp_client.hpp:127
~CSNPBlobId(void)
CSNPBlobId(const objects::CSNPDbSeqIterator &seq, size_t filter_index)
bool IsPrimaryTrack() const
Definition: snp_client.hpp:162
static bool IsValidNAIndex(size_t index)
static bool IsValidNAVersion(size_t version)
bool IsSatId(void) const
string GetAccession(void) const
string GetSatNA(void) const
size_t GetSeqIndex(void) const
Definition: snp_client.hpp:144
void SetPrimaryTrackGraph()
SSNPProcessor_Config m_Config
Definition: snp_client.hpp:337
bool CanProcessRequest(CPSGS_Request &request, TProcessorPriority priority) const
Definition: snp_client.cpp:875
SSNPData GetChunk(const string &id2info, int chunk_id)
void x_RegisterTiming(psg_time_point_t start, EPSGOperation operation, EPSGOperationStatus status)
~CSNPClient(void)
Definition: snp_client.cpp:763
bool IsValidSeqId(const objects::CSeq_id_Handle &idh) const
vector< SSNPData > GetAnnotInfo(const objects::CSeq_id_Handle &id, const string &name, objects::CSeq_id::ESNPScaleLimit scale_limit)
Definition: snp_client.cpp:905
CRef< objects::CSnpPtisClient > m_PTISClient
Definition: snp_client.hpp:339
CRef< objects::CID2S_Seq_annot_Info > x_GetGraphInfo(const string &name, const objects::CSeq_id_Handle &id)
CSNPClient(const SSNPProcessor_Config &config)
Definition: snp_client.cpp:752
CRef< CSNPSeqInfo > GetSeqInfo(const CSNPBlobId &blob_id)
Definition: snp_client.cpp:842
CRef< CSNPFileInfo > GetFileInfo(const string &acc)
Definition: snp_client.cpp:768
vector< string > WhatNACanProcess(SPSGS_AnnotRequest &annot_request, TProcessorPriority priority=0) const
Definition: snp_client.cpp:848
TSNPDbCache m_SNPDbCache
Definition: snp_client.hpp:340
shared_ptr< objects::CVDBMgr > m_Mgr
Definition: snp_client.hpp:338
bool HaveValidSeq_id(const SPSGS_AnnotRequest &request) const
Definition: snp_client.cpp:683
CVDBCacheWithExpiration TSNPDbCache
Definition: snp_client.hpp:328
SSNPData GetBlobByBlobId(const string &blob_id)
CRef< objects::CID2S_Seq_annot_Info > x_GetFeatInfo(const string &name, const objects::CSeq_id_Handle &id)
CRef< CSNPSeqInfo > GetSeqInfo(size_t seq_index)
CSNPDb & GetDb(void)
unsigned m_RemainingOpenRetries
const string & GetAccession(void) const
Definition: snp_client.hpp:256
CRef< CSNPSeqInfo > GetSeqInfo(const CSNPBlobId &blob_id)
map< objects::CSeq_id_Handle, CRef< CSNPSeqInfo > > TSeqById
Definition: snp_client.hpp:286
objects::CSNPDb & GetDb(void)
Definition: snp_client.hpp:272
map< size_t, CRef< CSNPSeqInfo > > TSeqByIdx
Definition: snp_client.hpp:287
const string & GetFileName(void) const
Definition: snp_client.hpp:252
TSeqByIdx m_SeqByIdx
const string & GetBaseAnnotName(void) const
Definition: snp_client.hpp:260
CSNPBlobId GetAnnotBlobId(const objects::CSeq_id_Handle &id) const
void x_Initialize(CSNPDataLoader_Impl &impl, const string &file_name)
CRef< CSNPSeqInfo > GetSeqInfo(const objects::CSeq_id_Handle &seq_id)
bool IsValidNA(void) const
Definition: snp_client.hpp:247
CSNPFileInfo(CSNPDataLoader_Impl &impl, const string &file_name)
string GetSNPAnnotName(size_t filter_index) const
TSeqById m_SeqById
objects::CSNPDb m_SNPDb
Definition: snp_client.hpp:296
void AddSeq(const objects::CSeq_id_Handle &id)
CSNPBlobId GetAnnotBlobId(void) const
bool IncludeFeat(void) const
Definition: snp_client.hpp:218
bool m_IsPrimaryTrackGraph
size_t m_FilterIndex
void SetFromBlobId(const CSNPBlobId &blob_id)
CSNPBlobId GetBlobId(void) const
CSNPFileInfo * m_File
bool IncludeGraph(void) const
Definition: snp_client.hpp:222
objects::CSeq_id_Handle m_SeqId
Definition: snp_client.hpp:236
void SetFilterIndex(size_t filter_index)
int GetAnnotChunkId(TSeqPos ref_pos) const
void LoadBlob(SSNPData &data, bool split_enabled)
Definition: snp_client.cpp:568
string GetAnnotName(void) const
CSNPSeqInfo(CSNPFileInfo *file, const objects::CSNPDbSeqIterator &it)
void LoadRanges(void)
void LoadChunk(SSNPData &data, int chunk_id)
Definition: snp_client.cpp:595
objects::CSNPDbSeqIterator GetSeqIterator(void) const
Definition: Seq_entry.hpp:56
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Definition: map.hpp:338
const char * file_name[]
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
operation
Bit operations.
Definition: bmconst.h:191
FILE * file
const string version
version string
Definition: variables.hpp:66
int TProcessorPriority
psg_clock_t::time_point psg_time_point_t
static CNamedPipeClient * client
BEGIN_NAMESPACE(objects)
@ eSNPAnnotChunk_graph
Definition: snp_client.hpp:195
@ eSNPAnnotChunk_snp
Definition: snp_client.hpp:196
END_NCBI_NAMESPACE
Definition: snp_client.hpp:346
END_NAMESPACE(objects)
BEGIN_NCBI_NAMESPACE
Definition: snp_client.hpp:43
ESNPAnnotChunkIdType
psg_time_point_t m_Start
Definition: snp_client.hpp:90
string m_Error
Definition: snp_client.hpp:84
TAnnotInfo m_AnnotInfo
Definition: snp_client.hpp:83
int m_SplitVersion
Definition: snp_client.hpp:89
CRef< objects::CID2S_Split_Info > m_SplitInfo
Definition: snp_client.hpp:87
vector< CRef< objects::CID2S_Seq_annot_Info > > TAnnotInfo
Definition: snp_client.hpp:79
string m_BlobId
Definition: snp_client.hpp:81
CRef< objects::CID2S_Chunk > m_Chunk
Definition: snp_client.hpp:88
CRef< objects::CSeq_entry > m_TSE
Definition: snp_client.hpp:86
string m_Name
Definition: snp_client.hpp:82
unsigned m_FileOpenRetry
Definition: snp_client.hpp:68
unsigned m_FileRecheckTime
Definition: snp_client.hpp:67
unsigned m_FileReopenTime
Definition: snp_client.hpp:66
objects::CSeq_id::ESNPScaleLimit m_SNPScaleLimit
Definition: snp_client.hpp:73
EPSGOperationStatus
Definition: timing.hpp:60
EPSGOperation
Definition: timing.hpp:65
Modified on Fri Sep 20 14:58:11 2024 by modify_doxy.py rev. 669887