NCBI C++ ToolKit
snploader_impl.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_DATA_LOADERS_SNP___SNPLOADER_IMPL__HPP
2 #define OBJTOOLS_DATA_LOADERS_SNP___SNPLOADER_IMPL__HPP
3 
4 /* $Id: snploader_impl.hpp 100839 2023-09-18 17:16:25Z vasilche $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Eugene Vasilchenko
30  *
31  * File Description: SNP file data loader
32  *
33  * ===========================================================================
34  */
35 
36 
37 #include <corelib/ncbistd.hpp>
38 #include <corelib/ncbimtx.hpp>
43 
46 
47 class CDataLoader;
49 class CSNPSeqChunkInfo;
50 class CSNPSeqInfo;
51 class CSNPFileInfo;
52 class CID2SNPContext;
53 
54 class CSNPBlobId : public CBlobId
55 {
56 public:
57  explicit
58  CSNPBlobId(const CTempString& str);
60  const CSeq_id_Handle& seq_id,
61  size_t filter_index);
62  CSNPBlobId(const CSNPDbSeqIterator& seq,
63  size_t filter_index);
65  size_t seq_index,
66  size_t filter_index);
67  ~CSNPBlobId(void);
68 
69  // string blob id representation:
70  // eBlobType_annot_plain_id
71  string ToString(void) const;
74 
75  bool operator<(const CBlobId& id) const;
76  bool operator==(const CBlobId& id) const;
77 
78  bool IsSatId(void) const;
79 
80  Int4 GetSat(void) const;
81  Int4 GetSubSat(void) const;
82  Int4 GetSatKey(void) const;
83 
84  bool IsValidSat(void) const;
85  bool IsValidSubSat(void) const;
86  bool IsValidSatKey(void) const;
87 
88  static bool IsValidNAIndex(size_t index);
89  static bool IsValidNAVersion(size_t version);
90  static bool IsValidNA(pair<size_t, size_t> na) {
91  return IsValidNAIndex(na.first) && IsValidNAVersion(na.second);
92  }
93  static bool IsValidSeqIndex(size_t seq_index);
94  static bool IsValidFilterIndex(size_t filter_index);
95 
96  static pair<size_t, size_t> ParseNA(CTempString acc);
97  static bool IsValidNA(CTempString acc) {
98  return ParseNA(acc).first != 0;
99  }
100 
101  string GetSatNA(void) const;
102  int GetSatBase(void) const;
103  int GetSubSatBase(void) const;
104 
105  size_t GetNAIndex(void) const
106  {
107  return m_NAIndex;
108  }
109  size_t GetNAVersion(void) const
110  {
111  return m_NAVersion;
112  }
113  size_t GetSeqIndex(void) const
114  {
115  return m_SeqIndex;
116  }
117  size_t GetFilterIndex(void) const
118  {
119  return m_FilterIndex;
120  }
121 
122  void SetSatNA(CTempString acc);
123  void SetNAIndex(size_t na_index);
124  void SetNAVersion(size_t na_version);
125  void SetSeqAndFilterIndex(size_t seq_index,
126  size_t filter_index);
127 
128  CSeq_id_Handle GetSeqId(void) const;
129  string GetAccession(void) const;
130 
131  bool IsPrimaryTrack() const
132  {
133  return m_IsPrimaryTrack;
134  }
135  bool IsPrimaryTrackGraph() const
136  {
137  return m_IsPrimaryTrackGraph;
138  }
139  bool IsPrimaryTrackFeat() const
140  {
141  return IsPrimaryTrack() && !IsPrimaryTrackGraph();
142  }
143 
144  void SetPrimaryTrackFeat();
145  void SetPrimaryTrackGraph();
146 
147 protected:
148  // ID2 blob id
155 
156  // SNP file name or VDB accession
157  string m_Accession;
158  // Ref Seq-id for annot blobs
160 };
161 
162 
166 };
167 
168 
169 class CSNPSeqInfo : public CObject
170 {
171 public:
173  const CSNPDbSeqIterator& it);
174 
175  CSNPDbSeqIterator GetSeqIterator(void) const;
176 
178  int GetAnnotChunkId(TSeqPos ref_pos) const;
179 
180  string GetAnnotName(void) const;
181 
182  void LoadRanges(void);
183 
184  void LoadAnnotBlob(CTSE_LoadLock& load_lock);
185  void LoadAnnotChunk(CTSE_Chunk_Info& chunk_info);
186 
191 
192  void LoadSeqBlob(CTSE_LoadLock& load_lock);
193  void LoadSeqChunk(CTSE_Chunk_Info& chunk_info);
194 
195  void LoadSeqMainEntry(CTSE_LoadLock& load_lock);
196 
197  CRef<CSNPBlobId> GetBlobId(void) const;
198  void SetFilterIndex(size_t filter_index);
199  void SetFromBlobId(const CSNPBlobId& blob_id);
200 
201  bool IncludeFeat(void) const
202  {
204  }
205  bool IncludeGraph(void) const
206  {
208  }
209 
210 protected:
211  friend class CSNPDataLoader_Impl;
212 
214  size_t m_SeqIndex;
219 };
220 
221 
222 class CSNPFileInfo : public CObject
223 {
224 public:
226  const string& file_name);
227 
228  bool IsValidNA(void) const {
229  return m_IsValidNA;
230  }
231 
232  const string& GetFileName(void) const
233  {
234  return m_FileName;
235  }
236  const string& GetAccession(void) const
237  {
238  return m_Accession;
239  }
240  const string& GetBaseAnnotName(void) const
241  {
242  return m_AnnotName;
243  }
244  string GetSNPAnnotName(size_t filter_index) const;
245 
247 
249  CRef<CSNPSeqInfo> GetSeqInfo(size_t seq_index);
250  CRef<CSNPSeqInfo> GetSeqInfo(const CSNPBlobId& blob_id);
251 
252  CMutex& GetMutex(void) const
253  {
254  return m_SNPMutex;
255  }
256 
258 
259  CSNPDb& GetDb(void)
260  {
261  return m_SNPDb;
262  }
263  operator CSNPDb&(void)
264  {
265  return GetDb();
266  }
267 
268  void AddSeq(const CSeq_id_Handle& id);
269 
272 
273 protected:
274  friend class CSNPDataLoader_Impl;
275 
278 
280  const string& file_name);
281 
284  string m_FileName; // external VDB file access string
285  string m_Accession; // OM named annot accession (without filter index)
286  string m_AnnotName; // OM annot name (without filter index)
291 };
292 
293 
295 {
296 public:
297  explicit CSNPDataLoader_Impl(const CSNPDataLoader::SLoaderParams& params);
298  ~CSNPDataLoader_Impl(void);
299 
301  void AddFixedFile(const string& file_name);
302  void AddFixedFileOnce(const string& file_name);
303 
304  template<class Call>
306  CallWithRetry(Call&& call,
307  const char* name,
308  unsigned retry_count = 0);
309 
310  CRef<CSNPFileInfo> GetFixedFile(const string& acc);
311  CRef<CSNPFileInfo> FindFile(const string& acc);
312  CRef<CSNPFileInfo> GetFileInfo(const string& acc);
313  CRef<CSNPFileInfo> GetFileInfo(const CSNPBlobId& blob_id);
314  CRef<CSNPSeqInfo> GetSeqInfo(const CSNPBlobId& blob_id);
315 
317  const CSeq_id_Handle& idh,
318  CDataLoader::EChoice choice);
320  const CSeq_id_Handle& idh,
321  const SAnnotSelector* sel,
322  CDataLoader::TProcessedNAs* processed_nas);
324  const CSeq_id_Handle& id,
325  const SAnnotSelector* sel,
326  CDataLoader::TProcessedNAs* processed_nas);
327 
329  const CSNPBlobId& blob_id);
331  const CSNPBlobId& blob_id);
332  void LoadBlob(const CSNPBlobId& blob_id,
333  CTSE_LoadLock& load_lock);
334  void GetChunk(const CSNPBlobId& blob_id,
335  CTSE_Chunk_Info& chunk);
336  void GetChunkOnce(const CSNPBlobId& blob_id,
337  CTSE_Chunk_Info& chunk_info);
338 
340 
343 
344 protected:
345  friend class CSNPFileInfo;
346  struct SDirSeqInfo {
350  string m_Label;
351  };
352 
353 private:
354  CRef<CSNPFileInfo> x_GetFileInfo(const string& file);
355 
356  typedef map<string, string> TFixedFiles; // SNP NA accession -> acc_or_path
358 
359  // mutex guarding input into the map
360  mutable CMutex m_Mutex;
364  unsigned m_RetryCount;
365  string m_DirPath;
366  string m_AnnotName;
369  bool m_AddPTIS;
371 };
372 
375 
376 #endif // OBJTOOLS_DATA_LOADERS_SNP___SNPLOADER_IMPL__HPP
CMutex –.
Definition: ncbimtx.hpp:749
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
Int4 GetSubSat(void) const
Definition: snp_client.cpp:290
void SetPrimaryTrackFeat()
Definition: snp_client.cpp:387
bool IsValidSatKey(void) const
Definition: snp_client.cpp:362
static bool IsValidSeqIndex(size_t seq_index)
Definition: snp_client.cpp:233
bool m_IsPrimaryTrackGraph
int GetSubSatBase(void) const
Definition: snp_client.cpp:264
bool IsValidSat(void) const
Definition: snp_client.cpp:305
size_t GetFilterIndex(void) const
string ToString(void) const
Get string representation of blob id.
Definition: snp_client.cpp:403
bool IsPrimaryTrackFeat() const
CSNPBlobId(const CTempString &str)
Definition: snp_client.cpp:170
static bool IsValidNAIndex(size_t index)
Definition: snp_client.cpp:221
static bool IsValidNAVersion(size_t version)
Definition: snp_client.cpp:227
void FromString(CTempString str)
Definition: snp_client.cpp:469
Int4 GetSat(void) const
Definition: snp_client.cpp:283
void SetSatNA(CTempString acc)
Definition: snp_client.cpp:344
void SetSeqAndFilterIndex(size_t seq_index, size_t filter_index)
Definition: snp_client.cpp:352
size_t GetNAVersion(void) const
static pair< size_t, size_t > ParseNA(CTempString acc)
Definition: snp_client.cpp:311
void SetNAIndex(size_t na_index)
Definition: snp_client.cpp:245
static bool IsValidNA(pair< size_t, size_t > na)
string m_Accession
bool operator<(const CBlobId &id) const
Int4 GetSatKey(void) const
Definition: snp_client.cpp:297
void SetNAVersion(size_t na_version)
Definition: snp_client.cpp:270
size_t GetNAIndex(void) const
bool FromSatString(CTempString str)
Definition: snp_client.cpp:418
bool IsPrimaryTrackGraph() const
int GetSatBase(void) const
Definition: snp_client.cpp:258
bool IsValidSubSat(void) const
Definition: snp_client.cpp:252
static bool IsValidNA(CTempString acc)
~CSNPBlobId(void)
Definition: snp_client.cpp:216
bool IsPrimaryTrack() const
CSeq_id_Handle GetSeqId(void) const
Definition: snp_client.cpp:369
bool operator==(const CBlobId &id) const
static bool IsValidFilterIndex(size_t filter_index)
Definition: snp_client.cpp:239
bool IsSatId(void) const
Definition: snp_client.cpp:277
string GetAccession(void) const
Definition: snp_client.cpp:376
string GetSatNA(void) const
Definition: snp_client.cpp:335
size_t GetSeqIndex(void) const
void SetPrimaryTrackGraph()
Definition: snp_client.cpp:395
CSeq_id_Handle m_SeqId
CRef< CSNPFileInfo > FindFile(const string &acc)
CRef< CSNPFileInfo > GetFixedFile(const string &acc)
std::invoke_result< Call >::type CallWithRetry(Call &&call, const char *name, unsigned retry_count=0)
CRef< CSNPFileInfo > x_GetFileInfo(const string &file)
CRef< CSNPSeqInfo > GetSeqInfo(const CSNPBlobId &blob_id)
void GetChunkOnce(const CSNPBlobId &blob_id, CTSE_Chunk_Info &chunk_info)
CDataLoader::TTSE_LockSet GetRecords(CDataSource *data_source, const CSeq_id_Handle &idh, CDataLoader::EChoice choice)
void LoadBlob(const CSNPBlobId &blob_id, CTSE_LoadLock &load_lock)
CObjectManager::TPriority GetDefaultPriority(void) const
CVDBCacheWithExpiration::CSlot SSNPFileInfoSlot
CSNPDataLoader_Impl(const CSNPDataLoader::SLoaderParams &params)
CTSE_LoadLock GetBlobById(CDataSource *data_source, const CSNPBlobId &blob_id)
CRef< CSnpPtisClient > m_PTISClient
CVDBCacheWithExpiration TFoundFiles
map< string, string > TFixedFiles
CTSE_LoadLock GetBlobByIdOnce(CDataSource *data_source, const CSNPBlobId &blob_id)
CRef< CSNPFileInfo > GetFileInfo(const string &acc)
CDataLoader::TTSE_LockSet GetOrphanAnnotRecords(CDataSource *ds, const CSeq_id_Handle &idh, const SAnnotSelector *sel, CDataLoader::TProcessedNAs *processed_nas)
CSNPDataLoader::TAnnotNames TAnnotNames
TAnnotNames GetPossibleAnnotNames(void) const
void GetChunk(const CSNPBlobId &blob_id, CTSE_Chunk_Info &chunk)
CDataLoader::TTSE_LockSet GetOrphanAnnotRecordsOnce(CDataSource *ds, const CSeq_id_Handle &id, const SAnnotSelector *sel, CDataLoader::TProcessedNAs *processed_nas)
void AddFixedFileOnce(const string &file_name)
void AddFixedFile(const string &file_name)
vector< CAnnotName > TAnnotNames
Definition: snploader.hpp:110
CMutex & GetMutex(void) const
CSNPDb & GetDb(void)
unsigned m_RemainingOpenRetries
const string & GetAccession(void) const
CSNPDataLoader::TAnnotNames TAnnotNames
map< CSeq_id_Handle, CRef< CSNPSeqInfo > > TSeqById
map< size_t, CRef< CSNPSeqInfo > > TSeqByIdx
void AddSeq(const CSeq_id_Handle &id)
const string & GetFileName(void) const
TSeqByIdx m_SeqByIdx
CRef< CSNPSeqInfo > GetSeqInfo(const CSeq_id_Handle &seq_id)
Definition: snp_client.cpp:642
const string & GetBaseAnnotName(void) const
void InitializeDb(CSNPDataLoader_Impl &impl)
void x_Initialize(CSNPDataLoader_Impl &impl, const string &file_name)
bool IsValidNA(void) const
CSNPFileInfo(CSNPDataLoader_Impl &impl, const string &file_name)
void GetPossibleAnnotNames(TAnnotNames &names) const
string GetSNPAnnotName(size_t filter_index) const
Definition: snp_client.cpp:636
TSeqById m_SeqById
CRef< CSNPBlobId > GetAnnotBlobId(const CSeq_id_Handle &id) const
void LoadAnnotChunk(CTSE_Chunk_Info &chunk_info)
CSNPSeqInfo(CSNPFileInfo *file, const CSNPDbSeqIterator &it)
Definition: snp_client.cpp:500
bool IncludeFeat(void) const
bool m_IsPrimaryTrackGraph
void LoadAnnotAlignChunk(CTSE_Chunk_Info &chunk_info)
CSNPDbSeqIterator GetSeqIterator(void) const
Definition: snp_client.cpp:540
size_t m_FilterIndex
void LoadAnnotBlob(CTSE_LoadLock &load_lock)
void SetFromBlobId(const CSNPBlobId &blob_id)
Definition: snp_client.cpp:532
CSNPFileInfo * m_File
CSeq_id_Handle m_SeqId
bool IncludeGraph(void) const
CRef< CSNPBlobId > GetAnnotBlobId(void) const
void SetFilterIndex(size_t filter_index)
Definition: snp_client.cpp:523
int GetAnnotChunkId(TSeqPos ref_pos) const
void LoadAnnotMainChunk(CTSE_Chunk_Info &chunk_info)
string GetAnnotName(void) const
Definition: snp_client.cpp:556
CRef< CSNPBlobId > GetBlobId(void) const
Definition: snp_client.cpp:513
void LoadSeqChunk(CTSE_Chunk_Info &chunk_info)
void LoadAnnotMainSplit(CTSE_LoadLock &load_lock)
void LoadSeqMainEntry(CTSE_LoadLock &load_lock)
void LoadRanges(void)
void LoadSeqBlob(CTSE_LoadLock &load_lock)
void LoadAnnotGraphChunk(CTSE_Chunk_Info &chunk_info)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
const char * file_name[]
static const struct name_t names[]
static int type
Definition: getdata.c:31
static const char * str(char *buf, int n)
Definition: stats.c:84
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
FILE * file
static int version
Definition: mdb_load.c:29
Multi-threading – mutexes; rw-locks; semaphore.
ESNPAnnotChunkIdType
@ eSNPAnnotChunk_graph
@ eSNPAnnotChunk_snp
SAnnotSelector –.
Modified on Thu Jun 13 17:28:49 2024 by modify_doxy.py rev. 669887