NCBI C++ ToolKit
sraloader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sraloader.cpp 87399 2019-08-26 18:56:36Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Eugene Vasilchenko
27  *
28  * File Description: SRA file data loader
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
38 #include <objects/seq/seq__.hpp>
40 
46 
50 
52 
53 class CObject;
54 
55 NCBI_PARAM_DECL(bool, SRA_LOADER, TRIM);
56 NCBI_PARAM_DEF_EX(bool, SRA_LOADER, TRIM, false,
57  eParam_NoThread, SRA_LOADER_TRIM);
58 
60 
61 class CDataLoader;
62 
64 
66 public:
67  bool IsDataLoaderMatches(CDataLoader& loader) const {
68  return dynamic_cast<CSRADataLoader*>(&loader) != 0;
69  }
70 };
71 
72 
73 class CRevoker {
74 public:
76  CLoaderFilter filter;
78  }
79 };
83 
85 
86 
87 /////////////////////////////////////////////////////////////////////////////
88 // CSRABlobId
89 /////////////////////////////////////////////////////////////////////////////
90 
91 class CSRABlobId : public CBlobId
92 {
93 public:
94  CSRABlobId(const string& acc, unsigned spot_id);
95  ~CSRABlobId(void);
96 
97  string m_Accession;
98  unsigned m_SpotId;
99 
100  string ToString(void) const;
101  bool operator<(const CBlobId& id) const;
102  bool operator==(const CBlobId& id) const;
103 };
104 
105 
106 CSRABlobId::CSRABlobId(const string& acc, unsigned spot_id)
107  : m_Accession(acc), m_SpotId(spot_id)
108 {
109 }
110 
111 
113 {
114 }
115 
116 
117 string CSRABlobId::ToString(void) const
118 {
120  out << m_Accession << '.' << m_SpotId;
122 }
123 
124 
125 bool CSRABlobId::operator<(const CBlobId& id) const
126 {
127  const CSRABlobId& sra2 = dynamic_cast<const CSRABlobId&>(id);
128  return m_Accession < sra2.m_Accession ||
129  (m_Accession == sra2.m_Accession && m_SpotId < sra2.m_SpotId);
130 }
131 
132 
133 bool CSRABlobId::operator==(const CBlobId& id) const
134 {
135  const CSRABlobId& sra2 = dynamic_cast<const CSRABlobId&>(id);
136  return m_Accession == sra2.m_Accession && m_SpotId == sra2.m_SpotId;
137 }
138 
139 
140 /////////////////////////////////////////////////////////////////////////////
141 // CSRADataLoader_Impl
142 /////////////////////////////////////////////////////////////////////////////
143 
144 
145 static bool GetTrimParam(void)
146 {
147  static NCBI_PARAM_TYPE(SRA_LOADER, TRIM) s_Value;
148  return s_Value.Get();
149 }
150 
151 
153  : m_Mgr(trim)
154 {
155 }
156 
157 
159 {
160 }
161 
162 
164  unsigned spot_id)
165 {
166  CMutexGuard LOCK(m_Mutex);
167  if ( m_Run.GetAccession() != accession ) {
168  m_Run.Init(m_Mgr, accession);
169  }
170  return m_Run.GetSpotEntry(spot_id);
171 }
172 
173 
175  unsigned spot_id,
176  unsigned read_id)
177 {
178  CMutexGuard LOCK(m_Mutex);
179  if ( m_Run.GetAccession() != accession ) {
180  m_Run.Init(m_Mgr, accession);
181  }
182  return m_Run.GetSequenceType(spot_id, read_id);
183 }
184 
185 
187  unsigned spot_id,
188  unsigned read_id)
189 {
190  CMutexGuard LOCK(m_Mutex);
191  if ( m_Run.GetAccession() != accession ) {
192  m_Run.Init(m_Mgr, accession);
193  }
194  return m_Run.GetSequenceLength(spot_id, read_id);
195 }
196 
197 
198 /////////////////////////////////////////////////////////////////////////////
199 // CSRADataLoader
200 /////////////////////////////////////////////////////////////////////////////
201 
203  : m_Trim(GetTrimParam())
204 {
205 }
206 
207 
209  : m_Trim(trim)
210 {
211 }
212 
213 
215 {
216 }
217 
218 
221  CObjectManager::EIsDefault is_default,
222  CObjectManager::TPriority priority)
223 {
224  SLoaderParams params;
225  TMaker maker(params);
226  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
227  return maker.GetRegisterInfo();
228 }
229 
230 
233  const string& rep_path,
234  const string& vol_path,
235  CObjectManager::EIsDefault is_default,
236  CObjectManager::TPriority priority)
237 {
238  SLoaderParams params;
239  params.m_RepPath = rep_path;
240  params.m_VolPath = vol_path;
241  TMaker maker(params);
242  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
243  return maker.GetRegisterInfo();
244 }
245 
246 
249  ETrim trim,
250  CObjectManager::EIsDefault is_default,
251  CObjectManager::TPriority priority)
252 {
253  SLoaderParams params(trim == eTrim);
254  TMaker maker(params);
255  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
256  return maker.GetRegisterInfo();
257 }
258 
259 
262  const string& rep_path,
263  const string& vol_path,
264  ETrim trim,
265  CObjectManager::EIsDefault is_default,
266  CObjectManager::TPriority priority)
267 {
268  SLoaderParams params(trim == eTrim);
269  params.m_RepPath = rep_path;
270  params.m_VolPath = vol_path;
271  TMaker maker(params);
272  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
273  return maker.GetRegisterInfo();
274 }
275 
276 
278 {
279  string ret = "SRADataLoader";
280  if ( params.m_Trim ) {
281  ret += "Trim";
282  }
283  if ( !params.m_RepPath.empty() || !params.m_VolPath.empty() ) {
284  ret += ":";
285  ret += params.m_RepPath;
286  ret += ":";
287  ret += params.m_VolPath;
288  }
289  return ret;
290 }
291 
292 
294 {
295  SLoaderParams params;
296  return GetLoaderNameFromArgs(params);
297 }
298 
299 
300 string CSRADataLoader::GetLoaderNameFromArgs(const string& rep_path,
301  const string& vol_path)
302 {
303  SLoaderParams params;
304  params.m_RepPath = rep_path;
305  params.m_VolPath = vol_path;
306  return GetLoaderNameFromArgs(params);
307 }
308 
309 
311 {
312  SLoaderParams params(trim == eTrim);
313  return GetLoaderNameFromArgs(params);
314 }
315 
316 
317 string CSRADataLoader::GetLoaderNameFromArgs(const string& rep_path,
318  const string& vol_path,
319  ETrim trim)
320 {
321  SLoaderParams params(trim == eTrim);
322  params.m_RepPath = rep_path;
323  params.m_VolPath = vol_path;
324  return GetLoaderNameFromArgs(params);
325 }
326 
327 
328 CSRADataLoader::CSRADataLoader(const string& loader_name,
329  const SLoaderParams& params)
330  : CDataLoader(loader_name)
331 {
333  m_Impl = new CSRADataLoader_Impl(trim);
334 }
335 
336 
338 {
339 }
340 
341 
342 typedef pair<CRef<CSRABlobId>, unsigned> TReadId;
343 
344 static TReadId sx_GetReadId(const string& sra, bool with_chunk)
345 {
346  SIZE_TYPE dot1 = sra.find('.');
347  if ( dot1 == NPOS ) {
348  return TReadId();
349  }
350  SIZE_TYPE dot2 = with_chunk? sra.find('.', dot1+1): sra.size();
351  if ( dot2 == NPOS || dot1+1 >= dot2 || sra[dot1+1] == '0' ||
352  (with_chunk && (dot2+2 != sra.size() ||
353  (sra[dot2+1] != '2' && sra[dot2+1] != '4') )) ) {
354  return TReadId();
355  }
356  unsigned spot_id =
357  NStr::StringToUInt(CTempString(sra.data()+dot1+1, dot2-dot1-1));
358  TReadId ret;
359  ret.first = new CSRABlobId(sra.substr(0, dot1), spot_id);
360  ret.second = sra[dot2+1] - '0';
361  return ret;
362 }
363 
364 
366 {
367  if ( idh.Which() != CSeq_id::e_General ) {
368  return TReadId();
369  }
370  CConstRef<CSeq_id> id = idh.GetSeqId();
371  const CDbtag& general = id->GetGeneral();
372  if ( general.GetDb() != "SRA") {
373  return TReadId();
374  }
375  return sx_GetReadId(general.GetTag().GetStr(), true);
376 }
377 
378 
380 {
381  return TBlobId(sx_GetReadId(idh).first);
382 }
383 
384 
387 {
388  return TBlobId(sx_GetReadId(str, false).first);
389 }
390 
391 
393 {
394  return true;
395 }
396 
397 
400  EChoice /* choice */)
401 {
402  TTSE_LockSet locks;
403  TBlobId blob_id = GetBlobId(idh);
404  if ( blob_id ) {
405  locks.insert(GetBlobById(blob_id));
406  }
407  return locks;
408 }
409 
410 
413 {
414  CTSE_LoadLock load_lock = GetDataSource()->GetTSE_LoadLock(blob_id);
415  if ( !load_lock.IsLoaded() ) {
416  const CSRABlobId& sra_id = dynamic_cast<const CSRABlobId&>(*blob_id);
417  CRef<CSeq_entry> entry =
418  m_Impl->LoadSRAEntry(sra_id.m_Accession, sra_id.m_SpotId);
419  if ( entry ) {
420  load_lock->SetSeq_entry(*entry);
421  }
422  load_lock.SetLoaded();
423  }
424  return load_lock;
425 }
426 
427 
429 {
430  TReadId read_id = sx_GetReadId(idh);
431  if ( read_id.first ) {
432  const CSRABlobId& sra_id = *read_id.first;
433  return m_Impl->GetSequenceLength(sra_id.m_Accession,
434  sra_id.m_SpotId,
435  read_id.second);
436  }
437  return kInvalidSeqPos;
438 }
439 
440 
443 {
444  STypeFound ret;
445  TReadId read_id = sx_GetReadId(idh);
446  if ( read_id.first ) {
447  const CSRABlobId& sra_id = *read_id.first;
448  ret.sequence_found = true;
449  ret.type = m_Impl->GetSequenceType(sra_id.m_Accession,
450  sra_id.m_SpotId,
451  read_id.second);
452  }
453  return ret;
454 }
455 
456 
458 {
460 }
461 
462 
464 
465 // ===========================================================================
466 
468 
470 {
471  RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_Sra);
472 }
473 
474 
475 const string kDataLoader_Sra_DriverName("sra");
476 
478 {
479 public:
482  virtual ~CSRA_DataLoaderCF(void) {}
483 
484 protected:
487  const TPluginManagerParamTree* params) const;
488 };
489 
490 
493  const TPluginManagerParamTree* params) const
494 {
495  if ( !ValidParams(params) ) {
496  // Use constructor without arguments
498  }
499  // IsDefault and Priority arguments may be specified
501  om,
502  GetIsDefault(params),
503  GetPriority(params)).GetLoader();
504 }
505 
506 
510 {
512 }
513 
514 
518 {
519  NCBI_EntryPoint_DataLoader_Sra(info_list, method);
520 }
521 
522 
CObjectManager::TPriority GetPriority(const TPluginManagerParamTree *params) const
CObjectManager::EIsDefault GetIsDefault(const TPluginManagerParamTree *params) const
bool ValidParams(const TPluginManagerParamTree *params) const
CTSE_LoadLock GetTSE_LoadLock(const TBlobId &blob_id)
Definition: Dbtag.hpp:53
bool IsDataLoaderMatches(CDataLoader &loader) const
Definition: sraloader.cpp:67
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectManager –.
CObject –.
Definition: ncbiobj.hpp:180
bool operator==(const CBlobId &id) const
Definition: sraloader.cpp:133
CSRABlobId(const string &acc, unsigned spot_id)
Definition: sraloader.cpp:106
string ToString(void) const
Get string representation of blob id.
Definition: sraloader.cpp:117
string m_Accession
Definition: sraloader.cpp:97
bool operator<(const CBlobId &id) const
Definition: sraloader.cpp:125
unsigned m_SpotId
Definition: sraloader.cpp:98
~CSRABlobId(void)
Definition: sraloader.cpp:112
CRef< CSeq_entry > LoadSRAEntry(const string &accession, unsigned spot_id)
Definition: sraloader.cpp:163
TSeqPos GetSequenceLength(const string &accession, unsigned spot_id, unsigned read_id)
Definition: sraloader.cpp:186
CSRADataLoader_Impl(CSraMgr::ETrim trim)
Definition: sraloader.cpp:152
CSeq_inst::TMol GetSequenceType(const string &accession, unsigned spot_id, unsigned read_id)
Definition: sraloader.cpp:174
CSRADataLoader(void)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: sraloader.cpp:219
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
Request for a length of a sequence.
Definition: sraloader.cpp:428
virtual STypeFound GetSequenceTypeFound(const CSeq_id_Handle &idh)
Definition: sraloader.cpp:442
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
Definition: sraloader.cpp:379
~CSRADataLoader(void)
Definition: sraloader.cpp:337
virtual bool CanGetBlobById(void) const
Definition: sraloader.cpp:392
virtual TTSE_Lock GetBlobById(const TBlobId &blob_id)
Definition: sraloader.cpp:412
static string GetLoaderNameFromArgs(void)
Definition: sraloader.cpp:293
virtual TBlobId GetBlobIdFromString(const string &str) const
Definition: sraloader.cpp:386
CRef< CSRADataLoader_Impl > m_Impl
Definition: sraloader.hpp:129
virtual CObjectManager::TPriority GetDefaultPriority(void) const
Definition: sraloader.cpp:457
virtual TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
Definition: sraloader.cpp:399
virtual ~CSRA_DataLoaderCF(void)
Definition: sraloader.cpp:482
virtual CDataLoader * CreateAndRegister(CObjectManager &om, const TPluginManagerParamTree *params) const
Definition: sraloader.cpp:491
CSafeStaticLifeSpan::
@ eLifeLevel_AppMain
Destroyed in CNcbiApplication::AppMain, if possible.
CSafeStatic<>::
@ eNoTrim
Definition: sraread.hpp:104
void Init(CSraMgr &mgr, const string &acc)
Definition: sraread.cpp:452
const string & GetAccession(void) const
Definition: sraread.hpp:169
TSeqPos GetSequenceLength(spotid_t spot_id, uint8_t read_id) const
Definition: sraread.cpp:613
CSeq_inst::TMol GetSequenceType(spotid_t spot_id, uint8_t read_id) const
Definition: sraread.cpp:602
CRef< CSeq_entry > GetSpotEntry(spotid_t spot_id) const
Definition: sraread.cpp:497
void SetSeq_entry(CSeq_entry &entry, CTSE_SetObjectInfo *set_info=0)
Definition: tse_info.cpp:351
bool IsLoaded(void) const
void SetLoaded(void)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
definition of a Culling tree
Definition: ncbi_tree.hpp:100
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static const char * str(char *buf, int n)
Definition: stats.c:84
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
CConstRef< CSeq_id > GetSeqId(void) const
CSeq_id::E_Choice Which(void) const
TLoader * GetLoader(void) const
Get pointer to the loader.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void RevokeDataLoaders(IDataLoaderFilter &filter)
Revoke data loaders by filter, even if they were still used.
CSeq_inst::TMol type
CBlobIdKey TBlobId
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
EIsDefault
Flag defining if the data loader is included in the "default" group.
TRegisterInfo GetRegisterInfo(void)
static void RegisterInObjectManager(CObjectManager &om, CLoaderMaker_Base &loader_maker, CObjectManager::EIsDefault is_default, CObjectManager::TPriority priority)
Register the loader only if the name is not yet registered in the object manager.
Definition: data_loader.cpp:53
@ kPriority_Replace
Default priority for replacement loaders.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
static void NCBI_EntryPointImpl(TDriverInfoList &info_list, EEntryPointRequest method)
Entry point implementation.
list< SDriverInfo > TDriverInfoList
List of driver information.
EEntryPointRequest
Actions performed by the entry point.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define NPOS
Definition: ncbistr.hpp:133
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
@ e_General
for other databases
Definition: Seq_id_.hpp:105
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
string s_Value(TValue value)
Helper classes and templates to implement plugins.
USING_SCOPE(objects)
void NCBI_EntryPoint_DataLoader_Sra(CPluginManager< CDataLoader >::TDriverInfoList &info_list, CPluginManager< CDataLoader >::EEntryPointRequest method)
Definition: sraloader.cpp:507
const string kDataLoader_Sra_DriverName("sra")
NCBI_PARAM_DEF_EX(bool, SRA_LOADER, TRIM, false, eParam_NoThread, SRA_LOADER_TRIM)
BEGIN_LOCAL_NAMESPACE
Definition: sraloader.cpp:61
void NCBI_EntryPoint_xloader_sra(CPluginManager< objects::CDataLoader >::TDriverInfoList &info_list, CPluginManager< objects::CDataLoader >::EEntryPointRequest method)
Definition: sraloader.cpp:515
pair< CRef< CSRABlobId >, unsigned > TReadId
Definition: sraloader.cpp:342
static bool GetTrimParam(void)
Definition: sraloader.cpp:145
END_LOCAL_NAMESPACE
Definition: sraloader.cpp:84
void DataLoaders_Register_SRA(void)
Definition: sraloader.cpp:469
static TReadId sx_GetReadId(const string &sra, bool with_chunk)
Definition: sraloader.cpp:344
NCBI_PARAM_DECL(bool, SRA_LOADER, TRIM)
static CSafeStatic< CRevoker > s_Revoker(CSafeStaticLifeSpan(CSafeStaticLifeSpan::eLifeLevel_AppMain, CSafeStaticLifeSpan::eLifeSpan_Long))
CRef< objects::CObjectManager > om
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
SRegisterLoaderInfo –.
Modified on Fri Sep 20 14:58:10 2024 by modify_doxy.py rev. 669887