NCBI C++ ToolKit
csraloader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: csraloader.cpp 100508 2023-08-08 16:06:08Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Eugene Vasilchenko
27  *
28  * File Description: CSRA file data loader
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
38 #include <objects/seq/seq__.hpp>
40 
47 
50 
53 
54 class CDataLoader;
55 
57 
59 public:
60  bool IsDataLoaderMatches(CDataLoader& loader) const {
61  return dynamic_cast<CCSRADataLoader*>(&loader) != 0;
62  }
63 };
64 
65 
66 class CRevoker {
67 public:
69  CLoaderFilter filter;
71  }
72 };
76 
78 
79 
80 /////////////////////////////////////////////////////////////////////////////
81 // CCSRADataLoader params
82 /////////////////////////////////////////////////////////////////////////////
83 
84 
85 NCBI_PARAM_DECL(string, CSRA, ACCESSIONS);
86 NCBI_PARAM_DEF(string, CSRA, ACCESSIONS, "");
87 
88 
89 NCBI_PARAM_DECL(bool, CSRA_LOADER, PILEUP_GRAPHS);
90 NCBI_PARAM_DEF_EX(bool, CSRA_LOADER, PILEUP_GRAPHS, true,
91  eParam_NoThread, CSRA_LOADER_PILEUP_GRAPHS);
92 
94 {
95  return NCBI_PARAM_TYPE(CSRA_LOADER, PILEUP_GRAPHS)::GetDefault();
96 }
97 
98 
100 {
101  NCBI_PARAM_TYPE(CSRA_LOADER, PILEUP_GRAPHS)::SetDefault(param);
102 }
103 
104 
105 NCBI_PARAM_DECL(bool, CSRA_LOADER, QUALITY_GRAPHS);
106 NCBI_PARAM_DEF_EX(bool, CSRA_LOADER, QUALITY_GRAPHS, false,
107  eParam_NoThread, CSRA_LOADER_QUALITY_GRAPHS);
108 
110 {
111  return NCBI_PARAM_TYPE(CSRA_LOADER, QUALITY_GRAPHS)::GetDefault();
112 }
113 
114 
116 {
117  return NCBI_PARAM_TYPE(CSRA_LOADER, QUALITY_GRAPHS)::SetDefault(param);
118 }
119 
120 
121 NCBI_PARAM_DECL(int, CSRA_LOADER, MIN_MAP_QUALITY);
122 NCBI_PARAM_DEF_EX(int, CSRA_LOADER, MIN_MAP_QUALITY, 0,
123  eParam_NoThread, CSRA_LOADER_MIN_MAP_QUALITY);
124 
126 {
127  return NCBI_PARAM_TYPE(CSRA_LOADER, MIN_MAP_QUALITY)::GetDefault();
128 }
129 
130 
132 {
133  return NCBI_PARAM_TYPE(CSRA_LOADER, MIN_MAP_QUALITY)::SetDefault(param);
134 }
135 
136 
137 NCBI_PARAM_DECL(int, CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS);
138 NCBI_PARAM_DEF_EX(int, CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS, 0,
139  eParam_NoThread, CSRA_LOADER_MAX_SEPARATE_SPOT_GROUPS);
140 
142 {
143  return NCBI_PARAM_TYPE(CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS)::GetDefault();
144 }
145 
146 
148 {
149  return NCBI_PARAM_TYPE(CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS)::SetDefault(param);
150 }
151 
152 
153 NCBI_PARAM_DECL(bool, CSRA_LOADER, SPOT_READ_ALIGN);
154 NCBI_PARAM_DEF(bool, CSRA_LOADER, SPOT_READ_ALIGN, false);
155 
157 {
158  return NCBI_PARAM_TYPE(CSRA_LOADER, SPOT_READ_ALIGN)::GetDefault();
159 }
160 
161 
163 {
164  return NCBI_PARAM_TYPE(CSRA_LOADER, SPOT_READ_ALIGN)::SetDefault(param);
165 }
166 
167 
169 {
170  return m_Impl->GetSpotReadAlign();
171 }
172 
173 
175 {
177 }
178 
179 
181 {
184 }
185 
186 
188 {
189  return m_PileupGraphs != kPileupGraphs_config?
190  m_PileupGraphs != 0: CCSRADataLoader::GetPileupGraphsParamDefault();
191 }
192 
193 
195 {
196  return m_QualityGraphs != kQualityGraphs_config?
197  m_QualityGraphs != 0: CCSRADataLoader::GetQualityGraphsParamDefault();
198 }
199 
200 
202 {
203  return m_SpotReadAlign != kSpotReadAlign_config?
204  m_SpotReadAlign != 0: CCSRADataLoader::GetSpotReadAlignParamDefault();
205 }
206 
207 
209 {
210  return m_SpotGroups != kSpotGroups_config?
211  m_SpotGroups != 0: CCSRADataLoader::GetSpotGroupsParamDefault();
212 }
213 
214 
216 {
218  str << "CCSRADataLoader:" << m_DirPath;
219  if ( !m_CSRAFiles.empty() ) {
220  str << "/files=";
221  ITERATE ( vector<string>, it, m_CSRAFiles ) {
222  str << "+" << *it;
223  }
224  }
225  if ( m_IdMapper ) {
226  str << "/mapper=" << m_IdMapper.get();
227  }
228  if ( !m_AnnotName.empty() ) {
229  str << "/name=" << m_AnnotName;
230  }
231  if ( m_MinMapQuality != kMinMapQuality_config ) {
232  str << "/q=" << m_MinMapQuality;
233  }
234  if ( m_PileupGraphs != kPileupGraphs_config ) {
235  str << "/pileup_graphs=" << m_PileupGraphs;
236  }
237  if ( m_QualityGraphs != kQualityGraphs_config ) {
238  str << "/quality_graphs=" << m_QualityGraphs;
239  }
240  if ( m_SpotGroups != kSpotGroups_config ) {
241  str << "/spot_groups=" << m_SpotGroups;
242  }
243  if ( m_PathInId != kPathInId_config ) {
244  str << "/path_in_id=" << m_PathInId;
245  }
247 }
248 
249 
250 /////////////////////////////////////////////////////////////////////////////
251 // CCSRADataLoader
252 /////////////////////////////////////////////////////////////////////////////
253 
256  const SLoaderParams& params,
257  CObjectManager::EIsDefault is_default,
258  CObjectManager::TPriority priority)
259 {
260  TMaker maker(params);
261  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
262  return maker.GetRegisterInfo();
263 }
264 
265 
268  CObjectManager::EIsDefault is_default,
269  CObjectManager::TPriority priority)
270 {
271  SLoaderParams params;
272  NStr::Split(NCBI_PARAM_TYPE(CSRA, ACCESSIONS)::GetDefault(), ",",
273  params.m_CSRAFiles);
274  TMaker maker(params);
275  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
276  return maker.GetRegisterInfo();
277 }
278 
279 
282  const string& srz_acc,
283  CObjectManager::EIsDefault is_default,
284  CObjectManager::TPriority priority)
285 {
286  SLoaderParams params;
287  params.m_DirPath = srz_acc;
288  TMaker maker(params);
289  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
290  return maker.GetRegisterInfo();
291 }
292 
293 
296  const string& dir_path,
297  const string& csra_name,
298  CObjectManager::EIsDefault is_default,
299  CObjectManager::TPriority priority)
300 {
301  SLoaderParams params;
302  params.m_DirPath = dir_path;
303  params.m_CSRAFiles.push_back(csra_name);
304  TMaker maker(params);
305  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
306  return maker.GetRegisterInfo();
307 }
308 
309 
312  const string& dir_path,
313  const vector<string>& csra_files,
314  CObjectManager::EIsDefault is_default,
315  CObjectManager::TPriority priority)
316 {
317  SLoaderParams params;
318  params.m_DirPath = dir_path;
319  params.m_CSRAFiles = csra_files;
320  TMaker maker(params);
321  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
322  return maker.GetRegisterInfo();
323 }
324 
325 
327 {
328  return "CCSRADataLoader";
329 }
330 
331 
333 {
334  return params.GetLoaderName();
335 }
336 
337 
338 string CCSRADataLoader::GetLoaderNameFromArgs(const string& srz_acc)
339 {
340  SLoaderParams params;
341  params.m_DirPath = srz_acc;
342  return GetLoaderNameFromArgs(params);
343 }
344 
345 
346 string CCSRADataLoader::GetLoaderNameFromArgs(const string& dir_path,
347  const string& csra_name)
348 {
349  SLoaderParams params;
350  params.m_DirPath = dir_path;
351  params.m_CSRAFiles.push_back(csra_name);
352  return GetLoaderNameFromArgs(params);
353 }
354 
355 
357  const string& dir_path,
358  const vector<string>& csra_files)
359 {
360  SLoaderParams params;
361  params.m_DirPath = dir_path;
362  params.m_CSRAFiles = csra_files;
363  return GetLoaderNameFromArgs(params);
364 }
365 
366 
367 CCSRADataLoader::CCSRADataLoader(const string& loader_name,
368  const SLoaderParams& params)
369  : CDataLoader(loader_name)
370 {
371  string dir_path = params.m_DirPath;
372 /*
373  if ( dir_path.empty() ) {
374  dir_path = NCBI_PARAM_TYPE(CSRA, DIR_PATH)::GetDefault();
375  }
376 */
377  m_Impl.Reset(new CCSRADataLoader_Impl(params));
378 }
379 
380 
382 {
383 }
384 
385 
387 {
388  return TBlobId(m_Impl->GetBlobId(idh).GetPointerOrNull());
389 }
390 
391 
394 {
395  return TBlobId(new CCSRABlobId(str));
396 }
397 
398 
400 {
401  return true;
402 }
403 
404 
407  EChoice choice)
408 {
409  return m_Impl->GetRecords(GetDataSource(), idh, choice);
410 }
411 
412 
414 {
415  TBlobId blob_id = chunk->GetBlobId();
416  const CCSRABlobId& csra_id = dynamic_cast<const CCSRABlobId&>(*blob_id);
417  m_Impl->GetChunk(csra_id, *chunk);
418 }
419 
420 
422 {
423  ITERATE ( TChunkSet, it, chunks ) {
424  GetChunk(*it);
425  }
426 }
427 
428 
431 {
432  return m_Impl->GetBlobById(GetDataSource(),
433  dynamic_cast<const CCSRABlobId&>(*blob_id));
434 }
435 
436 
438 {
439  return m_Impl->GetPossibleAnnotNames();
440 }
441 
442 
444 {
445  m_Impl->GetIds(idh, ids);
446 }
447 
448 
451 {
452  return m_Impl->GetAccVer(idh);
453 }
454 
455 
458 {
459  return m_Impl->GetGi(idh);
460 }
461 
462 
464 {
465  return m_Impl->GetLabel(idh);
466 }
467 
468 
470 {
471  return m_Impl->GetTaxId(idh);
472 }
473 
474 
476 {
477  return m_Impl->GetSequenceLength(idh);
478 }
479 
480 
483 {
484  return m_Impl->GetSequenceType(idh);
485 }
486 
487 
489 {
491 }
492 
493 
495 {
496  // do not cache released BAM TSEs
497  return 0;
498 }
499 
500 
502 
503 // ===========================================================================
504 
506 
508 {
509  RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_CSRA);
510 }
511 
512 
513 const char kDataLoader_CSRA_DriverName[] = "csra";
514 
516 {
517 public:
520  virtual ~CCSRA_DataLoaderCF(void) {}
521 
522 protected:
525  const TPluginManagerParamTree* params) const;
526 };
527 
528 
531  const TPluginManagerParamTree* params) const
532 {
533  if ( !ValidParams(params) ) {
534  // Use constructor without arguments
536  }
537  // IsDefault and Priority arguments may be specified
539  om,
540  GetIsDefault(params),
541  GetPriority(params)).GetLoader();
542 }
543 
544 
548 {
550 }
551 
552 
556 {
557  NCBI_EntryPoint_DataLoader_CSRA(info_list, method);
558 }
559 
560 
CTSE_LoadLock GetBlobById(CDataSource *data_source, const CCSRABlobId &blob_id)
CDataSource::SGiFound GetGi(const CSeq_id_Handle &idh)
void SetSpotReadAlign(bool value)
CRef< CCSRABlobId > GetBlobId(const CSeq_id_Handle &idh)
CDataLoader::TTSE_LockSet GetRecords(CDataSource *data_source, const CSeq_id_Handle &idh, CDataLoader::EChoice choice)
void GetIds(const CSeq_id_Handle &idh, TIds &ids)
string GetLabel(const CSeq_id_Handle &idh)
CDataSource::STypeFound GetSequenceType(const CSeq_id_Handle &idh)
TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
CDataSource::SAccVerFound GetAccVer(const CSeq_id_Handle &idh)
bool GetSpotReadAlign(void) const
TAnnotNames GetPossibleAnnotNames(void) const
TTaxId GetTaxId(const CSeq_id_Handle &idh)
void GetChunk(const CCSRABlobId &blob_id, CTSE_Chunk_Info &chunk)
static bool GetQualityGraphsParamDefault(void)
Definition: csraloader.cpp:109
virtual bool CanGetBlobById(void) const
Definition: csraloader.cpp:399
virtual TTSE_Lock GetBlobById(const TBlobId &blob_id)
Definition: csraloader.cpp:430
virtual TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
Definition: csraloader.cpp:406
virtual void GetIds(const CSeq_id_Handle &idh, TIds &ids)
Definition: csraloader.cpp:443
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
Request for a length of a sequence.
Definition: csraloader.cpp:475
CRef< CCSRADataLoader_Impl > m_Impl
Definition: csraloader.hpp:181
virtual void GetChunk(TChunk chunk)
Definition: csraloader.cpp:413
static void SetQualityGraphsParamDefault(bool param)
Definition: csraloader.cpp:115
vector< CAnnotName > TAnnotNames
Definition: csraloader.hpp:141
virtual TTaxId GetTaxId(const CSeq_id_Handle &idh)
Request for a taxonomy id of a sequence.
Definition: csraloader.cpp:469
virtual SAccVerFound GetAccVerFound(const CSeq_id_Handle &idh)
Definition: csraloader.cpp:450
void SetSpotReadAlign(bool value)
Definition: csraloader.cpp:174
static bool GetSpotReadAlignParamDefault(void)
Definition: csraloader.cpp:156
virtual STypeFound GetSequenceTypeFound(const CSeq_id_Handle &idh)
Definition: csraloader.cpp:482
virtual void GetChunks(const TChunkSet &chunks)
Definition: csraloader.cpp:421
static void SetSpotReadAlignParamDefault(bool param)
Definition: csraloader.cpp:162
TAnnotNames GetPossibleAnnotNames(void) const
Definition: csraloader.cpp:437
static bool GetPileupGraphsParamDefault(void)
Definition: csraloader.cpp:93
virtual CObjectManager::TPriority GetDefaultPriority(void) const
Definition: csraloader.cpp:488
virtual TBlobId GetBlobIdFromString(const string &str) const
Definition: csraloader.cpp:393
virtual string GetLabel(const CSeq_id_Handle &idh)
Request for a label string of a sequence.
Definition: csraloader.cpp:463
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const SLoaderParams &params, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: csraloader.cpp:254
static void SetPileupGraphsParamDefault(bool param)
Definition: csraloader.cpp:99
static string GetLoaderNameFromArgs(void)
Definition: csraloader.cpp:326
virtual unsigned GetDefaultBlobCacheSizeLimit() const
Definition: csraloader.cpp:494
virtual SGiFound GetGiFound(const CSeq_id_Handle &idh)
Definition: csraloader.cpp:457
static int GetMinMapQualityParamDefault(void)
Definition: csraloader.cpp:125
static void SetMinMapQualityParamDefault(int param)
Definition: csraloader.cpp:131
static void SetSpotGroupsParamDefault(int param)
Definition: csraloader.cpp:147
~CCSRADataLoader(void)
Definition: csraloader.cpp:381
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
Definition: csraloader.cpp:386
static int GetSpotGroupsParamDefault(void)
Definition: csraloader.cpp:141
bool GetSpotReadAlign() const
Definition: csraloader.cpp:168
virtual CDataLoader * CreateAndRegister(CObjectManager &om, const TPluginManagerParamTree *params) const
Definition: csraloader.cpp:529
virtual ~CCSRA_DataLoaderCF(void)
Definition: csraloader.cpp:520
CObjectManager::TPriority GetPriority(const TPluginManagerParamTree *params) const
CObjectManager::EIsDefault GetIsDefault(const TPluginManagerParamTree *params) const
bool ValidParams(const TPluginManagerParamTree *params) const
bool IsDataLoaderMatches(CDataLoader &loader) const
Definition: csraloader.cpp:60
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectManager –.
CSafeStaticLifeSpan::
@ eLifeLevel_AppMain
Destroyed in CNcbiApplication::AppMain, if possible.
CSafeStatic<>::
TBlobId GetBlobId(void) const
definition of a Culling tree
Definition: ncbi_tree.hpp:100
char value[7]
Definition: config.c:431
Include a standard set of the NCBI C++ Toolkit most basic headers.
USING_SCOPE(objects)
NCBI_PARAM_DEF_EX(bool, CSRA_LOADER, PILEUP_GRAPHS, true, eParam_NoThread, CSRA_LOADER_PILEUP_GRAPHS)
BEGIN_LOCAL_NAMESPACE
Definition: csraloader.cpp:54
END_LOCAL_NAMESPACE
Definition: csraloader.cpp:77
void NCBI_EntryPoint_DataLoader_CSRA(CPluginManager< CDataLoader >::TDriverInfoList &info_list, CPluginManager< CDataLoader >::EEntryPointRequest method)
Definition: csraloader.cpp:545
NCBI_PARAM_DECL(string, CSRA, ACCESSIONS)
void DataLoaders_Register_CSRA(void)
Definition: csraloader.cpp:507
NCBI_PARAM_DEF(string, CSRA, ACCESSIONS, "")
const char kDataLoader_CSRA_DriverName[]
Definition: csraloader.cpp:513
void NCBI_EntryPoint_xloader_csra(CPluginManager< objects::CDataLoader >::TDriverInfoList &info_list, CPluginManager< objects::CDataLoader >::EEntryPointRequest method)
Definition: csraloader.cpp:553
static CSafeStatic< CRevoker > s_Revoker(CSafeStaticLifeSpan(CSafeStaticLifeSpan::eLifeLevel_AppMain, CSafeStaticLifeSpan::eLifeSpan_Long))
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
TLoader * GetLoader(void) const
Get pointer to the loader.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
vector< CSeq_id_Handle > TIds
void RevokeDataLoaders(IDataLoaderFilter &filter)
Revoke data loaders by filter, even if they were still used.
CBlobIdKey TBlobId
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
EIsDefault
Flag defining if the data loader is included in the "default" group.
TRegisterInfo GetRegisterInfo(void)
static void RegisterInObjectManager(CObjectManager &om, CLoaderMaker_Base &loader_maker, CObjectManager::EIsDefault is_default, CObjectManager::TPriority priority)
Register the loader only if the name is not yet registered in the object manager.
Definition: data_loader.cpp:53
vector< TChunk > TChunkSet
@ kPriority_Replace
Default priority for replacement loaders.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
static void NCBI_EntryPointImpl(TDriverInfoList &info_list, EEntryPointRequest method)
Entry point implementation.
list< SDriverInfo > TDriverInfoList
List of driver information.
EEntryPointRequest
Actions performed by the entry point.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
Helper classes and templates to implement plugins.
CRef< objects::CObjectManager > om
static const char * str(char *buf, int n)
Definition: stats.c:84
string GetLoaderName(void) const
Definition: csraloader.cpp:215
bool GetEffectiveQualityGraphs(void) const
Definition: csraloader.cpp:194
int GetEffectiveMinMapQuality(void) const
Definition: csraloader.cpp:180
int GetEffectiveSpotGroups(void) const
Definition: csraloader.cpp:208
bool GetEffectiveSpotReadAlign(void) const
Definition: csraloader.cpp:201
bool GetEffectivePileupGraphs(void) const
Definition: csraloader.cpp:187
vector< string > m_CSRAFiles
Definition: csraloader.hpp:77
Better replacement of GetAccVer(), this method should be defined in data loaders, GetAccVer() is left...
Better replacement of GetGi(), this method should be defined in data loaders, GetGi() is left for com...
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
SRegisterLoaderInfo –.
Modified on Thu Feb 29 12:16:45 2024 by modify_doxy.py rev. 669887