NCBI C++ ToolKit
bamloader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bamloader.cpp 90546 2020-06-26 12:57:19Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Eugene Vasilchenko
27  *
28  * File Description: BAM file data loader
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
38 #include <objects/seq/seq__.hpp>
40 
47 
50 
53 
54 class CDataLoader;
55 
56 
58 
60 public:
61  bool IsDataLoaderMatches(CDataLoader& loader) const {
62  return dynamic_cast<CBAMDataLoader*>(&loader) != 0;
63  }
64 };
65 
66 
67 class CRevoker {
68 public:
70  CLoaderFilter filter;
72  }
73 };
77 
79 
80 
81 NCBI_PARAM_DECL(string, BAM, DIR_PATH);
82 NCBI_PARAM_DEF_EX(string, BAM, DIR_PATH, "",
83  eParam_NoThread, BAM_DIR_PATH);
84 
85 NCBI_PARAM_DECL(string, BAM, BAM_NAME);
86 NCBI_PARAM_DEF_EX(string, BAM, BAM_NAME, "",
87  eParam_NoThread, BAM_BAM_NAME);
88 
89 NCBI_PARAM_DECL(string, BAM, INDEX_NAME);
90 NCBI_PARAM_DEF_EX(string, BAM, INDEX_NAME, "",
91  eParam_NoThread, BAM_INDEX_NAME);
92 
93 
94 /////////////////////////////////////////////////////////////////////////////
95 // CBAMDataLoader
96 /////////////////////////////////////////////////////////////////////////////
97 
100  const SLoaderParams& params,
101  CObjectManager::EIsDefault is_default,
102  CObjectManager::TPriority priority)
103 {
104  TMaker maker(params);
105  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
106  return maker.GetRegisterInfo();
107 }
108 
109 
112  CObjectManager::EIsDefault is_default,
113  CObjectManager::TPriority priority)
114 {
115  SLoaderParams params;
116  TMaker maker(params);
117  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
118  return maker.GetRegisterInfo();
119 }
120 
121 
124  const string& srz_acc,
125  CObjectManager::EIsDefault is_default,
126  CObjectManager::TPriority priority)
127 {
128  SLoaderParams params;
129  params.m_DirPath = srz_acc;
130  TMaker maker(params);
131  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
132  return maker.GetRegisterInfo();
133 }
134 
135 
138  const string& dir_path,
139  const string& bam_name,
140  const string& index_name,
141  CObjectManager::EIsDefault is_default,
142  CObjectManager::TPriority priority)
143 {
144  SLoaderParams params;
145  params.m_DirPath = dir_path;
146  params.m_BamFiles.push_back(SBamFileName(bam_name, index_name));
147  TMaker maker(params);
148  CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
149  return maker.GetRegisterInfo();
150 }
151 
152 
154 {
155  return "BAMDataLoader";
156 }
157 
158 
160 {
162  str << "BAMDataLoader:" << params.m_DirPath;
163  if ( !params.m_BamFiles.empty() ) {
164  str << "/files=";
165  ITERATE ( vector<SBamFileName>, it, params.m_BamFiles ) {
166  str << "+" << it->m_BamName;
167  }
168  }
169  if ( params.m_IdMapper ) {
170  str << "/mapper=" << params.m_IdMapper.get();
171  }
173 }
174 
175 
176 string CBAMDataLoader::GetLoaderNameFromArgs(const string& srz_acc)
177 {
178  SLoaderParams params;
179  params.m_DirPath = srz_acc;
180  return GetLoaderNameFromArgs(params);
181 }
182 
183 
184 string CBAMDataLoader::GetLoaderNameFromArgs(const string& dir_path,
185  const string& bam_name,
186  const string& index_name)
187 {
188  SLoaderParams params;
189  params.m_DirPath = dir_path;
190  params.m_BamFiles.push_back(SBamFileName(bam_name, index_name));
191  return GetLoaderNameFromArgs(params);
192 }
193 
194 
196  const string& dir_path,
197  const vector<SBamFileName>& bam_files)
198 {
199  SLoaderParams params;
200  params.m_DirPath = dir_path;
201  params.m_BamFiles = bam_files;
202  return GetLoaderNameFromArgs(params);
203 }
204 
205 
206 CBAMDataLoader::CBAMDataLoader(const string& loader_name,
207  const SLoaderParams& params0)
208  : CDataLoader(loader_name)
209 {
210  SLoaderParams params = params0;
211  if ( params.m_DirPath.empty() ) {
212  params.m_DirPath = NCBI_PARAM_TYPE(BAM, DIR_PATH)::GetDefault();
213  }
214  if ( params.m_BamFiles.empty() ) {
216  file.m_BamName = NCBI_PARAM_TYPE(BAM, BAM_NAME)::GetDefault();
217  file.m_IndexName = NCBI_PARAM_TYPE(BAM, INDEX_NAME)::GetDefault();
218  if ( !file.m_BamName.empty() ) {
219  params.m_BamFiles.push_back(file);
220  }
221  }
222  m_Impl.Reset(new CBAMDataLoader_Impl(params));
223 }
224 
225 
227 {
228 }
229 
230 
232 {
233  return TBlobId(m_Impl->GetShortSeqBlobId(idh).GetPointerOrNull());
234 }
235 
236 
239 {
240  return TBlobId(new CBAMBlobId(str));
241 }
242 
243 
245 {
246  return true;
247 }
248 
249 
252  EChoice choice)
253 {
254  TTSE_LockSet locks;
255  if ( choice == eOrphanAnnot ) {
256  // alignment by refseqid
257  TBlobId blob_id(m_Impl->GetRefSeqBlobId(idh).GetPointerOrNull());
258  if ( blob_id ) {
259  locks.insert(GetBlobById(blob_id));
260  }
261  }
262  else {
263  // shortseqid || alignment by shortseqid
264  // look in the already loaded TSEs
265  TBlobId blob_id = GetBlobId(idh);
266  if ( blob_id ) {
267  locks.insert(GetBlobById(blob_id));
268  }
269  }
270  return locks;
271 }
272 
273 
275 {
276  TBlobId blob_id = chunk.GetBlobId();
277  const CBAMBlobId& bam_id = dynamic_cast<const CBAMBlobId&>(*blob_id);
278  return m_Impl.GetNCObject().EstimateLoadSeconds(bam_id, chunk, bytes);
279 }
280 
281 
283 {
284  TBlobId blob_id = chunk->GetBlobId();
285  const CBAMBlobId& bam_id = dynamic_cast<const CBAMBlobId&>(*blob_id);
286  m_Impl->LoadChunk(bam_id, *chunk);
287 }
288 
289 
291 {
292  ITERATE ( TChunkSet, it, chunks ) {
293  GetChunk(*it);
294  }
295 }
296 
297 
300 {
301  CTSE_LoadLock load_lock = GetDataSource()->GetTSE_LoadLock(blob_id);
302  if ( !load_lock.IsLoaded() ) {
303  const CBAMBlobId& bam_id = dynamic_cast<const CBAMBlobId&>(*blob_id);
304  m_Impl->LoadBAMEntry(bam_id, load_lock);
305  load_lock.SetLoaded();
306  }
307  return load_lock;
308 }
309 
310 
312 {
313  return m_Impl->GetPossibleAnnotNames();
314 }
315 
316 
318 {
319  m_Impl->GetIds(idh, ids);
320 }
321 
322 
325 {
326  return m_Impl->GetAccVer(idh);
327 }
328 
329 
332 {
333  return m_Impl->GetGi(idh);
334 }
335 
336 
338 {
339  return m_Impl->GetLabel(idh);
340 }
341 
342 
344 {
345  return m_Impl->GetTaxId(idh);
346 }
347 
348 
350 {
351  // do not cache released BAM TSEs
352  return 0;
353 }
354 
355 
357 
358 // ===========================================================================
359 
361 
363 {
364  RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_Bam);
365 }
366 
367 
368 const string kDataLoader_Bam_DriverName("bam");
369 
371 {
372 public:
375  virtual ~CBAM_DataLoaderCF(void) {}
376 
377 protected:
380  const TPluginManagerParamTree* params) const;
381 };
382 
383 
386  const TPluginManagerParamTree* params) const
387 {
388  if ( !ValidParams(params) ) {
389  // Use constructor without arguments
391  }
392  // IsDefault and Priority arguments may be specified
394  om,
395  GetIsDefault(params),
396  GetPriority(params)).GetLoader();
397 }
398 
399 
403 {
405 }
406 
407 
411 {
412  NCBI_EntryPoint_DataLoader_Bam(info_list, method);
413 }
414 
415 
NCBI_PARAM_DECL(string, BAM, DIR_PATH)
USING_SCOPE(objects)
const string kDataLoader_Bam_DriverName("bam")
BEGIN_LOCAL_NAMESPACE
Definition: bamloader.cpp:54
END_LOCAL_NAMESPACE
Definition: bamloader.cpp:78
void NCBI_EntryPoint_xloader_bam(CPluginManager< objects::CDataLoader >::TDriverInfoList &info_list, CPluginManager< objects::CDataLoader >::EEntryPointRequest method)
Definition: bamloader.cpp:408
void NCBI_EntryPoint_DataLoader_Bam(CPluginManager< CDataLoader >::TDriverInfoList &info_list, CPluginManager< CDataLoader >::EEntryPointRequest method)
Definition: bamloader.cpp:400
void DataLoaders_Register_BAM(void)
Definition: bamloader.cpp:362
NCBI_PARAM_DEF_EX(string, BAM, DIR_PATH, "", eParam_NoThread, BAM_DIR_PATH)
static CSafeStatic< CRevoker > s_Revoker(CSafeStaticLifeSpan(CSafeStaticLifeSpan::eLifeLevel_AppMain, CSafeStaticLifeSpan::eLifeSpan_Long))
CDataSource::SGiFound GetGi(const CSeq_id_Handle &idh)
CRef< CBAMBlobId > GetRefSeqBlobId(const CSeq_id_Handle &idh)
CBAMDataLoader::TAnnotNames GetPossibleAnnotNames(void) const
void LoadChunk(const CBAMBlobId &blob_id, CTSE_Chunk_Info &chunk)
double EstimateLoadSeconds(const CBAMBlobId &blob_id, const CTSE_Chunk_Info &chunk, Uint4 bytes)
CDataSource::SAccVerFound GetAccVer(const CSeq_id_Handle &idh)
string GetLabel(const CSeq_id_Handle &idh)
void GetIds(const CSeq_id_Handle &idh, TIds &ids)
TTaxId GetTaxId(const CSeq_id_Handle &idh)
void LoadBAMEntry(const CBAMBlobId &blob_id, CTSE_LoadLock &load_lock)
CRef< CBAMBlobId > GetShortSeqBlobId(const CSeq_id_Handle &idh)
static string GetLoaderNameFromArgs(void)
Definition: bamloader.cpp:153
~CBAMDataLoader(void)
Definition: bamloader.cpp:226
virtual void GetChunks(const TChunkSet &chunks)
Definition: bamloader.cpp:290
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const SLoaderParams &params, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bamloader.cpp:98
virtual TBlobId GetBlobIdFromString(const string &str) const
Definition: bamloader.cpp:238
CRef< CBAMDataLoader_Impl > m_Impl
Definition: bamloader.hpp:165
virtual TTSE_Lock GetBlobById(const TBlobId &blob_id)
Definition: bamloader.cpp:299
virtual double EstimateLoadSeconds(const CTSE_Chunk_Info &chunk, Uint4 bytes) const
Definition: bamloader.cpp:274
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
Definition: bamloader.cpp:231
virtual bool CanGetBlobById(void) const
Definition: bamloader.cpp:244
virtual string GetLabel(const CSeq_id_Handle &idh)
Request for a label string of a sequence.
Definition: bamloader.cpp:337
TAnnotNames GetPossibleAnnotNames(void) const
Definition: bamloader.cpp:311
virtual TTaxId GetTaxId(const CSeq_id_Handle &idh)
Request for a taxonomy id of a sequence.
Definition: bamloader.cpp:343
virtual void GetIds(const CSeq_id_Handle &idh, TIds &ids)
Definition: bamloader.cpp:317
CBAMDataLoader(void)
virtual unsigned GetDefaultBlobCacheSizeLimit() const
Definition: bamloader.cpp:349
virtual void GetChunk(TChunk chunk)
Definition: bamloader.cpp:282
virtual SAccVerFound GetAccVerFound(const CSeq_id_Handle &idh)
Definition: bamloader.cpp:324
vector< CAnnotName > TAnnotNames
Definition: bamloader.hpp:122
virtual SGiFound GetGiFound(const CSeq_id_Handle &idh)
Definition: bamloader.cpp:331
virtual TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
Definition: bamloader.cpp:251
virtual CDataLoader * CreateAndRegister(CObjectManager &om, const TPluginManagerParamTree *params) const
Definition: bamloader.cpp:384
virtual ~CBAM_DataLoaderCF(void)
Definition: bamloader.cpp:375
CObjectManager::TPriority GetPriority(const TPluginManagerParamTree *params) const
CObjectManager::EIsDefault GetIsDefault(const TPluginManagerParamTree *params) const
bool ValidParams(const TPluginManagerParamTree *params) const
CTSE_LoadLock GetTSE_LoadLock(const TBlobId &blob_id)
bool IsDataLoaderMatches(CDataLoader &loader) const
Definition: bamloader.cpp:61
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectManager –.
CSafeStaticLifeSpan::
@ eLifeLevel_AppMain
Destroyed in CNcbiApplication::AppMain, if possible.
CSafeStatic<>::
TBlobId GetBlobId(void) const
bool IsLoaded(void) const
void SetLoaded(void)
definition of a Culling tree
Definition: ncbi_tree.hpp:100
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
TLoader * GetLoader(void) const
Get pointer to the loader.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
vector< CSeq_id_Handle > TIds
void RevokeDataLoaders(IDataLoaderFilter &filter)
Revoke data loaders by filter, even if they were still used.
CBlobIdKey TBlobId
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
EIsDefault
Flag defining if the data loader is included in the "default" group.
TRegisterInfo GetRegisterInfo(void)
static void RegisterInObjectManager(CObjectManager &om, CLoaderMaker_Base &loader_maker, CObjectManager::EIsDefault is_default, CObjectManager::TPriority priority)
Register the loader only if the name is not yet registered in the object manager.
Definition: data_loader.cpp:53
vector< TChunk > TChunkSet
@ eOrphanAnnot
all external annotations if no Bioseq exists
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
static void NCBI_EntryPointImpl(TDriverInfoList &info_list, EEntryPointRequest method)
Entry point implementation.
list< SDriverInfo > TDriverInfoList
List of driver information.
EEntryPointRequest
Actions performed by the entry point.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
FILE * file
Helper classes and templates to implement plugins.
CRef< objects::CObjectManager > om
AutoPtr< IIdMapper > m_IdMapper
Definition: bamloader.hpp:72
vector< SBamFileName > m_BamFiles
Definition: bamloader.hpp:71
Better replacement of GetAccVer(), this method should be defined in data loaders, GetAccVer() is left...
Better replacement of GetGi(), this method should be defined in data loaders, GetGi() is left for com...
SRegisterLoaderInfo –.
Modified on Wed Apr 17 13:09:28 2024 by modify_doxy.py rev. 669887