NCBI C++ ToolKit
vcf_histogram.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Vladislav Evgeniev
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
35 #include <util/bitset/bmserial.h>
37 #include <util/checksum.hpp>
38 #include <corelib/rwstream.hpp>
40 #include <corelib/ncbiexec.hpp>
41 
44 
45 ///////////////////////////////////////////////////////////////////////////////
46 /// CVcfHistogram
47 
50 
51 string CVcfHistogram::GetCacheKey(const string& annot_name, const size_t bin_width)
52 {
53  CCompoundIDPool id_pool;
54  auto id = id_pool.NewID(eCIC_GenericID);
55 
56  id.AppendString(annot_name);
57  id.AppendInteger(bin_width);
58 
59  return id.ToString();
60 
61 }
62 
63 CVcfHistogram::CVcfHistogram(const string& cache_key)
64 {
65  _ASSERT(!cache_key.empty());
66  if (cache_key.empty())
67  NCBI_THROW(CException, eUnknown, "Empty cache key");
68 
69  CCompoundIDPool id_pool;
70  auto id = id_pool.FromString(cache_key);
71 
72  auto field_annot = id.GetFirst(eCIT_String);
73  m_AnnotName = field_annot.GetString();
74  if (m_AnnotName.empty()) NCBI_THROW(CException, eUnknown, "Empty annotation");
75 
76  auto field_bin_width = field_annot.GetNextNeighbor();
77  m_BinWidth = field_bin_width.GetInteger();
78  if (m_BinWidth == 0) NCBI_THROW(CException, eUnknown, "Empty bin width");
79 
81  cs.AddLine(m_AnnotName);
83  m_DataKey = cs.GetHexSum();
84  m_DataKey += "_vcf_histogram";
85 
86  {
87  lock_guard<mutex> guard(m_KeysMapMutex);
88  m_KeysMap[m_AnnotName].push_back(cache_key);
89  }
90 }
91 
92 
94  : m_AnnotName(data.m_AnnotName)
95  , m_BinWidth(data.m_BinWidth)
96  , m_DataKey(data.m_DataKey)
97  , m_Data(data.m_Data)
98 {
99 }
100 
101 
103 {
104  lock_guard<mutex> guard(m_DataMutex);
105  if (!m_Data.empty())
106  return;
107 
108  CObjPoolGuard<TGraphCachePool> icache(icache_pool);
109 
110  // is there any data stored?
111  size_t blob_size = 0;
112  try {
113  blob_size = icache->GetSize(m_DataKey, 0, NcbiEmptyString);
114  } catch(const exception&) {
115  }
116  if (blob_size > 0) {
117  try {
118  CRStream is(icache->GetReadStream(m_DataKey, 0, NcbiEmptyString), 0, 0, CRWStreambuf::fOwnReader);
119  int val = 0;
120  vector<unsigned char> buffer(val, 0);
121  buffer.resize(blob_size);
122  is.read((char*)&buffer[0], buffer.size());
123  if (!is) NCBI_THROW(CException, eUnknown, "Failed to read VCF histogram from cache: " + m_DataKey);
125  deserializer.deserialize(m_Data, &buffer[0]);
126  return;
127  } catch (exception& e) {
128  ERR_POST(Error << e.what());
129  }
130  }
131 }
132 
134 {
135  CObjPoolGuard<TGraphCachePool> icache(icache_pool);
136  m_Data.optimize();
139  sv_ser.enable_xor_compression();
140  _ASSERT(sv_ser.is_xor_ref());
141  sv_ser.serialize(m_Data, sv_lay);
142  CWStream w(icache->GetWriteStream(m_DataKey, 0, NcbiEmptyString), 0, 0, CRWStreambuf::fOwnWriter);
143  w.write((const char*)sv_lay.buf(), sv_lay.size());
144  if (!w) NCBI_THROW(CException, eUnknown, "Failed to write VCF histogram to cache: " + m_DataKey);
145  w.flush();
146 }
147 
149 {
150  lock_guard<mutex> guard(m_DataMutex);
152 }
153 
154 void CVcfHistogram::EraseCachePerAnnotation(const string& annot_name)
155 {
156  lock_guard<mutex> guard(m_KeysMapMutex);
157  auto it = m_KeysMap.find(annot_name);
158  if (it == m_KeysMap.end())
159  return;
160  auto &histogram_cache = CGraphCache<CVcfHistogram>::GetInstance();
161  for(const auto& key : it->second) {
162  histogram_cache.RemoveData(key);
163  }
164  m_KeysMap.erase(it);
165 }
166 
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Serialization for sparse_vector<>
Checksum and hash calculation classes.
CChecksum – Checksum calculator.
Definition: checksum.hpp:302
Pool of recycled CCompoundID objects.
CCompoundID NewID(ECompoundIDClass new_id_class)
Create and return a new CCompoundID objects.
CCompoundID FromString(const string &cid)
Unpack the base64-encoded ID and return a CCompoundID object for field extraction.
void AppendString(const string &string_value)
Append an eCIT_String field at the end of this compound ID.
CCompoundIDField GetFirst(ECompoundIDFieldType field_type)
Return the first field of the specified type or NULL if this compound ID contains no fields of such t...
static CGraphCache & GetInstance()
Definition: graph_cache.hpp:97
Guard that can be used to automatically return object to the pool after leaving some scope.
Definition: obj_pool.hpp:198
Note about the "buf_size" parameter for streams in this API.
Definition: rwstream.hpp:122
@ fOwnReader
Own the underlying reader.
Definition: rwstreambuf.hpp:66
@ fOwnWriter
Own the underlying writer.
Definition: rwstreambuf.hpp:67
CVcfHistogram.
static mutex m_KeysMapMutex
Mutex for the cache keys.
CVcfHistogram(const string &cache_key)
Copy constructor is invoked by CGraphCache which clones the data for aynchroneous saving operation.
static map< string, vector< string > > m_KeysMap
Map of cache keys per annotation.
static void EraseCachePerAnnotation(const string &annot_name)
static string GetCacheKey(const string &annot_name, const size_t bin_width)
static void InitHistogramGlyph(size_t bin_width, const TSeqRange &range, const THistogramVector &histogram, CHistogramGlyph::TMap &dmap)
mutex m_DataMutex
data access mutex
THistogramVector m_Data
void Init(TGraphCachePool &icache_pool)
Save is invoked by CGraphCache when cached data needs to be permanently saved into NetCache.
void Save(TGraphCachePool &icache_pool)
Initializes the histogram map for a given range.
void GetHistogram(const TSeqRange &range, CHistogramGlyph::TMap &dmap) const
Sets the histogram data.
Writer-based output stream.
Definition: rwstream.hpp:171
sparse vector de-serializer
void deserialize(SV &sv, const unsigned char *buf, bool clear_sv=true)
Serialize sparse vector into a memory buffer(s) structure.
bool is_xor_ref() const noexcept
Returns the XOR reference compression status (enabled/disabled)
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
void enable_xor_compression() noexcept
Enable XOR compression on vector serialization.
bool empty() const noexcept
return true if vector is empty
Definition: bmsparsevec.h:734
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector planes
Definition: bmsparsevec.h:2148
void erase(iterator pos)
Definition: map.hpp:167
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
@ eCIT_String
Definition: compound_id.hpp:74
@ eCIC_GenericID
Definition: compound_id.hpp:51
char data[12]
Definition: iconv.c:80
string GetHexSum(void) const
Return string with checksum in hexadecimal form.
Definition: checksum.hpp:353
void AddLine(const char *line, size_t len)
Definition: checksum.hpp:609
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NcbiEmptyString
Definition: ncbistr.hpp:122
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::KEY key
Defines a portable execute class.
static uint8_t * buffer
Definition: pcre2test.c:1016
Reader-writer based streams.
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
#define _ASSERT
USING_SCOPE(objects)
Modified on Fri Sep 20 14:58:28 2024 by modify_doxy.py rev. 669887