NCBI C++ ToolKit
wig_graph.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrei Shkeda
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
36 #include <gui/utils/url_utils.hpp>
37 
38 #include <objmgr/scope.hpp>
40 
41 #include <util/bitset/bmserial.h>
43 #include <util/bitset/bm.h>
45 
46 #include <util/bitset/bmdbg.h>
47 
48 #include <util/checksum.hpp>
49 #include <corelib/rwstream.hpp>
51 #include <corelib/ncbiexec.hpp>
52 
53 #include <cmath>
54 
57 
58 static const int kDeltaFrameSize = 65535;
59 
61 {
62  return (pos / kDeltaFrameSize) * kDeltaFrameSize;
63 }
64 
65 
66 ///////////////////////////////////////////////////////////////////////////////
67 /// CWigGraph
68 
69 string CWigGraph::GetCacheKey(CScope& scope, const CSeq_id& seq_id, const string& wig_id, const string& remote_path, const string& last_modified, EDataType data_type)
70 {
71  auto id = CSparseGraph::CreateCompoundID(scope, seq_id, remote_path);
72  CCompoundIDPool id_pool;
73  auto nested_id = id_pool.NewID(eCIC_GenericID);
74  nested_id.AppendSeqID(wig_id);
75  nested_id.AppendInteger(data_type);
76  string lmd = last_modified;
77  if (lmd.empty())
78  CUrlUtils::GetLastModified(remote_path, lmd);
79  nested_id.AppendString(lmd);
80  id.AppendNestedCID(nested_id);
81  return id.ToString();
82 
83 }
84 
85 CWigGraph::CWigGraph(const string& cache_key)
86 {
87 
88  _ASSERT(!cache_key.empty());
89  if (cache_key.empty())
90  NCBI_THROW(CException, eUnknown, "Empty cache key");
91 
92  CCompoundIDPool id_pool;
93  auto cid = id_pool.FromString(cache_key);
94 
95  InitFromCompoundID(cid);
96  auto nested_cid = cid.GetFirst(eCIT_NestedCID).GetNestedCID();
97 
98  m_WigId = nested_cid.GetFirst(eCIT_SeqID).GetSeqID();
99  if (m_WigId.empty()) NCBI_THROW(CException, eUnknown, "Empty wig_id");
100  auto data_type = nested_cid.GetFirst(eCIT_Integer).GetInteger();
101 
102  m_LastModified = nested_cid.GetFirst(eCIT_String).GetString();
103 
104  m_Data.emplace("0", unique_ptr<TSparseVector>(new TSparseVector));
105  switch (data_type) {
106  case eSummary:
108  m_SummaryBinCount = static_cast<int>(m_DataSize/kMinBinSize);
109  break;
110  case eData:
111  m_DataType = eData;
112  m_Data["0"]->resize(static_cast<TSparseVector::size_type>(m_DataSize));
113  break;
114  default:
115  NCBI_THROW(CException, eUnknown, "undefined data type");
116  }
118 }
119 
121 {
123  cs.AddLine(m_SeqId);
125  cs.AddLine(m_WigId);
126  cs.AddLine(m_RemotePath);
128  switch (m_DataType) {
129  case eSummary:
130  m_NetCacheKey = "WS_";
131  break;
132  case eData:
133  m_NetCacheKey = "WG_";
134  break;
135  }
136  m_NetCacheKey += cs.GetHexSum();
137 }
138 
140  : CSparseGraph(data)
141  , m_WigId(data.m_WigId)
142  , m_DataType(data.m_DataType)
143  , m_SummaryBinCount(data.m_SummaryBinCount)
144 {
145 }
146 
147 
149 {
150  // BM_DECLARE_TEMP_BLOCK(tb);
152  if (m_DataType != eSummary)
153  NCBI_THROW(CException, eUnknown, "Invalid read request");
154 
155  size_t size = 0;
156  try {
158  } catch (exception& e) {
159  ERR_POST(Error << e.what());
160  return false;
161  }
162 
164 
165 // if (m_DataType == eSummary) {
166  is.read((char*)&m_SummaryBinCount, sizeof(int));
167  if (!is) NCBI_THROW(CException, eUnknown, "Failed to read bin count: " + m_NetCacheKey);
168  size -= sizeof(int);
169 
170  int summary_size = 0;
171  is.read((char*)&summary_size, sizeof(int));
172  if (!is) NCBI_THROW(CException, eUnknown, "Failed to read summary size: " + m_NetCacheKey);
173  size -= sizeof(int);
174  // }
175  vector<char> buffer(size, 0);
176  is.read(&buffer[0], buffer.size());
177  if (!is) NCBI_THROW(CException, eUnknown, "Failed to read vector: " + m_NetCacheKey);
178 
179  auto res = sparse_vector_deserialize(summary/**m_Data["0"]*/, (const unsigned char*)&buffer[0]);
180  if (res != 0) NCBI_THROW(CException, eUnknown, "Cache deserialization failed");
181 
182  //size -= summary_size;
183  return true;
184 }
185 
186 
188 {
189  if (m_DataType == eSummary) {
190  lock_guard<mutex> guard(m_DataMutex);
191  if (m_Data["0"]->empty() && m_PrefetchRequested == false) {
192  m_PrefetchRequested = true;
193  m_PendingSummary = async(std::launch::async, [&]{
194  CObjPoolGuard<TGraphCachePool> icache(icache_pool);
196  // are there any data stored?
197  bool has_blob = false;
198  try {
199  has_blob = icache->GetSize(m_NetCacheKey, 0, NcbiEmptyString) >0;
200  } catch(exception& e) {
201  }
202  if (has_blob) {
203  try {
204  x_ReadSummary(*icache, *s);
205  return s;
206  } catch (exception& e) {
207  ERR_POST(Error << e.what());
208  }
209  }
210  x_GetBigWigSummary(*s);
211  x_SaveSummary(*icache, *s);
212  return s;
213  });
214  }
215 /*
216  CObjPoolGuard<TGraphCachePool> icache(icache_pool);
217 
218  // are there any data stored?
219  bool has_blob = false;
220  try {
221  has_blob = icache->GetSize(m_NetCacheKey, 0, NcbiEmptyString) >0;
222  } catch(exception& e) {
223  }
224  if (has_blob) {
225  try {
226  x_ReadData(*icache);
227  return;
228  } catch (exception& e) {
229  ERR_POST(Error << e.what());
230  }
231  }
232  x_GetBigWigSummary();
233  x_SaveData(*icache);
234 */
235  } else {
236  // since last_modified is a part of wig graph key
237  // we can skip remote data check by updating m_LastChecked
239  CSparseGraph::Init(icache_pool);
240  }
241 }
242 
244 {
245  if (m_DataType == eSummary) {
246  x_FetchSummary();
247  CObjPoolGuard<TGraphCachePool> icache(icache_pool);
248  x_SaveSummary(*icache, *m_Data["0"]);
249  } else {
250  // since last_modified is a part of wig graph key
251  // we can skip remote data check by updating m_LastChecked
253  CSparseGraph::Save(icache_pool);
254  }
255 }
256 
257 int CWigGraph::x_RunBigWigScript(const TSeqRange& range, const string tmp_file)
258 {
259  auto from = NStr::NumericToString(range.GetFrom());
260  auto to = NStr::NumericToString(range.GetToOpen());
261  string request_type;
262  string bin_count;
263 
264  if (m_DataType == eSummary) {
265  request_type = "summary";
267  } else if (m_DataType == eData) {
268  request_type = "data";
269  bin_count = "0";
270  }
271 
272  CExec::CResult ExecResult =
274  "./rmt_bigwig_summary.sh",
275  m_RemotePath.c_str(),
276  m_WigId.c_str(),
277  from.c_str(),
278  to.c_str(),
279  bin_count.c_str(),
280  tmp_file.c_str(),
281  request_type.c_str(),
282  NULL);
283 
284  //!! parameterize wait time later; for now, set it to 60 seconds
285  CProcess::CExitInfo ExitInfo;
286  CProcess(ExecResult.GetProcessHandle(), CProcess::eHandle).Wait(CRmtScriptTimeout::Get() * 1000, &ExitInfo);
287  return ExitInfo.IsSignaled() ? ExitInfo.GetSignal() : ExitInfo.GetExitCode();
288 }
289 
291 {
292 
293  // Occcasionally bigWigSummary fails and it depends on number of data points (m_SummaryBinCount)
294  // the code does three attempts to run each decreasing the number of points by 10%
295  //_ASSERT(!m_Data.empty());
296  float bin_count_pct = 0.9f;
297  TSeqRange seq_range(0, static_cast<TSeqPos>(m_DataSize - 1));
299  for (size_t count = 0; count < 4; ++count) {
301  AutoPtr<fstream> temp_fs(CDirEntry::CreateTmpFile(sTmpSummary));
302 
303  int exit_code = x_RunBigWigScript(seq_range, sTmpSummary);
304  if (exit_code != 0) {
305  if (count == 3) {
307  string err_msg = (exit_code == -1) ?
308  "bigwig statistics timed out" : "bigwig statistics failed (exit code: " + NStr::NumericToString(exit_code);
309  NCBI_THROW(CException, eUnknown, err_msg);
310  }
311  ERR_POST(Error << "bin count:" << (bin_count_pct * m_SummaryBinCount) << ", pct:" << bin_count_pct);
312  m_SummaryBinCount = bin_count_pct * m_SummaryBinCount;
313  bin_count_pct -= 0.1;
314  continue;
315  }
316 
317  CNcbiIfstream summary_is(sTmpSummary.c_str());
318  string val_max, val_min;
319  int start = 0;
320  vector<TValType> data_points;
321  constexpr int buf_len = 64000;
322  data_points.resize(buf_len, 0);
323  summary_sv.resize(m_SummaryBinCount);
324  //m_Data["0"]->resize(m_SummaryBinCount);
325  int k = 0;
327  string summary;
328  while (NcbiGetline(summary_is, summary, "\n")) {
329  if (!NStr::SplitInTwo(summary, "\t", val_min, val_max))
330  continue;
331  auto v_max = NStr::StringToNumeric<float>(val_max, convFlags);
332  auto v_min = NStr::StringToNumeric<float>(val_min, convFlags);
333  float v = (abs(v_min) > abs(v_max)) ? v_min : v_max;
334  x_PackFloat(v, data_points[k]);
335  ++k;
336  if (k == buf_len) {
337  int batch_size = min<int>(buf_len, m_SummaryBinCount - start);
338  summary_sv.import(&data_points[0], batch_size, start);
339  //m_Data["0"]->import(&data_points[0], batch_size, start);
340  fill_n(data_points.begin(), buf_len, 0);
341  start += batch_size;
342  k = 0;
343  }
344  }
345  if (k > 0) {
346  int batch_size = min<int>(k, m_SummaryBinCount - start);
347  summary_sv.import(&data_points[0], batch_size, start);
348  //m_Data["0"]->import(&data_points[0], batch_size, start);
349  }
350  summary_sv.optimize();
351  //m_Data["0"]->optimize();
352  break;
353  }
355  } else {
356  NCBI_THROW(CException, eUnknown, "bigwig statistics not performed because the file is blacklisted");
357  }
358 }
359 
360 // the data set is split into frams of kDeltaFrameSize
361 // and each block is compressed using delta coding
362 void CWigGraph::x_EncodeDelta(const TSeqRange& range, const vector<float>& v_in, vector<TValType>& v_out)
363 {
364  auto len = v_in.size();
365  _ASSERT(len > 0);
366  v_out.resize(len, 0);
367  //vector<TValType> v_out(len, 0);
368 
369  TValType last_val = 0;
370  for (size_t pos = 0; pos < len; ++pos) {
371  TValType curr_val = round(v_in[pos] * kFract);
372  if (pos % kDeltaFrameSize == 0)
373  last_val = 0;
374  v_out[pos] = x_EncodeSign(curr_val - last_val);
375  last_val = curr_val;
376  }
377 }
378 
379 void CWigGraph::x_DecodeDelta(const TSeqRange& range, vector<float>& v_out)
380 {
381 
382  _ASSERT(range.GetLength() > 0);
383  auto from = s_AlignToFrame(range.GetFrom());
384  _ASSERT(int(range.GetTo() - from) > 0);
385  size_t len = (range.GetTo() - from) + 1;
386  vector<TValType> v_in(len, 0);
387  m_Data["0"]->extract((TValType*)&v_in[0], static_cast<TSparseVector::size_type>(len), from);
388  v_out.resize(range.GetLength(), 0);
389 
390  int64_t last = 0;
391  size_t index = 0;
392  for (size_t i = 0; i < len; ++i) {
393  if (i % kDeltaFrameSize == 0)
394  last = 0;
395  auto val = x_DecodeSign(v_in[i]);
396  val += last;
397  last = val;
398  if (from + i < range.GetFrom())
399  continue;
400  float f = val;
401  f /= kFract;
402  v_out[index++] = f;
403  }
404 }
405 
406 
407 void CWigGraph::x_GetBigWigData(const TSeqRange& range, vector<TValType>& data)
408 {
409  _ASSERT(range.GetFrom() % kDeltaFrameSize == 0);
410  if (range.GetFrom() % kDeltaFrameSize != 0)
411  NCBI_THROW(CException, eUnknown, "Internal error: range is not aligned");
412  _ASSERT(range.GetLength() > 0);
413  vector<float> raw_data(range.GetLength(), 0);
414  {
416  AutoPtr<fstream> tmp_stream(CDirEntry::CreateTmpFile(sTmpFile));
417  auto exit_code = x_RunBigWigScript(range, sTmpFile);
418  if (exit_code != 0) {
419  string err_msg = (exit_code == -1) ?
420  "bigwig data retrieval timed out" : "bigwig data retrieval failed (exit code: " + NStr::NumericToString(exit_code);
421  NCBI_THROW(CException, eUnknown, err_msg);
422  }
423 
424  CNcbiIfstream is(sTmpFile.c_str());
425  string curr_line;
426  while (NcbiGetlineEOL(is, curr_line)) {
427  NStr::TruncateSpacesInPlace(curr_line);
428  if (curr_line.empty() || curr_line[0] == '#') // skip comments
429  continue;
430  vector<string> columns;
431  NStr::Split(curr_line, " \t", columns, NStr::fSplit_Tokenize);
432  if (columns.size() < 4)
433  continue;
434  if (columns[0] == "browser" || columns[0] == "track")
435  continue;
436  int start = max<int>(NStr::StringToNumeric<int>(columns[1]), range.GetFrom());
437  int stop = NStr::StringToNumeric<int>(columns[2]); // open pos
438  int span = stop - start;
439  if (span <= 0)
440  continue;
441  float value = NStr::StringToNumeric<float>(columns[3]);
442  TSignedSeqPos pos = start - range.GetFrom();
443  fill_n(raw_data.begin() + pos, span, value);
444  }
445  }
446  x_EncodeDelta(range, raw_data, data);
447 }
448 
449 
451 {
452  // BM_DECLARE_TEMP_BLOCK(tb);
453 // auto& data = *m_Data["0"];
455  if (m_DataType != eSummary)
456  NCBI_THROW(CException, eUnknown, "Invalid save request");
457 
458  data.optimize();
461  int len = static_cast<int>(sv_lay.size());
462 
464  //if (m_DataType == eSummary) {
465  w.write((const char*)&m_SummaryBinCount, sizeof(m_SummaryBinCount));
466  if (!w) NCBI_THROW(CException, eUnknown, "Failed to write wig summary bins: " + m_NetCacheKey);
467  w.write((const char*)&len, sizeof(len));
468  if (!w) NCBI_THROW(CException, eUnknown, "Failed to write wig cache size: " + m_NetCacheKey);
469  //}
470 
471  w.write((const char*)sv_lay.buf(), len);
472  if (!w) NCBI_THROW(CException, eUnknown, "Failed to write wig cache data: " + m_NetCacheKey);
473  w.flush();
474 }
475 
476 
477 static void s_SplitRange(const TSeqRange& range, int frames, vector<TSeqRange>& res)
478 {
479  _ASSERT(frames);
480  if (frames <= 0)
481  throw runtime_error("Invalid frames size");
482  auto start = s_AlignToFrame(range.GetFrom());
483  int chunk_size = frames * kDeltaFrameSize;
484  res.emplace_back(range.GetFrom(), min<int>(range.GetFrom() + (chunk_size - 1), range.GetTo()));
485  start += chunk_size;
486  _ASSERT(start % kDeltaFrameSize == 0);
487  auto stop = range.GetTo();
488  while (start <= stop) {
489  res.emplace_back(start, min<int>(start + (chunk_size - 1), stop));
490  start += chunk_size;
491  }
492 }
493 
494 
495 void CWigGraph::x_AddDataToMap(const vector<float>& data, const TSeqPos from, CHistogramGlyph::TMap& the_map)
496 {
497  if (data.empty())
498  return;
499  auto it = data.begin();
500  auto last = *it;
501 
502  int start = from;
503  int to = from + static_cast<int>(data.size() - 1);
504  int stop = start;
505  while (++it != data.end()) {
506  auto val = *it;
507  if (val != last) {
508  auto tr = TSeqRange(start, stop);
509  the_map.AddRange(tr, last);
510  last = val;
511  start = stop + 1;
512  }
513  stop +=1;
514  }
515  if (start < to) {
516  auto tr = TSeqRange(start, to);
517  the_map.AddRange(tr, last);
518  }
519 }
520 
522 {
523  const auto& data_r = data.GetRange();
524  _ASSERT(data_r.GetLength() > 0);
525  if (data_r.GetLength() == 0)
526  return;
527  if (m_DataType == eSummary) {
528  x_FetchSummary();
529  _ASSERT(!m_Data.empty());
530  int bin_size = max<int>(1, static_cast<int>(m_DataSize/m_SummaryBinCount));
531  auto start = data_r.GetFrom()/bin_size;
532  auto stop = data_r.GetTo()/bin_size;
533  int len = (stop - start) + 1;
534  auto from = data_r.GetFrom();
535  {
536  vector<TSparseVector::value_type> data_points;
537  constexpr int buf_len = 64000;
538  data_points.resize(buf_len, 0);
539  while (len > 0) {
540  int batch_size = min<int>(buf_len, (stop + 1) - start);
541  m_Data["0"]->decode(&data_points[0], start, batch_size);
542  for (auto i = 0; i < batch_size; ++i) {
543  auto f = from + i * bin_size;
544  TSeqRange r(f, f + bin_size);
545  float v = 0;
546  x_UnPackFloat(data_points[i], v);
547  data.AddRange(r, v);
548  }
549  start += batch_size;
550  from += (batch_size * bin_size);
551  len -= batch_size;
552  }
553  }
554  } else {
555  if (m_PrefetchRequested == false)
556  Prefetch(data_r);
557  x_FetchData();
558  /*
559  bool update_data = false;
560  for (auto& r : m_PendingData) {
561  TPrefetchData d = r.second.get();
562  TUpdateMap update = {{ "0", &d->data }};
563  Update(d->range, update);
564  update_data = true;
565  }
566  m_PrefetchRequested = false;
567  m_PendingData.clear();
568  */
569  vector<TSeqRange> chunks;
570  // split range into chunks of size 10 * kDeltaFrameSize (655353)
571  // each chunk except the first one is aligned to frame start
572  // first chunk start is not aligned
573  s_SplitRange(data_r, 10, chunks);
574  for (const auto& chunk : chunks) {
575  vector<float> data_points;
576  x_DecodeDelta(chunk, data_points);
577  _ASSERT(chunk.GetLength() == data_points.size());
578  x_AddDataToMap(data_points, chunk.GetFrom(), data);
579  }
580 
581  //if (update_data) {
582  // CGraphCache<CWigGraph>::GetInstance().SaveData(Ref(this));
583  //}
584  }
585 }
586 
588 {
589  if (range.GetLength() == 0)
590  return 0.;
591  vector<TSeqRange> missing_ranges;
592  GetMissingRegions(range, missing_ranges);
593  int len = range.GetLength();
594  for (const auto& r : missing_ranges) {
595  len -= r.GetLength();
596  }
597  float coverage = len;
598  coverage /=range.GetLength();
599  return coverage;
600 }
601 
603 {
605  if (m_DataType == eSummary)
606  return;
607  //if (m_PrefetchRequested || !m_PendingData.empty())
608  //NCBI_THROW(CException, eUnknown, "Prefetch conflict: prefetch requested multiple times");
609 
610  vector<TSeqRange> missing_ranges;
611  GetMissingRegions(range, missing_ranges);
612  bool update_data = !missing_ranges.empty();
613  if (update_data == false)
614  return;
615  vector<TSeqRange> normalized_ranges;
616  TSeqPos from = s_AlignToFrame (missing_ranges.front().GetFrom());
617  TSeqPos to = s_AlignToFrame(missing_ranges.front().GetTo()) + (kDeltaFrameSize - 1);
618  normalized_ranges.emplace_back(from, to);
619  for (size_t i = 1; i < missing_ranges.size(); ++i) {
620  const auto& r = missing_ranges[i];
621  if (r.GetFrom() > normalized_ranges.back().GetTo()) {
622  from = s_AlignToFrame(r.GetFrom());
623  to = s_AlignToFrame(r.GetTo()) + (kDeltaFrameSize - 1);
624  normalized_ranges.emplace_back(from, to);
625  } else if (r.GetTo() > normalized_ranges.back().GetTo()) {
626  to = s_AlignToFrame(r.GetTo()) + (kDeltaFrameSize - 1);
627  normalized_ranges.back().SetTo(to);
628  }
629  }
630  lock_guard<mutex> guard(m_DataMutex);
631  for (auto& range : normalized_ranges) {
632  auto it = m_PendingData.find(range);
633  if (it == m_PendingData.end()) {
634  m_PrefetchRequested = true;
635  m_PendingData.emplace(range, async(std::launch::async, [range, this]{
637  d->range = range;
638  x_GetBigWigData(range, d->data);
639  return d;
640  }));
641  }
642  }
643 }
644 
646 {
647  lock_guard<mutex> guard(m_DataMutex);
648  if (m_PrefetchRequested) {
649  auto s = m_PendingSummary.get();
650  m_Data["0"].swap(s);
651  m_PrefetchRequested = false;
652  }
653 }
654 
656 {
658  {
659  lock_guard<mutex> guard(m_DataMutex);
660  if (m_PrefetchRequested) {
661  pending_data = std::move(m_PendingData);
662  m_PendingData.clear();
663  m_PrefetchRequested = false;
664  }
665  }
666  if (!pending_data.empty()) {
667  for (auto& r : pending_data) {
668  TPrefetchData d = r.second.get();
669  TUpdateMap update = {{ "0", &d->data }};
670  Update(d->range, update);
671  }
673  }
674 }
675 
676 
T round(const T &v)
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Debugging functions (internal). Poorly documented, not well written.
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Serialization for sparse_vector<>
Checksum and hash calculation classes.
AutoPtr –.
Definition: ncbimisc.hpp:401
CChecksum – Checksum calculator.
Definition: checksum.hpp:302
Pool of recycled CCompoundID objects.
CCompoundID NewID(ECompoundIDClass new_id_class)
Create and return a new CCompoundID objects.
CCompoundID FromString(const string &cid)
Unpack the base64-encoded ID and return a CCompoundID object for field extraction.
void AppendSeqID(const string &seq_id)
Append an eCIT_SeqID field at the end of this compound ID.
The result type for Spawn methods.
Definition: ncbiexec.hpp:120
void SaveData(CRef< TData > data)
Clones TData, puts it into Save Queue for asynchroneous storage operation.
static CGraphCache & GetInstance()
Definition: graph_cache.hpp:97
Guard that can be used to automatically return object to the pool after leaving some scope.
Definition: obj_pool.hpp:198
Extended exit information for waited process.
CProcess –.
Note about the "buf_size" parameter for streams in this API.
Definition: rwstream.hpp:122
@ fOwnReader
Own the underlying reader.
Definition: rwstreambuf.hpp:66
@ fOwnWriter
Own the underlying writer.
Definition: rwstreambuf.hpp:67
@ ESuggestedAction_Access
try to access the remote file
@ ESuggestedAction_Skip
do not access the file
static void Set(const string &sKey, ESuggestedAction KnownAccessibility)
static ESuggestedAction Check(const string &sKey)
static time_t Get()
CScope –.
Definition: scope.hpp:92
CSparseGraph.
string m_RemotePath
path to remote data or srz accession
time_t m_LastChecked
timestamp: last time when m_LastModified was checked
string m_NetCacheKey
data access key
static CCompoundID CreateCompoundID(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &remote_path)
CSparseGraph.
virtual void Init(TGraphCachePool &icache_pool)
string m_LastModified
timestamp of the remote data last modified date as reported by www server
void Update(const TSeqRange &range, const TUpdateMap &update)
virtual void Save(TGraphCachePool &icache_pool)
mutex m_DataMutex
data access mutex
map< string, unique_ptr< TSparseVector > > m_Data
map of sprasvectors, the key is used as a part of suffix of NetCache key (m_NetCacheKey + "_" + this ...
uint64_t TValType
string m_SeqId
NCBI seq_id, e.g. NC_000001.
size_t m_DataSize
size of each vector of m_Data
void InitFromCompoundID(CCompoundID id)
void GetMissingRegions(const TSeqRange &range, vector< TSeqRange > &missing_ranges)
CTime –.
Definition: ncbitime.hpp:296
Writer-based output stream.
Definition: rwstream.hpp:171
CWigGraph.
Definition: wig_graph.hpp:58
int64_t x_DecodeSign(uint64_t v)
Definition: wig_graph.hpp:197
map< TSeqRange, future< TPrefetchData > > m_PendingData
Definition: wig_graph.hpp:151
void Prefetch(const TSeqRange &range)
Definition: wig_graph.cpp:602
virtual void Save(TGraphCachePool &icache_pool) override
GetData serializes the data into CDensityMap for the range provided in @data.
Definition: wig_graph.cpp:243
void x_CreateNetCacheKey()
Definition: wig_graph.cpp:120
virtual void Init(TGraphCachePool &icache_pool) override
Save is invoked by CGraphCache when cached data needs to be permanently saved into NetCache.
Definition: wig_graph.cpp:187
void x_FetchSummary()
Definition: wig_graph.cpp:645
CWigGraph(const string &cache_key)
Copy constructor is invoked by CGraphCache which clones the data for aynchroneous saving operation.
Definition: wig_graph.cpp:85
void x_GetBigWigData(const TSeqRange &range, vector< TValType > &v_out)
Definition: wig_graph.cpp:407
void x_DecodeDelta(const TSeqRange &range, vector< float > &v_out)
Definition: wig_graph.cpp:379
float GetDataCoverage(const TSeqRange &range)
Definition: wig_graph.cpp:587
bool x_ReadSummary(ICache &icache, TSparseVector &sv)
Definition: wig_graph.cpp:148
EDataType m_DataType
Definition: wig_graph.hpp:139
void GetData(CHistogramGlyph::TMap &data)
GetDataCoverage returns fraction (0.
Definition: wig_graph.cpp:521
int x_RunBigWigScript(const TSeqRange &range, const string tmp_file)
Definition: wig_graph.cpp:257
void x_UnPackFloat(TSparseVector::value_type v_in, float &v_out)
Definition: wig_graph.hpp:178
vector< TValType > data
Definition: wig_graph.hpp:148
void x_AddDataToMap(const vector< float > &data, const TSeqPos from, CHistogramGlyph::TMap &the_map)
Definition: wig_graph.cpp:495
void x_FetchData()
Definition: wig_graph.cpp:655
future< TPrefetchSummary > m_PendingSummary
Definition: wig_graph.hpp:154
unique_ptr< TSparseVector > TPrefetchSummary
Definition: wig_graph.hpp:153
void x_SaveSummary(ICache &icache, TSparseVector &sv)
Definition: wig_graph.cpp:450
int m_SummaryBinCount
Definition: wig_graph.hpp:142
string m_WigId
WigFile id, e.g. chr1.
Definition: wig_graph.hpp:138
void x_GetBigWigSummary(TSparseVector &summary_sv)
Definition: wig_graph.cpp:290
void x_EncodeDelta(const TSeqRange &range, const vector< float > &v_in, vector< TValType > &v_out)
Definition: wig_graph.cpp:362
atomic< bool > m_PrefetchRequested
Number od data points in one Summary point.
Definition: wig_graph.hpp:144
static constexpr float kFract
CWigGraph data come in two flavors Summary - low-res representation for the whole sequence length and...
Definition: wig_graph.hpp:65
unique_ptr< TPrefetchRequest > TPrefetchData
Definition: wig_graph.hpp:150
uint64_t x_EncodeSign(int64_t v)
Encode/Decode negtaive vaues to positive ones.
Definition: wig_graph.hpp:186
static const int kMinBinSize
Definition: wig_graph.hpp:141
void x_PackFloat(float v_in, TSparseVector::value_type &v_out)
Float values are packed with precision loss (3 decimal places retained) values are conveeted to posti...
Definition: wig_graph.hpp:171
struct { TSeqRange range TPrefetchRequest
Definition: wig_graph.hpp:147
static string GetCacheKey(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &wig_id, const string &remote_path, const string &last_modified, EDataType data_type)
CWigGraph.
Definition: wig_graph.cpp:69
BLOB cache read/write/maintenance interface.
Definition: icache.hpp:64
virtual size_t GetSize(const string &key, TBlobVersion version, const string &subkey)=0
Check if BLOB exists, return BLOB size.
virtual IWriter * GetWriteStream(const string &key, TBlobVersion version, const string &subkey, unsigned int time_to_live=0, const string &owner=kEmptyStr)=0
Return sequential stream interface to write BLOB data.
virtual IReader * GetReadStream(const string &key, TBlobVersion version, const string &subkey)=0
Return sequential stream interface to read BLOB data.
succinct sparse vector with runtime compression using bit-slicing / transposition method
Definition: bmsparsevec.h:87
bvector_type::size_type size_type
Definition: bmsparsevec.h:92
void resize(size_type sz)
resize vector
Definition: bmsparsevec.h:739
void import(const value_type *arr, size_type arr_size, size_type offset=0, bool set_not_null=true)
Import list of elements from a C-style array.
Definition: bmsparsevec.h:1172
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector planes
Definition: bmsparsevec.h:2148
bool empty() const
Definition: map.hpp:149
Definition: map.hpp:338
static const int chunk_size
@ eCIT_SeqID
Definition: compound_id.hpp:79
@ eCIT_String
Definition: compound_id.hpp:74
@ eCIT_Integer
Definition: compound_id.hpp:64
@ eCIT_NestedCID
Definition: compound_id.hpp:81
@ eCIC_GenericID
Definition: compound_id.hpp:51
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static const column_t columns[]
Definition: utf8_2.c:22
char data[12]
Definition: iconv.c:80
Int8 int64_t
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NULL
Definition: ncbistd.hpp:225
string GetHexSum(void) const
Return string with checksum in hexadecimal form.
Definition: checksum.hpp:353
void AddLine(const char *line, size_t len)
Definition: checksum.hpp:609
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static CResult SpawnL(EMode mode, const char *cmdname, const char *argv,...)
Spawn a new process with specified command-line arguments.
Definition: ncbiexec.cpp:532
TProcessHandle GetProcessHandle(void)
Get process handle/pid.
Definition: ncbiexec.cpp:63
@ eNoWait
Continues to execute calling process concurrently with new process (asynchronous process).
Definition: ncbiexec.hpp:96
static fstream * CreateTmpFile(const string &filename=kEmptyStr, ETextBinary text_binary=eBinary, EAllowRead allow_read=eAllowRead)
Create temporary file and return pointer to corresponding stream.
Definition: ncbifile.cpp:2985
static string GetTmpName(ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
Definition: ncbifile.cpp:2903
@ eTmpFileCreate
Create empty file for each GetTmpName* call.
Definition: ncbifile.hpp:1415
virtual void AddRange(TSeqRange range, CntType score=1, bool expand=false)
static void GetLastModified(const string &url, string &last_modified)
HTTP: Returns header's Last-Modified in the last_modified parameter FTP: Returns MD5 of first 512 byt...
Definition: url_utils.cpp:61
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
int GetSignal(void) const
Get the signal number that has caused the process to terminate (UNIX only).
int Wait(unsigned long timeout=kInfiniteTimeoutMs, CExitInfo *info=0) const
Wait until process terminates.
bool IsSignaled(void) const
TRUE if the process terminated by a signal (UNIX only).
int GetExitCode(void) const
Get process exit code.
@ eHandle
A process handle (MS Windows).
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
#define NcbiEmptyString
Definition: ncbistr.hpp:122
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3545
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
@ fAllowTrailingSpaces
Ignore trailing whitespace characters.
Definition: ncbistr.hpp:297
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ fAllowLeadingSpaces
Ignore leading whitespace characters in converted string.
Definition: ncbistr.hpp:294
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2510
time_t GetTimeT(void) const
Get time in time_t format.
Definition: ncbitime.cpp:1396
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
int i
int len
range(_Ty, _Ty) -> range< _Ty >
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define abs(a)
Definition: ncbi_heapmgr.c:130
Defines a portable execute class.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static pcre_uint8 * buffer
Definition: pcretest.c:1051
Reader-writer based streams.
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
#define _ASSERT
USING_SCOPE(objects)
static const int kDeltaFrameSize
Definition: wig_graph.cpp:58
static TSeqPos s_AlignToFrame(TSeqPos pos)
Definition: wig_graph.cpp:60
static void s_SplitRange(const TSeqRange &range, int frames, vector< TSeqRange > &res)
Definition: wig_graph.cpp:477
Modified on Tue Jul 16 13:24:34 2024 by modify_doxy.py rev. 669887