75 nested_id.AppendInteger(data_type);
76 string lmd = last_modified;
79 nested_id.AppendString(lmd);
80 id.AppendNestedCID(nested_id);
89 if (cache_key.empty())
100 auto data_type = nested_cid.GetFirst(
eCIT_Integer).GetInteger();
141 , m_WigId(
data.m_WigId)
142 , m_DataType(
data.m_DataType)
143 , m_SummaryBinCount(
data.m_SummaryBinCount)
158 }
catch (exception& e) {
170 int summary_size = 0;
171 is.read((
char*)&summary_size,
sizeof(
int));
197 bool has_blob =
false;
200 }
catch(exception& e) {
206 }
catch (exception& e) {
265 request_type =
"summary";
268 request_type =
"data";
274 "./rmt_bigwig_summary.sh",
281 request_type.c_str(),
296 float bin_count_pct = 0.9f;
299 for (
size_t count = 0; count < 4; ++count) {
304 if (exit_code != 0) {
307 string err_msg = (exit_code == -1) ?
308 "bigwig statistics timed out" :
"bigwig statistics failed (exit code: " +
NStr::NumericToString(exit_code);
313 bin_count_pct -= 0.1;
318 string val_max, val_min;
320 vector<TValType> data_points;
321 constexpr
int buf_len = 64000;
322 data_points.resize(buf_len, 0);
331 auto v_max = NStr::StringToNumeric<float>(val_max, convFlags);
332 auto v_min = NStr::StringToNumeric<float>(val_min, convFlags);
333 float v = (
abs(v_min) >
abs(v_max)) ? v_min : v_max;
338 summary_sv.
import(&data_points[0], batch_size, start);
340 fill_n(data_points.begin(), buf_len, 0);
347 summary_sv.
import(&data_points[0], batch_size, start);
364 auto len = v_in.size();
366 v_out.resize(
len, 0);
370 for (
size_t pos = 0; pos <
len; ++pos) {
385 size_t len = (
range.GetTo() - from) + 1;
386 vector<TValType> v_in(
len, 0);
388 v_out.resize(
range.GetLength(), 0);
392 for (
size_t i = 0;
i <
len; ++
i) {
398 if (from +
i <
range.GetFrom())
413 vector<float> raw_data(
range.GetLength(), 0);
418 if (exit_code != 0) {
419 string err_msg = (exit_code == -1) ?
420 "bigwig data retrieval timed out" :
"bigwig data retrieval failed (exit code: " +
NStr::NumericToString(exit_code);
428 if (curr_line.empty() || curr_line[0] ==
'#')
436 int start = max<int>(NStr::StringToNumeric<int>(
columns[1]),
range.GetFrom());
437 int stop = NStr::StringToNumeric<int>(
columns[2]);
438 int span = stop - start;
443 fill_n(raw_data.begin() + pos, span,
value);
461 int len =
static_cast<int>(sv_lay.
size());
467 w.write((
const char*)&
len,
sizeof(
len));
471 w.write((
const char*)sv_lay.
buf(),
len);
481 throw runtime_error(
"Invalid frames size");
487 auto stop =
range.GetTo();
488 while (start <= stop) {
489 res.emplace_back(start, min<int>(start + (
chunk_size - 1), stop));
499 auto it =
data.begin();
503 int to = from +
static_cast<int>(
data.size() - 1);
505 while (++it !=
data.end()) {
523 const auto& data_r =
data.GetRange();
524 _ASSERT(data_r.GetLength() > 0);
525 if (data_r.GetLength() == 0)
531 auto start = data_r.GetFrom()/bin_size;
532 auto stop = data_r.GetTo()/bin_size;
533 int len = (stop - start) + 1;
534 auto from = data_r.GetFrom();
536 vector<TSparseVector::value_type> data_points;
537 constexpr
int buf_len = 64000;
538 data_points.resize(buf_len, 0);
540 int batch_size = min<int>(buf_len, (stop + 1) - start);
541 m_Data[
"0"]->decode(&data_points[0], start, batch_size);
542 for (
auto i = 0;
i < batch_size; ++
i) {
543 auto f = from +
i * bin_size;
550 from += (batch_size * bin_size);
569 vector<TSeqRange> chunks;
574 for (
const auto& chunk : chunks) {
575 vector<float> data_points;
577 _ASSERT(chunk.GetLength() == data_points.size());
589 if (
range.GetLength() == 0)
591 vector<TSeqRange> missing_ranges;
594 for (
const auto&
r : missing_ranges) {
595 len -=
r.GetLength();
597 float coverage =
len;
598 coverage /=
range.GetLength();
610 vector<TSeqRange> missing_ranges;
612 bool update_data = !missing_ranges.empty();
613 if (update_data ==
false)
615 vector<TSeqRange> normalized_ranges;
618 normalized_ranges.emplace_back(from, to);
619 for (
size_t i = 1;
i < missing_ranges.size(); ++
i) {
620 const auto&
r = missing_ranges[
i];
621 if (
r.GetFrom() > normalized_ranges.back().GetTo()) {
624 normalized_ranges.emplace_back(from, to);
625 }
else if (
r.GetTo() > normalized_ranges.back().GetTo()) {
627 normalized_ranges.back().SetTo(to);
631 for (
auto&
range : normalized_ranges) {
666 if (!pending_data.
empty()) {
667 for (
auto&
r : pending_data) {
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Debugging functions (internal). Poorly documented, not well written.
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Serialization for sparse_vector<>
Checksum and hash calculation classes.
CChecksum – Checksum calculator.
Pool of recycled CCompoundID objects.
CCompoundID NewID(ECompoundIDClass new_id_class)
Create and return a new CCompoundID objects.
CCompoundID FromString(const string &cid)
Unpack the base64-encoded ID and return a CCompoundID object for field extraction.
void AppendSeqID(const string &seq_id)
Append an eCIT_SeqID field at the end of this compound ID.
The result type for Spawn methods.
void SaveData(CRef< TData > data)
Clones TData, puts it into Save Queue for asynchroneous storage operation.
static CGraphCache & GetInstance()
Guard that can be used to automatically return object to the pool after leaving some scope.
Extended exit information for waited process.
Note about the "buf_size" parameter for streams in this API.
@ fOwnReader
Own the underlying reader.
@ fOwnWriter
Own the underlying writer.
@ ESuggestedAction_Access
try to access the remote file
@ ESuggestedAction_Skip
do not access the file
static void Set(const string &sKey, ESuggestedAction KnownAccessibility)
static ESuggestedAction Check(const string &sKey)
string m_RemotePath
path to remote data or srz accession
time_t m_LastChecked
timestamp: last time when m_LastModified was checked
string m_NetCacheKey
data access key
static CCompoundID CreateCompoundID(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &remote_path)
CSparseGraph.
virtual void Init(TGraphCachePool &icache_pool)
string m_LastModified
timestamp of the remote data last modified date as reported by www server
void Update(const TSeqRange &range, const TUpdateMap &update)
virtual void Save(TGraphCachePool &icache_pool)
mutex m_DataMutex
data access mutex
map< string, unique_ptr< TSparseVector > > m_Data
map of sprasvectors, the key is used as a part of suffix of NetCache key (m_NetCacheKey + "_" + this ...
string m_SeqId
NCBI seq_id, e.g. NC_000001.
size_t m_DataSize
size of each vector of m_Data
void InitFromCompoundID(CCompoundID id)
void GetMissingRegions(const TSeqRange &range, vector< TSeqRange > &missing_ranges)
Writer-based output stream.
int64_t x_DecodeSign(uint64_t v)
map< TSeqRange, future< TPrefetchData > > m_PendingData
void Prefetch(const TSeqRange &range)
virtual void Save(TGraphCachePool &icache_pool) override
GetData serializes the data into CDensityMap for the range provided in @data.
void x_CreateNetCacheKey()
virtual void Init(TGraphCachePool &icache_pool) override
Save is invoked by CGraphCache when cached data needs to be permanently saved into NetCache.
CWigGraph(const string &cache_key)
Copy constructor is invoked by CGraphCache which clones the data for aynchroneous saving operation.
void x_GetBigWigData(const TSeqRange &range, vector< TValType > &v_out)
void x_DecodeDelta(const TSeqRange &range, vector< float > &v_out)
float GetDataCoverage(const TSeqRange &range)
bool x_ReadSummary(ICache &icache, TSparseVector &sv)
void GetData(CHistogramGlyph::TMap &data)
GetDataCoverage returns fraction (0.
int x_RunBigWigScript(const TSeqRange &range, const string tmp_file)
void x_UnPackFloat(TSparseVector::value_type v_in, float &v_out)
void x_AddDataToMap(const vector< float > &data, const TSeqPos from, CHistogramGlyph::TMap &the_map)
future< TPrefetchSummary > m_PendingSummary
unique_ptr< TSparseVector > TPrefetchSummary
void x_SaveSummary(ICache &icache, TSparseVector &sv)
string m_WigId
WigFile id, e.g. chr1.
void x_GetBigWigSummary(TSparseVector &summary_sv)
void x_EncodeDelta(const TSeqRange &range, const vector< float > &v_in, vector< TValType > &v_out)
atomic< bool > m_PrefetchRequested
Number od data points in one Summary point.
static constexpr float kFract
CWigGraph data come in two flavors Summary - low-res representation for the whole sequence length and...
unique_ptr< TPrefetchRequest > TPrefetchData
uint64_t x_EncodeSign(int64_t v)
Encode/Decode negtaive vaues to positive ones.
static const int kMinBinSize
void x_PackFloat(float v_in, TSparseVector::value_type &v_out)
Float values are packed with precision loss (3 decimal places retained) values are conveeted to posti...
struct { TSeqRange range TPrefetchRequest
static string GetCacheKey(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &wig_id, const string &remote_path, const string &last_modified, EDataType data_type)
CWigGraph.
BLOB cache read/write/maintenance interface.
virtual size_t GetSize(const string &key, TBlobVersion version, const string &subkey)=0
Check if BLOB exists, return BLOB size.
virtual IWriter * GetWriteStream(const string &key, TBlobVersion version, const string &subkey, unsigned int time_to_live=0, const string &owner=kEmptyStr)=0
Return sequential stream interface to write BLOB data.
virtual IReader * GetReadStream(const string &key, TBlobVersion version, const string &subkey)=0
Return sequential stream interface to read BLOB data.
succinct sparse vector with runtime compression using bit-slicing / transposition method
bvector_type::size_type size_type
void resize(size_type sz)
resize vector
void import(const value_type *arr, size_type arr_size, size_type offset=0, bool set_not_null=true)
Import list of elements from a C-style array.
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector planes
static const int chunk_size
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const column_t columns[]
unsigned int TSeqPos
Type for sequence locations and lengths.
int TSignedSeqPos
Type for signed sequence position.
string GetHexSum(void) const
Return string with checksum in hexadecimal form.
void AddLine(const char *line, size_t len)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static CResult SpawnL(EMode mode, const char *cmdname, const char *argv,...)
Spawn a new process with specified command-line arguments.
TProcessHandle GetProcessHandle(void)
Get process handle/pid.
@ eNoWait
Continues to execute calling process concurrently with new process (asynchronous process).
static fstream * CreateTmpFile(const string &filename=kEmptyStr, ETextBinary text_binary=eBinary, EAllowRead allow_read=eAllowRead)
Create temporary file and return pointer to corresponding stream.
static string GetTmpName(ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
@ eTmpFileCreate
Create empty file for each GetTmpName* call.
virtual void AddRange(TSeqRange range, CntType score=1, bool expand=false)
static void GetLastModified(const string &url, string &last_modified)
HTTP: Returns header's Last-Modified in the last_modified parameter FTP: Returns MD5 of first 512 byt...
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
int GetSignal(void) const
Get the signal number that has caused the process to terminate (UNIX only).
int Wait(unsigned long timeout=kInfiniteTimeoutMs, CExitInfo *info=0) const
Wait until process terminates.
bool IsSignaled(void) const
TRUE if the process terminated by a signal (UNIX only).
int GetExitCode(void) const
Get process exit code.
@ eHandle
A process handle (MS Windows).
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
@ fAllowTrailingSpaces
Ignore trailing whitespace characters.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fAllowLeadingSpaces
Ignore leading whitespace characters in converted string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
time_t GetTimeT(void) const
Get time in time_t format.
@ eCurrent
Use current time. See also CCurrentTime.
unsigned int
A callback function used to compare two keys in a database.
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
range(_Ty, _Ty) -> range< _Ty >
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Defines a portable execute class.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
static pcre_uint8 * buffer
Reader-writer based streams.
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
static const int kDeltaFrameSize
static TSeqPos s_AlignToFrame(TSeqPos pos)
static void s_SplitRange(const TSeqRange &range, int frames, vector< TSeqRange > &res)