60 id.AppendSeqID(seq_id_str);
63 id.AppendInteger(seq_len);
64 id.AppendHost(remote_path);
95 id.AppendCurrentTime();
97 session_tag =
id.ToString();
113 throw runtime_error(
"Empty data key!");
118 for (
auto& old_data :
data.m_Data) {
119 m_Data.emplace(old_data.first, unique_ptr<TSparseVector>(
new TSparseVector(*old_data.second)));
130 missing_ranges.clear();
133 if (num_bits >=
range.GetLength()) {
142 missing_ranges.emplace_back(
range.GetFrom(),
range.GetTo());
152 if (
range.GetFrom() > 0)
153 mask.set_range(0,
range.GetFrom() - 1,
false);
158 auto stop =
range.GetToOpen();
159 vector<TSeqRange> mr;
160 while (
first < stop) {
164 if (
last == 0 ||
last >= stop - 1) {
165 mr.emplace_back(
first, stop - 1);
176 missing_ranges.push_back(mr.front());
178 for (
size_t i = 1;
i < mr.size(); ++
i) {
179 if (mr[
i].GetFrom() - missing_ranges.back().GetFrom() < 20000) {
180 missing_ranges.back().SetTo(mr[
i].GetTo());
182 missing_ranges.emplace_back(mr[
i].GetFrom(), mr[
i].GetTo());
195 unique_ptr<unsigned char> smem(new unsigned char[st.max_serialize_mem]);
199 cs.AddChars((const char*)smem.get(), slen);
210 bool has_blob =
false;
213 }
catch(
const exception& ) {
226 unsigned short attempt = 1;
227 while (attempt <= 3) {
231 string last_modified;
232 time_t last_checked = 0;
235 x_ReadMap(*icache1,
tag, remote_path, last_modified, last_checked,
md5, data_map, tb);
236 bool must_sync =
tag != m_SessionTag;
242 if (my_MD5.empty()) {
243 lock_guard<mutex> guard(m_DataMutex);
246 must_sync = (my_MD5 !=
md5);
249 if (must_sync ==
false) {
252 lock_guard<mutex> guard(m_DataMutex);
253 m_LastSynced.SetCurrent();
254 if (m_LastModified.empty()) {
255 m_LastModified = last_modified;
256 m_LastChecked =
max(last_checked, m_LastChecked);
261 bool all_is_good =
false;
262 if (m_Data.size() > 1) {
263 vector<future<bool>> res;
264 for (
auto&
data : m_Data) {
266 temp_vectors[
data.first]->resize(
data.second->size());
269 res.emplace_back(async(launch::async, [&]()->
bool {
272 string data_key = m_NetCacheKey +
"_" +
data.first;
274 return x_ReadData(*icache, data_key,
tag, *temp_vectors[
data.first]); }));
277 all_is_good = all_of(res.begin(), res.end(), [](future<bool>&
f) { return f.get(); });
279 const auto&
data = *m_Data.begin();
281 temp_vectors[
data.first]->resize(
data.second->size());
282 string data_key = m_NetCacheKey +
"_" +
data.first;
284 all_is_good = x_ReadData(*icache, data_key,
tag, *temp_vectors[
data.first]);;
287 lock_guard<mutex> guard(m_DataMutex);
288 if (!last_modified.empty()) {
289 m_LastModified = last_modified;
290 m_LastChecked =
max(last_checked, m_LastChecked);
292 m_DataMap |= data_map;
293 m_LastSynced.SetCurrent();
294 for (
auto&&
data : m_Data) {
295 data.second->join(*temp_vectors[
data.first]);
300 }
catch (exception& e) {
303 chrono::milliseconds timespan(attempt * 50 + rand() % 100);
304 this_thread::sleep_for(timespan);
310 diag.
Print(
"Sync_passed_attempts", attempt - 1);
312 diag.
Print(
"Sync_failed_attempts", attempt - 1);
317 if (!m_IsCacheGood) {
320 for (
auto&&
data : m_Data) {
321 string data_key = m_NetCacheKey +
"_" +
data.first;
331 if (diff_in_millisec > 250)
341 for (
auto&
data : update) {
364 if (elapsed > 2000) {
366 diag.
Print(
"icache_save_delay", elapsed);
373 static const int kRemoteDataCheckInterval = 600;
385 }
else if ((
t.GetTimeT() -
m_LastChecked) > kRemoteDataCheckInterval) {
390 data.second->set(0,
data.second->size());
405 unique_ptr<unsigned char> smem(new unsigned char[st.max_serialize_mem]);
407 CWStream w(icache.GetWriteStream(m_NetCacheKey, 0, NcbiEmptyString), 0, 0, CRWStreambuf::fOwnWriter);
408 int val = static_cast<int>(m_SessionTag.size());
409 w.write((const char*)&val, sizeof(int));
410 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write Session tag size: " + m_NetCacheKey);
411 w.write(m_SessionTag.c_str(), m_SessionTag.size());
412 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write session tag: " + m_NetCacheKey);
413 val = static_cast<int>(m_RemotePath.size());
414 w.write((const char*)&val, sizeof(int));
415 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write remote path length: " + m_NetCacheKey);
416 w.write((const char*)m_RemotePath.c_str(), m_RemotePath.size());
417 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write remote path length: " + m_NetCacheKey);
418 val = static_cast<int>(m_LastModified.size());
419 w.write((const char*)&val, sizeof(int));
420 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write last_modified: " + m_NetCacheKey);
421 w.write((const char*)m_LastModified.c_str(), val);
422 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write last_modified: " + m_NetCacheKey);
423 w.write((const char*)&m_LastChecked, sizeof(time_t));
424 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write last_checked: " + m_NetCacheKey);
425 w.write((const char*)smem.get(), slen);
426 if (!w)
NCBI_THROW(CException, eUnknown, "Failed to write vector: " + m_NetCacheKey);
438 w.write((
const char*)&
val,
sizeof(
int));
442 w.write((
const char*)sv_lay.
buf(), sv_lay.
size());
452 size_t vector_offset = 0;
456 is.read((
char*)&
val,
sizeof(
int));
458 vector_offset +=
sizeof(
int);
463 vector_offset +=
buffer.size();
465 is.read((
char*)&
val,
sizeof(
int));
467 vector_offset +=
sizeof(
int);
470 vector_offset +=
val;
477 is.read((
char*)&
val,
sizeof(
int));
480 vector_offset +=
sizeof(
int);
481 last_modified.clear();
483 vector_offset +=
val;
491 is.read((
char*)&last_checked,
sizeof(time_t));
493 vector_offset +=
sizeof(time_t);
513 }
catch (exception& e) {
519 int vector_offset = 0;
520 is.read((
char*)&
val,
sizeof(
int));
522 vector_offset +=
sizeof(
int);
529 vector_offset +=
buffer.size();
546 return id.ToString();
552 if (cache_key.empty())
586 if (window < 1.0) window = 1.0;
587 size_t num_pixels = (size_t)ceil(
range.GetLength() / window);
588 stats.resize(num_pixels);
590 auto start =
range.GetFrom();
591 auto stop =
range.GetToOpen();
609 double curr_pos = start;
610 while (pos < stop && stat_idx < num_pixels) {
612 size_t end_idx = (size_t)floor(curr_pos + 0.5);
616 int max_mismatch = -1;
620 while (pos < end_idx) {
634 int mismatch =
static_cast<int>(mismatches[index]);
635 int match =
static_cast<int>(matches[index]);
636 int gap =
static_cast<int>(gaps[index]);
637 int intron =
static_cast<int>(introns[index]);
638 total = max<int>(total,
match + mismatch + gap + intron);
639 max_mismatch = max<int>(max_mismatch, mismatch);
640 max_gap = max<int>(max_gap, gap);
641 max_intron = max<int>(max_intron, intron);
647 int max_error =
max(max_gap,
max(max_mismatch, max_intron));
664 if (num_bits <
range.GetLength())
668 vector<TValType> matches;
669 vector<TValType> mismatches;
670 vector<TValType> gaps;
671 vector<TValType> introns;
673 auto from =
range.GetFrom();
674 auto to =
range.GetTo();
679 matches.resize(
r.GetLength());
680 mismatches.resize(
r.GetLength());
681 gaps.resize(
r.GetLength());
682 introns.resize(
r.GetLength());
683 m_Data[
"m"]->decode(&matches[0],
r.GetFrom(),
r.GetLength());
684 m_Data[
"mm"]->decode(&mismatches[0],
r.GetFrom(),
r.GetLength());
685 m_Data[
"g"]->decode(&gaps[0],
r.GetFrom(),
r.GetLength());
686 m_Data[
"n"]->decode(&introns[0],
r.GetFrom(),
r.GetLength());
689 for (
size_t i = 0;
i <
r.GetLength(); ++
i) {
694 sub_total = max<int>(sub_total,
static_cast<int>(matches[
i] + mismatches[
i] + gaps[
i] + introns[
i]));
697 from =
r.GetToOpen();
#define BM_DECLARE_TEMP_BLOCK(x)
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Serialization for sparse_vector<>
Checksum and hash calculation classes.
@ eStat_Intron
intron (for mRNA-to-genome alignments)
@ eStat_Mismatch
mismatches (A+G+T+C - matches)
@ eStat_Total
total alignment count at this base (A+G+T+C+Gap)
vector< SStatStruct > TStatVec
CChecksum – Checksum calculator.
Pool of recycled CCompoundID objects.
CCompoundID NewID(ECompoundIDClass new_id_class)
Create and return a new CCompoundID objects.
CCompoundID FromString(const string &cid)
Unpack the base64-encoded ID and return a CCompoundID object for field extraction.
Base64-encoded ID string that contains extractable typed fields.
void AppendID(Uint8 id)
Append an eCIT_ID field at the end of this compound ID.
Guard that can be used to automatically return object to the pool after leaving some scope.
void UpdateAlignStats(const TSeqRange &range, float window, CAlnStatGlyph::TStatVec &stats)
CPileUpGraph(const string &data_key)
static string CreateCacheKey(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &remote_path)
CPileUpGraph.
int GetNumberOfReads(const TSeqRange &range)
Note about the "buf_size" parameter for streams in this API.
@ fOwnReader
Own the underlying reader.
@ fOwnWriter
Own the underlying writer.
string m_RemotePath
path to remote data or srz accession
time_t m_LastChecked
timestamp: last time when m_LastModified was checked
void x_WriteMap(ICache &icache, bm::word_t *tb=0)
string m_NetCacheKey
data access key
static CCompoundID CreateCompoundID(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &remote_path)
CSparseGraph.
void x_Sync(TGraphCachePool &icache_pool)
void x_WriteData(ICache &icache, const string &data_key, TSparseVector &sv)
virtual void Init(TGraphCachePool &icache_pool)
string m_LastModified
timestamp of the remote data last modified date as reported by www server
void Update(const TSeqRange &range, const TUpdateMap &update)
virtual void Save(TGraphCachePool &icache_pool)
mutex m_DataMutex
data access mutex
map< string, unique_ptr< TSparseVector > > m_Data
map of sprasvectors, the key is used as a part of suffix of NetCache key (m_NetCacheKey + "_" + this ...
void x_ReadMap(ICache &icache, string &tag, string &remote_path, string &last_modified, time_t &last_checked, string &md5, bm::bvector<> &bv, bm::word_t *tb=0)
string m_SeqId
NCBI seq_id, e.g. NC_000001.
size_t m_DataSize
size of each vector of m_Data
bm::bvector m_DataMap
bvector shows the regions m_Data exist
void InitFromCompoundID(CCompoundID id)
bool m_IsCacheGood
false value indicates that remote data were updated and the cache was reset
string m_SessionTag
A current session tag, used to separate session netcache blobs from the blobs saved by other sessions...
bool x_ReadData(ICache &icache, const string &data_key, const string &tag, TSparseVector &sv, bm::word_t *tb=0)
void GetMissingRegions(const TSeqRange &range, vector< TSeqRange > &missing_ranges)
Writer-based output stream.
BLOB cache read/write/maintenance interface.
virtual size_t GetSize(const string &key, TBlobVersion version, const string &subkey)=0
Check if BLOB exists, return BLOB size.
virtual IWriter * GetWriteStream(const string &key, TBlobVersion version, const string &subkey, unsigned int time_to_live=0, const string &owner=kEmptyStr)=0
Return sequential stream interface to write BLOB data.
virtual IReader * GetReadStream(const string &key, TBlobVersion version, const string &subkey)=0
Return sequential stream interface to read BLOB data.
void optimize(bm::word_t *temp_block=0, optmode opt_mode=opt_compress, statistics *stat=0)
Optimize memory bitvector's memory allocation.
size_type count_range(size_type left, size_type right, const rs_index_type &rs_idx) const noexcept
Returns count of 1 bits in the given range [left..right] Uses rank-select index to accelerate the sea...
bvector< Alloc > & set_range(size_type left, size_type right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector's siz...
void clear(const size_type *ids, size_type ids_size, bm::sort_order so=bm::BM_UNKNOWN)
clear list of bits in this bitset
void calc_stat(struct bm::bvector< Alloc >::statistics *st) const noexcept
Calculates bitvector statistics.
succinct sparse vector with runtime compression using bit-slicing / transposition method
bvector_type::size_type size_type
static const int chunk_size
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static void md5(const char *src, const char *out)
string GetHexSum(void) const
Return string with checksum in hexadecimal form.
void AddLine(const char *line, size_t len)
void GetMD5Digest(unsigned char digest[16]) const
Return calculated MD5 digest.
void AddChars(const char *str, size_t len)
Update current control sum with data provided.
CDiagContext_Extra & Print(const string &name, const string &value)
The method does not print the argument, but adds it to the string.
CDiagContext & GetDiagContext(void)
Get diag context instance.
CDiagContext_Extra Extra(void) const
Create a temporary CDiagContext_Extra object.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static bool IfModifiedSince(const string &url, string &last_modified)
Works with HTTP(s) protocol returns true if url was modified since last_modified date if true last_mo...
static void GetLastModified(const string &url, string &last_modified)
HTTP: Returns header's Last-Modified in the last_modified parameter FTP: Returns MD5 of first 512 byt...
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
CSeq_id_Handle GetAccVer(const CSeq_id_Handle &idh, TGetFlags flags=0)
Get accession.version Seq-id Returns null CSeq_id_Handle if the sequence is not found or if it doesn'...
TSeqPos GetSequenceLength(const CSeq_id &id, TGetFlags flags=0)
Get sequence length Return kInvalidSeqPos if sequence is not found.
static TPid GetPid(void)
Get process identifier (pid) for the current process.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
@ eNocase
Case insensitive compare.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
double DiffNanoSecond(const CTime &t) const
Difference in nanoseconds from specified time.
CTime & AddHour(int hours=1, EDaylight adl=eDaylightDefault)
Add specified hours and adjust for daylight saving time.
@ eCurrent
Use current time. See also CCurrentTime.
@ eStart
Start timer immediately after creating.
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
unsigned int
A callback function used to compare two keys in a database.
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
if(yy_accept[yy_current_state])
bm::id_t bvector_size_type
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Reader-writer based streams.
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static void s_GenerateSessionTag(string &session_tag)
static void s_GetMD5Tag(bm::bvector<> &bv, string &tag, bm::word_t *tb=0)
Statistical information about bitset's memory allocation details.
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
string GetMD5Digest(const CChecksum &cs)