69 #define NCBI_USE_ERRCODE_X SNPLoader
123 static unsigned value =
135 static unsigned value =
199 m_IsPrimaryTrack(
false),
200 m_IsPrimaryTrackGraph(
false),
202 m_FilterIndex(
Uint4(filter_index)),
215 m_IsPrimaryTrack(
false),
216 m_IsPrimaryTrackGraph(
false),
217 m_SeqIndex(
Uint4(seq_index)),
218 m_FilterIndex(
Uint4(filter_index))
220 if (
file.IsValidNA() ) {
221 SetSatNA(
file.GetAccession());
225 m_Accession =
file.GetAccession();
227 SetSeqAndFilterIndex(seq_index, filter_index);
246 return na_index > 0 && na_index < 1000000000;
336 pair<size_t, size_t> ret(0, 0);
338 if ( acc.
size() < 13 || acc.
size() > 15 ||
339 acc[0] !=
'N' || acc[1] !=
'A' || acc[11] !=
'.' ) {
342 size_t na_index = NStr::StringToNumeric<size_t>(acc.
substr(2, 9),
347 size_t na_version = NStr::StringToNumeric<size_t>(acc.
substr(12),
352 ret.first = na_index;
353 ret.second = na_version;
369 pair<size_t, size_t> na =
ParseNA(acc);
433 size_t size = s.size();
435 while ( pos &&
isdigit(s[pos-1]) ) {
438 size_t num_len =
size - pos;
443 size_t index = NStr::StringToNumeric<size_t>(s.substr(pos));
485 size_t dot1 =
str.find(
'.');
486 if ( dot1 ==
NPOS ) {
489 size_t dot2 =
str.find(
'.', dot1+1);
490 if ( dot2 ==
NPOS ) {
493 size_t sat = NStr::StringToNumeric<size_t>(
str.substr(0, dot1),
500 size_t subsat = NStr::StringToNumeric<size_t>(
str.substr(dot1+1, dot2-dot1-1),
502 bool is_primary_track_graph = is_primary_track && subsat >=
kSNPSubSatGraph;
503 size_t na_index = subsat - (is_primary_track_graph?
kSNPSubSatGraph: 0);
508 size_t satkey = NStr::StringToNumeric<size_t>(
str.substr(dot2+1),
548 "Bad CSNPBlobId: "<<
str);
609 m_FileReopenTime, m_FileRecheckTime)
619 ERR_POST_ONCE(
"CSNPDataLoader: SNP primary track is disabled due to lack of GRPC support");
645 unsigned retry_count)
647 if ( retry_count == 0 ) {
650 for (
unsigned t = 1;
t < retry_count; ++
t ) {
659 LOG_POST(
Warning<<
"CSNPDataLoader::"<<name<<
"() try "<<
t<<
" exception: "<<exc);
661 catch ( exception& exc ) {
670 LOG_POST(
Warning<<
"CSNPDataLoader: waiting "<<wait_sec<<
"s before retry");
689 string key =
info->GetBaseAnnotName();
693 "Duplicated fixed SNP NA: "<<
698 info_slot->SetObject(
info);
699 info->InitializeDb(*
this);
714 "Reopening SNP file expired in cache: "<<
file);
716 info_slot->ResetObject();
724 info_slot->SetObject(
info);
738 info->InitializeDb(*
this);
795 data_source, cref(blob_id)),
830 if (
auto seq_id =
id.
GetSeqId() ) {
831 if (
const CTextseq_id* text_id = seq_id->GetTextseq_Id() ) {
832 if ( text_id->IsSetAccession() && !text_id->GetAccession().empty() &&
833 text_id->IsSetVersion() && text_id->GetVersion() > 0 ) {
850 ds, cref(
id), sel, processed_nas),
851 "GetOrphanAnnotRecords");
867 auto accs_size = accs.
size();
882 if (!
id.IsAllowedSNPScaleLimit(scale_limit))
continue;
886 if ( !acc_ver.empty() ) {
895 LOG_POST_X(13,
Info<<
"CSNPDataLoader:PTIS: "<<acc_ver<<
" primary SNP track is "<<na_acc);
899 ERR_POST_X(2,
"CSNPDataLoader: failed to add PTIS track for "<<acc_ver<<
": "<<exc);
901 if ( !na_acc.empty() ) {
905 seq->SetFilterIndex(filter_index);
907 auto blob_id = seq->GetBlobId();
912 auto blob_id = seq->GetBlobId();
923 string acc = it->first;
925 if ( filter_index == 0 && acc.size() == it->first.size() ) {
932 seq->SetFilterIndex(filter_index);
962 cref(blob_id), ref(chunk)),
973 "LoadChunk("<<blob_id<<
", "<<chunk_info.
GetChunkId()<<
")");
979 "LoadChunk("<<blob_id<<
", "<<chunk_info.
GetChunkId()<<
")"
1028 if ( sep !=
NPOS ) {
1119 m_SeqIndex(it.GetVDBSeqIndex()),
1121 m_IsPrimaryTrack(
false),
1122 m_IsPrimaryTrackGraph(
false)
1124 if ( !
file->IsValidNA() ) {
1142 blob_id->SetPrimaryTrackGraph();
1145 blob_id->SetPrimaryTrackFeat();
1217 <<
" SV=" <<
split.second
1242 <<
" SV=" << split_version
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Blob state exceptions, used by GenBank loader.
CTSE_LoadLock GetTSE_LoadLock(const TBlobId &blob_id)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Int4 GetSubSat(void) const
void SetPrimaryTrackFeat()
bool IsValidSatKey(void) const
static bool IsValidSeqIndex(size_t seq_index)
bool m_IsPrimaryTrackGraph
int GetSubSatBase(void) const
bool IsValidSat(void) const
size_t GetFilterIndex(void) const
string ToString(void) const
Get string representation of blob id.
CSNPBlobId(const CTempString &str)
static bool IsValidNAIndex(size_t index)
static bool IsValidNAVersion(size_t version)
void FromString(CTempString str)
void SetSatNA(CTempString acc)
void SetSeqAndFilterIndex(size_t seq_index, size_t filter_index)
size_t GetNAVersion(void) const
static pair< size_t, size_t > ParseNA(CTempString acc)
void SetNAIndex(size_t na_index)
static bool IsValidNA(pair< size_t, size_t > na)
bool operator<(const CBlobId &id) const
Int4 GetSatKey(void) const
void SetNAVersion(size_t na_version)
size_t GetNAIndex(void) const
bool FromSatString(CTempString str)
bool IsPrimaryTrackGraph() const
int GetSatBase(void) const
bool IsValidSubSat(void) const
bool IsPrimaryTrack() const
CSeq_id_Handle GetSeqId(void) const
bool operator==(const CBlobId &id) const
static bool IsValidFilterIndex(size_t filter_index)
string GetAccession(void) const
string GetSatNA(void) const
size_t GetSeqIndex(void) const
void SetPrimaryTrackGraph()
CRef< CSNPFileInfo > FindFile(const string &acc)
CRef< CSNPFileInfo > GetFixedFile(const string &acc)
std::invoke_result< Call >::type CallWithRetry(Call &&call, const char *name, unsigned retry_count=0)
CRef< CSNPFileInfo > x_GetFileInfo(const string &file)
CRef< CSNPSeqInfo > GetSeqInfo(const CSNPBlobId &blob_id)
void GetChunkOnce(const CSNPBlobId &blob_id, CTSE_Chunk_Info &chunk_info)
CDataLoader::TTSE_LockSet GetRecords(CDataSource *data_source, const CSeq_id_Handle &idh, CDataLoader::EChoice choice)
void LoadBlob(const CSNPBlobId &blob_id, CTSE_LoadLock &load_lock)
CObjectManager::TPriority GetDefaultPriority(void) const
CSNPDataLoader_Impl(const CSNPDataLoader::SLoaderParams ¶ms)
CTSE_LoadLock GetBlobById(CDataSource *data_source, const CSNPBlobId &blob_id)
friend class CSNPFileInfo
CRef< CSnpPtisClient > m_PTISClient
CTSE_LoadLock GetBlobByIdOnce(CDataSource *data_source, const CSNPBlobId &blob_id)
CRef< CSNPFileInfo > GetFileInfo(const string &acc)
CDataLoader::TTSE_LockSet GetOrphanAnnotRecords(CDataSource *ds, const CSeq_id_Handle &idh, const SAnnotSelector *sel, CDataLoader::TProcessedNAs *processed_nas)
CSNPDataLoader::TAnnotNames TAnnotNames
TAnnotNames GetPossibleAnnotNames(void) const
void GetChunk(const CSNPBlobId &blob_id, CTSE_Chunk_Info &chunk)
CDataLoader::TTSE_LockSet GetOrphanAnnotRecordsOnce(CDataSource *ds, const CSeq_id_Handle &id, const SAnnotSelector *sel, CDataLoader::TProcessedNAs *processed_nas)
void AddFixedFileOnce(const string &file_name)
void AddFixedFile(const string &file_name)
~CSNPDataLoader_Impl(void)
vector< CAnnotName > TAnnotNames
static CSeq_id::ESNPScaleLimit GetSNP_Scale_Limit(void)
pair< CRef< CID2S_Split_Info >, TSplitVersion > GetSplitInfoAndVersion(const string &base_name, TFlags flags=fDefaultFlags) const
void SetTrack(const CSNPDbTrackIterator &track)
CSNPDb_Impl & GetDb(void) const
const CSeq_id_Handle & GetSeqIdHandle(void) const
CRef< CSeq_entry > GetEntry(const string &base_name, TFlags flags=fDefaultFlags) const
CRef< CID2S_Chunk > GetChunkForVersion(const string &base_name, TChunkId chunk_id, TSplitVersion split_version) const
size_t GetVDBSeqIndex(void) const
const string & GetDbPath(void) const
unsigned m_RemainingOpenRetries
CSNPDataLoader::TAnnotNames TAnnotNames
CRef< CSNPSeqInfo > GetSeqInfo(const CSeq_id_Handle &seq_id)
const string & GetBaseAnnotName(void) const
void InitializeDb(CSNPDataLoader_Impl &impl)
void x_Initialize(CSNPDataLoader_Impl &impl, const string &file_name)
CSNPFileInfo(CSNPDataLoader_Impl &impl, const string &file_name)
void GetPossibleAnnotNames(TAnnotNames &names) const
string GetSNPAnnotName(size_t filter_index) const
void LoadAnnotChunk(CTSE_Chunk_Info &chunk_info)
CSNPSeqInfo(CSNPFileInfo *file, const CSNPDbSeqIterator &it)
bool m_IsPrimaryTrackGraph
CSNPDbSeqIterator GetSeqIterator(void) const
void LoadAnnotBlob(CTSE_LoadLock &load_lock)
void SetFromBlobId(const CSNPBlobId &blob_id)
void SetFilterIndex(size_t filter_index)
string GetAnnotName(void) const
CRef< CSNPBlobId > GetBlobId(void) const
static CRef< CSnpPtisClient > CreateClient()
virtual string GetPrimarySnpTrackForAccVer(const string &acc_ver)=0
static void Attach(CTSE_Info &tse, const CID2S_Split_Info &split)
static void Load(CTSE_Chunk_Info &chunk, const CID2S_Chunk &data)
@ eProtectedDb
DB is protected.
@ eNotFoundDb
DB main file not found.
virtual TErrCode GetErrCode(void) const
void SetLoaded(CObject *obj=0)
TChunkId GetChunkId(void) const
const CTSE_Split_Info & GetSplitInfo(void) const
CTSE_Split_Info & GetSplitInfo(void)
void SetSeq_entry(CSeq_entry &entry, CTSE_SetObjectInfo *set_info=0)
bool IsLoaded(void) const
void SetSplitVersion(TSplitVersion version)
TSplitVersion GetSplitVersion(void) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
size_t get_size_limit() const
void set_size_limit(size_t limit)
CRef< CSlot > GetSlot(const string &acc_or_path)
container_type::iterator iterator
const_iterator end() const
iterator_bool insert(const value_type &val)
container_type::value_type value_type
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
static const struct name_t names[]
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define LOG_POST_X(err_subcode, message)
#define ERR_POST_ONCE(message)
Error posting only once during program execution.
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
void Info(CExceptionArgs_Base &args)
#define MSerial_AsnText
I/O stream manipulators –.
void Reset(void)
Reset the handle (remove seq-id reference)
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static void SetProcessedNA(const string &na, TProcessedNAs *processed_nas)
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
@ kPriority_Replace
Default priority for replacement loaders.
bool IsIncludedAnyNamedAnnotAccession(void) const
check if any named annot accession is included in the search
TSNPScaleLimit GetSNPScaleLimit(void) const
const TNamedAnnotAccessions & GetNamedAnnotAccessions(void) const
void Swap(TThisType &ref)
Swaps the pointer with another reference.
TObjectType & GetObject(void)
Get object.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
int32_t Int4
4-byte (32-bit) signed integer
unsigned char Uchar
Alias for unsigned char.
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type size(void) const
Return the length of the represented array.
@ fConvErr_NoThrow
Do not throw an exception on error.
CMutexGuard TWriteLockGuard
Define Write Lock Guard.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Start(void)
Start the timer.
@ eStart
Start timer immediately after creating.
Definition of all error codes used in SRA C++ support libraries.
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
string s_Value(TValue value)
const GenericPointer< typename T::ValueType > T2 value
void SleepMilliSec(unsigned long ml_sec, EInterruptOnSignal onsignal=eRestartOnSignal)
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
Helper classes and templates to implement plugins.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
static const char kFileEnd[]
static size_t sx_ExtractFilterIndex(string &s)
static unsigned GetRetryCountParam(void)
NCBI_PARAM_DEF(unsigned, SNP_LOADER, RETRY_COUNT, 3)
static bool IsSplitEnabled(void)
const int kFilterIndexCount
static size_t GetGCSize(void)
const int kSNPSubSatGraph
const int kFilterIndexMaxLength
static int GetDebugLevel(void)
static unsigned GetFileRecheckTimeParam(void)
static string sx_AddFilterIndex(const string &s, size_t filter_index)
NCBI_PARAM_DECL(int, SNP_LOADER, DEBUG)
NCBI_PARAM_DEF_EX(int, SNP_LOADER, DEBUG, 0, eParam_NoThread, SNP_LOADER_DEBUG)
static const char kFilterPrefixChar
static unsigned GetFileReopenTimeParam(void)
NCBI_DEFINE_ERR_SUBCODE_X(16)
static string s_GetAccVer(const CSeq_id_Handle &id)