193 if (
str.empty() ||
str[0] != c ) {
203 return c ==
'-' || (c >=
'0' && c <=
'9');
218 str =
str.substr(int_size);
286 const string& db = dbtag.
GetDb();
317 for ( num_letters = 0; num_letters <
kNumLettersV2; ++num_letters ) {
318 if ( !
isalpha(acc[num_letters]&0xff) ) {
326 for (
size_t i = num_letters;
i < prefix_len; ++
i ) {
332 switch ( acc[row_pos] ) {
356 size_t row_digits = acc.size() - row_pos;
368 for (
size_t i = row_pos;
i < acc.size(); ++
i ) {
370 if ( c < '0' || c >
'9' ) {
384 const string& acc =
id.GetAccession();
394 if (
id.IsSetName() ) {
399 if ( !
id.IsSetAccession() ) {
402 const string& acc =
id.GetAccession();
479 PSG_INFO(
"PSGS_WGS: updated WGS index");
483 PSG_ERROR(
"PSGS_WGS: Exception while updating WGS index: " << exc);
485 catch ( exception& exc ) {
486 PSG_ERROR(
"PSGS_WGS: Exception while updating WGS index: " << exc.
what());
539 int seq_id_type = -1;
542 switch ( req_type ) {
546 seq_id = resolve_request.
m_SeqId;
547 seq_id_type = resolve_request.m_SeqIdType;
553 seq_id = blob_sid_request.
m_SeqId;
554 seq_id_type = blob_sid_request.m_SeqIdType;
570 if ( !seq_id.empty() ) {
571 return CanBeWGS(seq_id_type, seq_id);
582 shared_ptr<SWGSData> ret;
587 if ( !seq )
return ret;
599 shared_ptr<SWGSData> ret;
604 if ( !seq )
return ret;
608 if ( find(excluded.begin(), excluded.end(), ret->m_BlobId) != excluded.end() ) {
618 shared_ptr<SWGSData> ret;
620 if ( !id2_blob_id )
return ret;
623 if ( !seq )
return ret;
632 shared_ptr<SWGSData> ret;
634 if ( !parsed_id2info.
tse_id )
return ret;
637 if ( !seq0 )
return ret;
641 ret = make_shared<SWGSData>();
643 ret->m_Id2BlobId.Reset(&
GetBlobId(seq0));
645 ret->m_Id2BlobState = id2_blob_state;
654 ret = make_shared<SWGSData>();
656 ret->m_Id2BlobId.Reset(&
GetBlobId(seq0));
659 ret->m_Id2BlobState = id2_blob_state;
661 if ( !ret->m_Data ) {
675 GetTiming().Register(
nullptr,
operation, status, start, 0);
688 PSG_INFO(
"PSGS_WGS: GetWGSDb: opened " << prefix <<
" has expired");
709 PSG_ERROR(
"PSGS_WGS: Exception while opening WGS DB " << prefix <<
": " << exc);
716 PSG_ERROR(
"PSGS_WGS: Exception while opening WGS DB " << prefix <<
": " << exc);
719 catch ( exception& exc ) {
721 PSG_ERROR(
"PSGS_WGS: Exception while opening WGS DB " << prefix <<
": " << exc.what());
725 info->m_WGSDb = wgs_db;
726 slot->SetObject(
info);
728 wgs_db =
info->m_WGSDb;
732 PSG_INFO(
"PSGS_WGS: GetWGSDb: " << prefix <<
" is replaced");
737 PSG_INFO(
"PSGS_WGS: GetWGSDb: " << prefix);
895 const auto project_state = seq.
m_WGSDb->GetProjectGBState();
896 switch (project_state) {
901 return project_state == special_state;
915 const auto project_state = seq.
m_WGSDb->GetProjectGBState();
916 switch (project_state) {
929 switch (
id.Which() ) {
973 const string& db = dbtag.
GetDb();
992 if (
isalpha(wgs_acc.back()&0xff) ) {
1003 if ( !wgs_db || wgs_db->IsTSA() != is_tsa ) {
1008 if ( object_id.
IsStr() ) {
1049 gi == wgs_db->GetMasterGi() ) {
1087 if ( !prefixes.empty() ) {
1097 if (
id.IsSetName() ) {
1103 if ( !
id.IsSetAccession() ||
1110 if ( !
id.IsSetAccession() ) {
1113 const string& acc =
id.GetAccession();
1145 const string& acc =
id.GetAccession();
1149 int ask_version =
id.IsSetVersion()?
id.GetVersion(): -1;
1162 if (
TVDBRowId row = wgs_db.GetProtAccRowId(acc, ask_version) ) {
1176 if ( !prefixes.empty() ) {
1194 for ( num_letters = 0; num_letters <
kNumLettersV2; ++num_letters ) {
1195 if ( !
isalpha(acc[num_letters]&0xff) ) {
1203 for (
size_t i = num_letters;
i < prefix_len; ++
i ) {
1209 seq.
m_WGSAcc = acc.substr(0, prefix_len);
1213 switch ( acc[row_pos] ) {
1235 size_t row_digits = acc.size() - row_pos;
1247 for (
size_t i = row_pos;
i < acc.size(); ++
i ) {
1249 if ( c < '0' || c >
'9' ) {
1266 int version =
id.IsSetVersion()?
id.GetVersion(): 1;
1271 if ( acc[num_letters+
i] !=
'0' ) {
1293 if ( seq.
m_WGSDb->GetIdRowDigits() != row_digits ) {
1327 unsigned subsat = unsigned(
id.GetSub_sat());
1331 switch ( seq_type ) {
1374 seq.
m_WGSAcc += char(
'A'+subsat%26);
1378 seq.
m_WGSAcc += char(
'0'+subsat%10);
1385 if (skip_lookup ||
GetWGSDb(seq)) {
1387 seq.
m_RowId =
id.GetSat_key();
1393 for (
size_t i = 0;
i < 6; ++
i ) {
1397 for (
size_t i = 0;
i < 2; ++
i ) {
1412 if (skip_lookup ||
GetWGSDb(seq)) {
1414 seq.
m_RowId =
id.GetSat_key();
1431 for (
size_t i = 0;
i < seq.
m_WGSAcc.size(); ++
i ) {
1463 id->SetSub_sat(subsat);
1464 id->SetSat_key(
int(seq.
m_RowId));
1485 for (
size_t i = 0;
i < seq.
m_WGSAcc.size(); ++
i ) {
1503 for (
size_t i = 0;
i < seq.
m_WGSAcc.size(); ++
i ) {
1516 id->SetSub_sat(
int(subsat));
1517 id->SetSat_key(
int(seq.
m_RowId));
1527 switch ( gb_state ) {
1580 return GetWGSDb(seq)->GetMasterSeq_id();
1629 ids.push_back(gi_id);
1649 if (seq_id_type <= 0) {
1663 data = make_shared<SWGSData>();
1665 data->m_BioseqInfo = make_shared<CBioseqInfoRecord>();
1668 list< CRef<CSeq_id> > wgs_ids;
1672 for (
auto&
id : wgs_ids ) {
1679 else if (
auto text_id = id->GetTextseq_Id() ) {
1682 text_id->IsSetAccession() && text_id->IsSetVersion() ) {
1683 info.SetSeqIdType(id->Which());
1684 info.SetAccession(text_id->GetAccession());
1685 info.SetVersion(text_id->GetVersion());
1686 if ( text_id->IsSetName() ) {
1687 info.SetName(text_id->GetName());
1689 data->m_BioseqInfoFlags |=
1697 psg_ids.
insert(make_tuple(id->Which(), std::move(content)));
1706 info.SetAccession(content);
1709 data->m_BioseqInfoFlags |=
1715 psg_ids.
insert(make_tuple(gi_id.
Which(), std::move(content)));
1719 info.SetSeqIds(std::move(psg_ids));
1722 data->m_BioseqInfoFlags |=
1733 data->m_BioseqInfoFlags |=
1746 data->m_BioseqInfoFlags |=
1754 data->m_BioseqInfoFlags |=
1759 data->m_BioseqInfoFlags |=
1766 if ( wgs->HasCommonTaxId() ) {
1767 info.SetTaxId(wgs->GetCommonTaxId());
1773 if ( root_seq.IsContig() ) {
1780 if ( root_seq.IsProtein() ) {
1793 if (
data->m_Id2BlobId->IsSetVersion() ) {
1796 info.SetDateChanged(
data->m_Id2BlobId->GetVersion()*60000);
1801 info.SetState(
data->GetPSGBioseqState());
1809 data = make_shared<SWGSData>();
1817 if (
data->IsForbidden() )
return;
1835 if ( asn_data.first ) {
1836 data->m_SplitVersion = asn_data.second;
1838 if ( !asn_data.first ) {
1841 if ( !asn_data.first ) {
1844 data->m_Data = asn_data.first;
1868 if (
data->m_Data ) {
1882 auto id_str = blob_id.
GetId();
1889 id->SetSub_sat(subsat);
1890 id->SetSat_key(satkey);
1915 id->SetSub_sat(subsat);
1916 id->SetSat_key(satkey);
1917 id->SetVersion(tse_version);
1930 else if (
id.IsGeneral() ) {
1933 else if (
auto text_id =
id.GetTextseq_Id() ) {
1938 catch ( exception& ) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
virtual void DoJob(void)
Payload function.
CIndexUpdateThread(unsigned update_delay, CRef< CWGSResolver > resolver)
CRef< CWGSResolver > m_Resolver
EPSGS_Type GetRequestType(void) const
@ ePSGS_BlobBySatSatKeyRequest
@ ePSGS_BlobBySeqIdRequest
TRequest & GetRequest(void)
static CPubseqGatewayApp * GetInstance(void)
@ eProtectedDb
DB is protected.
@ eNotFoundDb
DB main file not found.
virtual TErrCode GetErrCode(void) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Adaptation of CThread class repeatedly running some job.
void RequestStop()
Schedule thread Stop.
CRef< CSlot > GetSlot(const string &acc_or_path)
static void SetSeqId(CSeq_id &id, int seq_id_type, const string &seq_id)
shared_ptr< SWGSData > GetChunk(const string &id2info, int64_t chunk_id)
bool GetCompress(SWGSProcessor_Config::ECompressData comp, const SWGSSeqInfo &seq, const objects::CAsnBinData &data) const
SWGSSeqInfo ResolveBlobId(const objects::CID2_Blob_Id &id, bool skip_lookup=false)
SWGSSeqInfo ResolveGi(TGi gi, bool skip_lookup=false)
static bool IsOSGBlob(const CID2_Blob_Id &blob_id)
shared_ptr< SWGSData > GetBlobByBlobId(const string &blob_id)
bool IsCorrectVersion(SWGSSeqInfo &seq, int version)
void ResetIteratorCache(SWGSSeqInfo &seq)
void GetWGSData(shared_ptr< SWGSData > &data, SWGSSeqInfo &seq0)
static string GetPSGBlobId(const CID2_Blob_Id &blob_id)
CRef< objects::CSeq_id > GetAccVer(SWGSSeqInfo &seq)
CWGSClient(const SWGSProcessor_Config &config)
CFastMutex m_ResolverMutex
SWGSSeqInfo & GetRootSeq(SWGSSeqInfo &seq0)
static bool CanBeWGS(int seq_id_type, const string &seq_id)
objects::CWGSScaffoldIterator & GetScaffoldIterator(SWGSSeqInfo &seq)
shared_ptr< SWGSData > ResolveSeqId(const objects::CSeq_id &seq_id)
TGi GetGi(SWGSSeqInfo &seq)
shared_ptr< SWGSData > GetSeqInfoBySeqId(const objects::CSeq_id &seq_id, SWGSSeqInfo &seq, const TBlobIds &excluded)
CRef< objects::CWGSResolver > m_Resolver
bool IsValidRowId(SWGSSeqInfo &seq)
CRef< objects::CWGSResolver > GetWGSResolver(void)
SWGSSeqInfo ResolveAcc(const objects::CTextseq_id &id, bool skip_lookup=false)
SWGSSeqInfo Resolve(const objects::CSeq_id &id, bool skip_lookup=false)
bool HasMigrated(SWGSSeqInfo &seq)
objects::CID2_Blob_Id & GetBlobId(SWGSSeqInfo &id)
static SParsedId2Info ParsePSGId2Info(const string &idsss2_info)
objects::CWGSDb GetWGSDb(const string &prefix)
CRef< objects::CSeq_id > GetGeneral(SWGSSeqInfo &seq)
bool CanProcessRequest(CPSGS_Request &request)
int GetID2BlobState(SWGSSeqInfo &seq)
objects::CWGSSeqIterator & GetContigIterator(SWGSSeqInfo &seq)
vector< string > TBlobIds
NCBI_gb_state GetGBState(SWGSSeqInfo &seq0)
void GetBioseqInfo(shared_ptr< SWGSData > &data, SWGSSeqInfo &seq)
objects::CWGSProteinIterator & GetProteinIterator(SWGSSeqInfo &seq)
static CRef< CID2_Blob_Id > ParsePSGBlobId(const SPSGS_BlobId &blob_id)
void x_RegisterTiming(psg_time_point_t start, EPSGOperation operation, EPSGOperationStatus status)
SWGSSeqInfo ResolveWGSAcc(const string &acc, const objects::CTextseq_id &id, TAllowSeqType allow_seq_type, bool skip_lookup=false)
bool HasSpecialState(SWGSSeqInfo &seq, NCBI_gb_state special_state)
SWGSSeqInfo ResolveGeneral(const objects::CDbtag &dbtag, bool skip_lookup=false)
SWGSSeqInfo ResolveProtAcc(const objects::CTextseq_id &id, bool skip_lookup=false)
SWGSProcessor_Config m_Config
void GetSeqIds(SWGSSeqInfo &seq, list< CRef< objects::CSeq_id > > &ids)
CRef< CThreadNonStop > m_IndexUpdateThread
TVDBRowId GetContigNameRowId(const string &name) const
TVDBRowId GetProteinNameRowId(const string &name) const
bool LoadMasterDescr(EDescrFilter filter=eDescrDefaultFilter) const
TVDBRowId GetScaffoldNameRowId(const string &name) const
TVDBRowId GetLocRowId(void) const
NCBI_WGS_seqtype GetLocSeqType(void) const
ESeqType GetSeqType(void) const
TVDBRowId GetRowId(void) const
NCBI_gb_state GetGBState(void) const
bool HasTaxId(void) const
TSeqPos GetSeqLength(void) const
CSeq_id::TGi GetGi(void) const
THash GetSeqHash(void) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
int GetAccVersion(void) const
bool HasSeqHash(void) const
TTaxId GetTaxId(void) const
virtual TWGSPrefixes GetPrefixes(TGi gi)=0
virtual void SetNonWGS(TGi gi, const TWGSPrefixes &prefixes)
vector< string > TWGSPrefixes
virtual bool Update(void)
static CRef< CWGSResolver > CreateResolver(const CVDBMgr &mgr)
virtual void SetWGSPrefix(TGi gi, const TWGSPrefixes &prefixes, const string &prefix)
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
TSeqPos GetSeqLength(void) const
CRef< CAsnBinData > GetSeq_entryData(TFlags flags=fDefaultFlags) const
CRef< CAsnBinData > GetChunkDataForVersion(TChunkId chunk_id, TSplitVersion split_version) const
CRef< CID2S_Chunk > GetChunkForVersion(TChunkId chunk_id, TSplitVersion split_version) const
TTaxId GetTaxId(void) const
CSeq_id::TGi GetGi(void) const
THash GetSeqHash(void) const
pair< CRef< CAsnBinData >, TSplitVersion > GetSplitInfoDataAndVersion(TFlags flags=fDefaultFlags) const
bool HasTaxId(void) const
TSeqPos GetSeqLength(EClipType clip_type=eDefaultClip) const
bool HasAccVersion(int version) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
iterator_bool insert(const value_type &val)
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
EAccessionInfo
For IdentifyAccession (below)
@ eFasta_AsTypeAndContent
@ eFastaContent
Like eFasta, but without any tag.
void Reset(void)
Reset reference object.
void Swap(TThisType &ref)
Swaps the pointer with another reference.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
NCBI_NS_STD::string::size_type SIZE_TYPE
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static string & ToUpper(string &str)
Convert string to upper case – string& version.
@ fConvErr_NoErrMessage
Set errno, but do not set CNcbiError message on error.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ eNocase
Case insensitive compare.
bool Run(TRunMode flags=fRunDefault)
Run the thread.
CGuard< CRWLock, SSimpleWriteLock< CRWLock > > TWriteLockGuard
void Join(void **exit_data=0)
Wait for the thread termination.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
TSat_key GetSat_key(void) const
Get the Sat_key member data.
TSub_sat GetSub_sat(void) const
Get the Sub_sat member data.
TSat GetSat(void) const
Get the Sat member data.
@ eID2_Blob_State_suppressed
@ eID2_Blob_State_protected
@ eID2_Blob_State_withdrawn
E_Choice Which(void) const
Which variant is currently selected.
TGi & SetGi(void)
Select the variant.
TVersion GetVersion(void) const
Get the Version member data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
@ e_Gibbmt
Geninfo backbone moltype.
@ e_Giim
Geninfo import id.
@ e_Gibbsq
Geninfo backbone seqid.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
@ e_not_set
No variant selected.
unsigned int
A callback function used to compare two keys in a database.
const string version
version string
const GenericPointer< typename T::ValueType > T2 value
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
#define PSG_ERROR(message)
#define PSG_INFO(message)
psg_clock_t::time_point psg_time_point_t
#define row(bind, expected)
TID2SplitVersion split_version
CRef< CID2_Blob_Id > tse_id
AutoPtr< SWGSSeqInfo > m_RootSeq
objects::CWGSProteinIterator m_ProteinIter
bool IsProtein(void) const
bool IsMaster(void) const
objects::CWGSScaffoldIterator m_ScaffoldIter
objects::CWGSSeqIterator m_ContigIter
objects::TVDBRowId m_RowId
CRef< objects::CID2_Blob_Id > m_BlobId
bool IsContig(void) const
bool IsScaffold(void) const
int GetPSGBioseqState() const
unsigned m_IndexUpdateDelay
ECompressData m_CompressData
static EAddMasterDescr s_AddMasterDescrLevel(void)
static bool s_AddMasterDescrScaffold()
static bool s_ParseInt(CTempString &str, Int &v)
static const size_t kNumLettersV1
static bool s_Skip(CTempString &str, char c)
static const size_t kMaxRowDigitsV2
static const int kOSG_Sat_CDD_max
static const EResolveMaster kResolveMaster
static bool s_IsValidIntChar(char c)
static int s_GBStateToID2(NCBI_gb_state gb_state)
static const size_t kMinRowDigitsV2
static bool s_MarkMasterDescr(void)
static bool IsWGSProtAccession(const CTextseq_id &id)
static bool s_IsEnabledOSGSat(CWGSClient::TEnabledFlags enabled_flags, Int4 sat)
static const size_t kNumLettersV2
NCBI_PARAM_DEF_EX(bool, WGS, FILTER_ALL, false, eParam_NoThread, WGS_FILTER_ALL)
NCBI_PARAM_DEF(bool, WGS, SPLIT_FEATURES, true)
static const int kOSG_Sat_WGS_max
static bool s_KeepReplaced(void)
static bool s_ParseOSGBlob(CTempString &s, Int4 &sat, Int4 &subsat, Int4 &satkey)
static const size_t kVersionDigits
static const int kBlobIdV2VersionContig
static const int kBlobIdV2SatMax
static const int kBlobIdV2SatMin
static const size_t kTypePrefixLen
static bool s_AddMasterDescrContig()
static bool s_SplitFeatures(void)
static const size_t kMaxRowDigitsV1
static const size_t kPrefixLenV2
static bool IsWGSGeneral(const CDbtag &dbtag)
static const int kBlobIdV2VersionScaffold
@ eAddMasterDescr_detached
NCBI_PARAM_DECL(bool, WGS, FILTER_ALL)
static EAddMasterDescr s_ProcessAddMasterDescr(void)
static bool s_IsOSGSat(Int4 sat)
static const int kBlobIdV1Sat
static bool s_KeepMigrated(void)
static const size_t kMaxProtAccLen
static const int kBlobIdV2VersionProtein
static const int kOSG_Sat_CDD_min
static const int kOSG_Sat_WGS_min
static bool s_IsOSGBlob(Int4 sat, Int4, Int4)
static const char kSubSatSeparator
static const size_t kMinRowDigitsV1
static bool s_AddMasterDescrProtein()
static const int kOSG_Sat_SNP_min
static const size_t kMinProtAccLen
@ eResolveMaster_without_gi
static const int kOSG_Sat_SNP_max
static bool IsWGSAccession(const string &acc, const CTextseq_id &id, TAllowSeqType allow_seq_type)
static void s_FormatBlobId(ostream &s, const CID2_Blob_Id &blob_id)
static const size_t kPrefixLenV1
@ NCBI_gb_state_eWGSGenBankUnverified
@ NCBI_gb_state_eWGSGenBankReplaced
@ NCBI_gb_state_eWGSGenBankSuppressed
@ NCBI_gb_state_eWGSGenBankMigrated
@ NCBI_gb_state_eWGSGenBankLive
@ NCBI_gb_state_eWGSGenBankWithdrawn
@ NCBI_WGS_seqtype_scaffold
@ NCBI_WGS_seqtype_contig