224 PSG_INFO(
"PSGS_WGS: updated WGS index");
228 PSG_ERROR(
"PSGS_WGS: Exception while updating WGS index: " << exc);
230 catch ( exception& exc ) {
231 PSG_ERROR(
"PSGS_WGS: Exception while updating WGS index: " << exc.
what());
284 int seq_id_type = -1;
287 switch ( req_type ) {
291 seq_id = resolve_request.
m_SeqId;
292 seq_id_type = resolve_request.m_SeqIdType;
298 seq_id = blob_sid_request.
m_SeqId;
299 seq_id_type = blob_sid_request.m_SeqIdType;
303 blob_id = osg::CPSGS_OSGGetBlobBase::ParsePSGBlobId(
309 blob_id = osg::CPSGS_OSGGetBlobBase::ParsePSGId2Info(chunk_request.m_Id2Info).tse_id;
316 if ( !seq_id.empty() ) {
317 return osg::CPSGS_OSGResolveBase::CanBeWGS(seq_id_type, seq_id);
328 shared_ptr<SWGSData> ret;
333 if ( !seq )
return ret;
342 shared_ptr<SWGSData> ret;
347 if ( !seq )
return ret;
351 if ( find(excluded.begin(), excluded.end(), ret->m_BlobId) != excluded.end() ) {
352 ret->m_Excluded =
true;
363 shared_ptr<SWGSData> ret;
365 if ( !id2_blob_id )
return ret;
368 if ( !seq )
return ret;
377 shared_ptr<SWGSData> ret;
378 osg::CPSGS_OSGGetBlobBase::SParsedId2Info parsed_id2info =
379 osg::CPSGS_OSGGetBlobBase::ParsePSGId2Info(id2info);
380 if ( !parsed_id2info.tse_id )
return ret;
383 if ( !seq0 )
return ret;
387 ret = make_shared<SWGSData>();
388 ret->m_Id2BlobId.Reset(&
GetBlobId(seq0));
389 ret->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*ret->m_Id2BlobId);
390 ret->m_Id2BlobState = id2_blob_state;
399 ret = make_shared<SWGSData>();
400 ret->m_Id2BlobId.Reset(&
GetBlobId(seq0));
401 ret->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*ret->m_Id2BlobId);
402 ret->m_SplitVersion = parsed_id2info.split_version;
403 ret->m_Id2BlobState = id2_blob_state;
405 if ( !ret->m_Data ) {
419 GetTiming().Register(
nullptr,
operation, status, start, 0);
432 PSG_INFO(
"PSGS_WGS: GetWGSDb: opened " <<
prefix <<
" has expired");
453 PSG_ERROR(
"PSGS_WGS: Exception while opening WGS DB " <<
prefix <<
": " << exc);
460 PSG_ERROR(
"PSGS_WGS: Exception while opening WGS DB " <<
prefix <<
": " << exc);
463 catch ( exception& exc ) {
465 PSG_ERROR(
"PSGS_WGS: Exception while opening WGS DB " <<
prefix <<
": " << exc.what());
469 info->m_WGSDb = wgs_db;
470 slot->SetObject(
info);
472 wgs_db =
info->m_WGSDb;
639 const auto project_state = seq.
m_WGSDb->GetProjectGBState();
640 switch (project_state) {
645 return project_state == special_state;
659 const auto project_state = seq.
m_WGSDb->GetProjectGBState();
660 switch (project_state) {
673 switch (
id.Which() ) {
717 const string& db = dbtag.
GetDb();
736 if (
isalpha(wgs_acc.back()&0xff) ) {
747 if ( !wgs_db || wgs_db->IsTSA() != is_tsa ) {
752 if ( object_id.
IsStr() ) {
793 gi == wgs_db->GetMasterGi() ) {
831 if ( !prefixes.empty() ) {
841 if (
id.IsSetName() ) {
847 if ( !
id.IsSetAccession() ||
854 if ( !
id.IsSetAccession() ) {
857 const string& acc =
id.GetAccession();
889 const string& acc =
id.GetAccession();
893 int ask_version =
id.IsSetVersion()?
id.GetVersion(): -1;
906 if (
TVDBRowId row = wgs_db.GetProtAccRowId(acc, ask_version) ) {
920 if ( !prefixes.empty() ) {
938 for ( num_letters = 0; num_letters <
kNumLettersV2; ++num_letters ) {
939 if ( !
isalpha(acc[num_letters]&0xff) ) {
947 for (
size_t i = num_letters;
i < prefix_len; ++
i ) {
953 seq.
m_WGSAcc = acc.substr(0, prefix_len);
957 switch ( acc[row_pos] ) {
979 size_t row_digits = acc.size() - row_pos;
991 for (
size_t i = row_pos;
i < acc.size(); ++
i ) {
993 if ( c < '0' || c >
'9' ) {
996 row = row*10+(c-
'0');
1010 int version =
id.IsSetVersion()?
id.GetVersion(): 1;
1015 if ( acc[num_letters+
i] !=
'0' ) {
1037 if ( seq.
m_WGSDb->GetIdRowDigits() != row_digits ) {
1071 unsigned subsat = unsigned(
id.GetSub_sat());
1075 switch ( seq_type ) {
1118 seq.
m_WGSAcc += char(
'A'+subsat%26);
1122 seq.
m_WGSAcc += char(
'0'+subsat%10);
1129 if (skip_lookup ||
GetWGSDb(seq)) {
1131 seq.
m_RowId =
id.GetSat_key();
1137 for (
size_t i = 0;
i < 6; ++
i ) {
1141 for (
size_t i = 0;
i < 2; ++
i ) {
1156 if (skip_lookup ||
GetWGSDb(seq)) {
1158 seq.
m_RowId =
id.GetSat_key();
1175 for (
size_t i = 0;
i < seq.
m_WGSAcc.size(); ++
i ) {
1207 id->SetSub_sat(subsat);
1208 id->SetSat_key(
int(seq.
m_RowId));
1229 for (
size_t i = 0;
i < seq.
m_WGSAcc.size(); ++
i ) {
1247 for (
size_t i = 0;
i < seq.
m_WGSAcc.size(); ++
i ) {
1260 id->SetSub_sat(
int(subsat));
1261 id->SetSat_key(
int(seq.
m_RowId));
1271 switch ( gb_state ) {
1324 return GetWGSDb(seq)->GetMasterSeq_id();
1373 ids.push_back(gi_id);
1395 data = make_shared<SWGSData>();
1396 data->m_BioseqInfo = make_shared<CBioseqInfoRecord>();
1399 list< CRef<CSeq_id> > wgs_ids;
1403 for (
auto&
id : wgs_ids ) {
1410 else if (
auto text_id = id->GetTextseq_Id() ) {
1413 text_id->IsSetAccession() && text_id->IsSetVersion() ) {
1414 info.SetSeqIdType(id->Which());
1415 info.SetAccession(text_id->GetAccession());
1416 info.SetVersion(text_id->GetVersion());
1417 if ( text_id->IsSetName() ) {
1418 info.SetName(text_id->GetName());
1420 data->m_BioseqInfoFlags |=
1428 psg_ids.
insert(make_tuple(id->Which(), move(content)));
1437 info.SetAccession(content);
1440 data->m_BioseqInfoFlags |=
1446 psg_ids.
insert(make_tuple(gi_id.
Which(), move(content)));
1450 info.SetSeqIds(move(psg_ids));
1453 data->m_BioseqInfoFlags |=
1464 data->m_BioseqInfoFlags |=
1477 data->m_BioseqInfoFlags |=
1485 data->m_BioseqInfoFlags |=
1490 data->m_BioseqInfoFlags |=
1497 if ( wgs->HasCommonTaxId() ) {
1498 info.SetTaxId(wgs->GetCommonTaxId());
1504 if ( root_seq.IsContig() ) {
1511 if ( root_seq.IsProtein() ) {
1521 data->m_Id2BlobId.Reset(&
GetBlobId(seq));
1522 data->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*data->m_Id2BlobId);
1524 if ( data->m_Id2BlobId->IsSetVersion() ) {
1527 info.SetDateChanged(data->m_Id2BlobId->GetVersion()*60000);
1532 info.SetState(data->GetPSGBioseqState());
1540 data = make_shared<SWGSData>();
1544 if ( !data->m_Id2BlobId ) data->m_Id2BlobId.Reset(&
GetBlobId(seq0));
1545 if ( data->m_BlobId.empty() ) data->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*data->m_Id2BlobId);
1547 if ( data->IsForbidden() )
return;
1565 if ( asn_data.first ) {
1566 data->m_SplitVersion = asn_data.second;
1568 if ( !asn_data.first ) {
1571 if ( !asn_data.first ) {
1574 data->m_Data = asn_data.first;
1598 if ( data->m_Data ) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
const CSerialObject & GetMainObject(void) const
virtual void DoJob(void)
Payload function.
CIndexUpdateThread(unsigned update_delay, CRef< CWGSResolver > resolver)
CRef< CWGSResolver > m_Resolver
EPSGS_Type GetRequestType(void) const
@ ePSGS_BlobBySatSatKeyRequest
@ ePSGS_BlobBySeqIdRequest
TRequest & GetRequest(void)
static CPubseqGatewayApp * GetInstance(void)
@ eProtectedDb
DB is protected.
@ eNotFoundDb
DB main file not found.
virtual TErrCode GetErrCode(void) const
Adaptation of CThread class repeatedly running some job.
void RequestStop()
Schedule thread Stop.
CRef< CSlot > GetSlot(const string &acc_or_path)
shared_ptr< SWGSData > GetChunk(const string &id2info, int64_t chunk_id)
bool GetCompress(SWGSProcessor_Config::ECompressData comp, const SWGSSeqInfo &seq, const objects::CAsnBinData &data) const
SWGSSeqInfo ResolveBlobId(const objects::CID2_Blob_Id &id, bool skip_lookup=false)
shared_ptr< SWGSData > GetBlobBySeqId(const objects::CSeq_id &seq_id, const TBlobIds &excluded)
SWGSSeqInfo ResolveGi(TGi gi, bool skip_lookup=false)
shared_ptr< SWGSData > GetBlobByBlobId(const string &blob_id)
bool IsCorrectVersion(SWGSSeqInfo &seq, int version)
void ResetIteratorCache(SWGSSeqInfo &seq)
void GetWGSData(shared_ptr< SWGSData > &data, SWGSSeqInfo &seq0)
CRef< objects::CSeq_id > GetAccVer(SWGSSeqInfo &seq)
CWGSClient(const SWGSProcessor_Config &config)
CFastMutex m_ResolverMutex
SWGSSeqInfo & GetRootSeq(SWGSSeqInfo &seq0)
objects::CWGSScaffoldIterator & GetScaffoldIterator(SWGSSeqInfo &seq)
shared_ptr< SWGSData > ResolveSeqId(const objects::CSeq_id &seq_id)
TGi GetGi(SWGSSeqInfo &seq)
CRef< objects::CWGSResolver > m_Resolver
bool IsValidRowId(SWGSSeqInfo &seq)
CRef< objects::CWGSResolver > GetWGSResolver(void)
SWGSSeqInfo ResolveAcc(const objects::CTextseq_id &id, bool skip_lookup=false)
SWGSSeqInfo Resolve(const objects::CSeq_id &id, bool skip_lookup=false)
bool HasMigrated(SWGSSeqInfo &seq)
objects::CID2_Blob_Id & GetBlobId(SWGSSeqInfo &id)
objects::CWGSDb GetWGSDb(const string &prefix)
CRef< objects::CSeq_id > GetGeneral(SWGSSeqInfo &seq)
bool CanProcessRequest(CPSGS_Request &request)
int GetID2BlobState(SWGSSeqInfo &seq)
objects::CWGSSeqIterator & GetContigIterator(SWGSSeqInfo &seq)
vector< string > TBlobIds
NCBI_gb_state GetGBState(SWGSSeqInfo &seq0)
void GetBioseqInfo(shared_ptr< SWGSData > &data, SWGSSeqInfo &seq)
objects::CWGSProteinIterator & GetProteinIterator(SWGSSeqInfo &seq)
void x_RegisterTiming(psg_time_point_t start, EPSGOperation operation, EPSGOperationStatus status)
SWGSSeqInfo ResolveWGSAcc(const string &acc, const objects::CTextseq_id &id, TAllowSeqType allow_seq_type, bool skip_lookup=false)
bool HasSpecialState(SWGSSeqInfo &seq, NCBI_gb_state special_state)
SWGSSeqInfo ResolveGeneral(const objects::CDbtag &dbtag, bool skip_lookup=false)
SWGSSeqInfo ResolveProtAcc(const objects::CTextseq_id &id, bool skip_lookup=false)
SWGSProcessor_Config m_Config
void GetSeqIds(SWGSSeqInfo &seq, list< CRef< objects::CSeq_id > > &ids)
CRef< CThreadNonStop > m_IndexUpdateThread
TVDBRowId GetContigNameRowId(const string &name) const
TVDBRowId GetProteinNameRowId(const string &name) const
bool LoadMasterDescr(EDescrFilter filter=eDescrDefaultFilter) const
TVDBRowId GetScaffoldNameRowId(const string &name) const
TVDBRowId GetLocRowId(void) const
NCBI_WGS_seqtype GetLocSeqType(void) const
ESeqType GetSeqType(void) const
TVDBRowId GetRowId(void) const
NCBI_gb_state GetGBState(void) const
bool HasTaxId(void) const
TSeqPos GetSeqLength(void) const
CSeq_id::TGi GetGi(void) const
THash GetSeqHash(void) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
int GetAccVersion(void) const
bool HasSeqHash(void) const
TTaxId GetTaxId(void) const
virtual TWGSPrefixes GetPrefixes(TGi gi)=0
virtual void SetNonWGS(TGi gi, const TWGSPrefixes &prefixes)
vector< string > TWGSPrefixes
virtual bool Update(void)
static CRef< CWGSResolver > CreateResolver(const CVDBMgr &mgr)
virtual void SetWGSPrefix(TGi gi, const TWGSPrefixes &prefixes, const string &prefix)
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
TSeqPos GetSeqLength(void) const
CRef< CAsnBinData > GetSeq_entryData(TFlags flags=fDefaultFlags) const
CRef< CAsnBinData > GetChunkDataForVersion(TChunkId chunk_id, TSplitVersion split_version) const
TTaxId GetTaxId(void) const
CSeq_id::TGi GetGi(void) const
THash GetSeqHash(void) const
pair< CRef< CAsnBinData >, TSplitVersion > GetSplitInfoDataAndVersion(TFlags flags=fDefaultFlags) const
bool HasTaxId(void) const
TSeqPos GetSeqLength(EClipType clip_type=eDefaultClip) const
bool HasAccVersion(int version) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
iterator_bool insert(const value_type &val)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
EAccessionInfo
For IdentifyAccession (below)
@ eFastaContent
Like eFasta, but without any tag.
void Reset(void)
Reset reference object.
void Swap(TThisType &ref)
Swaps the pointer with another reference.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
NCBI_NS_STD::string::size_type SIZE_TYPE
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
@ eNocase
Case insensitive compare.
bool Run(TRunMode flags=fRunDefault)
Run the thread.
CGuard< CRWLock, SSimpleWriteLock< CRWLock > > TWriteLockGuard
void Join(void **exit_data=0)
Wait for the thread termination.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
@ eID2_Blob_State_suppressed
@ eID2_Blob_State_protected
@ eID2_Blob_State_withdrawn
E_Choice Which(void) const
Which variant is currently selected.
TGi & SetGi(void)
Select the variant.
TVersion GetVersion(void) const
Get the Version member data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
@ e_Gibbmt
Geninfo backbone moltype.
@ e_Giim
Geninfo import id.
@ e_Gibbsq
Geninfo backbone seqid.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
@ e_not_set
No variant selected.
unsigned int
A callback function used to compare two keys in a database.
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
static const char * prefix[]
#define PSG_ERROR(message)
#define PSG_INFO(message)
psg_clock_t::time_point psg_time_point_t
AutoPtr< SWGSSeqInfo > m_RootSeq
objects::CWGSProteinIterator m_ProteinIter
bool IsProtein(void) const
bool IsMaster(void) const
objects::CWGSScaffoldIterator m_ScaffoldIter
objects::CWGSSeqIterator m_ContigIter
objects::TVDBRowId m_RowId
CRef< objects::CID2_Blob_Id > m_BlobId
bool IsContig(void) const
bool IsScaffold(void) const
int GetPSGBioseqState() const
unsigned m_IndexUpdateDelay
ECompressData m_CompressData
static EAddMasterDescr s_AddMasterDescrLevel(void)
static bool s_AddMasterDescrScaffold()
static const size_t kNumLettersV1
static const size_t kMaxRowDigitsV2
static const EResolveMaster kResolveMaster
static int s_GBStateToID2(NCBI_gb_state gb_state)
static const size_t kMinRowDigitsV2
static bool s_MarkMasterDescr(void)
static bool s_FilterAll(void)
static const size_t kNumLettersV2
NCBI_PARAM_DEF_EX(bool, WGS, FILTER_ALL, false, eParam_NoThread, WGS_FILTER_ALL)
NCBI_PARAM_DEF(bool, WGS, SPLIT_FEATURES, true)
static bool s_KeepReplaced(void)
static const size_t kVersionDigits
static const int kBlobIdV2VersionContig
static const int kBlobIdV2SatMax
static const int kBlobIdV2SatMin
static const size_t kTypePrefixLen
static bool s_AddMasterDescrContig()
static bool s_SplitFeatures(void)
static const size_t kMaxRowDigitsV1
static const size_t kPrefixLenV2
static const int kBlobIdV2VersionScaffold
@ eAddMasterDescr_detached
NCBI_PARAM_DECL(bool, WGS, FILTER_ALL)
static EAddMasterDescr s_ProcessAddMasterDescr(void)
static const int kBlobIdV1Sat
static bool s_KeepMigrated(void)
static const size_t kMaxProtAccLen
static const int kBlobIdV2VersionProtein
static const size_t kMinRowDigitsV1
static bool s_AddMasterDescrProtein()
static const size_t kMinProtAccLen
@ eResolveMaster_without_gi
static const size_t kPrefixLenV1
@ NCBI_gb_state_eWGSGenBankUnverified
@ NCBI_gb_state_eWGSGenBankReplaced
@ NCBI_gb_state_eWGSGenBankSuppressed
@ NCBI_gb_state_eWGSGenBankMigrated
@ NCBI_gb_state_eWGSGenBankLive
@ NCBI_gb_state_eWGSGenBankWithdrawn
@ NCBI_WGS_seqtype_scaffold
@ NCBI_WGS_seqtype_contig