NCBI C++ ToolKit
wgs_client.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: wgs_client.cpp 100818 2023-09-14 17:33:33Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Aleksey Grichenko, Eugene Vasilchenko
27  *
28  * File Description: client for loading data from WGS
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "wgs_client.hpp"
35 #include "pubseq_gateway.hpp"
40 #include "osg_getblob_base.hpp"
41 #include "osg_resolve_base.hpp"
44 
45 
49 
51 
52 /////////////////////////////////////////////////////////////////////////////
53 // Processor settings
54 /////////////////////////////////////////////////////////////////////////////
55 
56 NCBI_PARAM_DECL(bool, WGS, FILTER_ALL);
57 NCBI_PARAM_DEF_EX(bool, WGS, FILTER_ALL, false, eParam_NoThread, WGS_FILTER_ALL);
58 
59 NCBI_PARAM_DECL(bool, WGS, SPLIT_FEATURES);
60 NCBI_PARAM_DEF(bool, WGS, SPLIT_FEATURES, true);
61 
62 NCBI_PARAM_DECL(bool, WGS, KEEP_REPLACED);
63 NCBI_PARAM_DEF(bool, WGS, KEEP_REPLACED, true);
64 
65 NCBI_PARAM_DECL(bool, WGS, KEEP_MIGRATED);
66 NCBI_PARAM_DEF(bool, WGS, KEEP_MIGRATED, false);
67 
68 NCBI_PARAM_DECL(bool, WGS, KEEP_EXTERNAL);
69 NCBI_PARAM_DEF(bool, WGS, KEEP_EXTERNAL, true);
70 
71 NCBI_PARAM_DECL(string, WGS, ADD_MASTER_DESCR);
72 NCBI_PARAM_DEF(string, WGS, ADD_MASTER_DESCR, "detached");
73 
74 NCBI_PARAM_DECL(bool, WGS, MARK_MASTER_DESCR);
75 NCBI_PARAM_DEF(bool, WGS, MARK_MASTER_DESCR, false);
76 
77 
78 static inline bool s_FilterAll(void)
79 {
80  static bool value = NCBI_PARAM_TYPE(WGS, FILTER_ALL)::GetDefault();
81  return value;
82 }
83 
84 static bool s_SplitFeatures(void)
85 {
86  static bool value = NCBI_PARAM_TYPE(WGS, SPLIT_FEATURES)::GetDefault();
87  return value;
88 }
89 
90 static bool s_KeepReplaced(void)
91 {
92  static bool value = NCBI_PARAM_TYPE(WGS, KEEP_REPLACED)::GetDefault();
93  return value;
94 }
95 
96 static bool s_KeepMigrated(void)
97 {
98  static bool value = NCBI_PARAM_TYPE(WGS, KEEP_MIGRATED)::GetDefault();
99  return value;
100 }
101 
103 {
107 };
108 
110 {
111  auto value = NCBI_PARAM_TYPE(WGS, ADD_MASTER_DESCR)::GetDefault();
112  return (NStr::EqualNocase(value, "detached")? eAddMasterDescr_detached:
115 }
116 
118 {
120  return value;
121 }
122 
124 {
125  // master descr on contig should be added only with "all" setting
127 }
128 
130 {
131  // master descr on scaffold should be added only with any setting except "none"
133 }
134 
136 {
137  // master descr on protein should be added only with any setting except "none"
139 }
140 
141 static bool s_MarkMasterDescr(void)
142 {
143  static bool value = NCBI_PARAM_TYPE(WGS, MARK_MASTER_DESCR)::GetDefault();
144  return value;
145 }
146 
147 
152 };
154 
155 
156 /////////////////////////////////////////////////////////////////////////////
157 // WGS seq-ids
158 /////////////////////////////////////////////////////////////////////////////
159 
160 // WGS accession parameters
161 static const size_t kTypePrefixLen = 4; // "WGS:" or "TSA:"
162 static const size_t kNumLettersV1 = 4;
163 static const size_t kNumLettersV2 = 6;
164 static const size_t kVersionDigits = 2;
165 static const size_t kPrefixLenV1 = kNumLettersV1 + kVersionDigits;
166 static const size_t kPrefixLenV2 = kNumLettersV2 + kVersionDigits;
167 static const size_t kMinRowDigitsV1 = 6;
168 static const size_t kMaxRowDigitsV1 = 8;
169 static const size_t kMinRowDigitsV2 = 7;
170 static const size_t kMaxRowDigitsV2 = 9;
171 
172 static const size_t kMinProtAccLen = 8; // 3+5
173 static const size_t kMaxProtAccLen = 10; // 3+7
174 
175 
176 /////////////////////////////////////////////////////////////////////////////
177 // WGS blob-ids
178 /////////////////////////////////////////////////////////////////////////////
179 
180 // satkey: row-id
181 static const int kBlobIdV1Sat = 1000;
182 static const int kBlobIdV2SatMin = 1001;
183 static const int kBlobIdV2SatMax = 1129;
184 static const int kBlobIdV2VersionScaffold = 0;
185 static const int kBlobIdV2VersionProtein = 1;
186 static const int kBlobIdV2VersionContig = 2;
187 enum EBlobType {
191 };
196 };
197 
198 
199 /////////////////////////////////////////////////////////////////////////////
200 // Helper classes
201 /////////////////////////////////////////////////////////////////////////////
202 
204 
206 {
207 public:
208  CIndexUpdateThread(unsigned update_delay, CRef<CWGSResolver> resolver)
209  : CThreadNonStop(update_delay),
210  m_FirstRun(true),
211  m_Resolver(resolver)
212  {
213  }
214 
215 protected:
216  virtual void DoJob(void) {
217  if ( m_FirstRun ) {
218  // CThreadNonStop runs first iteration immediately, ignore it
219  m_FirstRun = false;
220  return;
221  }
222  try {
223  if ( m_Resolver->Update() ) {
224  PSG_INFO("PSGS_WGS: updated WGS index");
225  }
226  }
227  catch ( CException& exc ) {
228  PSG_ERROR("PSGS_WGS: Exception while updating WGS index: " << exc);
229  }
230  catch ( exception& exc ) {
231  PSG_ERROR("PSGS_WGS: Exception while updating WGS index: " << exc.what());
232  }
233  }
234 
235 private:
238 };
239 
241 
242 
243 /////////////////////////////////////////////////////////////////////////////
244 // CWGSClient
245 /////////////////////////////////////////////////////////////////////////////
246 
247 
249  : m_Config(config),
250  m_WGSDbCache(config.m_CacheSize, config.m_FileReopenTime, config.m_FileRecheckTime)
251 {
252 }
253 
254 
256 {
257  if ( m_IndexUpdateThread ) {
260  }
261 }
262 
263 
265 {
266  if ( !m_Resolver ) {
268  if ( !m_Resolver ) {
270  }
271  if ( m_Resolver && !m_IndexUpdateThread ) {
274  }
275  }
276  return m_Resolver;
277 }
278 
279 
281 {
282  auto req_type = request.GetRequestType();
283  string seq_id;
284  int seq_id_type = -1;
285  CRef<CID2_Blob_Id> blob_id;
286 
287  switch ( req_type ) {
289  {
290  auto& resolve_request = request.GetRequest<SPSGS_ResolveRequest>();
291  seq_id = resolve_request.m_SeqId;
292  seq_id_type = resolve_request.m_SeqIdType;
293  break;
294  }
296  {
297  auto& blob_sid_request = request.GetRequest<SPSGS_BlobBySeqIdRequest>();
298  seq_id = blob_sid_request.m_SeqId;
299  seq_id_type = blob_sid_request.m_SeqIdType;
300  break;
301  }
303  blob_id = osg::CPSGS_OSGGetBlobBase::ParsePSGBlobId(
305  break;
307  {
308  auto& chunk_request = request.GetRequest<SPSGS_TSEChunkRequest>();
309  blob_id = osg::CPSGS_OSGGetBlobBase::ParsePSGId2Info(chunk_request.m_Id2Info).tse_id;
310  break;
311  }
312  default:
313  return false;
314  }
315 
316  if ( !seq_id.empty() ) {
317  return osg::CPSGS_OSGResolveBase::CanBeWGS(seq_id_type, seq_id);
318  }
319  if ( blob_id ) {
320  return ResolveBlobId(*blob_id, true).m_ValidWGS;
321  }
322  return false;
323 }
324 
325 
326 shared_ptr<SWGSData> CWGSClient::ResolveSeqId(const CSeq_id& seq_id)
327 {
328  shared_ptr<SWGSData> ret;
329  SWGSSeqInfo seq = Resolve(seq_id);
330  if (seq && HasMigrated(seq) && !s_KeepMigrated() ) {
331  seq = SWGSSeqInfo();
332  }
333  if ( !seq ) return ret;
334 
335  GetBioseqInfo(ret, seq);
336  return ret;
337 }
338 
339 
340 shared_ptr<SWGSData> CWGSClient::GetBlobBySeqId(const CSeq_id& seq_id, const TBlobIds& excluded)
341 {
342  shared_ptr<SWGSData> ret;
343  SWGSSeqInfo seq = Resolve(seq_id);
344  if (seq && HasMigrated(seq) && !s_KeepMigrated() ) {
345  seq = SWGSSeqInfo();
346  }
347  if ( !seq ) return ret;
348 
349  GetBioseqInfo(ret, seq);
350 
351  if ( find(excluded.begin(), excluded.end(), ret->m_BlobId) != excluded.end() ) {
352  ret->m_Excluded = true;
353  return ret;
354  }
355 
356  GetWGSData(ret, seq);
357  return ret;
358 }
359 
360 
361 shared_ptr<SWGSData> CWGSClient::GetBlobByBlobId(const string& blob_id)
362 {
363  shared_ptr<SWGSData> ret;
364  CRef<CID2_Blob_Id> id2_blob_id(osg::CPSGS_OSGGetBlobBase::ParsePSGBlobId(blob_id));
365  if ( !id2_blob_id ) return ret;
366 
367  SWGSSeqInfo seq = ResolveBlobId(*id2_blob_id);
368  if ( !seq ) return ret;
369 
370  GetWGSData(ret, seq);
371  return ret;
372 }
373 
374 
375 shared_ptr<SWGSData> CWGSClient::GetChunk(const string& id2info, int64_t chunk_id)
376 {
377  shared_ptr<SWGSData> ret;
378  osg::CPSGS_OSGGetBlobBase::SParsedId2Info parsed_id2info =
379  osg::CPSGS_OSGGetBlobBase::ParsePSGId2Info(id2info);
380  if ( !parsed_id2info.tse_id ) return ret;
381 
382  SWGSSeqInfo seq0 = ResolveBlobId(*parsed_id2info.tse_id);
383  if ( !seq0 ) return ret;
384 
385  auto id2_blob_state = GetID2BlobState(seq0);
386  if ( SWGSData::IsForbidden(id2_blob_state) ) {
387  ret = make_shared<SWGSData>();
388  ret->m_Id2BlobId.Reset(&GetBlobId(seq0));
389  ret->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*ret->m_Id2BlobId);
390  ret->m_Id2BlobState = id2_blob_state;
391  return ret;
392  }
393 
394  SWGSSeqInfo& seq = GetRootSeq(seq0);
395  if ( seq.IsContig() ) {
397  // master descr shouldn't be added to proteins in chunks
398  //CWGSSeqIterator::TFlags flags = it.fDefaultFlags & ~it.fMasterDescr;
399  ret = make_shared<SWGSData>();
400  ret->m_Id2BlobId.Reset(&GetBlobId(seq0));
401  ret->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*ret->m_Id2BlobId);
402  ret->m_SplitVersion = parsed_id2info.split_version;
403  ret->m_Id2BlobState = id2_blob_state;
404  ret->m_Data = it.GetChunkDataForVersion(chunk_id, parsed_id2info.split_version);
405  if ( !ret->m_Data ) {
406  ret->m_Data = new CAsnBinData(*it.GetChunkDataForVersion(chunk_id, parsed_id2info.split_version));
407  }
408  ret->m_Compress = GetCompress(m_Config.m_CompressData, seq, *ret->m_Data);
409  }
410  return ret;
411 }
412 
413 
416  EPSGOperationStatus status)
417 {
419  GetTiming().Register(nullptr, operation, status, start, 0);
420 }
421 
422 
424 {
425  CWGSDb wgs_db;
426  {{
427  CRef<CWGSDbInfo> delete_info; // delete stale file info after releasing mutex
428  auto slot = m_WGSDbCache.GetSlot(prefix);
429  TWGSDbCache::CSlot::TSlotMutex::TWriteLockGuard guard(slot->GetSlotMutex());
430  CRef<CWGSDbInfo> info = slot->GetObject<CWGSDbInfo>();
431  if ( info && slot->IsExpired(m_WGSDbCache, prefix) ) {
432  PSG_INFO("PSGS_WGS: GetWGSDb: opened " << prefix << " has expired");
433  slot->ResetObject();
434  delete_info.Swap(info);
435  }
436  if ( !info ) {
437  slot->UpdateExpiration(m_WGSDbCache, prefix);
438  try {
439  psg_time_point_t start = psg_clock_t::now();
440  wgs_db = CWGSDb(m_Mgr, prefix);
442  wgs_db.LoadMasterDescr();
443  }
445  }
446  catch ( CSraException& exc ) {
447  if ( exc.GetErrCode() == exc.eNotFoundDb ||
448  exc.GetErrCode() == exc.eProtectedDb ) {
449  // no such WGS table
450  }
451  else {
452  // problem in VDB or WGS reader
453  PSG_ERROR("PSGS_WGS: Exception while opening WGS DB " << prefix << ": " << exc);
454  throw;
455  }
456  return CWGSDb();
457  }
458  catch ( CException& exc ) {
459  // problem in VDB or WGS reader
460  PSG_ERROR("PSGS_WGS: Exception while opening WGS DB " << prefix << ": " << exc);
461  throw;
462  }
463  catch ( exception& exc ) {
464  // problem in VDB or WGS reader
465  PSG_ERROR("PSGS_WGS: Exception while opening WGS DB " << prefix << ": " << exc.what());
466  throw;
467  }
468  info = new CWGSDbInfo;
469  info->m_WGSDb = wgs_db;
470  slot->SetObject(info);
471  }
472  wgs_db = info->m_WGSDb;
473  }}
474  if ( wgs_db->IsReplaced() && !s_KeepReplaced() ) {
475  // replaced
476  PSG_INFO("PSGS_WGS: GetWGSDb: " << prefix << " is replaced");
477  return CWGSDb();
478  }
479  else {
480  // found
481  PSG_INFO("PSGS_WGS: GetWGSDb: " << prefix);
482  return wgs_db;
483  }
484 }
485 
486 
488 {
489  if ( !seq.m_WGSDb ) {
490  seq.m_WGSDb = GetWGSDb(seq.m_WGSAcc);
491  if ( seq.m_WGSDb ) {
492  seq.m_IsWGS = true;
493  seq.m_RowDigits = Uint1(seq.m_WGSDb->GetIdRowDigits());
494  }
495  }
496  return seq.m_WGSDb;
497 }
498 
499 
501 {
502  seq.m_ContigIter.Reset();
503  seq.m_ScaffoldIter.Reset();
504  seq.m_ProteinIter.Reset();
505  seq.m_BlobId.Reset();
506 }
507 
508 
510 {
511  if ( !seq.m_ContigIter ) {
514  seq.m_ContigIter.SelectAccVersion(seq.m_Version);
515  }
516  return seq.m_ContigIter;
517 }
518 
519 
521 {
522  if ( !seq.m_ScaffoldIter ) {
524  }
525  return seq.m_ScaffoldIter;
526 }
527 
528 
530 {
531  if ( !seq.m_ProteinIter ) {
533  }
534  return seq.m_ProteinIter;
535 }
536 
537 
540 {
541  if ( seq0.m_RootSeq.get() ) {
542  return *seq0.m_RootSeq;
543  }
544  if ( seq0.m_NoRootSeq ) {
545  return seq0;
546  }
547  if ( !seq0.IsProtein() ) {
548  seq0.m_NoRootSeq = true;
549  return seq0;
550  }
551  // proteins can be located in nuc-prot set
552  TVDBRowId cds_row_id = GetProteinIterator(seq0).GetBestProductFeatRowId();
553  if ( !cds_row_id ) {
554  seq0.m_NoRootSeq = true;
555  return seq0;
556  }
557  CWGSFeatureIterator cds_it(GetWGSDb(seq0), cds_row_id);
558  if ( !cds_it ) {
559  seq0.m_NoRootSeq = true;
560  return seq0;
561  }
562  switch ( cds_it.GetLocSeqType() ) {
564  {
565  // switch to contig
566  seq0.m_RootSeq.reset(new SWGSSeqInfo(seq0));
567  SWGSSeqInfo& seq = *seq0.m_RootSeq;
568  seq.SetContig();
569  seq.m_RowId = cds_it.GetLocRowId();
570  ResetIteratorCache(seq);
571  return seq;
572  }
574  {
575  // switch to scaffold
576  seq0.m_RootSeq.reset(new SWGSSeqInfo(seq0));
577  SWGSSeqInfo& seq = *seq0.m_RootSeq;
578  seq.SetScaffold();
579  seq.m_RowId = cds_it.GetLocRowId();
580  ResetIteratorCache(seq);
581  return seq;
582  }
583  default:
584  seq0.m_NoRootSeq = true;
585  return seq0;
586  }
587 }
588 
589 
591 {
592  if ( seq.IsContig() ) {
593  return GetContigIterator(seq);
594  }
595  if ( seq.IsScaffold() ) {
596  return GetScaffoldIterator(seq);
597  }
598  if ( seq.IsProtein() ) {
599  return GetProteinIterator(seq);
600  }
601  // master
602  return true;
603 }
604 
605 
607 {
608  if ( !seq ) {
609  return false;
610  }
611  if ( seq.IsContig() ) {
613  return it && it.HasAccVersion(version);
614  }
615  else if ( seq.IsProtein() ) {
617  return it && it.GetAccVersion() == version;
618  }
619  else if ( seq.IsMaster() ) {
620  // master version is already checked
621  return true;
622  }
623  else {
624  // scaffolds can have only version 1
625  return version == 1;
626  }
627 }
628 
629 
631 {
632  if ( !seq.IsProtein() ) {
633  return false;
634  }
636  if ( !it.HasGi() ) {
637  return false;
638  }
639  const auto project_state = seq.m_WGSDb->GetProjectGBState();
640  switch (project_state) {
643  return it.GetGBState() == special_state;
644  default:
645  return project_state == special_state;
646  }
647 }
648 
649 
651 {
652  if ( !seq.IsProtein() ) {
653  return false;
654  }
656  if ( !it.HasGi() ) {
657  return false;
658  }
659  const auto project_state = seq.m_WGSDb->GetProjectGBState();
660  switch (project_state) {
664  default:
665  return false;
666  }
667 }
668 
669 
671 CWGSClient::Resolve(const CSeq_id& id, bool skip_lookup)
672 {
673  switch ( id.Which() ) {
674  case CSeq_id::e_Gi:
675  return ResolveGi(id.GetGi(), skip_lookup);
676  case CSeq_id::e_General:
677  return ResolveGeneral(id.GetGeneral(), skip_lookup);
678  case CSeq_id::e_not_set:
679  case CSeq_id::e_Local:
680  case CSeq_id::e_Gibbsq:
681  case CSeq_id::e_Gibbmt:
682  case CSeq_id::e_Giim:
683  case CSeq_id::e_Patent:
684  case CSeq_id::e_Pdb:
685  return SWGSSeqInfo();
686  default:
687  break;
688  }
689  const CTextseq_id* text_id = id.GetTextseq_Id();
690  if ( !text_id ) {
691  return SWGSSeqInfo();
692  }
693  SWGSSeqInfo seq = ResolveAcc(*text_id, skip_lookup);
694  if ( !seq ) {
695  return seq;
696  }
697  if ( text_id->IsSetVersion() ) {
698  int version = text_id->GetVersion();
699  if ( !IsCorrectVersion(seq, version) ) {
700  seq.m_ValidWGS = false;
701  return seq;
702  }
703  if ( seq.IsContig() ) {
704  GetContigIterator(seq).SelectAccVersion(version);
705  seq.m_Version = version;
706  }
707  }
708  seq.m_ValidWGS = true;
709  return seq;
710 }
711 
712 
714 CWGSClient::ResolveGeneral(const CDbtag& dbtag, bool skip_lookup)
715 {
716  const CObject_id& object_id = dbtag.GetTag();
717  const string& db = dbtag.GetDb();
718  if ( db.size() != kTypePrefixLen+kNumLettersV1 /* WGS:AAAA */ &&
719  db.size() != kTypePrefixLen+kPrefixLenV1 /* WGS:AAAA01 */ &&
720  db.size() != kTypePrefixLen+kNumLettersV2 /* WGS:AAAAAA */ &&
721  db.size() != kTypePrefixLen+kPrefixLenV2 /* WGS:AAAAAA01 */ ) {
722  return SWGSSeqInfo();
723  }
724  bool is_tsa = false;
725  if ( NStr::StartsWith(db, "WGS:", NStr::eNocase) ) {
726  }
727  else if ( NStr::StartsWith(db, "TSA:", NStr::eNocase) ) {
728  is_tsa = true;
729  }
730  else {
731  return SWGSSeqInfo();
732  }
733  string wgs_acc = db.substr(kTypePrefixLen); // remove "WGS:" or "TSA:"
734 
735  NStr::ToUpper(wgs_acc);
736  if ( isalpha(wgs_acc.back()&0xff) ) {
737  wgs_acc += "01"; // add default version digits
738  }
739  SWGSSeqInfo seq;
740  seq.m_WGSAcc = wgs_acc;
741  seq.m_IsWGS = true;
742  if (skip_lookup) {
743  seq.m_ValidWGS = true;
744  return seq;
745  }
746  CWGSDb wgs_db = GetWGSDb(seq);
747  if ( !wgs_db || wgs_db->IsTSA() != is_tsa ) {
748  // TSA or WGS type must match
749  return seq;
750  }
751  string tag;
752  if ( object_id.IsStr() ) {
753  tag = object_id.GetStr();
755  }
756  else {
757  tag = NStr::NumericToString(object_id.GetId());
758  }
759  if ( TVDBRowId row = wgs_db.GetContigNameRowId(tag) ) {
760  seq.m_ValidWGS = true;
761  seq.SetContig();
762  seq.m_RowId = row;
763  }
764  if ( TVDBRowId row = wgs_db.GetScaffoldNameRowId(tag) ) {
765  seq.m_ValidWGS = true;
766  seq.SetScaffold();
767  seq.m_RowId = row;
768  }
769  if ( TVDBRowId row = wgs_db.GetProteinNameRowId(tag) ) {
770  seq.m_ValidWGS = true;
771  seq.SetProtein();
772  seq.m_RowId = row;
773  }
774  return seq;
775 }
776 
777 
779 CWGSClient::ResolveGi(TGi gi, bool skip_lookup)
780 {
781  CRef<CWGSResolver> wgs_resolver = GetWGSResolver();
782  psg_time_point_t start = psg_clock_t::now();
783  CWGSResolver::TWGSPrefixes prefixes = wgs_resolver->GetPrefixes(gi);
785  prefixes.empty()? eOpStatusNotFound: eOpStatusFound);
786  ITERATE ( CWGSResolver::TWGSPrefixes, it, prefixes ) {
787  if (skip_lookup) {
788  SWGSSeqInfo fake_info;
789  fake_info.m_IsWGS = fake_info.m_ValidWGS = true;
790  return fake_info;
791  } else if ( CWGSDb wgs_db = GetWGSDb(*it) ) {
793  gi == wgs_db->GetMasterGi() ) {
794  // resolve master sequence with GI from VDB
795  wgs_resolver->SetWGSPrefix(gi, prefixes, *it);
796  SWGSSeqInfo seq;
797  seq.m_WGSAcc = *it;
798  seq.m_IsWGS = true;
799  seq.m_ValidWGS = true;
800  seq.m_WGSDb = wgs_db;
801  seq.m_RowDigits = Uint1(wgs_db->GetIdRowDigits());
802  seq.SetMaster();
803  return seq;
804  }
805  CWGSGiIterator gi_it(wgs_db, gi);
806  if ( gi_it ) {
807  wgs_resolver->SetWGSPrefix(gi, prefixes, *it);
808  SWGSSeqInfo seq;
809  seq.m_WGSAcc = *it;
810  seq.m_IsWGS = true;
811  seq.m_ValidWGS = true;
812  seq.m_WGSDb = wgs_db;
813  seq.m_RowDigits = Uint1(wgs_db->GetIdRowDigits());
814  seq.m_RowId = gi_it.GetRowId();
815  if ( gi_it.GetSeqType() == gi_it.eProt ) {
816  seq.SetProtein();
817  if ( !GetProteinIterator(seq) ) {
818  return SWGSSeqInfo();
819  }
820  }
821  else {
822  seq.SetContig();
823  if ( !GetContigIterator(seq) ) {
824  return SWGSSeqInfo();
825  }
826  }
827  return seq;
828  }
829  }
830  }
831  if ( !prefixes.empty() ) {
832  wgs_resolver->SetNonWGS(gi, prefixes);
833  }
834  return SWGSSeqInfo();
835 }
836 
837 
839 CWGSClient::ResolveAcc(const CTextseq_id& id, bool skip_lookup)
840 {
841  if ( id.IsSetName() ) {
842  // first try name reference if it has WGS format like AAAA01P000001
843  // as it directly contains WGS accession
844  if ( SWGSSeqInfo seq = ResolveWGSAcc(id.GetName(), id, fAllow_aa,
845  skip_lookup) ) {
846  _ASSERT(seq.IsProtein());
847  if ( !id.IsSetAccession() ||
849  id.GetAccession()) ) {
850  return seq;
851  }
852  }
853  }
854  if ( !id.IsSetAccession() ) {
855  return SWGSSeqInfo();
856  }
857  const string& acc = id.GetAccession();
859  switch ( type & CSeq_id::eAcc_division_mask ) {
860  // accepted accession types
861  case CSeq_id::eAcc_wgs:
863  case CSeq_id::eAcc_tsa:
865  if ( type & CSeq_id::fAcc_prot ) {
866  return ResolveProtAcc(id, skip_lookup);
867  }
868  else {
869  return ResolveWGSAcc(acc, id, fAllow_master|fAllow_na,
870  skip_lookup);
871  }
872  case CSeq_id::eAcc_other:
873  // Some EMBL WGS accession aren't identified as WGS, so we'll try lookup anyway
874  if ( type == CSeq_id::eAcc_embl_prot ||
875  (type == CSeq_id::eAcc_gb_prot && acc.size() == 10) ) { // TODO: remove
876  return ResolveProtAcc(id, skip_lookup);
877  }
878  return SWGSSeqInfo();
879  default:
880  // non-WGS accessions
881  return SWGSSeqInfo();
882  }
883 }
884 
885 
887 CWGSClient::ResolveProtAcc(const CTextseq_id& id, bool skip_lookup)
888 {
889  const string& acc = id.GetAccession();
890  if ( acc.size() < kMinProtAccLen || acc.size() > kMaxProtAccLen ) {
891  return SWGSSeqInfo();
892  }
893  int ask_version = id.IsSetVersion()? id.GetVersion(): -1;
894 
895  CRef<CWGSResolver> wgs_resolver = GetWGSResolver();
896  psg_time_point_t start = psg_clock_t::now();
897  CWGSResolver::TWGSPrefixes prefixes = wgs_resolver->GetPrefixes(acc);
899  prefixes.empty()? eOpStatusNotFound: eOpStatusFound);
900  ITERATE ( CWGSResolver::TWGSPrefixes, it, prefixes ) {
901  if (skip_lookup) {
902  SWGSSeqInfo fake_info;
903  fake_info.m_IsWGS = fake_info.m_ValidWGS = true;
904  return fake_info;
905  } else if ( CWGSDb wgs_db = GetWGSDb(*it) ) {
906  if ( TVDBRowId row = wgs_db.GetProtAccRowId(acc, ask_version) ) {
907  wgs_resolver->SetWGSPrefix(acc, prefixes, *it);
908  SWGSSeqInfo seq;
909  seq.m_WGSAcc = *it;
910  seq.m_IsWGS = true;
911  seq.m_ValidWGS = true;
912  seq.m_WGSDb = wgs_db;
913  seq.SetProtein();
914  seq.m_RowDigits = Uint1(wgs_db->GetIdRowDigits());
915  seq.m_RowId = row;
916  return seq;
917  }
918  }
919  }
920  if ( !prefixes.empty() ) {
921  wgs_resolver->SetNonWGS(acc, prefixes);
922  }
923  return SWGSSeqInfo();
924 }
925 
926 
928 CWGSClient::ResolveWGSAcc(const string& acc,
929  const CTextseq_id& id,
930  TAllowSeqType allow_seq_type,
931  bool skip_lookup)
932 {
933  if ( acc.size() < kPrefixLenV1 + kMinRowDigitsV1 ||
934  acc.size() > kPrefixLenV2 + kMaxRowDigitsV2 + 1 ) { // one for type letter
935  return SWGSSeqInfo();
936  }
937  size_t num_letters;
938  for ( num_letters = 0; num_letters < kNumLettersV2; ++num_letters ) {
939  if ( !isalpha(acc[num_letters]&0xff) ) {
940  break;
941  }
942  }
943  if ( num_letters != kNumLettersV1 && num_letters != kNumLettersV2 ) {
944  return SWGSSeqInfo();
945  }
946  size_t prefix_len = num_letters + kVersionDigits;
947  for ( size_t i = num_letters; i < prefix_len; ++i ) {
948  if ( !isdigit(acc[i]&0xff) ) {
949  return SWGSSeqInfo();
950  }
951  }
952  SWGSSeqInfo seq;
953  seq.m_WGSAcc = acc.substr(0, prefix_len);
954  NStr::ToUpper(seq.m_WGSAcc);
955  seq.m_IsWGS = true;
956  SIZE_TYPE row_pos = prefix_len;
957  switch ( acc[row_pos] ) { // optional type letter
958  case 'S':
959  seq.SetScaffold();
960  if ( !(allow_seq_type & fAllow_scaffold) ) {
961  return seq;
962  }
963  ++row_pos;
964  break;
965  case 'P':
966  seq.SetProtein();
967  if ( !(allow_seq_type & fAllow_protein) ) {
968  return seq;
969  }
970  ++row_pos;
971  break;
972  default:
973  // it can be either contig or master sequence
974  if ( !(allow_seq_type & (fAllow_master|fAllow_contig)) ) {
975  return seq;
976  }
977  break;
978  }
979  size_t row_digits = acc.size() - row_pos;
980  if ( num_letters == kNumLettersV1 ) {
981  if ( row_digits < kMinRowDigitsV1 || row_digits > kMaxRowDigitsV1 ) {
982  return SWGSSeqInfo();
983  }
984  }
985  else {
986  if ( row_digits < kMinRowDigitsV2 || row_digits > kMaxRowDigitsV2 ) {
987  return SWGSSeqInfo();
988  }
989  }
990  Uint8 row = 0;
991  for ( size_t i = row_pos; i < acc.size(); ++i ) {
992  char c = acc[i];
993  if ( c < '0' || c > '9' ) {
994  return SWGSSeqInfo();
995  }
996  row = row*10+(c-'0');
997  }
998  seq.m_RowId = row;
999  if ( !row ) {
1000  // zero row might be master WGS sequence
1001  // it mustn't have type letter, version digits and row must be zero
1002  // version must be positive
1003  if ( !seq.IsMaster() ) {
1004  return SWGSSeqInfo();
1005  }
1006  if ( !(allow_seq_type & fAllow_master) ) {
1007  return seq;
1008  }
1009  // now, move version into version digits of the accession
1010  int version = id.IsSetVersion()? id.GetVersion(): 1;
1011  if ( version <= 0 ) {
1012  return SWGSSeqInfo();
1013  }
1014  for ( size_t i = kVersionDigits; i--; version /= 10) {
1015  if ( acc[num_letters+i] != '0' ) {
1016  return SWGSSeqInfo();
1017  }
1018  seq.m_WGSAcc[num_letters+i] = char('0'+version%10);
1019  }
1020  if ( version ) {
1021  // doesn't fit
1022  return SWGSSeqInfo();
1023  }
1024  }
1025  else if ( seq.IsContig() ) {
1026  if ( !(allow_seq_type & fAllow_contig) ) {
1027  return seq;
1028  }
1029  }
1030  if (skip_lookup) {
1031  seq.m_ValidWGS = true;
1032  return seq;
1033  }
1034  if ( !GetWGSDb(seq) ) {
1035  return seq;
1036  }
1037  if ( seq.m_WGSDb->GetIdRowDigits() != row_digits ) {
1038  return seq;
1039  }
1040  if ( !row ) {
1042  // no master resolution
1043  seq.m_IsWGS = false;
1044  return seq;
1045  }
1046  else if ( kResolveMaster == eResolveMaster_without_gi ) {
1047  // only master sequences w/o GI are resolved
1048  if ( GetWGSDb(seq)->GetMasterGi() != ZERO_GI ) {
1049  // Let master sequences with GI to be processed by ID
1050  seq.m_IsWGS = false;
1051  return seq;
1052  }
1053  }
1054  }
1055  else if ( !IsValidRowId(seq) ) {
1056  return seq;
1057  }
1058  seq.m_ValidWGS = true;
1059  return seq;
1060 }
1061 
1062 
1064 CWGSClient::ResolveBlobId(const CID2_Blob_Id& id, bool skip_lookup)
1065 {
1066  SWGSSeqInfo seq;
1067  CID2_Blob_Id::TSat sat = id.GetSat();
1068  if ( sat == kBlobIdV1Sat ) {
1069  // old 4-letter WGS accession format
1070  seq.m_IsWGS = true;
1071  unsigned subsat = unsigned(id.GetSub_sat());
1072  if ( unsigned seq_type = (subsat & ((1 << eBlobIdBits_type)-1)) ) {
1073  bool bad = false;
1074  // old blob-id subsat format
1075  switch ( seq_type ) {
1076  case eBlobType_contig: seq.SetContig(); break;
1077  case eBlobType_scaffold: seq.SetScaffold(); break;
1078  case eBlobType_protein: seq.SetProtein(); break;
1079  }
1080  int bit = eBlobIdBits_type;
1081  for ( size_t i = 0; i < kPrefixLenV1; ++i ) {
1082  if ( i < kNumLettersV1 ) {
1083  int v = (subsat >> bit)&((1 << eBlobIdBits_letter)-1);
1084  if ( v < 26 ) {
1085  seq.m_WGSAcc += char('A'+v);
1086  }
1087  else {
1088  bad = true;
1089  break;
1090  }
1091  bit += eBlobIdBits_letter;
1092  }
1093  else {
1094  int v = (subsat >> bit)&((1 << eBlobIdBits_digit)-1);
1095  if ( v < 10 ) {
1096  seq.m_WGSAcc += char('0'+v);
1097  }
1098  else {
1099  bad = true;
1100  break;
1101  }
1102  bit += eBlobIdBits_digit;
1103  }
1104  }
1105  if ( seq.IsContig() ) {
1106  // old format means version is 1
1107  seq.m_Version = 1;
1108  }
1109  if ( bad ) { // bad format - illegal letters or digits
1110  return seq;
1111  }
1112  }
1113  else {
1114  seq.SetContig();
1115  subsat /= 4;
1116  for ( size_t i = 0; i < kPrefixLenV1; ++i ) {
1117  if ( i < kNumLettersV1 ) {
1118  seq.m_WGSAcc += char('A'+subsat%26);
1119  subsat /= 26;
1120  }
1121  else {
1122  seq.m_WGSAcc += char('0'+subsat%10);
1123  subsat /= 10;
1124  }
1125  }
1126  seq.m_Version = subsat + 2; // remaining value is version
1127  }
1128  // verify if the WGS accession actually exists in VDB
1129  if (skip_lookup || GetWGSDb(seq)) {
1130  seq.m_ValidWGS = true;
1131  seq.m_RowId = id.GetSat_key();
1132  }
1133  }
1134  else if ( sat >= kBlobIdV2SatMin && sat <= kBlobIdV2SatMax ) {
1135  seq.m_IsWGS = true;
1136  Uint8 v = (Uint8(sat-kBlobIdV2SatMin) << 32)|Uint4(id.GetSub_sat());
1137  for ( size_t i = 0; i < 6; ++i ) {
1138  seq.m_WGSAcc += char('A'+v%26);
1139  v /= 26;
1140  }
1141  for ( size_t i = 0; i < 2; ++i ) {
1142  seq.m_WGSAcc += char('0'+v%10);
1143  v /= 10;
1144  }
1145  if ( v == kBlobIdV2VersionScaffold ) {
1146  seq.SetScaffold();
1147  }
1148  else if ( v == kBlobIdV2VersionProtein ) {
1149  seq.SetProtein();
1150  }
1151  else {
1152  seq.SetContig();
1153  seq.m_Version = int(v - kBlobIdV2VersionContig + 1);
1154  }
1155  // verify if the WGS accession actually exists in VDB
1156  if (skip_lookup || GetWGSDb(seq)) {
1157  seq.m_ValidWGS = true;
1158  seq.m_RowId = id.GetSat_key();
1159  }
1160  }
1161  return seq;
1162 }
1163 
1164 
1166 {
1167  if ( seq0.m_BlobId ) {
1168  return *seq0.m_BlobId;
1169  }
1171  SWGSSeqInfo& seq = GetRootSeq(seq0);
1172  if ( seq.m_WGSAcc.size() == kPrefixLenV2 ) {
1173  Uint8 mul = 1;
1174  Uint8 value = 0;
1175  for ( size_t i = 0; i < seq.m_WGSAcc.size(); ++i ) {
1176  if ( i < kNumLettersV2 ) {
1177  value += (seq.m_WGSAcc[i]-'A')*mul;
1178  mul *= 26;
1179  }
1180  else {
1181  value += (seq.m_WGSAcc[i]-'0')*mul;
1182  mul *= 10;
1183  }
1184  }
1185  unsigned version;
1186  if ( seq.IsScaffold() ) {
1188  }
1189  else if ( seq.IsProtein() ) {
1191  }
1192  else {
1193  _ASSERT(seq.IsContig());
1194  if ( seq.m_Version == -1 ) {
1195  // need contig version to choose appropriate blob-id format
1196  seq.m_Version = GetContigIterator(seq).GetLatestAccVersion();
1197  }
1198  _ASSERT(seq.m_Version >= 1);
1199  _ASSERT(seq.m_Version <= 16);
1201  }
1202  value += mul*version;
1203  CID2_Blob_Id::TSat sat = kBlobIdV2SatMin + int(value >> 32); // high 32 bits
1204  CID2_Blob_Id::TSub_sat subsat = int(value & 0xFFFFFFFF); // low 32 bits
1205  _ASSERT(sat >= kBlobIdV2SatMin && sat <= kBlobIdV2SatMax);
1206  id->SetSat(sat);
1207  id->SetSub_sat(subsat);
1208  id->SetSat_key(int(seq.m_RowId));
1209  }
1210  else {
1211  _ASSERT(seq.m_WGSAcc.size() == kPrefixLenV1);
1212  unsigned subsat;
1213  if ( seq.IsContig() && seq.m_Version == -1 ) {
1214  // need contig version to choose appropriate blob-id format
1215  seq.m_Version = GetContigIterator(seq).GetLatestAccVersion();
1216  }
1217  if ( !seq.IsContig() || seq.m_Version <= 1 ) {
1218  // old blob-id subsat format, version is 1
1219  if ( seq.IsScaffold() ) {
1220  subsat = eBlobType_scaffold;
1221  }
1222  else if ( seq.IsProtein() ) {
1223  subsat = eBlobType_protein;
1224  }
1225  else { // contig or master
1226  subsat = eBlobType_contig;
1227  }
1228  int bit = eBlobIdBits_type;
1229  for ( size_t i = 0; i < seq.m_WGSAcc.size(); ++i ) {
1230  if ( i < kNumLettersV1 ) {
1231  subsat |= (seq.m_WGSAcc[i]-'A') << bit;
1232  bit += eBlobIdBits_letter;
1233  }
1234  else {
1235  subsat |= (seq.m_WGSAcc[i]-'0') << bit;
1236  bit += eBlobIdBits_digit;
1237  }
1238  }
1239  }
1240  else {
1241  // new blob-id subsat format that includes contig version > 1
1242  _ASSERT(seq.IsContig());
1243  _ASSERT(seq.m_Version >= 2);
1244  _ASSERT(seq.m_Version <= 24);
1245  subsat = 0;
1246  unsigned mul = 4;
1247  for ( size_t i = 0; i < seq.m_WGSAcc.size(); ++i ) {
1248  if ( i < kNumLettersV1 ) {
1249  subsat += (seq.m_WGSAcc[i]-'A')*mul;
1250  mul *= 26;
1251  }
1252  else {
1253  subsat += (seq.m_WGSAcc[i]-'0')*mul;
1254  mul *= 10;
1255  }
1256  }
1257  subsat += (seq.m_Version - 2)*mul;
1258  }
1259  id->SetSat(kBlobIdV1Sat);
1260  id->SetSub_sat(int(subsat));
1261  id->SetSat_key(int(seq.m_RowId));
1262  }
1263  seq0.m_BlobId = id;
1264  return *id;
1265 }
1266 
1267 
1268 static int s_GBStateToID2(NCBI_gb_state gb_state)
1269 {
1270  int state = 0;
1271  switch ( gb_state ) {
1274  break;
1277  state |= 1 << eID2_Blob_State_dead;
1278  break;
1281  break;
1282  default:
1283  break;
1284  }
1285  return state;
1286 }
1287 
1288 
1290 {
1291  return s_GBStateToID2(GetGBState(seq)) | s_GBStateToID2(seq.m_WGSDb->GetProjectGBState());
1292 }
1293 
1294 
1296 {
1297  SWGSSeqInfo& seq = GetRootSeq(seq0);
1298  if ( seq.IsContig() ) {
1299  return GetContigIterator(seq).GetGBState();
1300  }
1301  if ( seq.IsScaffold() ) {
1302  return 0;
1303  }
1304  if ( seq.IsProtein() ) {
1305  return GetProteinIterator(seq).GetGBState();
1306  }
1307  // master
1308  return 0;
1309 }
1310 
1311 
1313 {
1314  if ( seq.IsContig() ) {
1315  return GetContigIterator(seq).GetAccSeq_id();
1316  }
1317  if ( seq.IsScaffold() ) {
1318  return GetScaffoldIterator(seq).GetAccSeq_id();
1319  }
1320  if ( seq.IsProtein() ) {
1321  return GetProteinIterator(seq).GetAccSeq_id();
1322  }
1323  // master
1324  return GetWGSDb(seq)->GetMasterSeq_id();
1325 }
1326 
1327 
1329 {
1330  if ( seq.IsContig() ) {
1331  return GetContigIterator(seq).GetGeneralOrPatentSeq_id();
1332  }
1333  if ( seq.IsScaffold() ) {
1334  return GetScaffoldIterator(seq).GetGeneralOrPatentSeq_id();
1335  }
1336  if ( seq.IsProtein() ) {
1337  return GetProteinIterator(seq).GetGeneralOrPatentSeq_id();
1338  }
1339  // master
1340  return null;
1341 }
1342 
1343 
1345 {
1346  if ( seq.IsContig() ) {
1348  return it.HasGi()? it.GetGi(): ZERO_GI;
1349  }
1350  if ( seq.IsScaffold() ) {
1351  // scaffolds have no GIs
1352  return ZERO_GI;
1353  }
1354  if ( seq.IsProtein() ) {
1356  return it.HasGi()? it.GetGi(): ZERO_GI;
1357  }
1358  // master
1359  return ZERO_GI;
1360 }
1361 
1362 
1364 {
1365  ids.push_back(GetAccVer(seq));
1366  if ( CRef<CSeq_id> id = GetGeneral(seq) ) {
1367  ids.push_back(id);
1368  }
1369  TGi gi = GetGi(seq);
1370  if ( gi != ZERO_GI ) {
1371  CRef<CSeq_id> gi_id(new CSeq_id);
1372  gi_id->SetGi(gi);
1373  ids.push_back(gi_id);
1374  }
1375 }
1376 
1377 
1380  const SWGSSeqInfo& seq,
1381  const CAsnBinData& data) const
1382 {
1383  switch ( comp ) {
1385  case SWGSProcessor_Config::eCompressData_never: return false;
1386  default: return dynamic_cast<const CSeq_entry*>(&data.GetMainObject()) && seq.IsMaster();
1387  }
1388 }
1389 
1390 
1391 void CWGSClient::GetBioseqInfo(shared_ptr<SWGSData>& data, SWGSSeqInfo& seq)
1392 {
1393  if ( !seq ) return;
1394 
1395  data = make_shared<SWGSData>();
1396  data->m_BioseqInfo = make_shared<CBioseqInfoRecord>();
1397  CBioseqInfoRecord& info = *data->m_BioseqInfo;
1398 
1399  list< CRef<CSeq_id> > wgs_ids;
1400  GetSeqIds(seq, wgs_ids);
1402  TGi gi = ZERO_GI;
1403  for ( auto& id : wgs_ids ) {
1404  if ( id->IsGi() ) {
1405  gi = id->GetGi();
1406  info.SetGI(GI_TO(CBioseqInfoRecord::TGI, gi));
1407  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_Gi;
1408  continue;
1409  }
1410  else if ( auto text_id = id->GetTextseq_Id() ) {
1411  // only versioned accession goes to canonical id
1412  if ( !(data->m_BioseqInfoFlags & SPSGS_ResolveRequest::fPSGS_CanonicalId) &&
1413  text_id->IsSetAccession() && text_id->IsSetVersion() ) {
1414  info.SetSeqIdType(id->Which());
1415  info.SetAccession(text_id->GetAccession());
1416  info.SetVersion(text_id->GetVersion());
1417  if ( text_id->IsSetName() ) {
1418  info.SetName(text_id->GetName());
1419  }
1420  data->m_BioseqInfoFlags |=
1423  continue;
1424  }
1425  }
1426  string content;
1427  id->GetLabel(&content, CSeq_id::eFastaContent);
1428  psg_ids.insert(make_tuple(id->Which(), move(content)));
1429  }
1430  if ( gi != ZERO_GI ) {
1431  // gi goes either to canonical id or to other ids
1432  CSeq_id gi_id(CSeq_id::e_Gi, gi);
1433  string content;
1434  gi_id.GetLabel(&content, CSeq_id::eFastaContent);
1435  if ( !(data->m_BioseqInfoFlags & SPSGS_ResolveRequest::fPSGS_CanonicalId) ) {
1436  // set canonical id from gi
1437  info.SetAccession(content);
1438  info.SetVersion(0);
1439  info.SetSeqIdType(gi_id.Which());
1440  data->m_BioseqInfoFlags |=
1443  }
1444  else {
1445  // to other ids
1446  psg_ids.insert(make_tuple(gi_id.Which(), move(content)));
1447  }
1448  }
1449  if ( (data->m_BioseqInfoFlags & SPSGS_ResolveRequest::fPSGS_CanonicalId) || !psg_ids.empty() ) {
1450  info.SetSeqIds(move(psg_ids));
1451  // all ids are requested, so we should get GI and acc.ver too if they exist
1452  info.SetGI(GI_TO(CBioseqInfoRecord::TGI, gi)); // even if it's zero
1453  data->m_BioseqInfoFlags |=
1457  }
1458 
1459  if ( seq.IsContig() ) {
1461  info.SetHash(it.GetSeqHash());
1462  info.SetLength(it.GetSeqLength());
1463  info.SetMol(GetWGSDb(seq)->GetContigMolType());
1464  data->m_BioseqInfoFlags |=
1468  if ( it.HasTaxId() ) {
1469  info.SetTaxId(it.GetTaxId());
1470  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_TaxId;
1471  }
1472  }
1473  if ( seq.IsScaffold() ) {
1475  info.SetLength(it.GetSeqLength());
1476  info.SetMol(GetWGSDb(seq)->GetScaffoldMolType());
1477  data->m_BioseqInfoFlags |=
1480  }
1481  if ( seq.IsProtein() ) {
1483  info.SetLength(it.GetSeqLength());
1484  info.SetMol(GetWGSDb(seq)->GetProteinMolType());
1485  data->m_BioseqInfoFlags |=
1488  if ( it.HasSeqHash() ) {
1489  info.SetHash(it.GetSeqHash());
1490  data->m_BioseqInfoFlags |=
1492  }
1493  {{
1494  // set taxid
1495  auto wgs = GetWGSDb(seq);
1496  // faster common taxid retrieval if possible
1497  if ( wgs->HasCommonTaxId() ) {
1498  info.SetTaxId(wgs->GetCommonTaxId());
1499  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_TaxId;
1500  }
1501  else {
1502  // otherwise get taxid from root sequence (contig or protein itself)
1503  auto& root_seq = GetRootSeq(seq);
1504  if ( root_seq.IsContig() ) {
1505  CWGSSeqIterator root_it = GetContigIterator(root_seq);
1506  if ( root_it.HasTaxId() ) {
1507  info.SetTaxId(root_it.GetTaxId());
1508  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_TaxId;
1509  }
1510  }
1511  if ( root_seq.IsProtein() ) {
1512  if ( it.HasTaxId() ) {
1513  info.SetTaxId(it.GetTaxId());
1514  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_TaxId;
1515  }
1516  }
1517  }
1518  }}
1519  }
1520 
1521  data->m_Id2BlobId.Reset(&GetBlobId(seq));
1522  data->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*data->m_Id2BlobId);
1523  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_BlobId;
1524  if ( data->m_Id2BlobId->IsSetVersion() ) {
1525  // ID2 version is minutes since UNIX epoch
1526  // PSG date_changed is ms since UNIX epoch
1527  info.SetDateChanged(data->m_Id2BlobId->GetVersion()*60000);
1528  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_DateChanged;
1529  }
1530 
1531  data->m_Id2BlobState = GetID2BlobState(seq);
1532  info.SetState(data->GetPSGBioseqState());
1533  data->m_BioseqInfoFlags |= SPSGS_ResolveRequest::fPSGS_State;
1534 }
1535 
1536 
1537 void CWGSClient::GetWGSData(shared_ptr<SWGSData>& data, SWGSSeqInfo& seq0)
1538 {
1539  if (!data) {
1540  data = make_shared<SWGSData>();
1541  }
1542  SWGSSeqInfo& seq = GetRootSeq(seq0);
1543 
1544  if ( !data->m_Id2BlobId ) data->m_Id2BlobId.Reset(&GetBlobId(seq0));
1545  if ( data->m_BlobId.empty() ) data->m_BlobId = osg::CPSGS_OSGGetBlobBase::GetPSGBlobId(*data->m_Id2BlobId);
1546  data->m_Id2BlobState = GetID2BlobState(seq0);
1547  if ( data->IsForbidden() ) return;
1548 
1549  if ( seq.IsMaster() ) {
1550  data->m_Data = new CAsnBinData(*GetWGSDb(seq)->GetMasterSeq_entry());
1551  }
1552  else if ( seq.IsContig() ) {
1554  CWGSSeqIterator::TFlags flags = it.fDefaultFlags ;
1555  if ( !s_AddMasterDescrContig() ) {
1556  flags &= ~it.fMasterDescr;
1557  }
1558  else if ( s_MarkMasterDescr() ) {
1559  flags |= it.fMasterDescrMark;
1560  }
1561  if ( !s_SplitFeatures() ) {
1562  flags &= ~it.fSplitFeatures;
1563  }
1564  auto asn_data = it.GetSplitInfoDataAndVersion(flags);
1565  if ( asn_data.first ) {
1566  data->m_SplitVersion = asn_data.second;
1567  }
1568  if ( !asn_data.first ) {
1569  asn_data.first = it.GetSeq_entryData(flags);
1570  }
1571  if ( !asn_data.first ) {
1572  asn_data.first = new CAsnBinData(*it.GetSeq_entry(flags));
1573  }
1574  data->m_Data = asn_data.first;
1575  }
1576  else if ( seq.IsScaffold() ) {
1578  CWGSScaffoldIterator::TFlags flags = it.fDefaultFlags;
1579  if ( !s_AddMasterDescrScaffold() ) {
1580  flags &= ~it.fMasterDescr;
1581  }
1582  else if ( s_MarkMasterDescr() ) {
1583  flags |= it.fMasterDescrMark;
1584  }
1585  data->m_Data = new CAsnBinData(*it.GetSeq_entry(flags));
1586  }
1587  else if ( seq.IsProtein() ) {
1589  CWGSProteinIterator::TFlags flags = it.fDefaultFlags;
1590  if ( !s_AddMasterDescrProtein() ) {
1591  flags &= ~it.fMasterDescr;
1592  }
1593  else if ( s_MarkMasterDescr() ) {
1594  flags |= it.fMasterDescrMark;
1595  }
1596  data->m_Data = new CAsnBinData(*it.GetSeq_entry(flags));
1597  }
1598  if ( data->m_Data ) {
1599  data->m_Compress = GetCompress(m_Config.m_CompressData, seq, *data->m_Data);
1600  }
1601  else {
1602  data.reset();
1603  }
1604 }
1605 
1606 
1608 {
1609  if ( m_Id2BlobState == 0 ||
1611  return eLive;
1612  }
1613  else if ( m_Id2BlobState & (1<<eID2_Blob_State_suppressed) ) {
1614  return eReserved;
1615  }
1616  else if ( m_Id2BlobState & (1<<eID2_Blob_State_dead) ) {
1617  return eDead;
1618  }
1619  else if ( m_Id2BlobState & (1<<eID2_Blob_State_withdrawn) ) {
1620  return eDead; // assume withdrawn as dead ???
1621  }
1622  else if ( m_Id2BlobState & (1<<eID2_Blob_State_protected) ) {
1623  return eDead; // assume protected (unauthorized) as dead ???
1624  }
1625  else {
1626  return eDead;
1627  }
1628 }
1629 
1630 
1631 bool SWGSData::IsForbidden(int id2_blob_state)
1632 {
1633  if ( id2_blob_state & (1<<eID2_Blob_State_withdrawn) ) {
1634  return true;
1635  }
1636  else if ( id2_blob_state & (1<<eID2_Blob_State_protected) ) {
1637  return true;
1638  }
1639  return false;
1640 }
1641 
1642 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Definition: Dbtag.hpp:53
CID2_Blob_Id –.
Definition: ID2_Blob_Id.hpp:66
virtual void DoJob(void)
Payload function.
Definition: wgs_client.cpp:216
CIndexUpdateThread(unsigned update_delay, CRef< CWGSResolver > resolver)
Definition: wgs_client.cpp:208
CRef< CWGSResolver > m_Resolver
Definition: wgs_client.cpp:237
EPSGS_Type GetRequestType(void) const
@ ePSGS_BlobBySatSatKeyRequest
TRequest & GetRequest(void)
static CPubseqGatewayApp * GetInstance(void)
Definition: Seq_entry.hpp:56
@ eProtectedDb
DB is protected.
Definition: exception.hpp:98
@ eNotFoundDb
DB main file not found.
Definition: exception.hpp:92
virtual TErrCode GetErrCode(void) const
Definition: sraread.cpp:163
Adaptation of CThread class repeatedly running some job.
void RequestStop()
Schedule thread Stop.
CRef< CSlot > GetSlot(const string &acc_or_path)
Definition: vdbcache.cpp:187
shared_ptr< SWGSData > GetChunk(const string &id2info, int64_t chunk_id)
Definition: wgs_client.cpp:375
bool GetCompress(SWGSProcessor_Config::ECompressData comp, const SWGSSeqInfo &seq, const objects::CAsnBinData &data) const
objects::CVDBMgr m_Mgr
Definition: wgs_client.hpp:228
SWGSSeqInfo ResolveBlobId(const objects::CID2_Blob_Id &id, bool skip_lookup=false)
shared_ptr< SWGSData > GetBlobBySeqId(const objects::CSeq_id &seq_id, const TBlobIds &excluded)
Definition: wgs_client.cpp:340
SWGSSeqInfo ResolveGi(TGi gi, bool skip_lookup=false)
Definition: wgs_client.cpp:779
shared_ptr< SWGSData > GetBlobByBlobId(const string &blob_id)
Definition: wgs_client.cpp:361
bool IsCorrectVersion(SWGSSeqInfo &seq, int version)
Definition: wgs_client.cpp:606
void ResetIteratorCache(SWGSSeqInfo &seq)
Definition: wgs_client.cpp:500
void GetWGSData(shared_ptr< SWGSData > &data, SWGSSeqInfo &seq0)
CRef< objects::CSeq_id > GetAccVer(SWGSSeqInfo &seq)
CWGSClient(const SWGSProcessor_Config &config)
Definition: wgs_client.cpp:248
CFastMutex m_ResolverMutex
Definition: wgs_client.hpp:229
int TAllowSeqType
Definition: wgs_client.hpp:177
SWGSSeqInfo & GetRootSeq(SWGSSeqInfo &seq0)
Definition: wgs_client.cpp:539
objects::CWGSScaffoldIterator & GetScaffoldIterator(SWGSSeqInfo &seq)
Definition: wgs_client.cpp:520
shared_ptr< SWGSData > ResolveSeqId(const objects::CSeq_id &seq_id)
Definition: wgs_client.cpp:326
TGi GetGi(SWGSSeqInfo &seq)
CRef< objects::CWGSResolver > m_Resolver
Definition: wgs_client.hpp:230
~CWGSClient(void)
Definition: wgs_client.cpp:255
bool IsValidRowId(SWGSSeqInfo &seq)
Definition: wgs_client.cpp:590
CRef< objects::CWGSResolver > GetWGSResolver(void)
Definition: wgs_client.cpp:264
SWGSSeqInfo ResolveAcc(const objects::CTextseq_id &id, bool skip_lookup=false)
Definition: wgs_client.cpp:839
SWGSSeqInfo Resolve(const objects::CSeq_id &id, bool skip_lookup=false)
Definition: wgs_client.cpp:671
bool HasMigrated(SWGSSeqInfo &seq)
Definition: wgs_client.cpp:650
objects::CID2_Blob_Id & GetBlobId(SWGSSeqInfo &id)
objects::CWGSDb GetWGSDb(const string &prefix)
Definition: wgs_client.cpp:423
CRef< objects::CSeq_id > GetGeneral(SWGSSeqInfo &seq)
bool CanProcessRequest(CPSGS_Request &request)
Definition: wgs_client.cpp:280
int GetID2BlobState(SWGSSeqInfo &seq)
objects::CWGSSeqIterator & GetContigIterator(SWGSSeqInfo &seq)
Definition: wgs_client.cpp:509
vector< string > TBlobIds
Definition: wgs_client.hpp:113
NCBI_gb_state GetGBState(SWGSSeqInfo &seq0)
TWGSDbCache m_WGSDbCache
Definition: wgs_client.hpp:231
void GetBioseqInfo(shared_ptr< SWGSData > &data, SWGSSeqInfo &seq)
objects::CWGSProteinIterator & GetProteinIterator(SWGSSeqInfo &seq)
Definition: wgs_client.cpp:529
void x_RegisterTiming(psg_time_point_t start, EPSGOperation operation, EPSGOperationStatus status)
Definition: wgs_client.cpp:414
SWGSSeqInfo ResolveWGSAcc(const string &acc, const objects::CTextseq_id &id, TAllowSeqType allow_seq_type, bool skip_lookup=false)
Definition: wgs_client.cpp:928
bool HasSpecialState(SWGSSeqInfo &seq, NCBI_gb_state special_state)
Definition: wgs_client.cpp:630
SWGSSeqInfo ResolveGeneral(const objects::CDbtag &dbtag, bool skip_lookup=false)
Definition: wgs_client.cpp:714
SWGSSeqInfo ResolveProtAcc(const objects::CTextseq_id &id, bool skip_lookup=false)
Definition: wgs_client.cpp:887
SWGSProcessor_Config m_Config
Definition: wgs_client.hpp:227
void GetSeqIds(SWGSSeqInfo &seq, list< CRef< objects::CSeq_id > > &ids)
CRef< CThreadNonStop > m_IndexUpdateThread
Definition: wgs_client.hpp:232
TVDBRowId GetContigNameRowId(const string &name) const
Definition: wgsread.hpp:737
TVDBRowId GetProteinNameRowId(const string &name) const
Definition: wgsread.hpp:749
bool LoadMasterDescr(EDescrFilter filter=eDescrDefaultFilter) const
Definition: wgsread.hpp:772
TVDBRowId GetScaffoldNameRowId(const string &name) const
Definition: wgsread.hpp:743
TVDBRowId GetLocRowId(void) const
Definition: wgsread.cpp:7785
NCBI_WGS_seqtype GetLocSeqType(void) const
Definition: wgsread.cpp:7771
ESeqType GetSeqType(void) const
Definition: wgsread.hpp:1367
TVDBRowId GetRowId(void) const
Definition: wgsread.hpp:1372
bool HasGi(void) const
Definition: wgsread.cpp:7099
NCBI_gb_state GetGBState(void) const
Definition: wgsread.cpp:7311
bool HasTaxId(void) const
Definition: wgsread.cpp:7252
TSeqPos GetSeqLength(void) const
Definition: wgsread.cpp:7287
CSeq_id::TGi GetGi(void) const
Definition: wgsread.cpp:7105
THash GetSeqHash(void) const
Definition: wgsread.cpp:7281
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
Definition: wgsread.cpp:7628
int GetAccVersion(void) const
Definition: wgsread.cpp:7131
bool HasSeqHash(void) const
Definition: wgsread.cpp:7273
TTaxId GetTaxId(void) const
Definition: wgsread.cpp:7262
virtual TWGSPrefixes GetPrefixes(TGi gi)=0
virtual void SetNonWGS(TGi gi, const TWGSPrefixes &prefixes)
vector< string > TWGSPrefixes
Definition: wgsresolver.hpp:51
virtual bool Update(void)
static CRef< CWGSResolver > CreateResolver(const CVDBMgr &mgr)
Definition: wgsresolver.cpp:75
virtual void SetWGSPrefix(TGi gi, const TWGSPrefixes &prefixes, const string &prefix)
Definition: wgsresolver.cpp:96
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
Definition: wgsread.cpp:6813
TSeqPos GetSeqLength(void) const
Definition: wgsread.cpp:6624
CRef< CAsnBinData > GetSeq_entryData(TFlags flags=fDefaultFlags) const
Definition: wgsread.cpp:6250
CRef< CAsnBinData > GetChunkDataForVersion(TChunkId chunk_id, TSplitVersion split_version) const
Definition: wgsread.cpp:6337
TTaxId GetTaxId(void) const
Definition: wgsread.cpp:4405
bool HasGi(void) const
Definition: wgsread.cpp:4256
CSeq_id::TGi GetGi(void) const
Definition: wgsread.cpp:4262
THash GetSeqHash(void) const
Definition: wgsread.cpp:4422
pair< CRef< CAsnBinData >, TSplitVersion > GetSplitInfoDataAndVersion(TFlags flags=fDefaultFlags) const
Definition: wgsread.cpp:6289
bool HasTaxId(void) const
Definition: wgsread.cpp:4399
TSeqPos GetSeqLength(EClipType clip_type=eDefaultClip) const
Definition: wgsread.cpp:4478
bool HasAccVersion(int version) const
Definition: wgsread.cpp:4301
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
Definition: wgsread.cpp:6239
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
bool empty() const
Definition: set.hpp:133
static uch flags
#define true
Definition: bool.h:35
char data[12]
Definition: iconv.c:80
Int8 int64_t
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define GI_TO(T, gi)
Definition: ncbimisc.hpp:1085
EBlobType
Definition: types.hpp:78
@ eBlobType_contig
Definition: wgs_client.cpp:188
@ eBlobType_scaffold
Definition: wgs_client.cpp:189
@ eBlobType_protein
Definition: wgs_client.cpp:190
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
@ eAcc_wgs
Definition: Seq_id.hpp:290
@ fAcc_prot
Definition: Seq_id.hpp:252
@ eAcc_targeted
Definition: Seq_id.hpp:298
@ eAcc_embl_prot
Definition: Seq_id.hpp:383
@ eAcc_wgs_intermed
Definition: Seq_id.hpp:294
@ eAcc_gb_prot
Definition: Seq_id.hpp:345
@ eAcc_tsa
Definition: Seq_id.hpp:273
@ eAcc_other
Definition: Seq_id.hpp:264
@ eAcc_division_mask
Definition: Seq_id.hpp:299
@ eFastaContent
Like eFasta, but without any tag.
Definition: Seq_id.hpp:608
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
void Swap(TThisType &ref)
Swaps the pointer with another reference.
Definition: ncbiobj.hpp:754
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
bool Run(TRunMode flags=fRunDefault)
Run the thread.
Definition: ncbithr.cpp:724
CGuard< CRWLock, SSimpleWriteLock< CRWLock > > TWriteLockGuard
Definition: ncbimtx.hpp:934
void Join(void **exit_data=0)
Wait for the thread termination.
Definition: ncbithr.cpp:863
operation
Bit operations.
Definition: bmconst.h:191
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
@ eID2_Blob_State_dead
@ eID2_Blob_State_suppressed
@ eID2_Blob_State_protected
@ eID2_Blob_State_live
@ eID2_Blob_State_withdrawn
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
TGi & SetGi(void)
Select the variant.
Definition: Seq_id_.hpp:896
TVersion GetVersion(void) const
Get the Version member data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
static MDB_envinfo info
Definition: mdb_load.c:37
static int version
Definition: mdb_load.c:29
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const char * tag
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
static const char * prefix[]
Definition: pcregrep.c:405
#define PSG_ERROR(message)
#define PSG_INFO(message)
psg_clock_t::time_point psg_time_point_t
#define row(bind, expected)
Definition: string_bind.c:73
AutoPtr< SWGSSeqInfo > m_RootSeq
Definition: wgs_client.hpp:154
objects::CWGSProteinIterator m_ProteinIter
Definition: wgs_client.hpp:152
bool IsProtein(void) const
Definition: wgs_client.hpp:132
objects::CWGSDb m_WGSDb
Definition: wgs_client.hpp:149
bool IsMaster(void) const
Definition: wgs_client.hpp:130
objects::CWGSScaffoldIterator m_ScaffoldIter
Definition: wgs_client.hpp:151
objects::CWGSSeqIterator m_ContigIter
Definition: wgs_client.hpp:150
objects::TVDBRowId m_RowId
Definition: wgs_client.hpp:146
CRef< objects::CID2_Blob_Id > m_BlobId
Definition: wgs_client.hpp:153
bool IsContig(void) const
Definition: wgs_client.hpp:129
bool IsScaffold(void) const
Definition: wgs_client.hpp:131
int GetPSGBioseqState() const
int m_Id2BlobState
Definition: wgs_client.hpp:94
bool IsForbidden() const
Definition: wgs_client.hpp:90
@ fMasterDescrMark
Definition: wgsread.hpp:157
@ fSplitFeatures
Definition: wgsread.hpp:170
unsigned m_IndexUpdateDelay
Definition: wgs_client.hpp:68
ECompressData m_CompressData
Definition: wgs_client.hpp:71
Definition: type.c:6
#define _ASSERT
EPSGOperationStatus
Definition: timing.hpp:60
@ eOpStatusFound
Definition: timing.hpp:61
@ eOpStatusNotFound
Definition: timing.hpp:62
EPSGOperation
Definition: timing.hpp:65
@ eWGS_VDBLookup
Definition: timing.hpp:90
@ eVDBOpen
Definition: timing.hpp:87
int64_t TVDBRowId
Definition: vdbread.hpp:79
static EAddMasterDescr s_AddMasterDescrLevel(void)
Definition: wgs_client.cpp:117
static bool s_AddMasterDescrScaffold()
Definition: wgs_client.cpp:129
static const size_t kNumLettersV1
Definition: wgs_client.cpp:162
USING_SCOPE(objects)
static const size_t kMaxRowDigitsV2
Definition: wgs_client.cpp:170
static const EResolveMaster kResolveMaster
Definition: wgs_client.cpp:153
END_NAMESPACE(wgs)
static int s_GBStateToID2(NCBI_gb_state gb_state)
static const size_t kMinRowDigitsV2
Definition: wgs_client.cpp:169
EBlobIdBits
Definition: wgs_client.cpp:192
@ eBlobIdBits_type
Definition: wgs_client.cpp:193
@ eBlobIdBits_letter
Definition: wgs_client.cpp:194
@ eBlobIdBits_digit
Definition: wgs_client.cpp:195
static bool s_MarkMasterDescr(void)
Definition: wgs_client.cpp:141
static bool s_FilterAll(void)
Definition: wgs_client.cpp:78
BEGIN_LOCAL_NAMESPACE
Definition: wgs_client.cpp:203
static const size_t kNumLettersV2
Definition: wgs_client.cpp:163
NCBI_PARAM_DEF_EX(bool, WGS, FILTER_ALL, false, eParam_NoThread, WGS_FILTER_ALL)
NCBI_PARAM_DEF(bool, WGS, SPLIT_FEATURES, true)
static bool s_KeepReplaced(void)
Definition: wgs_client.cpp:90
static const size_t kVersionDigits
Definition: wgs_client.cpp:164
static const int kBlobIdV2VersionContig
Definition: wgs_client.cpp:186
static const int kBlobIdV2SatMax
Definition: wgs_client.cpp:183
static const int kBlobIdV2SatMin
Definition: wgs_client.cpp:182
static const size_t kTypePrefixLen
Definition: wgs_client.cpp:161
END_LOCAL_NAMESPACE
Definition: wgs_client.cpp:240
static bool s_AddMasterDescrContig()
Definition: wgs_client.cpp:123
static bool s_SplitFeatures(void)
Definition: wgs_client.cpp:84
static const size_t kMaxRowDigitsV1
Definition: wgs_client.cpp:168
static const size_t kPrefixLenV2
Definition: wgs_client.cpp:166
static const int kBlobIdV2VersionScaffold
Definition: wgs_client.cpp:184
EAddMasterDescr
Definition: wgs_client.cpp:103
@ eAddMasterDescr_none
Definition: wgs_client.cpp:104
@ eAddMasterDescr_all
Definition: wgs_client.cpp:106
@ eAddMasterDescr_detached
Definition: wgs_client.cpp:105
NCBI_PARAM_DECL(bool, WGS, FILTER_ALL)
static EAddMasterDescr s_ProcessAddMasterDescr(void)
Definition: wgs_client.cpp:109
static const int kBlobIdV1Sat
Definition: wgs_client.cpp:181
END_NCBI_NAMESPACE
static bool s_KeepMigrated(void)
Definition: wgs_client.cpp:96
static const size_t kMaxProtAccLen
Definition: wgs_client.cpp:173
static const int kBlobIdV2VersionProtein
Definition: wgs_client.cpp:185
static const size_t kMinRowDigitsV1
Definition: wgs_client.cpp:167
BEGIN_NCBI_NAMESPACE
Definition: wgs_client.cpp:46
static bool s_AddMasterDescrProtein()
Definition: wgs_client.cpp:135
static const size_t kMinProtAccLen
Definition: wgs_client.cpp:172
BEGIN_NAMESPACE(psg)
EResolveMaster
Definition: wgs_client.cpp:148
@ eResolveMaster_never
Definition: wgs_client.cpp:149
@ eResolveMaster_always
Definition: wgs_client.cpp:151
@ eResolveMaster_without_gi
Definition: wgs_client.cpp:150
static const size_t kPrefixLenV1
Definition: wgs_client.cpp:165
@ NCBI_gb_state_eWGSGenBankUnverified
Definition: wgsread.hpp:90
@ NCBI_gb_state_eWGSGenBankReplaced
Definition: wgsread.hpp:88
@ NCBI_gb_state_eWGSGenBankSuppressed
Definition: wgsread.hpp:87
@ NCBI_gb_state_eWGSGenBankMigrated
Definition: wgsread.hpp:92
@ NCBI_gb_state_eWGSGenBankLive
Definition: wgsread.hpp:86
@ NCBI_gb_state_eWGSGenBankWithdrawn
Definition: wgsread.hpp:89
@ NCBI_WGS_seqtype_scaffold
Definition: wgsread.hpp:68
@ NCBI_WGS_seqtype_contig
Definition: wgsread.hpp:67
uint32_t NCBI_gb_state
Definition: wgsread.hpp:53
Modified on Sat Apr 13 11:47:55 2024 by modify_doxy.py rev. 669887