NCBI C++ ToolKit
wgsloader_impl.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: wgsloader_impl.cpp 100814 2023-09-14 17:26:43Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Eugene Vasilchenko
27  *
28  * File Description: WGS file data loader
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
38 #include <objects/seq/seq__.hpp>
42 
43 //#define USE_ID2_CLIENT
44 #ifdef USE_ID2_CLIENT
45 # include <objects/id2/id2__.hpp>
47 #endif
48 
57 #include <serial/objistr.hpp>
58 #include <serial/serial.hpp>
59 #ifdef NCBI_THREADS
60 # include <util/thread_nonstop.hpp>
61 #endif
62 
63 #include <sra/error_codes.hpp>
69 
71 
72 #include <algorithm>
73 #include <cmath>
74 #include <sra/error_codes.hpp>
75 
77 
78 #define NCBI_USE_ERRCODE_X WGSLoader
80 
82 
83 class CDataLoader;
84 
85 static const size_t kMaxWGSProteinAccLen = 3+7; // longest WGS protein accession
86 static const size_t kMinWGSPrefixLetters = 4;
87 static const size_t kMaxWGSPrefixLetters = 6;
88 static const size_t kWGSPrefixDigits = 2;
89 
90 NCBI_PARAM_DECL(int, WGS_LOADER, DEBUG);
91 NCBI_PARAM_DEF_EX(int, WGS_LOADER, DEBUG, 0,
92  eParam_NoThread, WGS_LOADER_DEBUG);
93 
94 static int GetDebugLevel(void)
95 {
96  static NCBI_PARAM_TYPE(WGS_LOADER, DEBUG) s_Value;
97  return s_Value.Get();
98 }
99 
100 
101 NCBI_PARAM_DECL(bool, WGS_LOADER, MASTER_DESCR);
102 NCBI_PARAM_DEF(bool, WGS_LOADER, MASTER_DESCR, true);
103 
104 static bool GetMasterDescrParam(void)
105 {
106  return NCBI_PARAM_TYPE(WGS_LOADER, MASTER_DESCR)::GetDefault();
107 }
108 
109 
110 NCBI_PARAM_DECL(size_t, WGS_LOADER, GC_SIZE);
111 NCBI_PARAM_DEF(size_t, WGS_LOADER, GC_SIZE, 100);
112 
113 static size_t GetGCSizeParam(void)
114 {
115  return NCBI_PARAM_TYPE(WGS_LOADER, GC_SIZE)::GetDefault();
116 }
117 
118 
119 NCBI_PARAM_DECL(string, WGS_LOADER, VOL_PATH);
120 NCBI_PARAM_DEF(string, WGS_LOADER, VOL_PATH, "");
121 
122 static string GetWGSVolPath(void)
123 {
124  return NCBI_PARAM_TYPE(WGS_LOADER, VOL_PATH)::GetDefault();
125 }
126 
127 
128 NCBI_PARAM_DECL(bool, WGS_LOADER, RESOLVE_GIS);
129 NCBI_PARAM_DEF(bool, WGS_LOADER, RESOLVE_GIS, true);
130 
131 
132 static bool GetResolveGIsParam(void)
133 {
134  return NCBI_PARAM_TYPE(WGS_LOADER, RESOLVE_GIS)::GetDefault();
135 }
136 
137 
138 NCBI_PARAM_DECL(bool, WGS_LOADER, RESOLVE_PROT_ACCS);
139 NCBI_PARAM_DEF(bool, WGS_LOADER, RESOLVE_PROT_ACCS, true);
140 
141 
142 static bool GetResolveProtAccsParam(void)
143 {
144  static bool value =
145  NCBI_PARAM_TYPE(WGS_LOADER, RESOLVE_PROT_ACCS)::GetDefault();
146  return value;
147 }
148 
149 
150 NCBI_PARAM_DECL(bool, WGS_LOADER, SPLIT_QUALITY_GRAPH);
151 NCBI_PARAM_DEF(bool, WGS_LOADER, SPLIT_QUALITY_GRAPH, true);
152 
153 
154 static bool GetSplitQualityGraphParam(void)
155 {
156  static bool value =
157  NCBI_PARAM_TYPE(WGS_LOADER, SPLIT_QUALITY_GRAPH)::GetDefault();
158  return value;
159 }
160 
161 
162 NCBI_PARAM_DECL(bool, WGS_LOADER, SPLIT_SEQUENCE);
163 NCBI_PARAM_DEF(bool, WGS_LOADER, SPLIT_SEQUENCE, true);
164 
165 
166 static bool GetSplitSequenceParam(void)
167 {
168  static bool value =
169  NCBI_PARAM_TYPE(WGS_LOADER, SPLIT_SEQUENCE)::GetDefault();
170  return value;
171 }
172 
173 
174 NCBI_PARAM_DECL(bool, WGS_LOADER, SPLIT_FEATURES);
175 NCBI_PARAM_DEF(bool, WGS_LOADER, SPLIT_FEATURES, true);
176 
177 
178 static bool GetSplitFeaturesParam(void)
179 {
180  static bool value =
181  NCBI_PARAM_TYPE(WGS_LOADER, SPLIT_FEATURES)::GetDefault();
182  return value;
183 }
184 
185 
186 NCBI_PARAM_DECL(bool, WGS_LOADER, KEEP_REPLACED);
187 NCBI_PARAM_DEF(bool, WGS_LOADER, KEEP_REPLACED, true);
188 NCBI_PARAM_DECL(bool, WGS_LOADER, KEEP_MIGRATED);
189 NCBI_PARAM_DEF(bool, WGS_LOADER, KEEP_MIGRATED, false);
190 
191 
192 static bool GetKeepReplacedParam(void)
193 {
194  static bool value =
195  NCBI_PARAM_TYPE(WGS_LOADER, KEEP_REPLACED)::GetDefault();
196  return value;
197 }
198 
199 
200 static bool GetKeepMigratedParam(void)
201 {
202  static bool value =
203  NCBI_PARAM_TYPE(WGS_LOADER, KEEP_MIGRATED)::GetDefault();
204  return value;
205 }
206 
207 
208 NCBI_PARAM_DECL(unsigned, WGS_LOADER, INDEX_UPDATE_TIME);
209 NCBI_PARAM_DEF(unsigned, WGS_LOADER, INDEX_UPDATE_TIME, 600);
210 
211 
212 static unsigned GetIndexUpdateTimeParam(void)
213 {
214  static unsigned value =
215  NCBI_PARAM_TYPE(WGS_LOADER, INDEX_UPDATE_TIME)::GetDefault();
216  return value;
217 }
218 
219 
220 NCBI_PARAM_DECL(unsigned, WGS_LOADER, RETRY_COUNT);
221 NCBI_PARAM_DEF(unsigned, WGS_LOADER, RETRY_COUNT, 3);
222 
223 
224 static unsigned GetRetryCountParam(void)
225 {
226  static unsigned value =
227  NCBI_PARAM_TYPE(WGS_LOADER, RETRY_COUNT)::GetDefault();
228  return value;
229 }
230 
231 
232 NCBI_PARAM_DECL(unsigned, WGS_LOADER, FILE_REOPEN_TIME);
233 NCBI_PARAM_DEF(unsigned, WGS_LOADER, FILE_REOPEN_TIME, 60*60); // 1 hour
234 
235 
236 static unsigned GetFileReopenTimeParam(void)
237 {
238  static unsigned value =
239  NCBI_PARAM_TYPE(WGS_LOADER, FILE_REOPEN_TIME)::GetDefault();
240  return value;
241 }
242 
243 
244 NCBI_PARAM_DECL(unsigned, WGS_LOADER, FILE_RECHECK_TIME);
245 NCBI_PARAM_DEF(unsigned, WGS_LOADER, FILE_RECHECK_TIME, 5*60); // 5 minutes
246 
247 
248 static unsigned GetFileRecheckTimeParam(void)
249 {
250  static unsigned value =
251  NCBI_PARAM_TYPE(WGS_LOADER, FILE_RECHECK_TIME)::GetDefault();
252  return value;
253 }
254 
255 
256 /////////////////////////////////////////////////////////////////////////////
257 // CWGSBlobId
258 /////////////////////////////////////////////////////////////////////////////
259 
261 {
262  FromString(str);
263 }
264 
265 
267  : m_WGSPrefix(info.file->GetWGSPrefix()),
268  m_SeqType(info.seq_type),
269  m_RowId(info.row_id),
270  m_Version(info.version)
271 {
272 }
273 
274 
276 {
277 }
278 
279 
280 string CWGSBlobId::ToString(void) const
281 {
283  out << m_WGSPrefix << '/';
284  if ( m_SeqType ) {
285  out << m_SeqType;
286  }
287  out << m_RowId;
288  if ( m_Version != -1 ) {
289  out << '.' << m_Version;
290  }
292 }
293 
294 
296 {
297  SIZE_TYPE slash = str.rfind('/');
298  if ( slash == NPOS ) {
299  NCBI_THROW_FMT(CSraException, eOtherError,
300  "Bad CWGSBlobId: "<<str);
301  }
302  m_WGSPrefix = str.substr(0, slash);
303  str = str.substr(slash+1);
304  SIZE_TYPE pos = 0;
305  if ( str[pos] == 'S' || str[pos] == 'P' ) {
306  m_SeqType = str[pos++];
307  }
308  else {
309  m_SeqType = '\0';
310  }
311  SIZE_TYPE dot = str.rfind('.');
312  if ( dot == NPOS ) {
313  m_Version = -1;
314  }
315  else {
316  m_Version = NStr::StringToNumeric<int>(str.substr(dot+1));
317  str = str.substr(0, dot);
318  }
319  m_RowId = NStr::StringToNumeric<Uint8>(str);
320 }
321 
322 
323 bool CWGSBlobId::operator<(const CBlobId& id) const
324 {
325  const CWGSBlobId& wgs2 = dynamic_cast<const CWGSBlobId&>(id);
326  if ( int diff = NStr::CompareNocase(m_WGSPrefix, wgs2.m_WGSPrefix) ) {
327  return diff < 0;
328  }
329  if ( m_SeqType != wgs2.m_SeqType ) {
330  return m_SeqType < wgs2.m_SeqType;
331  }
332  if ( m_Version != wgs2.m_Version ) {
333  return m_Version < wgs2.m_Version;
334  }
335  return m_RowId < wgs2.m_RowId;
336 }
337 
338 
339 bool CWGSBlobId::operator==(const CBlobId& id) const
340 {
341  const CWGSBlobId& wgs2 = dynamic_cast<const CWGSBlobId&>(id);
342  return m_RowId == wgs2.m_RowId &&
343  m_Version == wgs2.m_Version &&
344  m_SeqType == wgs2.m_SeqType &&
345  m_WGSPrefix == wgs2.m_WGSPrefix;
346 }
347 
348 
349 /////////////////////////////////////////////////////////////////////////////
350 // resolver update thread
351 
353 
354 
355 void sx_Update(CWGSResolver& resolver)
356 {
357  try {
358  if ( resolver.Update() ) {
359  if ( GetDebugLevel() >= 1 ) {
360  LOG_POST_X(18, Info<<"CWGSDataLoader: updated WGS index");
361  }
362  }
363  }
364  catch ( CException& exc ) {
365  if ( GetDebugLevel() >= 1 ) {
366  ERR_POST_X(20, "ID2WGS: "
367  "Exception while updating WGS index: "<<exc);
368  }
369  }
370  catch ( exception& exc ) {
371  if ( GetDebugLevel() >= 1 ) {
372  ERR_POST_X(20, "ID2WGS: "
373  "Exception while updating WGS index: "<<exc.what());
374  }
375  }
376 }
377 
378 
379 #ifdef NCBI_THREADS
380 class CIndexUpdateThread : public CThreadNonStop
381 {
382 public:
383  CIndexUpdateThread(unsigned update_delay,
384  CRef<CWGSResolver> resolver)
385  : CThreadNonStop(update_delay),
386  m_FirstRun(true),
387  m_Resolver(resolver)
388  {
389  }
390 
391 protected:
392  virtual void DoJob(void) {
393  if ( m_FirstRun ) {
394  // CThreadNonStop runs first iteration immediately, ignore it
395  m_FirstRun = false;
396  return;
397  }
399  }
400 
401 private:
402  bool m_FirstRun;
404 };
405 #endif
406 
408 
409 
410 /////////////////////////////////////////////////////////////////////////////
411 // CWGSDataLoader_Impl
412 /////////////////////////////////////////////////////////////////////////////
413 
414 
416  const CWGSDataLoader::SLoaderParams& params)
417  : m_WGSVolPath(params.m_WGSVolPath),
418  m_RetryCount(GetRetryCountParam()),
419  m_IndexUpdateDelay(GetIndexUpdateTimeParam()),
420  m_FileReopenTime(GetFileReopenTimeParam()),
421  m_FileRecheckTime(GetFileRecheckTimeParam()),
422  m_FoundFiles(max(params.m_WGSFiles.size(), GetGCSizeParam()),
423  m_FileReopenTime, m_FileRecheckTime),
424  m_AddWGSMasterDescr(GetMasterDescrParam()),
425  m_ResolveGIs(GetResolveGIsParam()),
426  m_ResolveProtAccs(GetResolveProtAccsParam()),
427  m_ResolverCreated(false)
428 {
429  if ( m_WGSVolPath.empty() && params.m_WGSFiles.empty() ) {
431  }
432  ITERATE (vector<string>, it, params.m_WGSFiles) {
434  CRef<CWGSFileInfo> info = OpenWGSFile(*info_slot, *it);
435  if ( !m_FixedFiles.insert(TFixedFiles::value_type(info->GetWGSPrefix(),
436  info_slot)).second ) {
437  NCBI_THROW_FMT(CSraException, eOtherError,
438  "Duplicated fixed WGS prefix: "<<
439  info->GetWGSPrefix());
440  }
441  }
442 }
443 
444 
446 {
447 #ifdef NCBI_THREADS
448  if ( m_IndexUpdateThread ) {
451  }
452 #endif
453 }
454 
455 
457 
458 NCBI_PARAM_DECL(bool, WGS, RESOLVER_GENBANK);
459 NCBI_PARAM_DEF(bool, WGS, RESOLVER_GENBANK, true);
460 
462 {
463 public:
464  CWGSResolver_DL(void) // find GenBank loader
465  : m_Loader(CObjectManager::GetInstance()->FindDataLoader("GBLOADER"))
466  {
467  }
468  explicit
470  : m_Loader(loader)
471  {
472  }
473 
474  static CRef<CWGSResolver> CreateResolver(void) // find GenBank loader
475  {
476  if ( !NCBI_PARAM_TYPE(WGS, RESOLVER_GENBANK)::GetDefault() ) {
477  return null;
478  }
479  CRef<CWGSResolver_DL> resolver(new CWGSResolver_DL());
480  if ( !resolver->IsValid() ) {
481  return null;
482  }
483  return CRef<CWGSResolver>(resolver);
484  }
486  {
487  if ( !loader ) {
488  return null;
489  }
490  return CRef<CWGSResolver>(new CWGSResolver_DL(loader));
491  }
492 
493  bool IsValid(void) const {
494  return m_Loader;
495  }
496 
497 protected:
498  virtual TWGSPrefixes GetPrefixes(const CSeq_id& id)
499  {
500  TWGSPrefixes prefixes;
502  LOG_POST_X(21, "CWGSResolver_DL: "
503  "Asking DataLoader for ids of "<<id.AsFastaString());
504  }
505  CDataLoader::TIds ids;
507  ITERATE ( CDataLoader::TIds, rit, ids ) {
509  LOG_POST_X(22, "CWGSResolver_DL: Parsing Seq-id "<<*rit);
510  }
511  string prefix = ParseWGSPrefix(*rit->GetSeqId());
512  if ( !prefix.empty() ) {
514  LOG_POST_X(23, "CWGSResolver_DL: WGS prefix: "<<prefix);
515  }
516  prefixes.push_back(prefix);
517  break;
518  }
519  }
520  return prefixes;
521  }
522 
524 };
525 
527 
528 
529 template<class Call>
532  const char* name,
533  unsigned retry_count)
534 {
535  if ( retry_count == 0 ) {
536  retry_count = m_RetryCount;
537  }
538  for ( unsigned t = 1; t < retry_count; ++ t ) {
539  try {
540  return call();
541  }
542  catch ( CBlobStateException& ) {
543  // no retry
544  throw;
545  }
546  catch ( CException& exc ) {
547  LOG_POST(Warning<<"CWGSDataLoader::"<<name<<"() try "<<t<<" exception: "<<exc);
548  }
549  catch ( exception& exc ) {
550  LOG_POST(Warning<<"CWGSDataLoader::"<<name<<"() try "<<t<<" exception: "<<exc.what());
551  }
552  catch ( ... ) {
553  LOG_POST(Warning<<"CWGSDataLoader::"<<name<<"() try "<<t<<" exception");
554  }
555  if ( t >= 2 ) {
556  //double wait_sec = m_WaitTime.GetTime(t-2);
557  double wait_sec = 1;
558  LOG_POST(Warning<<"CWGSDataLoader: waiting "<<wait_sec<<"s before retry");
559  SleepMilliSec(Uint4(wait_sec*1000));
560  }
561  }
562  return call();
563 }
564 
565 
567  const string& prefix)
568 {
570  ref(info_slot), cref(prefix)),
571  "OpenWGSFile");
572 }
573 
574 
576  const string& prefix)
577 {
579  info_slot.UpdateExpiration(m_FoundFiles, path);
580  CRef<CWGSFileInfo> info(new CWGSFileInfo(*this, prefix));
581  info_slot.SetObject(info);
582  return info;
583 }
584 
585 
587 {
588  if ( !m_ResolverCreated.load(memory_order_acquire) ) {
590  if ( !m_ResolverCreated.load(memory_order_acquire) ) {
591  if ( !m_Resolver ) {
593  }
594  if ( !m_Resolver ) {
596  }
597  if ( m_Resolver ) {
598 #ifdef NCBI_THREADS
599  if ( !m_IndexUpdateThread ) {
602  }
603 #else
604  m_IndexUpdateDeadline = make_unique<CDeadline>(m_IndexUpdateDelay);
605 #endif
606  }
607  m_ResolverCreated.store(true, memory_order_release);
608  }
609  }
610 #ifndef NCBI_THREADS
611  if ( m_Resolver && m_IndexUpdateDeadline->IsExpired() ) {
613  *m_IndexUpdateDeadline = CDeadline(m_IndexUpdateDelay);
614  }
615 #endif
616  return *m_Resolver;
617 }
618 
619 
621 {
622  CRef<SWGSFileInfoSlot> info_slot;
623  if ( !m_FixedFiles.empty() ) {
624  // no dynamic WGS accessions
626  if ( it == m_FixedFiles.end() ) {
627  return null;
628  }
629  info_slot = it->second;
630  }
631  else {
632  // lookup in dynamic cache
633  info_slot = m_FoundFiles.GetSlot(prefix);
634  }
635  CRef<CWGSFileInfo> info = x_GetFileInfo(*info_slot, prefix);
636  if ( !info ) {
637  return null;
638  }
639  if ( info->GetDb()->IsReplaced() && !GetKeepReplacedParam() ) {
640  // replaced
641  if ( GetDebugLevel() >= 2 ) {
642  ERR_POST_X(11, "CWGSDataLoader: WGS Project "<<prefix<<" is replaced with GenBank entry");
643  }
644  return null;
645  }
646  else {
647  // found
648  return info;
649  }
650 }
651 
652 
654  const string& prefix)
655 {
656  CRef<CWGSFileInfo> info; // return info
657  CRef<CWGSFileInfo> delete_info; // delete stale file info after releasing mutex
658  // now open or reopen the WGS file under individual guard
660  info = info_slot.GetObject<CWGSFileInfo>();
661  if ( info && info_slot.IsExpired(m_FoundFiles, prefix) ) {
662  if ( GetDebugLevel() >= 1 ) {
663  LOG_POST_X(4, Info<<"CWGSDataLoader: "
664  "Reopening WGS project expired in cache: "<<prefix);
665  }
666  info_slot.ResetObject();
667  delete_info.Swap(info);
668  }
669  if ( !info ) {
670  // make sure the file is opened
671  try {
672  info = OpenWGSFileOnce(info_slot, prefix);
673  }
674  catch ( CSraException& exc ) {
675  if ( exc.GetErrCode() == exc.eNotFoundDb ||
676  exc.GetErrCode() == exc.eProtectedDb ) {
677  // no such WGS table
678  return null;
679  }
680  else {
681  // problem in VDB or WGS reader
682  throw;
683  }
684  }
685  }
686  return info;
687 }
688 
689 
692 {
694  if ( !m_FixedFiles.empty() ) {
695  for ( auto& slot : m_FixedFiles ) {
696  if ( x_GetFileInfo(*slot.second, slot.first)->FindGi(ret, gi) ) {
697  if ( GetDebugLevel() >= 2 ) {
698  LOG_POST_X(3, Info<<"CWGSDataLoader: "
699  "Resolved gi "<<gi<<
700  " -> "<<ret.file->GetWGSPrefix());
701  }
702  return ret;
703  }
704  }
705  if ( GetDebugLevel() >= 3 ) {
706  ERR_POST_X(4, "CWGSDataLoader: "
707  "Failed to resolve gi "<<gi);
708  }
709  return ret;
710  }
711  CWGSResolver& resolver = GetResolver();
712  CWGSResolver::TWGSPrefixes prefixes = resolver.GetPrefixes(gi);
713  ITERATE ( CWGSResolver::TWGSPrefixes, it, prefixes ) {
714  if ( CRef<CWGSFileInfo> file = GetWGSFile(*it) ) {
715  if ( GetDebugLevel() >= 2 ) {
716  LOG_POST_X(6, Info<<"CWGSDataLoader: "
717  "Resolved gi "<<gi<<
718  " -> "<<file->GetWGSPrefix());
719  }
720  if ( file->FindGi(ret, gi) ) {
721  resolver.SetWGSPrefix(gi, prefixes, *it);
722  return ret;
723  }
724  }
725  }
726  if ( !prefixes.empty() ) {
727  resolver.SetNonWGS(gi, prefixes);
728  }
729  return ret;
730 }
731 
732 
735 {
736  const string& acc = text_id.GetAccession();
738  if ( !m_FixedFiles.empty() ) {
739  for ( auto& slot : m_FixedFiles ) {
740  if ( x_GetFileInfo(*slot.second, slot.first)->FindProtAcc(ret, text_id) ) {
741  if ( GetDebugLevel() >= 2 ) {
742  LOG_POST_X(7, Info<<"CWGSDataLoader: "
743  "Resolved prot acc "<<acc<<
744  " -> "<<ret.file->GetWGSPrefix());
745  }
746  return ret;
747  }
748  }
749  if ( GetDebugLevel() >= 3 ) {
750  ERR_POST_X(8, "CWGSDataLoader: "
751  "Failed to resolve prot acc "<<acc);
752  }
753  return ret;
754  }
755  CWGSResolver& resolver = GetResolver();
756  CWGSResolver::TWGSPrefixes prefixes = resolver.GetPrefixes(acc);
757  ITERATE ( CWGSResolver::TWGSPrefixes, it, prefixes ) {
758  if ( CRef<CWGSFileInfo> file = GetWGSFile(*it) ) {
759  if ( GetDebugLevel() >= 2 ) {
760  LOG_POST_X(10, Info<<"CWGSDataLoader: "
761  "Resolved prot acc "<<acc<<
762  " -> "<<file->GetWGSPrefix());
763  }
764  if ( file->FindProtAcc(ret, text_id) ) {
765  resolver.SetWGSPrefix(acc, prefixes, *it);
766  return ret;
767  }
768  }
769  }
770  if ( !prefixes.empty() ) {
771  resolver.SetNonWGS(acc, prefixes);
772  }
773  return ret;
774 }
775 
776 
779 {
780  const string& acc = text_id.GetAccession();
783  switch ( type & CSeq_id::eAcc_division_mask ) {
784  // accepted accession types
785  case CSeq_id::eAcc_wgs:
787  case CSeq_id::eAcc_tsa:
789  break;
790  case CSeq_id::eAcc_other:
791  if ( type == CSeq_id::eAcc_embl_prot ||
792  (type == CSeq_id::eAcc_gb_prot && acc.size() == 10) ) { // TODO: remove
793  // Some EMBL WGS accession aren't identified as WGS, so we'll try lookup anyway
794  break;
795  }
796  return ret;
797  default:
798  return ret;
799  }
800 
801  if ( (type & CSeq_id::fAcc_prot) && acc.size() <= kMaxWGSProteinAccLen ) {
802  if ( m_ResolveProtAccs ) {
803  ret = GetFileInfoByProtAcc(text_id);
804  }
805  return ret;
806  }
807 
808  // WGS accession
809  // AAAA010000001 - prefix AAAA01 or AAAAAA01
810  // optional 'S' or 'P' symbol after prefix for scaffolds and proteins
811 
812  // first find number of letters in prefix
813  if ( acc.size() <= kMinWGSPrefixLetters+kWGSPrefixDigits ) {
814  return ret;
815  }
816  // determine actual number of letters
817  SIZE_TYPE prefix_letters = 0;
818  for ( ; prefix_letters < kMaxWGSPrefixLetters; ++prefix_letters ) {
819  if ( !isalpha(Uchar(acc[prefix_letters])) ) {
820  if ( prefix_letters < kMinWGSPrefixLetters ) {
821  return ret;
822  }
823  else {
824  break;
825  }
826  }
827  }
828  // check prefix digits
829  for ( SIZE_TYPE i = 0; i < kWGSPrefixDigits; ++i ) {
830  if ( !isdigit(Uchar(acc[prefix_letters+i])) ) {
831  return ret;
832  }
833  }
834  SIZE_TYPE prefix_len = prefix_letters+kWGSPrefixDigits;
835  // prefix is valid
836 
837  string prefix = acc.substr(0, prefix_len);
838  SIZE_TYPE row_pos = prefix_len;
839  if ( acc[row_pos] == 'S' || acc[row_pos] == 'P' ) {
840  ret.seq_type = acc[row_pos++];
841  }
842  if ( acc.size() <= row_pos ) {
843  return ret;
844  }
845  ret.row_id = NStr::StringToNumeric<Uint8>(acc.substr(row_pos),
847  if ( !ret.row_id ) {
848  return ret;
849  }
850  NStr::ToUpper(prefix);
851  if ( CRef<CWGSFileInfo> info = GetWGSFile(prefix) ) {
852  SIZE_TYPE row_digits = acc.size() - row_pos;
853  if ( info->m_WGSDb->GetIdRowDigits() == row_digits ) {
854  ret.file = info;
855  if ( !ret.ValidateAcc(text_id) ) {
856  ret.file = 0;
857  return ret;
858  }
859  }
860  }
861  return ret;
862 }
863 
864 
867 {
868  SAccFileInfo ret;
869  const CObject_id& object_id = dbtag.GetTag();
870  const string& db = dbtag.GetDb();
871  if ( db.size() != 8 /* WGS:AAAA */ &&
872  db.size() != 10 /* WGS:AAAA01 or WGS:AAAAAA */ &&
873  db.size() != 12 /* WGS:AAAAAA01 */ ) {
874  return SAccFileInfo();
875  }
876  bool is_tsa = false;
877  if ( NStr::StartsWith(db, "WGS:", NStr::eNocase) ) {
878  }
879  else if ( NStr::StartsWith(db, "TSA:", NStr::eNocase) ) {
880  is_tsa = true;
881  }
882  else {
883  return ret;
884  }
885  string wgs_acc = db.substr(4); // remove "WGS:" or "TSA:"
886 
887  NStr::ToUpper(wgs_acc);
888  if ( isalpha(wgs_acc.back()&0xff) ) {
889  wgs_acc += "01"; // add default version digits
890  }
892  if ( !info ) {
893  return ret;
894  }
895  const CWGSDb& wgs_db = info->GetDb();
896  if ( wgs_db->IsTSA() != is_tsa ) {
897  // TSA or WGS type must match
898  return ret;
899  }
900  string tag;
901  if ( object_id.IsStr() ) {
902  tag = object_id.GetStr();
904  }
905  else {
906  tag = NStr::NumericToString(object_id.GetId());
907  }
908  if ( TVDBRowId row = wgs_db.GetContigNameRowId(tag) ) {
909  ret.row_id = row;
910  }
911  else if ( TVDBRowId row = wgs_db.GetScaffoldNameRowId(tag) ) {
912  ret.seq_type = 'S';
913  ret.row_id = row;
914  }
915  else if ( TVDBRowId row = wgs_db.GetProteinNameRowId(tag) ) {
916  ret.seq_type = 'P';
917  ret.row_id = row;
918  }
919  if ( ret.row_id ) {
920  ret.file = info;
921  }
922  return ret;
923 }
924 
925 
928 {
929  if ( m_ResolveGIs && idh.IsGi() ) {
930  return GetFileInfoByGi(idh.GetGi());
931  }
932  switch ( idh.Which() ) { // shortcut
933  case CSeq_id::e_not_set:
934  case CSeq_id::e_Local:
935  case CSeq_id::e_Gi:
936  case CSeq_id::e_Gibbsq:
937  case CSeq_id::e_Gibbmt:
938  case CSeq_id::e_Giim:
939  case CSeq_id::e_Patent:
940  case CSeq_id::e_Pdb:
942  case CSeq_id::e_General:
943  return GetFileInfoByGeneral(idh.GetSeqId()->GetGeneral());
944  default:
945  break;
946  }
947  CConstRef<CSeq_id> id = idh.GetSeqId();
948  const CTextseq_id* text_id = id->GetTextseq_Id();
949  if ( !text_id ) {
951  }
953  if ( text_id->IsSetAccession() ) {
954  ret = GetFileInfoByAcc(*text_id);
955  }
956  if ( !ret ) {
957  return ret;
958  }
959  return ret;
960 }
961 
962 
964 {
965  return GetWGSFile(blob_id.m_WGSPrefix);
966 }
967 
968 
971 {
972  SAccFileInfo root_info;
973  if ( seq_type != 'P' ) {
974  return root_info;
975  }
976 
977  // may belong to a contig prot-set
978  // proteins can be located in nuc-prot set
980  if ( !cds_row_id ) {
981  return root_info;
982  }
983 
984  CWGSFeatureIterator cds_it(file->GetDb(), cds_row_id);
985  if ( !cds_it ) {
986  return root_info;
987  }
988 
989  switch ( cds_it.GetLocSeqType() ) {
991  {
992  // switch to contig
993  root_info.file = file;
994  root_info.row_id = cds_it.GetLocRowId();
995  root_info.seq_type = '\0';
996  break;
997  }
999  {
1000  // switch to scaffold
1001  root_info.file = file;
1002  root_info.row_id = cds_it.GetLocRowId();
1003  root_info.seq_type = 'S';
1004  break;
1005  }
1006  default:
1007  break;
1008  }
1009  return root_info;
1010 }
1011 
1012 
1014 {
1016  cref(idh)),
1017  "GetBlobId");
1018 }
1019 
1020 
1022 {
1023  // return blob-id of blob with sequence
1024  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1025  if ( CWGSFileInfo::SAccFileInfo root_info = info.GetRootFileInfo() ) {
1026  info = root_info;
1027  }
1028  return Ref(new CWGSBlobId(info));
1029  }
1030  return null;
1031 }
1032 
1033 
1036 {
1037  _ASSERT(IsContig() && row_id != 0);
1038  CWGSSeqIterator iter(file->GetDb(), row_id,
1040  iter.SelectAccVersion(version);
1041  return iter;
1042 }
1043 
1044 
1047 {
1048  _ASSERT(IsScaffold() && row_id != 0);
1049  return CWGSScaffoldIterator(file->GetDb(), row_id);
1050 }
1051 
1052 
1055 {
1056  _ASSERT(IsProtein() && row_id != 0);
1057  return CWGSProteinIterator(file->GetDb(), row_id);
1058 }
1059 
1060 
1063 {
1064  _ASSERT(blob_id.m_SeqType == '\0');
1065  _ASSERT(blob_id.m_RowId);
1066  CWGSSeqIterator iter(GetDb(), blob_id.m_RowId,
1068  iter.SelectAccVersion(blob_id.m_Version);
1069  return iter;
1070 }
1071 
1072 
1075 {
1076  _ASSERT(blob_id.m_SeqType == 'S');
1077  _ASSERT(blob_id.m_RowId);
1078  return CWGSScaffoldIterator(GetDb(), blob_id.m_RowId);
1079 }
1080 
1081 
1084 {
1085  _ASSERT(blob_id.m_SeqType == 'P');
1086  _ASSERT(blob_id.m_RowId);
1087  return CWGSProteinIterator(GetDb(), blob_id.m_RowId);
1088 }
1089 
1090 
1092  const CWGSBlobId& blob_id)
1093 {
1095  data_source, cref(blob_id)),
1096  "GetBlobById");
1097 }
1098 
1099 
1101  const CWGSBlobId& blob_id)
1102 {
1103  CDataLoader::TBlobId loader_blob_id(&blob_id);
1104  CTSE_LoadLock load_lock = data_source->GetTSE_LoadLock(loader_blob_id);
1105  if ( !load_lock.IsLoaded() ) {
1106  LoadBlob(blob_id, load_lock);
1107  load_lock.SetLoaded();
1108  }
1109  return load_lock;
1110 }
1111 
1112 
1115  const CSeq_id_Handle& idh,
1116  CDataLoader::EChoice choice)
1117 {
1119  data_source, cref(idh), choice),
1120  "GetRecords");
1121 }
1122 
1123 
1126  const CSeq_id_Handle& idh,
1127  CDataLoader::EChoice choice)
1128 {
1130  if ( choice == CDataLoader::eExtAnnot ||
1131  choice == CDataLoader::eExtFeatures ||
1132  choice == CDataLoader::eExtAlign ||
1133  choice == CDataLoader::eExtGraph ||
1134  choice == CDataLoader::eOrphanAnnot ) {
1135  // WGS loader doesn't provide external annotations
1136  return locks;
1137  }
1138  // return blob-id of blob with annotations and possibly with sequence
1139 
1140  if ( CRef<CWGSBlobId> blob_id = GetBlobId(idh) ) {
1141  CDataLoader::TTSE_Lock lock = GetBlobById(data_source, *blob_id);
1142  if ( (lock->GetBlobState() & CBioseq_Handle::fState_no_data) &&
1144  NCBI_THROW2(CBlobStateException, eBlobStateError,
1145  "blob state error for "+idh.AsString(),
1146  lock->GetBlobState());
1147  }
1148  locks.insert(lock);
1149  }
1150 
1151  return locks;
1152 }
1153 
1154 
1156  CTSE_LoadLock& load_lock)
1157 {
1158  GetFileInfo(blob_id)->LoadBlob(blob_id, load_lock);
1159 }
1160 
1161 
1163  CTSE_Chunk_Info& chunk_info)
1164 {
1166  cref(blob_id), ref(chunk_info)),
1167  "GetChunk");
1168 }
1169 
1170 
1172  CTSE_Chunk_Info& chunk_info)
1173 {
1174  GetFileInfo(blob_id)->LoadChunk(blob_id, chunk_info);
1175 }
1176 
1177 
1179 {
1181  cref(idh), ref(ids)),
1182  "GetIds");
1183 }
1184 
1185 
1187 {
1188  CBioseq::TId ids2;
1189  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1190  switch ( info.seq_type ) {
1191  case 'S':
1192  info.GetScaffoldIterator().GetIds(ids2);
1193  break;
1194  case 'P':
1195  info.GetProteinIterator().GetIds(ids2);
1196  break;
1197  default:
1198  info.GetContigIterator().GetIds(ids2);
1199  break;
1200  }
1201  }
1202  ITERATE ( CBioseq::TId, it2, ids2 ) {
1203  ids.push_back(CSeq_id_Handle::GetHandle(**it2));
1204  }
1205 }
1206 
1207 
1210 {
1212  cref(idh)),
1213  "GetAccVer");
1214 }
1215 
1216 
1219 {
1221  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1222  ret.sequence_found = true;
1223  CRef<CSeq_id> acc_id;
1224  switch ( info.seq_type ) {
1225  case 'S':
1226  if ( CWGSScaffoldIterator it = info.GetScaffoldIterator() ) {
1227  acc_id = it.GetAccSeq_id();
1228  }
1229  break;
1230  case 'P':
1231  if ( CWGSProteinIterator it = info.GetProteinIterator() ) {
1232  acc_id = it.GetAccSeq_id();
1233  }
1234  break;
1235  default:
1236  if ( CWGSSeqIterator it = info.GetContigIterator() ) {
1237  acc_id = it.GetAccSeq_id();
1238  }
1239  break;
1240  }
1241  if ( acc_id ) {
1242  ret.acc_ver = CSeq_id_Handle::GetHandle(*acc_id);
1243  }
1244  }
1245  return ret;
1246 }
1247 
1248 
1251 {
1252  return CallWithRetry(bind(&CWGSDataLoader_Impl::GetGiOnce, this,
1253  cref(idh)),
1254  "GetGi");
1255 }
1256 
1257 
1260 {
1262  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1263  ret.sequence_found = true;
1264  if ( info.IsContig() ) {
1265  if ( CWGSSeqIterator it = info.GetContigIterator() ) {
1266  if ( it.HasGi() ) {
1267  ret.gi = it.GetGi();
1268  }
1269  }
1270  }
1271  else if ( info.IsProtein() ) {
1272  if ( CWGSProteinIterator it = info.GetProteinIterator() ) {
1273  if ( it.HasGi() ) {
1274  ret.gi = it.GetGi();
1275  }
1276  }
1277  }
1278  }
1279  return ret;
1280 }
1281 
1282 
1283 TTaxId
1285 {
1287  cref(idh)),
1288  "GetTaxId");
1289 }
1290 
1291 
1293 {
1294  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1295  auto& vdb = info.file->GetDb();
1296  if ( vdb->HasCommonTaxId() ) {
1297  return vdb->GetCommonTaxId();
1298  }
1299  if ( info.IsContig() ) {
1300  return info.GetContigIterator().GetTaxId();
1301  }
1302  if ( info.IsProtein() ) {
1303  if ( auto root = info.GetRootFileInfo() ) {
1304  if ( root.IsContig() ) {
1305  return root.GetContigIterator().GetTaxId();
1306  }
1307  if ( root.IsProtein() ) {
1308  return root.GetProteinIterator().GetTaxId();
1309  }
1310  }
1311  return info.GetProteinIterator().GetTaxId();
1312  }
1313  return ZERO_TAX_ID; // taxid is not defined
1314  }
1315  return INVALID_TAX_ID; // sequence is unknown
1316 }
1317 
1318 
1319 TSeqPos
1321 {
1323  cref(idh)),
1324  "GetSequenceLength");
1325 }
1326 
1327 
1329 {
1330  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1331  switch ( info.seq_type ) {
1332  case 'S':
1333  if ( CWGSScaffoldIterator it = info.GetScaffoldIterator() ) {
1334  return it.GetSeqLength();
1335  }
1336  break;
1337  case 'P':
1338  if ( CWGSProteinIterator it = info.GetProteinIterator() ) {
1339  return it.GetSeqLength();
1340  }
1341  break;
1342  default:
1343  if ( CWGSSeqIterator it = info.GetContigIterator() ) {
1344  return it.GetSeqLength();
1345  }
1346  break;
1347  }
1348  }
1349  return kInvalidSeqPos;
1350 }
1351 
1352 
1355 {
1357  cref(idh)),
1358  "GetSequenceHash");
1359 }
1360 
1361 
1364 {
1366  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1367  ret.sequence_found = true;
1368  switch ( info.seq_type ) {
1369  case 'S': // scaffold
1370  /*
1371  if ( CWGSScaffoldIterator it = info.GetScaffoldIterator() ) {
1372  return it.GetSeqHash();
1373  }
1374  */
1375  break;
1376  case 'P': // protein
1377  if ( CWGSProteinIterator it = info.GetProteinIterator() ) {
1378  if ( it.HasSeqHash() ) {
1379  ret.hash = it.GetSeqHash();
1380  ret.hash_known = true;
1381  }
1382  }
1383  break;
1384  default:
1385  if ( CWGSSeqIterator it = info.GetContigIterator() ) {
1386  if ( it.HasSeqHash() ) {
1387  ret.hash = it.GetSeqHash();
1388  ret.hash_known = true;
1389  }
1390  }
1391  break;
1392  }
1393  }
1394  return ret;
1395 }
1396 
1397 
1400 {
1402  cref(idh)),
1403  "GetSequenceType");
1404 }
1405 
1406 
1409 {
1411  if ( CWGSFileInfo::SAccFileInfo info = GetFileInfo(idh) ) {
1412  ret.sequence_found = true;
1413  switch ( info.seq_type ) {
1414  case 'S':
1415  ret.type = info.file->GetDb()->GetScaffoldMolType();
1416  break;
1417  case 'P':
1418  ret.type = info.file->GetDb()->GetProteinMolType();
1419  break;
1420  default:
1421  ret.type = info.file->GetDb()->GetContigMolType();
1422  break;
1423  }
1424  }
1425  return ret;
1426 }
1427 
1428 
1429 /////////////////////////////////////////////////////////////////////////////
1430 // CWGSFileInfo
1431 /////////////////////////////////////////////////////////////////////////////
1432 
1433 
1435  const string& prefix)
1436 {
1437  Open(impl, prefix);
1438 }
1439 
1440 
1442  const string& prefix)
1443 {
1444  if ( m_WGSDb ) {
1445  return;
1446  }
1447  try {
1448  x_Initialize(impl, prefix);
1449  }
1450  catch ( CSraException& exc ) {
1451  if ( GetDebugLevel() >= 1 ) {
1452  ERR_POST_X(1, "CWGSDataLoader: "
1453  "Exception while opening WGS DB "<<prefix<<": "<<exc);
1454  }
1455  if ( exc.GetParam().find(prefix) == NPOS ) {
1456  exc.SetParam(exc.GetParam()+" acc="+string(prefix));
1457  }
1458  throw exc;
1459  }
1460  catch ( CException& exc ) {
1461  if ( GetDebugLevel() >= 1 ) {
1462  ERR_POST_X(1, "CWGSDataLoader: "
1463  "Exception while opening WGS DB "<<prefix<<": "<<exc);
1464  }
1465  NCBI_RETHROW_FMT(exc, CSraException, eOtherError,
1466  "CWGSDataLoader: exception while opening WGS DB "<<prefix);
1467  }
1468 }
1469 
1470 
1472  const string& prefix)
1473 {
1474  auto mgr = impl.m_Mgr;
1475  m_WGSDb = CWGSDb(mgr, prefix, impl.m_WGSVolPath);
1476  m_WGSPrefix = m_WGSDb->GetIdPrefixWithVersion();
1477  if ( GetDebugLevel() >= 1 ) {
1478  LOG_POST_X(2, Info<<"CWGSDataLoader: "
1479  "Opened WGS DB "<<prefix<<" -> "<<
1480  GetWGSPrefix()<<" "<<m_WGSDb.GetWGSPath());
1481  }
1482  if ( impl.GetAddWGSMasterDescr() ) {
1484  }
1485 }
1486 
1487 
1489 {
1490  if ( m_WGSDb.LoadMasterDescr() ) {
1491  // loaded descriptors from metadata
1492  return;
1493  }
1494  CRef<CSeq_id> id = m_WGSDb->GetMasterSeq_id();
1495  if ( !id ) {
1496  // no master sequence id
1497  return;
1498  }
1500  CDataLoader* gb_loader =
1502  if ( !gb_loader ) {
1503  // no GenBank loader found -> no way to load master record
1504  return;
1505  }
1508  ITERATE ( CDataLoader::TTSE_LockSet, it, locks ) {
1509  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
1510  if ( !bs_info ) {
1511  continue;
1512  }
1513  if ( bs_info->IsSetDescr() ) {
1514  m_WGSDb.SetMasterDescr(bs_info->GetDescr().Get());
1515  }
1516  break;
1517  }
1518 }
1519 
1520 
1521 static bool s_ValidateAcc(const CConstRef<CSeq_id>& real_seq_id, const CTextseq_id& asked_text_id)
1522 {
1523  if ( !real_seq_id ) {
1524  return false;
1525  }
1526  if ( auto real_text_id = real_seq_id->GetTextseq_Id() ) {
1527  if ( !NStr::EqualNocase(real_text_id->GetAccession(), asked_text_id.GetAccession()) ||
1528  !real_text_id->IsSetVersion() ) {
1529  return false;
1530  }
1531 
1532  if ( asked_text_id.IsSetVersion() ) {
1533  return real_text_id->GetVersion() == asked_text_id.GetVersion();
1534  }
1535  else {
1536  return true;
1537  }
1538  }
1539  return false;
1540 }
1541 
1542 
1544 {
1545  if ( GetKeepMigratedParam() ) {
1546  return false;
1547  }
1548  if ( !iter.HasGi() ) {
1549  return false;
1550  }
1551  auto project_state = file->GetDb()->GetProjectGBState();
1552  switch (project_state) {
1556  default:
1557  return false;
1558  }
1559 }
1560 
1561 
1563 {
1564  _ASSERT(version == -1);
1565  if ( row_id == 0 ) {
1566  return false;
1567  }
1568  if ( IsScaffold() ) {
1569  if ( auto iter = GetScaffoldIterator() ) {
1570  return s_ValidateAcc(iter.GetAccSeq_id(), text_id);
1571  }
1572  }
1573  else if ( IsProtein() ) {
1574  if ( auto iter = GetProteinIterator() ) {
1575  if ( IsMigrated(iter) ) {
1576  // individual protein migrated to GenBank
1577  if ( GetDebugLevel() >= 2 ) {
1578  ERR_POST_X(11, "CWGSDataLoader: WGS protein "<<text_id.GetAccession()<<" migrated to GenBank");
1579  }
1580  return false;
1581  }
1582  return s_ValidateAcc(iter.GetAccSeq_id(), text_id);
1583  }
1584  }
1585  else {
1586  if ( auto iter = GetContigIterator() ) {
1587  if ( text_id.IsSetVersion() ) {
1588  // select requested version
1589  version = text_id.GetVersion();
1590  if ( !iter.HasAccVersion(version) ) {
1591  return false;
1592  }
1593  iter.SelectAccVersion(version);
1594  }
1595  else {
1596  // select latest version
1597  version = iter.GetLatestAccVersion();
1598  }
1599  return s_ValidateAcc(iter.GetAccSeq_id(), text_id);
1600  }
1601  }
1602  return false;
1603 }
1604 
1605 
1607 {
1608  _ASSERT(version == -1);
1609  if ( row_id == 0 ) {
1610  return false;
1611  }
1612  if ( IsScaffold() ) {
1613  // scaffolds cannot have GI
1614  return false;
1615  }
1616  else if ( IsProtein() ) {
1617  if ( auto iter = GetProteinIterator() ) {
1618  if ( IsMigrated(iter) ) {
1619  // individual protein migrated to GenBank
1620  if ( GetDebugLevel() >= 2 ) {
1621  ERR_POST_X(11, "CWGSDataLoader: WGS protein "<<gi<<" migrated to GenBank");
1622  }
1623  return false;
1624  }
1625  return iter.GetGi() == gi;
1626  }
1627  }
1628  else {
1629  if ( auto iter = GetContigIterator() ) {
1630  if ( iter.GetGi() == gi ) {
1631  // select latest version
1632  version = iter.GetLatestAccVersion();
1633  return true;
1634  }
1635  }
1636  }
1637  return false;
1638 }
1639 
1640 
1642 {
1643  CWGSGiIterator it(m_WGSDb, gi);
1644  if ( it ) {
1645  info.file = this;
1646  info.row_id = it.GetRowId();
1647  info.seq_type = it.GetSeqType() == it.eProt? 'P': '\0';
1648  info.version = -1;
1649  if ( !info.ValidateGi(gi) ) {
1650  info.file = 0;
1651  }
1652  return info;
1653  }
1654  return false;
1655 }
1656 
1657 
1659 {
1660  int ask_version = text_id.IsSetVersion()? text_id.GetVersion(): -1;
1661  if ( TVDBRowId row_id = m_WGSDb.GetProtAccRowId(text_id.GetAccession(), ask_version) ) {
1662  info.file = this;
1663  info.row_id = row_id;
1664  info.seq_type = 'P';
1665  info.version = -1;
1666  if ( !info.ValidateAcc(text_id) ) {
1667  info.file = 0;
1668  }
1669  return info;
1670  }
1671  return false;
1672 }
1673 
1674 
1676 {
1678  switch ( gb_state ) {
1681  break;
1685  break;
1688  break;
1689  default:
1690  break;
1691  }
1692  return state;
1693 }
1694 
1695 
1697  CTSE_LoadLock& load_lock) const
1698 {
1699  if ( !load_lock.IsLoaded() ) {
1700  CBioseq_Handle::TBioseqStateFlags project_state = s_GBStateToOM(GetDb()->GetProjectGBState());
1701  CBioseq_Handle::TBioseqStateFlags state = project_state;
1702  CRef<CSeq_entry> entry;
1703  pair<CRef<CID2S_Split_Info>, CWGSSeqIterator::TSplitVersion> split;
1704  if ( blob_id.m_SeqType == 'S' ) {
1705  if ( CWGSScaffoldIterator it = GetScaffoldIterator(blob_id) ) {
1706  state = s_GBStateToOM(it.GetGBState());
1707  entry = it.GetSeq_entry();
1708  }
1709  }
1710  else if ( blob_id.m_SeqType == 'P' ) {
1711  if ( CWGSProteinIterator it = GetProteinIterator(blob_id) ) {
1712  state = s_GBStateToOM(it.GetGBState());
1714  entry = it.GetSeq_entry();
1715  }
1716  }
1717  }
1718  else {
1719  if ( CWGSSeqIterator it = GetContigIterator(blob_id) ) {
1720  state = s_GBStateToOM(it.GetGBState());
1722  CWGSSeqIterator::TFlags flags = it.fDefaultFlags;
1723  if ( !GetSplitQualityGraphParam() ) {
1724  flags &= ~it.fSplitQualityGraph;
1725  }
1726  if ( !GetSplitSequenceParam() ) {
1727  flags &= ~it.fSplitSeqData;
1728  }
1729  if ( !GetSplitFeaturesParam() ) {
1730  flags &= ~it.fSplitFeatures;
1731  }
1732  split = it.GetSplitInfoAndVersion(flags);
1733  if ( !split.first ) {
1734  entry = it.GetSeq_entry(flags);
1735  }
1736  }
1737  }
1738  }
1739  if ( !entry && !split.first ) {
1740  if ( GetDebugLevel() >= 2 ) {
1741  ERR_POST_X(12, "CWGSDataLoader: blob "<<blob_id.ToString()<<
1742  " not loaded");
1743  }
1745  }
1746  if ( entry ) {
1747  if ( GetDebugLevel() >=8 ) {
1748  LOG_POST_X(13, Info<<"CWGSDataLoader: blob "<<blob_id.ToString()<<
1749  " "<<MSerial_AsnText<<*entry);
1750  }
1751  else if ( GetDebugLevel() >= 7 ) {
1752  LOG_POST_X(13, Info<<"CWGSDataLoader: blob "<<blob_id.ToString());
1753  }
1754  }
1755  if ( split.first ) {
1756  if ( GetDebugLevel() >=8 ) {
1757  LOG_POST_X(14, Info<<"CWGSDataLoader: split blob "<<blob_id.ToString()<<
1758  " split-version="<<split.second<<
1759  " "<<MSerial_AsnText<<*split.first);
1760  }
1761  else if ( GetDebugLevel() >= 7 ) {
1762  LOG_POST_X(14, Info<<"CWGSDataLoader: split blob "<<blob_id.ToString());
1763  }
1764  }
1765  if ( state ) {
1766  load_lock->SetBlobState(state);
1767  }
1768  if ( split.first ) {
1769  _ASSERT(!entry);
1770  load_lock->GetSplitInfo().SetSplitVersion(split.second);
1771  CSplitParser::Attach(*load_lock, *split.first);
1772  }
1773  else if ( entry ) {
1774  load_lock->SetSeq_entry(*entry);
1775  }
1776  }
1777 }
1778 
1779 
1781  CTSE_Chunk_Info& chunk_info) const
1782 {
1783  if ( blob_id.m_SeqType == '\0' ) {
1784  CWGSSeqIterator it = GetContigIterator(blob_id);
1785  CRef<CID2S_Chunk> chunk = it.GetChunkForVersion(chunk_info.GetChunkId(), chunk_info.GetSplitInfo().GetSplitVersion());
1786  if ( GetDebugLevel() >=8 ) {
1787  LOG_POST_X(15, Info<<"CWGSDataLoader: chunk "<<blob_id.ToString()<<
1788  "."<<chunk_info.GetChunkId()<<
1789  " "<<MSerial_AsnText<<*chunk);
1790  }
1791  else if ( GetDebugLevel() >= 7 ) {
1792  LOG_POST_X(15, Info<<"CWGSDataLoader: chunk "<<blob_id.ToString()<<
1793  "."<<chunk_info.GetChunkId());
1794  }
1795  CSplitParser::Load(chunk_info, *chunk);
1796  chunk_info.SetLoaded();
1797  }
1798 }
1799 
1800 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
std::invoke_result< Call >::type CallWithRetry(Call &&call, const char *name, int retry_count=0)
bool IsSetDescr(void) const
const TDescr & GetDescr(void) const
Blob state exceptions, used by GenBank loader.
CTSE_LoadLock GetTSE_LoadLock(const TBlobId &blob_id)
Definition: Dbtag.hpp:53
CDeadline.
Definition: ncbitime.hpp:1830
virtual void DoJob(void)
Payload function.
CIndexUpdateThread(unsigned update_delay, CRef< CWGSResolver > resolver)
CRef< CWGSResolver > m_Resolver
Definition: wgs_client.cpp:492
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectManager –.
static void Attach(CTSE_Info &tse, const CID2S_Split_Info &split)
static void Load(CTSE_Chunk_Info &chunk, const CID2S_Chunk &data)
@ eProtectedDb
DB is protected.
Definition: exception.hpp:98
@ eNotFoundDb
DB main file not found.
Definition: exception.hpp:92
void SetParam(const string &param)
Definition: exception.hpp:153
const string & GetParam(void) const
Definition: exception.hpp:149
virtual TErrCode GetErrCode(void) const
Definition: sraread.cpp:164
void SetLoaded(CObject *obj=0)
TChunkId GetChunkId(void) const
const CTSE_Split_Info & GetSplitInfo(void) const
TBlobState GetBlobState(void) const
Definition: tse_info.hpp:834
void SetBlobState(TBlobState state)
Definition: tse_info.hpp:848
CTSE_Split_Info & GetSplitInfo(void)
Definition: tse_info.cpp:1395
void SetSeq_entry(CSeq_entry &entry, CTSE_SetObjectInfo *set_info=0)
Definition: tse_info.cpp:351
bool IsLoaded(void) const
void SetLoaded(void)
void SetSplitVersion(TSplitVersion version)
TSplitVersion GetSplitVersion(void) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Adaptation of CThread class repeatedly running some job.
void RequestStop()
Schedule thread Stop.
void UpdateExpiration(const CVDBCacheWithExpiration &cache, const string &acc_or_path)
Definition: vdbcache.cpp:92
void SetObject(CObject *object)
Definition: vdbcache.hpp:109
bool IsExpired(const CVDBCacheWithExpiration &cache, const string &acc_or_path) const
Definition: vdbcache.cpp:85
CRef< Object > GetObject() const
Definition: vdbcache.hpp:105
CRef< CSlot > GetSlot(const string &acc_or_path)
Definition: vdbcache.cpp:187
CWGSBlobId(const string &str)
string ToString(void) const
Get string representation of blob id.
bool operator<(const CBlobId &id) const
void FromString(CTempString str)
string m_WGSPrefix
TVDBRowId m_RowId
bool operator==(const CBlobId &id) const
CWGSResolver & GetResolver(void)
CWGSDataLoader_Impl(const CWGSDataLoader::SLoaderParams &params)
CDataLoader::TTSE_LockSet GetRecordsOnce(CDataSource *data_source, const CSeq_id_Handle &idh, CDataLoader::EChoice choice)
CDataLoader::SHashFound GetSequenceHash(const CSeq_id_Handle &idh)
CDataLoader::TTSE_LockSet GetRecords(CDataSource *data_source, const CSeq_id_Handle &idh, CDataLoader::EChoice choice)
TTaxId GetTaxIdOnce(const CSeq_id_Handle &idh)
vector< CSeq_id_Handle > TIds
TTaxId GetTaxId(const CSeq_id_Handle &idh)
CDataLoader::STypeFound GetSequenceTypeOnce(const CSeq_id_Handle &idh)
CRef< CThreadNonStop > m_IndexUpdateThread
CRef< CWGSFileInfo > OpenWGSFile(SWGSFileInfoSlot &slot, const string &prefix)
CRef< CWGSFileInfo > GetWGSFile(const string &acc)
CDataLoader::SAccVerFound GetAccVerOnce(const CSeq_id_Handle &idh)
CRef< CWGSBlobId > GetBlobIdOnce(const CSeq_id_Handle &idh)
SAccFileInfo GetFileInfoByProtAcc(const CTextseq_id &text_id)
friend class CWGSFileInfo
std::invoke_result< Call >::type CallWithRetry(Call &&call, const char *name, unsigned retry_count=0)
SAccFileInfo GetFileInfoByAcc(const CTextseq_id &text_id)
CDataLoader::STypeFound GetSequenceType(const CSeq_id_Handle &idh)
CDataLoader::SGiFound GetGi(const CSeq_id_Handle &idh)
void GetChunkOnce(const CWGSBlobId &blob_id, CTSE_Chunk_Info &chunk)
CDataLoader::SHashFound GetSequenceHashOnce(const CSeq_id_Handle &idh)
CDataLoader::SGiFound GetGiOnce(const CSeq_id_Handle &idh)
CRef< CWGSFileInfo > x_GetFileInfo(SWGSFileInfoSlot &info_slot, const string &prefix)
CRef< CWGSFileInfo > OpenWGSFileOnce(SWGSFileInfoSlot &slot, const string &prefix)
SAccFileInfo GetFileInfoByGi(TGi gi)
CTSE_LoadLock GetBlobById(CDataSource *data_source, const CWGSBlobId &blob_id)
CRef< CWGSBlobId > GetBlobId(const CSeq_id_Handle &idh)
void GetChunk(const CWGSBlobId &blob_id, CTSE_Chunk_Info &chunk)
atomic< bool > m_ResolverCreated
CDataLoader::SAccVerFound GetAccVer(const CSeq_id_Handle &idh)
TSeqPos GetSequenceLengthOnce(const CSeq_id_Handle &idh)
CTSE_LoadLock GetBlobByIdOnce(CDataSource *data_source, const CWGSBlobId &blob_id)
void GetIds(const CSeq_id_Handle &idh, TIds &ids)
CWGSFileInfo::SAccFileInfo SAccFileInfo
CRef< CWGSResolver > m_Resolver
SAccFileInfo GetFileInfoByGeneral(const CDbtag &dbtag)
TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
CRef< CWGSFileInfo > GetFileInfo(const CWGSBlobId &blob_id)
void LoadBlob(const CWGSBlobId &blob_id, CTSE_LoadLock &load_lock)
void GetIdsOnce(const CSeq_id_Handle &idh, TIds &ids)
static string NormalizePathOrAccession(CTempString path_or_acc, CTempString vol_path=CTempString())
Definition: wgsread.cpp:2322
const string & GetWGSPath(void) const
Definition: wgsread.hpp:669
TVDBRowId GetContigNameRowId(const string &name) const
Definition: wgsread.hpp:744
void SetMasterDescr(const TMasterDescr &descr, EDescrFilter filter=eDescrDefaultFilter) const
Definition: wgsread.hpp:784
TVDBRowId GetProteinNameRowId(const string &name) const
Definition: wgsread.hpp:756
TVDBRowId GetProtAccRowId(const string &acc, int version=-1) const
Definition: wgsread.hpp:768
bool LoadMasterDescr(EDescrFilter filter=eDescrDefaultFilter) const
Definition: wgsread.hpp:779
TVDBRowId GetScaffoldNameRowId(const string &name) const
Definition: wgsread.hpp:750
TVDBRowId GetLocRowId(void) const
Definition: wgsread.cpp:7860
NCBI_WGS_seqtype GetLocSeqType(void) const
Definition: wgsread.cpp:7846
bool FindProtAcc(SAccFileInfo &info, const CTextseq_id &text_id)
void x_Initialize(const CWGSDataLoader_Impl &impl, const string &prefix)
void Open(const CWGSDataLoader_Impl &impl, const string &prefix)
CWGSFileInfo(const CWGSDataLoader_Impl &impl, const string &prefix)
CWGSProteinIterator GetProteinIterator(const CWGSBlobId &blob_id) const
CWGSSeqIterator GetContigIterator(const CWGSBlobId &blob_id) const
bool FindGi(SAccFileInfo &info, TGi gi)
void LoadChunk(const CWGSBlobId &blob_id, CTSE_Chunk_Info &chunk) const
void LoadBlob(const CWGSBlobId &blob_id, CTSE_LoadLock &load_lock) const
CWGSScaffoldIterator GetScaffoldIterator(const CWGSBlobId &blob_id) const
const string & GetWGSPrefix(void) const
const CWGSDb & GetDb(void) const
void x_InitMasterDescr(void)
ESeqType GetSeqType(void) const
Definition: wgsread.hpp:1379
TVDBRowId GetRowId(void) const
Definition: wgsread.hpp:1384
bool HasGi(void) const
Definition: wgsread.cpp:7149
NCBI_gb_state GetGBState(void) const
Definition: wgsread.cpp:7361
TVDBRowId GetBestProductFeatRowId(void) const
Definition: wgsread.cpp:7480
CWGSResolver_DL(CDataLoader *loader)
bool IsValid(void) const
CRef< CDataLoader > m_Loader
static CRef< CWGSResolver > CreateResolver(void)
static CRef< CWGSResolver > CreateResolver(CDataLoader *loader)
virtual TWGSPrefixes GetPrefixes(const CSeq_id &id)
string ParseWGSPrefix(const CDbtag &dbtag) const
virtual TWGSPrefixes GetPrefixes(TGi gi)=0
virtual void SetNonWGS(TGi gi, const TWGSPrefixes &prefixes)
static bool s_DebugEnabled(EDebugLevel level)
Definition: wgsresolver.cpp:58
vector< string > TWGSPrefixes
Definition: wgsresolver.hpp:51
virtual bool Update(void)
static CRef< CWGSResolver > CreateResolver(const CVDBMgr &mgr)
Definition: wgsresolver.cpp:75
virtual void SetWGSPrefix(TGi gi, const TWGSPrefixes &prefixes, const string &prefix)
Definition: wgsresolver.cpp:96
CRef< CID2S_Chunk > GetChunkForVersion(TChunkId chunk_id, TSplitVersion split_version) const
Definition: wgsread.cpp:6339
void SelectAccVersion(int version)
Definition: wgsread.cpp:4335
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
bool empty() const
Definition: map.hpp:149
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
std::ofstream out("events_result.xml")
main entry point for tests
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static int type
Definition: getdata.c:31
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define LOG_POST_X(err_subcode, message)
Definition: ncbidiag.hpp:553
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_RETHROW_FMT(prev_exception, exception_class, err_code, message)
The same as NCBI_RETHROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:745
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:719
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
bool IsGi(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
CSeq_id::E_Choice Which(void) const
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
TGi GetGi(void) const
@ eAcc_wgs
Definition: Seq_id.hpp:290
@ fAcc_prot
Definition: Seq_id.hpp:252
@ eAcc_targeted
Definition: Seq_id.hpp:298
@ eAcc_embl_prot
Definition: Seq_id.hpp:383
@ eAcc_wgs_intermed
Definition: Seq_id.hpp:294
@ eAcc_gb_prot
Definition: Seq_id.hpp:345
@ eAcc_tsa
Definition: Seq_id.hpp:273
@ eAcc_other
Definition: Seq_id.hpp:264
@ eAcc_division_mask
Definition: Seq_id.hpp:299
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
vector< CSeq_id_Handle > TIds
CSeq_inst::TMol type
CDataLoader * FindDataLoader(const string &loader_name) const
Try to find a registered data loader by name.
virtual void GetIds(const CSeq_id_Handle &idh, TIds &ids)
Request for a list of all Seq-ids of a sequence.
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
TTSE_LockSet GetRecordsNoBlobState(const CSeq_id_Handle &idh, EChoice choice)
The same as GetRecords() but always returns empty TSE lock set instead of throwing CBlobStateExceptio...
@ eExtFeatures
external features
@ eExtAnnot
all external annotations
@ eExtAlign
external aligns
@ eOrphanAnnot
all external annotations if no Bioseq exists
@ eBioseqCore
main blob with bioseq core (no seqdata and annots)
@ eExtGraph
external graph annotations
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Swap(TThisType &ref)
Swaps the pointer with another reference.
Definition: ncbiobj.hpp:754
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
#define NPOS
Definition: ncbistr.hpp:133
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
bool Run(TRunMode flags=fRunDefault)
Run the thread.
Definition: ncbithr.cpp:724
CMutexGuard TWriteLockGuard
Define Write Lock Guard.
Definition: ncbimtx.hpp:763
void Join(void **exit_data=0)
Wait for the thread termination.
Definition: ncbithr.cpp:863
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
TVersion GetVersion(void) const
Get the Version member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
User-defined methods of the data storage class.
#define DEBUG
Definition: config.h:32
Definition of all error codes used in SRA C++ support libraries.
FILE * file
int i
static MDB_envinfo info
Definition: mdb_load.c:37
const string version
version string
Definition: variables.hpp:66
const struct ncbi::grid::netcache::search::fields::SIZE size
string s_Value(TValue value)
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
EIPRangeType t
Definition: ncbi_localip.c:101
const char * tag
void SleepMilliSec(unsigned long ml_sec, EInterruptOnSignal onsignal=eRestartOnSignal)
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
Helper classes and templates to implement plugins.
#define row(bind, expected)
Definition: string_bind.c:73
Better replacement of GetAccVer(), this method should be defined in data loaders, GetAccVer() is left...
Better replacement of GetGi(), this method should be defined in data loaders, GetGi() is left for com...
Better replacement of GetSequenceHash(), this method should be defined in data loaders,...
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
vector< string > m_WGSFiles
Definition: wgsloader.hpp:59
CWGSProteinIterator GetProteinIterator(void) const
CWGSSeqIterator GetContigIterator(void) const
bool ValidateAcc(const CTextseq_id &text_id)
bool IsMigrated(const CWGSProteinIterator &iter) const
SAccFileInfo GetRootFileInfo(void) const
CRef< CWGSFileInfo > file
CWGSScaffoldIterator GetScaffoldIterator(void) const
int TSplitVersion
Definition: wgsread.hpp:193
Definition: type.c:6
#define _ASSERT
int64_t TVDBRowId
Definition: vdbread.hpp:80
void sx_Update(CWGSResolver &resolver)
static bool s_ValidateAcc(const CConstRef< CSeq_id > &real_seq_id, const CTextseq_id &asked_text_id)
static bool GetSplitFeaturesParam(void)
static bool GetSplitQualityGraphParam(void)
static string GetWGSVolPath(void)
static unsigned GetRetryCountParam(void)
BEGIN_LOCAL_NAMESPACE
static bool GetResolveGIsParam(void)
static bool GetKeepMigratedParam(void)
static CBioseq_Handle::TBioseqStateFlags s_GBStateToOM(NCBI_gb_state gb_state)
END_LOCAL_NAMESPACE
static bool GetKeepReplacedParam(void)
static int GetDebugLevel(void)
static unsigned GetFileRecheckTimeParam(void)
static const size_t kMaxWGSPrefixLetters
static bool GetResolveProtAccsParam(void)
static bool GetMasterDescrParam(void)
static unsigned GetIndexUpdateTimeParam(void)
NCBI_PARAM_DEF_EX(int, WGS_LOADER, DEBUG, 0, eParam_NoThread, WGS_LOADER_DEBUG)
static const size_t kMinWGSPrefixLetters
static unsigned GetFileReopenTimeParam(void)
static const size_t kWGSPrefixDigits
static size_t GetGCSizeParam(void)
NCBI_DEFINE_ERR_SUBCODE_X(23)
NCBI_PARAM_DECL(int, WGS_LOADER, DEBUG)
NCBI_PARAM_DEF(bool, WGS_LOADER, MASTER_DESCR, true)
static bool GetSplitSequenceParam(void)
static const size_t kMaxWGSProteinAccLen
@ NCBI_gb_state_eWGSGenBankReplaced
Definition: wgsread.hpp:88
@ NCBI_gb_state_eWGSGenBankSuppressed
Definition: wgsread.hpp:87
@ NCBI_gb_state_eWGSGenBankMigrated
Definition: wgsread.hpp:92
@ NCBI_gb_state_eWGSGenBankWithdrawn
Definition: wgsread.hpp:89
@ NCBI_WGS_seqtype_scaffold
Definition: wgsread.hpp:68
@ NCBI_WGS_seqtype_contig
Definition: wgsread.hpp:67
uint32_t NCBI_gb_state
Definition: wgsread.hpp:53
Modified on Fri Sep 20 14:57:58 2024 by modify_doxy.py rev. 669887