NCBI C++ ToolKit
seqdbimpl.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqdbimpl.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Kevin Bealer
27  *
28  */
29 
30 /// @file seqdbimpl.cpp
31 /// Implementation for the CSeqDBImpl class, the top implementation
32 /// layer for SeqDB.
33 #include <ncbi_pch.hpp>
34 #include "seqdbimpl.hpp"
35 #include <iostream>
36 #include <sstream>
37 #include <algorithm>
38 #include <serial/enumvalues.hpp>
39 #include <serial/objistr.hpp>
40 #include <serial/objistrasnb.hpp>
42 
44 
45 CSeqDBImpl::CSeqDBImpl(const string & db_name_list,
46  char prot_nucl,
47  int oid_begin,
48  int oid_end,
49  CSeqDBGiList * gi_list,
50  CSeqDBNegativeList * neg_list,
51  CSeqDBIdSet idset,
52  bool use_atlas_lock)
53  : m_AtlasHolder (NULL, use_atlas_lock),
54  m_Atlas (m_AtlasHolder.Get()),
55  m_DBNames (db_name_list),
56  m_Aliases (m_Atlas, db_name_list, prot_nucl),
57  m_VolSet (m_Atlas,
58  m_Aliases.GetVolumeNames(),
59  prot_nucl,
60  gi_list,
61  neg_list),
62  m_LMDBSet (m_VolSet),
63  m_RestrictBegin (oid_begin),
64  m_RestrictEnd (oid_end),
65  m_NextChunkOID (0),
66  m_NumSeqs (0),
67  m_NumSeqsStats (0),
68  m_NumOIDs (0),
69  m_TotalLength (0),
70  m_ExactTotalLength(0),
71  m_TotalLengthStats(0),
72  m_VolumeLength (0),
73  m_MaxLength (0),
74  m_MinLength (0),
75  m_SeqType (prot_nucl),
76  m_OidListSetup (false),
77  m_UserGiList (gi_list),
78  m_NegativeList (neg_list),
79  m_IdSet (idset),
80  m_NeedTotalsScan (false),
81  m_UseGiMask (m_Aliases.HasGiMask()),
82  m_MaskDataColumn (kUnknownTitle),
83  m_NumThreads (0)
84 {
86 
87  if (m_UseGiMask) {
88  vector <string> mask_list;
89  m_Aliases.GetMaskList(mask_list);
90  m_GiMask.Reset(new CSeqDBGiMask(m_Atlas, mask_list));
91  }
92 
93 
95 
96  m_OidListSetup = ! (m_Aliases.HasFilters() || gi_list || neg_list);
97 
100 
102 
103 
104  // If the alias files seem to provide correct data for the totals,
105  // use it; otherwise scan the OID list and use approximate lengths
106  // to compute the totals. Presence of a user GI list implies that
107  // the alias file cannot have correct values.
108 
109  try {
110  if (gi_list || neg_list || m_Aliases.NeedTotalsScan(m_VolSet)) {
111  m_NeedTotalsScan = true;
112  x_InitIdSet();
113  }
114 
115  if ((! m_OidListSetup) && (oid_begin || oid_end)) {
116  m_NeedTotalsScan = true;
117  }
118 
119  if (m_NeedTotalsScan) {
120  CSeqDBLockHold locked(m_Atlas);
121 
122  // This is a whole-database scan; it's always done in
123  // approximate length mode.
125  & m_MaxLength, & m_MinLength, locked);
126 
127  } else {
132 
133  // Do not bother scanning the db... it would be slow
134  // FIXME: future implementation should probably have the
135  // shortest length encoded in index file...
137  }
140 
141  LOG_POST(Info << "Num of Seqs: " << m_NumSeqs);
142  LOG_POST(Info << "Total Length: " << m_TotalLength);
143  }
144  catch(CSeqDBException & e) {
147  m_VolSet.UnLease();
148  throw e;
149  }
150 
151  SetIterationRange(oid_begin, oid_end);
152 
154 
155  CHECK_MARKER();
156 }
157 
158 CSeqDBImpl::CSeqDBImpl(bool use_atlas_lock)
159  : m_AtlasHolder (NULL, use_atlas_lock),
160  m_Atlas (m_AtlasHolder.Get()),
161  m_Aliases (m_Atlas, "", '-'),
162  m_RestrictBegin (0),
163  m_RestrictEnd (0),
164  m_NextChunkOID (0),
165  m_NumSeqs (0),
166  m_NumOIDs (0),
167  m_TotalLength (0),
168  m_ExactTotalLength(0),
169  m_VolumeLength (0),
170  m_SeqType ('-'),
171  m_OidListSetup (true),
172  m_NeedTotalsScan (false),
173  m_UseGiMask (false),
174  m_MaskDataColumn (kUnknownTitle),
175  m_NumThreads (0)
176 {
177  INIT_CLASS_MARK();
178 
180  CHECK_MARKER();
181 }
182 
183 void CSeqDBImpl::SetIterationRange(int oid_begin, int oid_end)
184 {
185  CHECK_MARKER();
186  CSeqDBLockHold locked(m_Atlas);
187  m_Atlas.Lock(locked);
188 
189  m_RestrictBegin = (oid_begin < 0) ? 0 : oid_begin;
190  m_RestrictEnd = (oid_end < 0) ? 0 : oid_end;
191 
192  if ((oid_begin == 0) && (oid_end == 0)) {
194  } else {
195  if ((oid_end == 0) || (m_RestrictEnd > m_VolSet.GetNumOIDs())) {
197  }
200  }
201  }
202 }
203 
205 {
206  CHECK_MARKER();
207  if( reusable_inpstr ) {
208  delete reusable_inpstr;
210  }
211  else {
212  cerr << "\n(=)\n";
213  }
215 
216  CSeqDBLockHold locked(m_Atlas);
217  m_Atlas.Lock(locked);
218 
219 
220 
221  m_VolSet.UnLease();
222 
223  if (m_OIDList.NotEmpty()) {
224  m_OIDList->UnLease();
225  }
226  BREAK_MARKER();
227 }
228 
230 {
231  CHECK_MARKER();
232  if (! m_OidListSetup) {
233  m_Atlas.Lock(locked);
234 
236  if (m_OIDList.Empty()) {
238  m_VolSet,
239  *ft,
240  m_UserGiList,
242  locked,
243  m_LMDBSet) );
244  }
245 
246  m_OidListSetup = true;
247  // Handle the case where FIRST_OID and LAST_OID is set on a top level
248  // alias file and that's the only alias file present
249  if (ft->HasFilter()) {
250  const vector< CRef<CSeqDB_FilterTree> >& nodes = ft->GetNodes();
251  if (nodes.size() == 1) {
252  const CSeqDB_FilterTree::TFilters& filters = nodes.front()->GetFilters();
253  if (filters.size() == 1 && filters.front()->GetType() == CSeqDB_AliasMask::eOidRange) {
254  const CSeqDB_AliasMask& alias_mask = *filters.front();
255  SetIterationRange(alias_mask.GetBegin(), alias_mask.GetEnd());
256  }
257  }
258  }
259  //DebugDumpText(cerr, "CSeqDBImpl after m_OIDList initialization", 10);
260  //ft->Print();
261  m_Atlas.Unlock(locked);
262  }
263 }
264 
265 bool CSeqDBImpl::CheckOrFindOID(int & next_oid)
266 {
267  CHECK_MARKER();
268  CSeqDBLockHold locked(m_Atlas);
269  return x_CheckOrFindOID(next_oid, locked);
270 }
271 
272 bool CSeqDBImpl::x_CheckOrFindOID(int & next_oid, CSeqDBLockHold & locked)
273 {
274  CHECK_MARKER();
275  bool success = true;
276 
277  if (next_oid < m_RestrictBegin) {
278  next_oid = m_RestrictBegin;
279  }
280 
281  if (next_oid >= m_RestrictEnd) {
282  success = false;
283  }
284 
285  if (! m_OidListSetup) {
286  x_GetOidList(locked);
287  }
288 
289  if (success && m_OIDList.NotEmpty()) {
290  success = m_OIDList->CheckOrFindOID(next_oid);
291 
292  if (next_oid > m_RestrictEnd) {
293  success = false;
294  }
295  }
296 
297  return success;
298 }
299 
301 CSeqDBImpl::GetNextOIDChunk(int & begin_chunk, // out
302  int & end_chunk, // out
303  int oid_size, // in
304  vector<int> & oid_list, // out
305  int * state_obj) // in+out
306 {
307  CHECK_MARKER();
308  CSeqDBLockHold locked(m_Atlas);
309 
310  int cacheID = (m_NumThreads) ? x_GetCacheID(locked) : 0;
311 
312  m_Atlas.Lock(locked);
313 
314  if (! m_OidListSetup) {
315  x_GetOidList(locked);
316  }
317 
318  if (! state_obj) {
319  state_obj = & m_NextChunkOID;
320  }
321 
322  // This has to be done before ">=end" check, to insure correctness
323  // in empty-range cases.
324 
325  if (*state_obj < m_RestrictBegin) {
326  *state_obj = m_RestrictBegin;
327  }
328 
329  // Case 1: Iteration's End.
330 
331  if (*state_obj >= m_RestrictEnd) {
332  begin_chunk = 0;
333  end_chunk = 0;
334  return CSeqDB::eOidRange;
335  }
336 
337  begin_chunk = * state_obj;
338 
339  // fill the cache for all sequence in mmaped slice
340  if (m_NumThreads) {
341  SSeqResBuffer * buffer = m_CachedSeqs[cacheID];
342  x_FillSeqBuffer(buffer, begin_chunk);
343  end_chunk = begin_chunk + static_cast<int>(buffer->results.size());
344  } else {
345  end_chunk = begin_chunk + oid_size;
346  }
347 
348  if (end_chunk > m_RestrictEnd) {
349  end_chunk = m_RestrictEnd;
350  }
351  *state_obj = end_chunk;
352 
353  // Case 2: Return a range
354 
355  if (m_OIDList.Empty()) {
356  return CSeqDB::eOidRange;
357  }
358 
359 
360  // Case 3: Ones and Zeros - The bitmap provides OIDs.
361 
362  int next_oid = begin_chunk;
363  if (m_NumThreads) {
364  oid_list.clear();
365  while(next_oid < end_chunk) {
366  // Find next ordinal id, and save it if it falls within iteration range.
367  if (m_OIDList->CheckOrFindOID(next_oid) &&
368  next_oid < end_chunk) {
369  oid_list.push_back(next_oid++);
370  } else {
371  next_oid = end_chunk;
372  break;
373  }
374  }
375  } else {
376  int iter = 0;
377  oid_list.resize(oid_size);
378  while (iter < oid_size) {
379  if (next_oid >= m_RestrictEnd) break;
380  // Find next ordinal id, and save it if it falls within iteration range.
381  if (m_OIDList->CheckOrFindOID(next_oid) &&
382  next_oid < m_RestrictEnd) {
383  oid_list[iter++] = next_oid++;
384  } else {
385  next_oid = m_RestrictEnd;
386  break;
387  }
388  }
389  if (iter < oid_size) {
390  oid_list.resize(iter);
391  }
392  *state_obj = next_oid;
393  }
394 
395  return CSeqDB::eOidList;
396 }
397 
399 {
400  CHECK_MARKER();
401  CFastMutexGuard guard(m_OIDLock);
402  m_NextChunkOID = 0;
403 }
404 
405 int CSeqDBImpl::GetSeqLength(int oid) const
406 {
407  CHECK_MARKER();
408 
409  return x_GetSeqLength(oid);
410 }
411 
412 int CSeqDBImpl::x_GetSeqLength(int oid) const
413 {
414  int vol_oid = 0;
415 
416  if ('p' == m_SeqType) {
417  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
418  return vol->GetSeqLengthProt(vol_oid);
419  }
420  } else {
421  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
422  return vol->GetSeqLengthExact(vol_oid);
423  }
424  }
425 
427 }
428 
430 {
431  CHECK_MARKER();
432 
433  int vol_oid = 0;
434 
435  if ('p' == m_SeqType) {
436  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
437  return vol->GetSeqLengthProt(vol_oid);
438  }
439  } else {
440  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
441  return vol->GetSeqLengthApprox(vol_oid);
442  }
443  }
444 
446 }
447 
449  map<TGi, TTaxId> & gi_to_taxid,
450  bool persist)
451 {
452  CSeqDBLockHold locked(m_Atlas);
453  m_Atlas.Lock(locked);
454  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
455 
456  if (! persist) {
457  gi_to_taxid.clear();
458  }
459 
460  CRef<CBlast_def_line_set> defline_set =
461  x_GetHdr(oid, locked);
462 
463  if ((! defline_set.Empty()) && defline_set->CanGet()) {
464  ITERATE(list< CRef<CBlast_def_line> >, defline, defline_set->Get()) {
465  if (! (*defline)->CanGetSeqid()) {
466  continue;
467  }
468 
469  if (! (*defline)->IsSetTaxid()) {
470  continue;
471  }
472 
473  ITERATE(list< CRef<CSeq_id> >, seqid, (*defline)->GetSeqid()) {
474  if (! (**seqid).IsGi()) {
475  continue;
476  }
477 
478  gi_to_taxid[(**seqid).GetGi()] = (*defline)->GetTaxid();
479  }
480  }
481  }
482 }
483 
485  vector<TTaxId> & taxids,
486  bool persist)
487 {
488  CSeqDBLockHold locked(m_Atlas);
489  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
490 
491  if (! persist) {
492  taxids.clear();
493  }
494 
495  CRef<CBlast_def_line_set> defline_set =
496  x_GetHdr(oid, locked);
497 
498  if ((! defline_set.Empty()) && defline_set->CanGet()) {
499  ITERATE(list< CRef<CBlast_def_line> >, defline, defline_set->Get()) {
500  if ((*defline)->IsSetTaxid()) {
501  taxids.push_back((*defline)->GetTaxid());
502  }
503 // CBlast_def_line::TTaxIds taxid_set = (*defline)->GetTaxIds();
504 // taxids.insert(taxids.end(), taxid_set.begin(), taxid_set.end());
505  }
506  }
507 }
508 
510  set<TTaxId> & taxids)
511 {
512  CSeqDBLockHold locked(m_Atlas);
513 
514  CRef<CBlast_def_line_set> defline_set =
515  x_GetHdr(oid, locked);
516 
517  if ((! defline_set.Empty()) && defline_set->CanGet()) {
518  ITERATE(list< CRef<CBlast_def_line> >, defline, defline_set->Get()) {
519  CBlast_def_line::TTaxIds taxid_set = (*defline)->GetTaxIds();
520  taxids.insert(taxid_set.begin(), taxid_set.end());
521  }
522  }
523 }
524 
526  int oid,
527  map<TGi, set<TTaxId> >& gi_to_taxid_set,
528  bool persist
529 )
530 {
531  CSeqDBLockHold locked(m_Atlas);
532  m_Atlas.Lock(locked);
533  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
534 
535  if (! persist) {
536  gi_to_taxid_set.clear();
537  }
538 
539  CRef<CBlast_def_line_set> defline_set =
540  x_GetHdr(oid, locked);
541 
542  if ((! defline_set.Empty()) && defline_set->CanGet()) {
543  ITERATE(list< CRef<CBlast_def_line> >, defline, defline_set->Get()) {
544  if (! (*defline)->CanGetSeqid()) {
545  continue;
546  }
547 
548  ITERATE(list< CRef<CSeq_id> >, seqid, (*defline)->GetSeqid()) {
549  if (! (**seqid).IsGi()) {
550  continue;
551  }
552 
553  CBlast_def_line::TTaxIds taxids = (*defline)->GetLeafTaxIds();
554  gi_to_taxid_set[(**seqid).GetGi()].insert(
555  taxids.begin(), taxids.end()
556  );
557  }
558  }
559  }
560 }
561 
563  int oid,
564  vector<TTaxId>& taxids,
565  bool persist
566 )
567 {
568  CSeqDBLockHold locked(m_Atlas);
569  m_Atlas.Lock(locked);
570  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
571 
572  if (! persist) {
573  taxids.clear();
574  }
575 
576  CRef<CBlast_def_line_set> defline_set = x_GetHdr(oid, locked);
577 
578  if ((! defline_set.Empty()) && defline_set->CanGet()) {
579  ITERATE(
580  list<CRef<CBlast_def_line> >,
581  defline,
582  defline_set->Get()
583  ) {
584  if ((*defline)->CanGetSeqid()) {
585  ITERATE(
586  list<CRef<CSeq_id> >,
587  seqid,
588  (*defline)->GetSeqid()
589  ) {
590  if ((**seqid).IsGi()) {
591  CBlast_def_line::TTaxIds leafTaxids =
592  (*defline)->GetLeafTaxIds();
593  taxids.insert(
594  taxids.end(),
595  leafTaxids.begin(),
596  leafTaxids.end()
597  );
598  }
599  }
600  }
601  }
602  }
603 }
604 
606 CSeqDBImpl::GetBioseq(int oid, TGi target_gi, const CSeq_id * target_seq_id, bool seqdata)
607 {
608  CHECK_MARKER();
609 
610  CSeqDBLockHold locked(m_Atlas);
611  m_Atlas.Lock(locked);
612  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
613 
614  int vol_oid = 0;
615 
616  if (! m_OidListSetup) {
617  x_GetOidList(locked);
618  }
619  m_Atlas.Unlock(locked);
620 
621  if (CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
622  return vol->GetBioseq(vol_oid,
623  target_gi,
624  target_seq_id,
625  seqdata,
626  locked);
627  }
628 
630 }
631 
632 void CSeqDBImpl::RetSequence(const char ** buffer) const
633 {
634  CHECK_MARKER();
635 
636  CSeqDBLockHold locked(m_Atlas);
637 
638  if (m_NumThreads) {
639  int cacheID = x_GetCacheID(locked);
640  (m_CachedSeqs[cacheID]->checked_out)--;
641  *buffer = 0;
642  return;
643  }
644 
645  // This returns a reference to part of a memory mapped region.
646 
647  //m_Atlas.Lock(locked);
648 
649  //m_Atlas.RetRegion(*buffer);
650  *buffer = 0;
651 }
652 
653 void CSeqDBImpl::RetAmbigSeq(const char ** buffer) const
654 {
655  CSeqDBAtlas::RetRegion(*buffer);//Keep this
656  *buffer = 0;
657 }
658 
660 {
661  // client must return sequence before getting a new one
662  if (buffer->checked_out > 0) {
663  NCBI_THROW(CSeqDBException, eArgErr, "Sequence not returned.");
664  }
665 
666  buffer->checked_out = 0;
667  buffer->results.clear();
668 }
669 
671  const char ** seq) const
672 {
673  // Search local cache for oid
674  Uint4 index = oid - buffer->oid_start;
675  if (index < buffer->results.size()) {
676  (buffer->checked_out)++;
677  *seq = buffer->results[index].address;
678  return buffer->results[index].length;
679  }
680 
681  x_FillSeqBuffer(buffer, oid);
682  (buffer->checked_out)++;
683  *seq = buffer->results[0].address;
684  return buffer->results[0].length;
685 }
686 
688  int oid) const
689 {
690  // clear the buffer first
692 
693  buffer->oid_start = oid;
694  Int4 vol_oid = 0;
695 
696  // Get all sequences within the lease
697  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
698  SSeqRes res;
699  const char * seq;
700  Int8 tot_length = m_Atlas.GetSliceSize() / (4*m_NumThreads) + 1;
701 
702  res.length = vol->GetSequence(vol_oid++, &seq);
703  if (res.length < 0) return;
704  // must return at least one sequence
705  do {
706  tot_length -= res.length;
707  res.address = seq;
708  buffer->results.push_back(res);
709  res.length = vol->GetSequence(vol_oid++, &seq);
710  } while (res.length >= 0 && tot_length >= res.length && vol_oid < m_RestrictEnd);
711 
712  return;
713  }
714 
716 }
717 
718 int CSeqDBImpl::GetSequence(int oid, const char ** buffer) const
719 {
720  CHECK_MARKER();
721  CSeqDBLockHold locked(m_Atlas);
722  if (m_NumThreads) {
723  int cacheID = x_GetCacheID(locked);
724  return x_GetSeqBuffer(m_CachedSeqs[cacheID], oid, buffer);
725  }
726 
727  int vol_oid = 0;
728 
729  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
730  return vol->GetSequence(vol_oid, buffer);
731  }
732 
734 }
735 
737  TSeqPos begin,
738  TSeqPos end) const
739 {
740  CHECK_MARKER();
741  CSeqDBLockHold locked(m_Atlas);
742  int vol_oid = 0;
743 
744  m_Atlas.Lock(locked);
745  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
746 
747  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
748  return vol->GetSeqData(vol_oid, begin, end, locked);
749  }
750 
752 }
753 
755  char ** buffer,
756  int nucl_code,
757  SSeqDBSlice * region,
758  ESeqDBAllocType alloc_type,
759  CSeqDB::TSequenceRanges * masks) const
760 {
761  CHECK_MARKER();
762 
763  int vol_oid = 0;
764  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
765  return vol->GetAmbigSeq(vol_oid,
766  buffer,
767  nucl_code,
768  alloc_type,
769  region,
770  masks);
771  }
772 
774 }
775 
777  char ** buffer,
778  int nucl_code,
779  ESeqDBAllocType alloc_type,
780  CSeqDB::TSequenceRanges * partial_ranges,
781  CSeqDB::TSequenceRanges * masks) const
782 {
783  CHECK_MARKER();
784  int vol_oid = 0;
785  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
786  return vol->GetAmbigPartialSeq(vol_oid,
787  buffer,
788  nucl_code,
789  alloc_type,
790  partial_ranges,
791  masks);
792  }
793 
795 }
796 
797 list< CRef<CSeq_id> > CSeqDBImpl::GetSeqIDs(int oid)
798 {
799  CHECK_MARKER();
800  int vol_oid = 0;
801 
802  CSeqDBLockHold locked(m_Atlas);
803  m_Atlas.Lock(locked);
804  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
805 
806  if (! m_OidListSetup) {
807  x_GetOidList(locked);
808  }
809 
810  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
811  if( ! reusable_inpstr ) {
813  }
814  return vol->GetSeqIDs(vol_oid, reusable_inpstr);
815  }
816 
818 }
819 
821 {
822  CHECK_MARKER();
823  CSeqDBLockHold locked(m_Atlas);
824  return x_GetSeqGI(oid, locked);
825 }
826 
828 {
829  CHECK_MARKER();
830  return m_NumSeqs;
831 }
832 
834 {
835  CHECK_MARKER();
836  return m_NumSeqsStats;
837 }
838 
840 {
841  CHECK_MARKER();
842  return m_NumOIDs;
843 }
844 
846 {
847  CHECK_MARKER();
848  return m_TotalLength;
849 }
850 
852 {
853  CHECK_MARKER();
855  return m_ExactTotalLength;
856 
857  if(m_NeedTotalsScan) {
858  CSeqDBLockHold locked(m_Atlas);
860  &m_MaxLength, &m_MinLength, locked);
861 
862  }
863  else {
865  }
866  return m_ExactTotalLength;
867 }
868 
869 
871 {
872  CHECK_MARKER();
873  return m_TotalLengthStats;
874 }
875 
877 {
878  CHECK_MARKER();
879  return m_VolumeLength;
880 }
881 
883 {
884  CHECK_MARKER();
885 
886  // GetNumSeqs should not overflow, even for alias files.
887 
889  _ASSERT((rv & 0x7FFFFFFF) == rv);
890 
891  return (int) rv;
892 }
893 
895 {
896  CHECK_MARKER();
897 
898  m_Atlas.Lock(locked);
899 
900  if (! m_OidListSetup) {
901  x_GetOidList(locked);
902  }
903  m_Atlas.Unlock(locked);
904 
905  int vol_oid = 0;
906  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
907  // Try lookup *.nxg first
908  TGi gi = vol->GetSeqGI(vol_oid, locked);
909  if (gi>=ZERO_GI) return gi;
910  // Fall back to parsing deflines
911  list< CRef<CSeq_id> > ids =
912  vol->GetSeqIDs(vol_oid);
913  ITERATE(list< CRef<CSeq_id> >, id, ids) {
914  if ((**id).IsGi()) {
915  return (**id).GetGi();
916  }
917  }
918  // No GI found
919  return INVALID_GI;
920  }
921 
923 }
924 
926 {
927  CHECK_MARKER();
928 
929  // GetNumSeqs should not overflow, even for alias files.
930 
932  _ASSERT((rv & 0x7FFFFFFF) == rv);
933 
934  return (int) rv;
935 }
936 
938 {
939  CHECK_MARKER();
940  Int8 num_oids = m_VolSet.GetNumOIDs();
941 
942  // The aliases file may have more of these, because walking the
943  // alias tree will count volumes each time they appear in the
944  // volume set. The volset number is the "good" one, because it
945  // corresponds to the range of OIDs we accept in methods like
946  // "GetSequence()". If you give SeqDB an OID, the volset number
947  // is the range for that oid.
948 
949  // However, at this layer, we need to use Int8, because the alias
950  // number can overestimate so much that it wraps a signed int.
951 
952  _ASSERT(num_oids <= m_Aliases.GetNumOIDs(m_VolSet));
953  _ASSERT((num_oids & 0x7FFFFFFF) == num_oids);
954 
955  return (int) num_oids;
956 }
957 
959 {
960  CHECK_MARKER();
962 }
963 
965 {
966  CHECK_MARKER();
968 }
969 
971 {
972  CHECK_MARKER();
973  return m_VolSet.GetVolumeSetLength();
974 }
975 
977 {
978  CHECK_MARKER();
979  return m_VolSet.GetMaxLength();
980 }
981 
983 {
984  CHECK_MARKER();
986 }
987 
988 string CSeqDBImpl::GetTitle() const
989 {
990  CHECK_MARKER();
992 }
993 
995 {
996  CHECK_MARKER();
997  if (const CSeqDBVol * vol = m_VolSet.GetVol(0)) {
998  return vol->GetSeqType();
999  }
1000  return '-';
1001 }
1002 
1003 string CSeqDBImpl::GetDate() const
1004 {
1005  CHECK_MARKER();
1006 
1007  CSeqDBLockHold locked(m_Atlas);
1008  m_Atlas.Lock(locked);
1009 
1010  if (! m_Date.empty()) {
1011  return m_Date;
1012  }
1013 
1014  // This is close enough to allow parsing but does not precisely
1015  // describe the format normally used for generated dates.
1016 
1017  string fmt = "b d, Y H:m P";
1018  string date;
1019 
1020  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1021  string d = x_FixString( m_VolSet.GetVol(i)->GetDate() );
1022 
1023  if (date.empty()) {
1024  date = d;
1025  } else if (d != date) {
1026  try {
1027  CTime t1(date, fmt);
1028  CTime t2(d, fmt);
1029 
1030  if (t2 > t1) {
1031  date.swap(d);
1032  }
1033  }
1034  catch(CStringException &) {
1035  // Here I think it is better to pick any valid date
1036  // than to propagate a string exception.
1037  }
1038  }
1039  }
1040 
1041  m_Date = date;
1042 
1043  return date;
1044 }
1045 
1047 {
1048  CHECK_MARKER();
1049  CSeqDBLockHold locked(m_Atlas);
1050 
1051  return x_GetHdr(oid, locked);
1052 }
1053 
1056 {
1057  CHECK_MARKER();
1058  m_Atlas.Lock(locked);
1059  //m_Atlas.MentionOid(oid, m_NumOIDs, locked);
1060 
1061  if (! m_OidListSetup) {
1062  x_GetOidList(locked);
1063  }
1064  m_Atlas.Unlock(locked);
1065 
1066  int vol_oid = 0;
1067 
1068  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
1069  return vol->GetFilteredHeader(vol_oid, locked);
1070  }
1071 
1073 }
1074 
1076 {
1077  CHECK_MARKER();
1078  return m_MaxLength;
1079 }
1080 
1082 {
1083  CHECK_MARKER();
1084  return m_MinLength;
1085 }
1086 
1087 const string & CSeqDBImpl::GetDBNameList() const
1088 {
1089  CHECK_MARKER();
1090  return m_DBNames;
1091 }
1092 
1093 // This is a work-around for bad data in the database; probably the
1094 // fault of formatdb. The problem is that the database date field has
1095 // an incorrect length - possibly a general problem with string
1096 // handling in formatdb? In any case, this method trims a string to
1097 // the minimum of its length and the position of the first NULL. This
1098 // technique will not work if the date field is not null terminated,
1099 // but apparently it usually or always is, in spite of the length bug.
1100 
1101 string CSeqDBImpl::x_FixString(const string & s) const
1102 {
1103  CHECK_MARKER();
1104  for(int i = 0; i < (int) s.size(); i++) {
1105  if (s[i] == char(0)) {
1106  return string(s,0,i);
1107  }
1108  }
1109  return s;
1110 }
1111 
1112 // Assumes atlas is locked
1113 
1115 {
1116  m_VolSet.UnLease();
1117 }
1118 
1119 bool CSeqDBImpl::PigToOid(int pig, int & oid) const
1120 {
1121  CHECK_MARKER();
1122 
1123  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1124  if (m_VolSet.GetVol(i)->PigToOid(pig, oid)) {
1125  oid += m_VolSet.GetVolOIDStart(i);
1126  return true;
1127  }
1128  }
1129 
1130  return false;
1131 }
1132 
1133 bool CSeqDBImpl::OidToPig(int oid, int & pig) const
1134 {
1135  CHECK_MARKER();
1136  CSeqDBLockHold locked(m_Atlas);
1137  int vol_oid(0);
1138 
1139  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
1140  return vol->GetPig(vol_oid, pig, locked);
1141  }
1142 
1144 }
1145 
1146 bool CSeqDBImpl::TiToOid(Int8 ti, int & oid)
1147 {
1148  CHECK_MARKER();
1149  CSeqDBLockHold locked(m_Atlas);
1150 
1151  if (! m_OidListSetup) {
1152  x_GetOidList(locked);
1153  }
1154 
1155  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1156  if (m_VolSet.GetVol(i)->TiToOid(ti,
1157  oid,
1158  locked)) {
1159  oid += m_VolSet.GetVolOIDStart(i);
1160  return true;
1161  }
1162  }
1163 
1164  return false;
1165 }
1166 
1167 bool CSeqDBImpl::GiToOid(TGi gi, int & oid) const
1168 {
1169  CHECK_MARKER();
1170  CSeqDBLockHold locked(m_Atlas);
1171 
1172  // This could be accellerated (a little) if a GI list is used.
1173  // However, this should be done (if at all) at the volume layer,
1174  // not in *Impl. This volume may mask a particular GI that the
1175  // user gi list DOES NOT mask. Or, the user GI list may assign
1176  // this GI an OID that belongs to a different volume, for example,
1177  // if the same GI appears in more than one volume. In such cases,
1178  // the volume GI list (if one exists) is probably a better filter,
1179  // because it represents the restrictions of both the volume GI
1180  // list and the User GI list. It's also smaller, and therefore
1181  // should be easier to binary search.
1182 
1183  // (The hypothetical optimization described above changes if there
1184  // are Seq-ids in the user provided list, because you can no
1185  // longer assume that the GI list is all-inclusive -- you would
1186  // also need to fall back on regular lookups.)
1187 
1188  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1189  if (m_VolSet.GetVol(i)->GiToOid(gi, oid, locked)) {
1190  oid += m_VolSet.GetVolOIDStart(i);
1191  return true;
1192  }
1193  }
1194 
1195  return false;
1196 }
1197 
1199 {
1200  CHECK_MARKER();
1201  CSeqDBLockHold locked(m_Atlas);
1202 
1203  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1204  oid =-1;
1205  if (m_VolSet.GetVol(i)->GiToOid(gi, oid, locked)) {
1206  oid += m_VolSet.GetVolOIDStart(i);
1207  int oid0 = oid;
1208  if (CheckOrFindOID(oid) && (oid==oid0)) {
1209  return true;
1210  }
1211  }
1212  }
1213  return false;
1214 }
1215 
1216 bool CSeqDBImpl::OidToGi(int oid, TGi & gi)
1217 {
1218  CHECK_MARKER();
1219  CSeqDBLockHold locked(m_Atlas);
1220 
1221  if (! m_OidListSetup) {
1222  x_GetOidList(locked);
1223  }
1224 
1225  int vol_oid(0);
1226 
1227  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
1228  return vol->GetGi(vol_oid, gi, locked);
1229  }
1230 
1232 }
1233 
1234 void CSeqDBImpl::AccessionToOids(const string & acc,
1235  vector<int> & oids)
1236 {
1237  CHECK_MARKER();
1238  CSeqDBLockHold locked(m_Atlas);
1239 
1240  if (! m_OidListSetup) {
1241  x_GetOidList(locked);
1242  }
1243 
1244  oids.clear();
1245  if (m_LMDBSet.IsBlastDBVersion5()) {
1246  vector<int> tmp;
1248  for(unsigned int i=0; i < tmp.size(); i++) {
1249  int oid2 = tmp[i];
1250  if (x_CheckOrFindOID(oid2, locked) && (tmp[i] == oid2)) {
1251  oids.push_back(tmp[i]);
1252  }
1253  }
1254  }
1255  else {
1256  vector<int> vol_oids;
1257 
1258  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
1259  // Append any additional OIDs from this volume's indices.
1260  m_VolSet.GetVol(vol_idx)->AccessionToOids(acc,
1261  vol_oids,
1262  locked);
1263 
1264  if (vol_oids.empty()) {
1265  continue;
1266  }
1267 
1268  int vol_start = m_VolSet.GetVolOIDStart(vol_idx);
1269 
1270  ITERATE(vector<int>, iter, vol_oids) {
1271  int oid1 = ((*iter) + vol_start);
1272  int oid2 = oid1;
1273 
1274  // Remove OIDs already found in OIDs.
1275 
1276  if (find(oids.begin(), oids.end(), oid1) != oids.end()) {
1277  continue;
1278  }
1279 
1280  // Filter out any oids not in the virtual oid bitmaps.
1281 
1282  if (x_CheckOrFindOID(oid2, locked) && (oid1 == oid2)) {
1283  oids.push_back(oid1);
1284  }
1285  }
1286 
1287  vol_oids.clear();
1288  }
1289  }
1290 }
1291 
1292 
1293 void CSeqDBImpl::TaxIdsToOids(set<TTaxId>& tax_ids, vector<blastdb::TOid>& rv)
1294 {
1295  CHECK_MARKER();
1296  rv.clear();
1297  vector<blastdb::TOid> oids;
1298  if (m_LMDBSet.IsBlastDBVersion5()) {
1299  m_LMDBSet.TaxIdsToOids(tax_ids,oids);
1300  CSeqDBLockHold locked(m_Atlas);
1301  for(unsigned int i=0; i < oids.size(); i++) {
1302  blastdb::TOid oid2 = oids[i];
1303  if (x_CheckOrFindOID(oid2, locked) && (oids[i] == oid2)) {
1304  rv.push_back(oids[i]);
1305  }
1306  }
1307  }
1308  else {
1309  NCBI_THROW(CSeqDBException, eArgErr,
1310  "Taxonomy list is not supported in v4 BLAST db");
1311  }
1312  return;
1313 }
1314 
1316 {
1317  CHECK_MARKER();
1318  CSeqDBLockHold locked(m_Atlas);
1319 
1320  if (! m_OidListSetup) {
1321  x_GetOidList(locked);
1322  }
1323  tax_ids.clear();
1324  if (m_LMDBSet.IsBlastDBVersion5()) {
1325  if(m_OIDList.NotEmpty()){
1326  vector<blastdb::TOid> oids;
1327  for(int oid = 0; CheckOrFindOID(oid); oid++) {
1328  oids.push_back(oid);
1329  }
1330  m_LMDBSet.GetTaxIdsForOids(oids, tax_ids);
1331  }
1332  else {
1333  m_LMDBSet.GetDBTaxIds(tax_ids);
1334  }
1335  }
1336  else {
1337  NCBI_THROW(CSeqDBException, eArgErr,
1338  "Taxonomy list is not supported in v4 BLAST db");
1339  }
1340  return;
1341 }
1342 
1343 void CSeqDBImpl::GetTaxIdsForOids(const vector<blastdb::TOid> & oids, set<TTaxId> & tax_ids)
1344 {
1345  if (m_LMDBSet.IsBlastDBVersion5()) {
1346  m_LMDBSet.GetTaxIdsForOids(oids, tax_ids);
1347  }
1348  else {
1349  NCBI_THROW(CSeqDBException, eArgErr,
1350  "Taxonomy list is not supported in v4 BLAST db");
1351  }
1352 
1353 }
1354 
1355 void CSeqDBImpl::AccessionsToOids(const vector<string>& accs, vector<blastdb::TOid>& oids)
1356 {
1357  CHECK_MARKER();
1358  oids.clear();
1359  oids.resize(accs.size());
1360  if (m_LMDBSet.IsBlastDBVersion5()) {
1361  m_LMDBSet.AccessionsToOids(accs,oids);
1362  CSeqDBLockHold locked(m_Atlas);
1363  for(unsigned int i=0; i < oids.size(); i++) {
1364  if(oids[i] == kSeqDBEntryNotFound) {
1365  continue;
1366  }
1367  blastdb::TOid oid2 = oids[i];
1368  if (!(x_CheckOrFindOID(oid2, locked) && (oids[i] == oid2))) {
1369  oids[i] = kSeqDBEntryNotFound;
1370  }
1371  }
1372  }
1373  else {
1374  for(unsigned int i=0; i < accs.size(); i++) {
1375  vector<blastdb::TOid> tmp;
1376  AccessionToOids(accs[i], tmp);
1377  if(tmp.empty()) {
1378  oids[i] = kSeqDBEntryNotFound;
1379  }
1380  else {
1381  oids[i] = tmp[0];
1382  }
1383  }
1384  }
1385  return;
1386 }
1387 
1388 
1389 void CSeqDBImpl::SeqidToOids(const CSeq_id & seqid_in,
1390  vector<int> & oids,
1391  bool multi)
1392 {
1393  CHECK_MARKER();
1394  CSeqDBLockHold locked(m_Atlas);
1395 
1396  if (! m_OidListSetup) {
1397  x_GetOidList(locked);
1398  }
1399 
1400  oids.clear();
1401 
1402  bool is_BL_ORD_ID = false;
1403  if(seqid_in.Which() == CSeq_id::e_General)
1404  {
1405  const CDbtag & dbt = seqid_in.GetGeneral();
1406  if (dbt.CanGetDb()) {
1407  if (dbt.GetDb() == "BL_ORD_ID") {
1408  is_BL_ORD_ID = true;
1409  }
1410  }
1411  }
1412 
1413  if (m_LMDBSet.IsBlastDBVersion5() && (!is_BL_ORD_ID)) {
1414  if(IsStringId(seqid_in)) {
1415  vector<int> tmp;
1416  if(seqid_in.IsPir() || seqid_in.IsPrf()) {
1418  }
1419  else {
1420  m_LMDBSet.AccessionToOids(seqid_in.GetSeqIdString(true), tmp);
1421  }
1422  for(unsigned int i=0; i < tmp.size(); i++) {
1423  int oid2 = tmp[i];
1424  if (x_CheckOrFindOID(oid2, locked) && (tmp[i] == oid2)) {
1425  oids.push_back(tmp[i]);
1426  }
1427  }
1428  return;
1429  }
1430  }
1431 
1432  vector<int> vol_oids;
1433 
1434  // The lower level functions modify the seqid - namely, changing
1435  // or clearing certain fields before printing it to a string.
1436  // Further analysis of data and exception flow might reveal that
1437  // the Seq_id will always be returned to the original state by
1438  // this operation... At the moment, safest route is to clone it.
1439  CSeq_id seqid;
1440  seqid.Assign(seqid_in);
1441 
1442  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
1443  // Append any additional OIDs from this volume's indices.
1444  m_VolSet.GetVol(vol_idx)->SeqidToOids(seqid, vol_oids, locked);
1445 
1446  if (vol_oids.empty()) {
1447  continue;
1448  }
1449 
1450  int vol_start = m_VolSet.GetVolOIDStart(vol_idx);
1451 
1452  ITERATE(vector<int>, iter, vol_oids) {
1453  int oid1 = ((*iter) + vol_start);
1454  int oid2 = oid1;
1455 
1456  // Filter out any oids not in the virtual oid bitmaps.
1457 
1458  if (x_CheckOrFindOID(oid2, locked) && (oid1 == oid2)) {
1459  oids.push_back(oid1);
1460 
1461  if (! multi) {
1462  return;
1463  }
1464  }
1465  }
1466 
1467  vol_oids.clear();
1468  }
1469 }
1470 
1471 int CSeqDBImpl::GetOidAtOffset(int first_seq, Uint8 residue) const
1472 {
1473  CSeqDBLockHold locked(m_Atlas);
1474  m_Atlas.Lock(locked);
1475 
1476  CHECK_MARKER();
1477  if (first_seq >= m_NumOIDs) {
1479  eArgErr,
1480  "OID not in valid range.");
1481  }
1482 
1483  if (residue >= m_VolumeLength) {
1485  eArgErr,
1486  "Residue offset not in valid range.");
1487  }
1488 
1489  int vol_start(0);
1490 
1491  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
1492  const CSeqDBVol * volp = m_VolSet.GetVol(vol_idx);
1493 
1494  int vol_cnt = volp->GetNumOIDs();
1495  Uint8 vol_len = volp->GetVolumeLength();
1496 
1497  // Both limits fit this volume, delegate to volume code.
1498 
1499  if ((first_seq < vol_cnt) && (residue < vol_len)) {
1500  return vol_start + volp->GetOidAtOffset(first_seq, residue, locked);
1501  }
1502 
1503  // Adjust each limit.
1504 
1505  vol_start += vol_cnt;
1506 
1507  if (first_seq > vol_cnt) {
1508  first_seq -= vol_cnt;
1509  } else {
1510  first_seq = 0;
1511  }
1512 
1513  if (residue > vol_len) {
1514  residue -= vol_len;
1515  } else {
1516  residue = 0;
1517  }
1518  }
1519 
1521  eArgErr,
1522  "Could not find valid split point oid.");
1523 }
1524 
1525 void
1527  char prot_nucl,
1528  vector<string> & paths,
1529  vector<string> * alias_paths,
1530  bool recursive,
1531  bool expand_links)
1532 {
1533  bool use_atlas_lock = true;
1534  CSeqDBAtlasHolder AH(NULL, use_atlas_lock);
1535  CSeqDBAtlas & atlas(AH.Get());
1536 
1537  // This constructor handles its own locking.
1538  CSeqDBAliasFile aliases(atlas, dbname, prot_nucl, expand_links);
1539  aliases.FindVolumePaths(paths, alias_paths, recursive);
1540 }
1541 
1542 void
1543 CSeqDBImpl::FindVolumePaths(vector<string> & paths, bool recursive) const
1544 {
1545  CHECK_MARKER();
1546  m_Aliases.FindVolumePaths(paths, NULL, recursive);
1547 }
1548 
1549 void
1550 CSeqDBImpl::FindVolumePaths(vector<string> & paths, vector<string> & alias, bool recursive) const
1551 {
1552  CHECK_MARKER();
1553  m_Aliases.FindVolumePaths(paths, &alias, recursive);
1554 }
1555 
1557 {
1558  CHECK_MARKER();
1559  CSeqDBLockHold locked(m_Atlas);
1560  m_Atlas.Lock(locked);
1561 
1563 }
1564 
1566  int * numseq,
1567  Uint8 * totlen,
1568  int * maxlen,
1569  int * minlen,
1570  CSeqDBLockHold & locked)
1571 {
1572  int oid_count(0);
1573  Uint8 base_count(0);
1574  int max_count(0);
1575  int min_count(INT4_MAX);
1576 
1577  const CSeqDBVol * volp = 0;
1578 
1579  for(int oid = 0; x_CheckOrFindOID(oid, locked); oid++) {
1580  ++ oid_count;
1581 
1582  int vol_oid = 0;
1583 
1584  volp = m_VolSet.FindVol(oid, vol_oid);
1585 
1586  _ASSERT(volp);
1587 
1588  if (totlen || maxlen || minlen) {
1589  int len;
1590  if ('p' == m_SeqType) {
1591  len = volp->GetSeqLengthProt(vol_oid);
1592  } else {
1593  if (approx) {
1594  len = volp->GetSeqLengthApprox(vol_oid);
1595  } else {
1596  len = volp->GetSeqLengthExact(vol_oid);
1597  }
1598  }
1599  max_count = max(len, max_count);
1600  min_count = min(len, min_count);
1601  base_count += len;
1602  }
1603  }
1604 
1605  if (numseq) {
1606  *numseq = oid_count;
1607  }
1608 
1609  if (totlen) {
1610  *totlen = base_count;
1611  }
1612 
1613  if (maxlen) {
1614  *maxlen = max_count;
1615  }
1616 
1617  if (minlen) {
1618  *minlen = min_count;
1619  }
1620 }
1621 
1623 {
1624  if (! CSeqDBTaxInfo::GetTaxNames(taxid, info)) {
1625  CNcbiOstrstream oss;
1626  oss << "Taxid " << taxid << " not found";
1627  string msg = CNcbiOstrstreamToString(oss);
1628  NCBI_THROW(CSeqDBException, eArgErr, msg);
1629  }
1630 }
1631 
1633  int * oid_count,
1634  Uint8 * total_length,
1635  bool use_approx)
1636 {
1637  CSeqDBLockHold locked(m_Atlas);
1638 
1639  if (oid_count) {
1640  *oid_count = 0;
1641  }
1642 
1643  if (total_length) {
1644  *total_length = 0;
1645  }
1646 
1647  switch(sumtype) {
1649  if (oid_count) {
1650  *oid_count = GetNumOIDs();
1651  }
1652  if (total_length) {
1653  *total_length = GetVolumeLength();
1654  }
1655  break;
1656 
1657  case CSeqDB::eFilteredAll:
1658  if (oid_count) {
1659  *oid_count = GetNumSeqs();
1660  }
1661  if (total_length) {
1662  *total_length = GetTotalLength();
1663  }
1664  break;
1665 
1667  x_ScanTotals(use_approx, oid_count, total_length, NULL, NULL, locked);
1668  break;
1669  }
1670 }
1671 
1673  const char ** buffer,
1674  int * seq_length,
1675  int * ambig_length) const
1676 {
1677  CHECK_MARKER();
1678 
1679  int vol_oid = 0;
1680 
1681  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
1682  vol->GetRawSeqAndAmbig(vol_oid,
1683  buffer,
1684  seq_length,
1685  ambig_length);
1686 
1687  return;
1688  }
1689 
1691 }
1692 
1693 /// Accumulate optional min, max, and count.
1694 ///
1695 /// This generic template describes the accumulation of low, high, and
1696 /// count values over any ordered type. The low_in, high_in, and
1697 /// count_in values are compared to the current values of low_out,
1698 /// high_out, and count_out. If the new value is lower than low_out,
1699 /// it will replace the value in low_out, and similarly for high_out.
1700 /// The count_out value just accumulates counts. If any of the *_out
1701 /// fields is NULL, that field will not be processed. If the set_all
1702 /// flag is true, the values in *_in are simply copied unchanged to
1703 /// the corresponding *_out fields.
1704 ///
1705 /// @param low_in The low value for one volume. [in]
1706 /// @param high_in The high value for one volume. [in]
1707 /// @param count_in The ID count for one volume. [in]
1708 /// @param low_out If non-null, the low output value. [out]
1709 /// @param high_out If non-null, the high output value. [out]
1710 /// @param count_out If non-null, the ID output value. [out]
1711 /// @param set_all If true, adopt values without testing. [in]
1712 template<class TId>
1713 inline void s_AccumulateMinMaxCount(TId low_in,
1714  TId high_in,
1715  int count_in,
1716  TId * low_out,
1717  TId * high_out,
1718  int * count_out,
1719  bool set_all)
1720 {
1721  if (set_all) {
1722  if (low_out)
1723  *low_out = low_in;
1724 
1725  if (high_out)
1726  *high_out = high_in;
1727 
1728  if (count_out)
1729  *count_out = count_in;
1730  } else {
1731  if (low_out && (*low_out > low_in)) {
1732  *low_out = low_in;
1733  }
1734  if (high_out && (*high_out < high_in)) {
1735  *high_out = high_in;
1736  }
1737  if (count_out) {
1738  *count_out += count_in;
1739  }
1740  }
1741 }
1742 
1744  TGi * high_id,
1745  int * count)
1746 {
1747  CSeqDBLockHold locked(m_Atlas);
1748 
1749  bool found = false;
1750 
1751  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1752  TGi vlow(ZERO_GI), vhigh(ZERO_GI);
1753  int vcount(0);
1754 
1755  m_VolSet.GetVol(i)->GetGiBounds(vlow, vhigh, vcount, locked);
1756 
1757  if (vcount) {
1759  vhigh,
1760  vcount,
1761  low_id,
1762  high_id,
1763  count,
1764  ! found);
1765 
1766  found = true;
1767  }
1768  }
1769 
1770  if (! found) {
1771  NCBI_THROW(CSeqDBException, eArgErr, "No GIs found.");
1772  }
1773 }
1774 
1775 void CSeqDBImpl::GetPigBounds(int * low_id,
1776  int * high_id,
1777  int * count)
1778 {
1779  CSeqDBLockHold locked(m_Atlas);
1780 
1781  bool found = false;
1782 
1783  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1784  int vlow(0), vhigh(0), vcount(0);
1785 
1786  m_VolSet.GetVol(i)->GetPigBounds(vlow, vhigh, vcount, locked);
1787 
1788  if (vcount) {
1790  vhigh,
1791  vcount,
1792  low_id,
1793  high_id,
1794  count,
1795  ! found);
1796 
1797  found = true;
1798  }
1799  }
1800 
1801  if (! found) {
1802  NCBI_THROW(CSeqDBException, eArgErr, "No PIGs found.");
1803  }
1804 }
1805 
1806 void CSeqDBImpl::GetStringBounds(string * low_id,
1807  string * high_id,
1808  int * count)
1809 {
1810  bool found = false;
1811 
1812  for(int i = 0; i < m_VolSet.GetNumVols(); i++) {
1813  string vlow, vhigh;
1814  int vcount(0);
1815 
1816  m_VolSet.GetVol(i)->GetStringBounds(vlow, vhigh, vcount);
1817 
1818  if (vcount) {
1820  vhigh,
1821  vcount,
1822  low_id,
1823  high_id,
1824  count,
1825  ! found);
1826 
1827  found = true;
1828  }
1829  }
1830 
1831  if (! found) {
1832  NCBI_THROW(CSeqDBException, eArgErr, "No strings found.");
1833  }
1834 }
1835 
1837  const TRangeList & offset_ranges,
1838  bool append_ranges,
1839  bool cache_data)
1840 {
1841  CHECK_MARKER();
1842  int vol_oid = 0;
1843 
1844  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid)) {
1845  vol->SetOffsetRanges(vol_oid,
1846  offset_ranges,
1847  append_ranges,
1848  cache_data);
1849  } else {
1851  }
1852 }
1853 
1855 {
1856  CHECK_MARKER();
1857 
1858  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
1859  CSeqDBVol* volp = m_VolSet.GetVolNonConst(vol_idx);
1860  volp->FlushOffsetRangeCache();
1861  }
1862 }
1863 
1864 
1866 {
1867  char * datap(0);
1868  int base_len = GetAmbigSeq(oid,
1869  & datap,
1871  0,
1872  (ESeqDBAllocType) 0);
1873 
1874  unsigned h = SeqDB_SequenceHash(datap, base_len);
1875 
1876  RetAmbigSeq(const_cast<const char**>(& datap));
1877 
1878  return h;
1879 }
1880 
1881 void CSeqDBImpl::HashToOids(unsigned hash, vector<int> & oids)
1882 {
1883  // Find all OIDs in all volumes that match this hash.
1884 
1885  CHECK_MARKER();
1886  CSeqDBLockHold locked(m_Atlas);
1887 
1888  oids.clear();
1889 
1890  vector<int> vol_oids;
1891 
1892  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
1893  // Append any additional OIDs from this volume's indices.
1894  m_VolSet.GetVol(vol_idx)->HashToOids(hash, vol_oids, locked);
1895 
1896  if (vol_oids.empty()) {
1897  continue;
1898  }
1899 
1900  int vol_start = m_VolSet.GetVolOIDStart(vol_idx);
1901 
1902  ITERATE(vector<int>, iter, vol_oids) {
1903  int oid1 = (*iter) + vol_start;
1904  int oid2 = oid1;
1905 
1906  // Filter out any oids not in the virtual oid bitmaps.
1907 
1908  if (x_CheckOrFindOID(oid2, locked) && (oid1 == oid2)) {
1909  oids.push_back(oid1);
1910  }
1911  }
1912 
1913  vol_oids.clear();
1914  }
1915 }
1916 
1918 {
1919  if (m_IdSet.Blank()) {
1920  if (! m_UserGiList.Empty()) {
1921  // Note: this returns a 'blank' IdSet list for positive
1922  // lists that specify filtering using CSeq-id objects.
1923 
1924  if (m_UserGiList->GetNumGis()) {
1925  vector<TGi> gis;
1926  m_UserGiList->GetGiList(gis);
1927 
1928  CSeqDBIdSet new_ids(gis, CSeqDBIdSet::eGi);
1929  m_IdSet = new_ids;
1930  } else if (m_UserGiList->GetNumTis()) {
1931  vector<TTi> tis;
1932  m_UserGiList->GetTiList(tis);
1933 
1934  CSeqDBIdSet new_ids(tis, CSeqDBIdSet::eTi);
1935  m_IdSet = new_ids;
1936  }
1937  } else if (! m_NegativeList.Empty()) {
1938  const vector<TGi> & ngis = m_NegativeList->GetGiList();
1939  const vector<TTi> & ntis = m_NegativeList->GetTiList();
1940  const vector<string> & stis = m_NegativeList->GetSiList();
1941 
1942  if (! ngis.empty()) {
1943  CSeqDBIdSet new_ids(ngis, CSeqDBIdSet::eGi, false);
1944  m_IdSet = new_ids;
1945  } else if (! ntis.empty()) {
1946  CSeqDBIdSet new_ids(ntis, CSeqDBIdSet::eTi, false);
1947  m_IdSet = new_ids;
1948  } else if (!stis.empty()) {
1949  CSeqDBIdSet new_ids(stis, CSeqDBIdSet::eSi, false);
1950  m_IdSet = new_ids;
1951  }
1952  }
1953  }
1954 }
1955 
1957 {
1958  return m_IdSet;
1959 }
1960 
1961 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
1962  (!defined(NCBI_COMPILER_MIPSPRO)) )
1963 void CSeqDBImpl::ListColumns(vector<string> & titles)
1964 {
1965  CHECK_MARKER();
1966  CSeqDBLockHold locked(m_Atlas);
1967  m_Atlas.Lock(locked);
1968 
1969  set<string> all;
1970 
1971  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
1972  m_VolSet.GetVolNonConst(vol_idx)->ListColumns(all, locked);
1973  }
1974 
1975  titles.assign(all.begin(), all.end());
1976 }
1977 
1978 int CSeqDBImpl::GetColumnId(const string & title)
1979 {
1980  CHECK_MARKER();
1981  CSeqDBLockHold locked(m_Atlas);
1982 
1983  return x_GetColumnId(title, locked);
1984 }
1985 
1986 int CSeqDBImpl::x_GetColumnId(const string & title,
1987  CSeqDBLockHold & locked)
1988 {
1989  m_Atlas.Lock(locked);
1990 
1991  int col_id = SeqDB_MapFind(m_ColumnTitleMap, title, (int) kUnknownTitle);
1992 
1993  if (col_id == kUnknownTitle) {
1994  vector<int> vol_ids;
1995 
1996  bool found = false;
1997 
1998  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
1999  CSeqDBVol * volp = m_VolSet.GetVolNonConst(vol_idx);
2000  int id = volp->GetColumnId(title, locked);
2001 
2002  vol_ids.push_back(id);
2003 
2004  if (id >= 0) {
2005  found = true;
2006  }
2007  }
2008 
2009  if (found) {
2010  CRef<CSeqDB_ColumnEntry> obj(new CSeqDB_ColumnEntry(vol_ids));
2011 
2012  col_id = static_cast<int>(m_ColumnInfo.size());
2013  m_ColumnInfo.push_back(obj);
2014  } else {
2015  col_id = kColumnNotFound;
2016  }
2017 
2018  // Cache this lookup even if it failed (-1).
2019 
2020  m_ColumnTitleMap[title] = col_id;
2021  }
2022 
2023  return col_id;
2024 }
2025 
2026 const map<string,string> &
2028 {
2029  CHECK_MARKER();
2030  CSeqDBLockHold locked(m_Atlas);
2031  m_Atlas.Lock(locked);
2032 
2033  _ASSERT(column_id >= 0);
2034  _ASSERT(column_id < (int)m_ColumnInfo.size());
2035  CSeqDB_ColumnEntry & entry = *m_ColumnInfo[column_id];
2036 
2037  if (! entry.HaveMap()) {
2039 
2040  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
2041  int vol_col_id = entry.GetVolumeIndex(vol_idx);
2042 
2043  if (vol_col_id < 0)
2044  continue;
2045 
2046  CSeqDBVol * volp = m_VolSet.GetVolNonConst(vol_idx);
2047  const TStringMap & volmap =
2048  volp->GetColumnMetaData(vol_col_id, locked);
2049 
2050  ITERATE(TStringMap, iter, volmap) {
2051  entry.SetMapValue(iter->first, iter->second);
2052  }
2053  }
2054 
2055  entry.SetHaveMap();
2056  }
2057 
2058  return entry.GetMap();
2059 }
2060 
2061 const map<string,string> &
2062 CSeqDBImpl::GetColumnMetaData(int column_id, const string & volname)
2063 {
2064  CHECK_MARKER();
2065  CSeqDBLockHold locked(m_Atlas);
2066  m_Atlas.Lock(locked);
2067 
2068  _ASSERT(column_id >= 0);
2069  _ASSERT(column_id < (int)m_ColumnInfo.size());
2070  CSeqDB_ColumnEntry & entry = *m_ColumnInfo[column_id];
2071 
2072  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
2073  CSeqDBVol * volp = m_VolSet.GetVolNonConst(vol_idx);
2074 
2075  if (volname != volp->GetVolName()) {
2076  continue;
2077  }
2078 
2079  // Found it.
2080 
2081  int vol_col_id = entry.GetVolumeIndex(vol_idx);
2082  return volp->GetColumnMetaData(vol_col_id, locked);
2083  }
2084 
2086  eArgErr,
2087  "This column ID was not found.");
2088 }
2089 
2091  int oid,
2092  bool keep,
2093  CBlastDbBlob & blob)
2094 {
2095  CHECK_MARKER();
2096 
2097  // This Clear() must be done outside of the Lock()ed section below
2098  // to avoid possible self-deadlock. In general, do not clear or
2099  // allow the destruction of a blob that may have an attached
2100  // 'lifetime' object while the atlas lock is held.
2101 
2102  blob.Clear();
2103 
2104  CSeqDBLockHold locked(m_Atlas);
2105  m_Atlas.Lock(locked);
2106 
2107  _ASSERT(col_id >= 0);
2108  _ASSERT(col_id < (int)m_ColumnInfo.size());
2109  CSeqDB_ColumnEntry & entry = *m_ColumnInfo[col_id];
2110 
2111  // Find the volume for this OID.
2112 
2113  int vol_idx = -1, vol_oid = -1;
2114 
2115  if (const CSeqDBVol * vol = m_VolSet.FindVol(oid, vol_oid, vol_idx)) {
2116  int vol_col_id = entry.GetVolumeIndex(vol_idx);
2117 
2118  if (vol_col_id >= 0) {
2119  const_cast<CSeqDBVol *>(vol)->
2120  GetColumnBlob(vol_col_id, vol_oid, blob, keep, locked);
2121  }
2122  }
2123 }
2124 
2126 {
2127  m_Atlas.Lock(locked);
2128 
2130  m_MaskDataColumn = x_GetColumnId("BlastDb/MaskData", locked);
2131  }
2132 
2134 
2135  return m_MaskDataColumn;
2136 }
2137 #endif
2138 
2139 
2140 template<class K, class C>
2141 bool s_Contains(const C & c, const K & k)
2142 {
2143  return c.find(k) != c.end();
2144 }
2145 
2146 static bool s_IsNumericId(const string &id) {
2147  Int4 nid(-1);
2148  return NStr::StringToNumeric(id, &nid, NStr::fConvErr_NoThrow, 10);
2149 }
2150 
2151 static const string* s_CheckUniqueValues(const map<string, string> & m)
2152 {
2154 
2155  set<string> seen;
2156 
2157  ITERATE(TStringMap, iter, m) {
2158  string v = iter->second;
2159  vector<string> items;
2160  NStr::Split(v, ":", items);
2161 
2162  if (items.size() == 4) {
2163  v = items[2];
2164  }
2165 
2166  if (s_Contains(seen, v)) {
2167  return & iter->second;
2168  }
2169 
2170  seen.insert(v);
2171  }
2172 
2173  return NULL;
2174 }
2175 
2177  : m_NextId(100), m_Empty(true), m_CacheRealAlgo(-1)
2178 {
2179 }
2180 
2181 void CSeqDB_IdRemapper::GetIdList(vector<int> & algorithms)
2182 {
2183  typedef map<int,string> TIdMap;
2184 
2185  ITERATE(TIdMap, iter, m_IdToDesc) {
2186  algorithms.push_back(iter->first);
2187  }
2188 }
2189 
2190 void CSeqDB_IdRemapper::AddMapping(int vol_id, int id, const string & desc)
2191 {
2192  string real_desc = desc;
2193  vector<string> items;
2194  NStr::Split(desc, ":", items);
2195  if (items.size() == 4) {
2196  real_desc = items[2];
2197  }
2198  bool found_desc = s_Contains(m_DescToId, real_desc);
2199  bool found_id = s_Contains(m_IdToDesc, id);
2200 
2201  int real_id = id;
2202 
2203  if (found_desc) {
2204  if ((! found_id) || (m_DescToId[real_desc] != id)) {
2205  // This description is mapped to a different ID.
2206  real_id = m_DescToId[real_desc];
2207  }
2208  } else {
2209  // New description.
2210 
2211  if (found_id) {
2212  // Pick a 'synthetic' ID for this description,
2213  // i.e. one that is not actually used by any of
2214  // the existing volumes so far.
2215 
2216  while(s_Contains(m_IdToDesc, m_NextId)) {
2217  m_NextId++;
2218  }
2219 
2220  real_id = m_NextId;
2221  }
2222 
2223  // Add the new description.
2224 
2225  m_IdToDesc[real_id] = desc;
2226  m_DescToId[real_desc] = real_id;
2227  }
2228 
2229  m_RealIdToVolumeId[vol_id][real_id] = id;
2230 }
2231 
2232 bool CSeqDB_IdRemapper::GetDesc(int algorithm_id, string & desc)
2233 {
2234  if (! s_Contains(m_IdToDesc, algorithm_id)) {
2235  return false;
2236  }
2237 
2238  desc = m_IdToDesc[algorithm_id];
2239  return true;
2240 }
2241 
2242 int CSeqDB_IdRemapper::GetVolAlgo(int vol_idx, int algo_id)
2243 {
2244  if (algo_id != m_CacheRealAlgo || vol_idx != m_CacheVolIndex) {
2245  m_CacheVolIndex = vol_idx;
2246  m_CacheRealAlgo = algo_id;
2247  m_CacheVolAlgo = RealToVol(vol_idx, algo_id);
2248  }
2249  return m_CacheVolAlgo;
2250 }
2251 
2252 int CSeqDB_IdRemapper::RealToVol(int vol_idx, int algo_id)
2253 {
2254  if (! s_Contains(m_RealIdToVolumeId, vol_idx)) {
2255  NCBI_THROW(CSeqDBException, eArgErr,
2256  "Cannot find volume in algorithm map.");
2257  }
2258 
2259  map<int,int> & trans = m_RealIdToVolumeId[vol_idx];
2260 
2261  if (! s_Contains(trans, algo_id)) {
2262  NCBI_THROW(CSeqDBException, eArgErr,
2263  "Cannot find volume algorithm in algorithm map.");
2264  }
2265 
2266  return trans[algo_id];
2267 }
2268 
2269 int CSeqDB_IdRemapper::GetAlgoId(const string & id)
2270 {
2271  if (! s_Contains(m_DescToId, id)) {
2272  NCBI_THROW(CSeqDBException, eArgErr,
2273  "Cannot find string algorithm id in algorithm map.");
2274  }
2275 
2276  return m_DescToId[id];
2277 }
2278 
2279 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
2280  (!defined(NCBI_COMPILER_MIPSPRO)) )
2281 void CSeqDBImpl::GetAvailableMaskAlgorithms(vector<int> & algorithms)
2282 {
2283  if (m_UseGiMask) {
2284  m_GiMask->GetAvailableMaskAlgorithms(algorithms);
2285  return;
2286  }
2287 
2288  CHECK_MARKER();
2289  CSeqDBLockHold locked(m_Atlas);
2290  m_Atlas.Lock(locked);
2291 
2292  if (m_AlgorithmIds.Empty()) {
2293  x_BuildMaskAlgorithmList(locked);
2294  }
2295 
2296  algorithms.resize(0);
2297  m_AlgorithmIds.GetIdList(algorithms);
2298 }
2299 
2301 {
2302  if (m_UseGiMask) {
2303  return m_GiMask->GetAlgorithmId(algo);
2304  }
2305 
2306  CHECK_MARKER();
2307  CSeqDBLockHold locked(m_Atlas);
2308  m_Atlas.Lock(locked);
2309 
2310  if (m_AlgorithmIds.Empty()) {
2311  x_BuildMaskAlgorithmList(locked);
2312  }
2313 
2314  return m_AlgorithmIds.GetAlgoId(algo);
2315 }
2316 
2318 {
2319  vector<int> algorithms;
2320  GetAvailableMaskAlgorithms(algorithms);
2321  if (algorithms.empty()) {
2322  return kEmptyStr;
2323  }
2324 
2325  CNcbiOstrstream retval;
2326  retval << endl
2327  << "Available filtering algorithms applied to database sequences:"
2328  << endl << endl;
2329 
2330  retval << setw(13) << left << "Algorithm ID"
2331  << setw(40) << left << "Algorithm name"
2332  << setw(40) << left << "Algorithm options" << endl;
2333  ITERATE(vector<int>, algo_id, algorithms) {
2334  string algo, algo_opts, algo_name;
2335  GetMaskAlgorithmDetails(*algo_id, algo, algo_name, algo_opts);
2336  if (algo_opts.empty()) {
2337  algo_opts.assign("default options used");
2338  }
2339  if (s_IsNumericId(algo)) {
2340  retval << setw(13) << left << (*algo_id)
2341  << setw(40) << left << algo_name
2342  << setw(40) << left << algo_opts << endl;
2343  } else {
2344  retval << setw(13) << left << (*algo_id)
2345  << setw(40) << left << algo
2346  << setw(40) << left << algo_opts << endl;
2347  }
2348  }
2349  return CNcbiOstrstreamToString(retval);
2350 }
2351 
2352 static const string s_RestoreColon(const string &in) {
2353  const char l = 0x1;
2354  return NStr::Replace(in, string(l,1), ":");
2355 }
2356 
2357 static
2358 void s_GetDetails(const string & desc,
2359  string & program,
2360  string & program_name,
2361  string & algo_opts)
2362 {
2363  static const CEnumeratedTypeValues* enum_type_vals = NULL;
2364  if (enum_type_vals == NULL) {
2365  enum_type_vals = GetTypeInfo_enum_EBlast_filter_program();
2366  }
2367  _ASSERT(enum_type_vals);
2368 
2369  vector<string> items;
2370  NStr::Split(desc, ":", items);
2371 
2372  if (items.size() == 2) {
2374  pid = (EBlast_filter_program) NStr::StringToInt(items[0]);
2375  program.assign(items[0]);
2376  program_name.assign(enum_type_vals->FindName(pid, false));
2377  algo_opts.assign(s_RestoreColon(items[1]));
2378  } else if (items.size() == 4) {
2379  program.assign(s_RestoreColon(items[0]));
2380  program_name.assign(s_RestoreColon(items[2]));
2381  algo_opts.assign(s_RestoreColon(items[1]));
2382  } else {
2383  NCBI_THROW(CSeqDBException, eArgErr,
2384  "Error in stored mask algorithm description data.");
2385  }
2386 }
2387 
2389  string & program,
2390  string & program_name,
2391  string & algo_opts)
2392 {
2393  CHECK_MARKER();
2394  CSeqDBLockHold locked(m_Atlas);
2395  m_Atlas.Lock(locked);
2396 
2397  string s;
2398  bool found;
2399 
2400  if (m_UseGiMask) {
2401  // TODO: to get description s
2402  s = m_GiMask->GetDesc(algorithm_id, locked);
2403  found = true;
2404  } else {
2405  if (m_AlgorithmIds.Empty()) {
2406  x_BuildMaskAlgorithmList(locked);
2407  }
2408  found = m_AlgorithmIds.GetDesc(algorithm_id, s);
2409  }
2410 
2411  if (found == false) {
2412  CNcbiOstrstream oss;
2413  oss << "Filtering algorithm ID " << algorithm_id
2414  << " is not supported." << endl;
2417  }
2418 
2419  s_GetDetails(s, program, program_name, algo_opts);
2420 }
2421 
2423 {
2424  m_Atlas.Lock(locked);
2425 
2426  if (! m_AlgorithmIds.Empty()) {
2427  return;
2428  }
2429 
2430  int col_id = x_GetMaskDataColumn(locked);
2431 
2432  if (col_id < 0) {
2433  // No mask data column exists, therefore, the algorithms list
2434  // is empty, and we are done.
2435  return;
2436  }
2437 
2438  CSeqDB_ColumnEntry & entry = *m_ColumnInfo[col_id];
2439 
2441 
2442  // Results needed:
2443  // 1. Map global ids to desc.
2444  // 2. Map local id+vol -> global id
2445 
2446  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
2447  // Get volume column #.
2448  int vol_col_id = entry.GetVolumeIndex(vol_idx);
2449 
2450  if (vol_col_id < 0) {
2451  continue;
2452  }
2453 
2454  CSeqDBVol * volp = m_VolSet.GetVolNonConst(vol_idx);
2455  const TStringMap & volmap =
2456  volp->GetColumnMetaData(vol_col_id, locked);
2457 
2458  // Check for identical algorithm descriptions (should not happen.)
2459 
2460  const string * dup = s_CheckUniqueValues(volmap);
2461 
2462  if (dup != NULL) {
2463  ostringstream oss;
2464  oss << "Error: volume (" << volp->GetVolName()
2465  << ") mask data has duplicates value (" << *dup << ")";
2466 
2467  NCBI_THROW(CSeqDBException, eArgErr, oss.str());
2468  }
2469 
2470  ITERATE(TStringMap, iter, volmap) {
2471  int id1 = NStr::StringToInt(iter->first);
2472  const string & desc1 = iter->second;
2473 
2474  m_AlgorithmIds.AddMapping(vol_idx, id1, desc1);
2475  }
2476  }
2477 
2479 }
2480 
2481 struct SReadInt4 {
2482  enum { numeric_size = 4 };
2483 
2484  static int Read(CBlastDbBlob & blob)
2485  {
2486  return blob.ReadInt4();
2487  }
2488 
2489  static void Read(CBlastDbBlob & blob, int n,
2490  CSeqDB::TSequenceRanges & ranges)
2491  {
2492  const void * src = (const void *) blob.ReadRaw(n*8);
2493  ranges.append(src, n);
2494  }
2495 };
2496 
2497 template<class TRead>
2498 void s_ReadRanges(int vol_algo,
2499  CSeqDB::TSequenceRanges & ranges,
2500  CBlastDbBlob & blob)
2501 {
2502  int num_ranges = TRead::Read(blob);
2503 
2504  for(int rng = 0; rng < num_ranges; rng++) {
2505  int algo = TRead::Read(blob);
2506  int num_pairs = TRead::Read(blob);
2507  if (algo == vol_algo) {
2508  TRead::Read(blob, num_pairs, ranges);
2509  break;
2510  }
2511  int skip_amt = num_pairs * 2 * TRead::numeric_size;
2512  blob.SeekRead(blob.GetReadOffset() + skip_amt);
2513  }
2514 }
2515 
2517  int algo_id,
2518  CSeqDB::TSequenceRanges & ranges)
2519 {
2520  CHECK_MARKER();
2521 
2522  // This reads the data written by CWriteDB_Impl::SetMaskData
2523  ranges.clear();
2524 
2525  CSeqDBLockHold locked(m_Atlas);
2526  m_Atlas.Lock(locked);
2527 
2528  if (m_UseGiMask) {
2529  m_GiMask->GetMaskData(algo_id, x_GetSeqGI(oid, locked), ranges, locked);
2530  return;
2531  }
2532 
2533  if (m_AlgorithmIds.Empty()) {
2534  x_BuildMaskAlgorithmList(locked);
2535  }
2536 
2537  int vol_oid = 0, vol_idx = -1;
2538 
2539  CSeqDBVol * vol = const_cast<CSeqDBVol*>
2540  (m_VolSet.FindVol(oid, vol_oid, vol_idx));
2541 
2542  if (! vol) {
2544  }
2545 
2546  // Get the data.
2547 
2548  CBlastDbBlob blob;
2549  vol->GetColumnBlob(x_GetMaskDataColumn(locked), vol_oid, blob, false, locked);
2550 
2551  if (blob.Size() != 0) {
2552  // If there actually is mask data, then we need to do the
2553  // algorithm translation.
2554 
2555  int vol_algo_id = -1;
2556  try {
2557  vol_algo_id = m_AlgorithmIds.GetVolAlgo(vol_idx, algo_id);
2558  } // indicates that masking algo not in this volume (should not be fatal)
2559  catch(CSeqDBException & e) {
2560  return;
2561  }
2562 
2563  s_ReadRanges<SReadInt4>(vol_algo_id, ranges, blob);
2564  }
2565 
2566  //int seq_length = 0;
2567 }
2568 #endif
2569 
2570 void CSeqDBImpl::SetNumberOfThreads(int num_threads, bool force_mt)
2571 {
2572  CSeqDBLockHold locked(m_Atlas);
2573  m_Atlas.Lock(locked);
2574 
2575  if (num_threads < 1) {
2576  num_threads = 0;
2577  } else if (num_threads == 1) {
2578  num_threads = force_mt ? 1 : 0;
2579  }
2580 
2581  if (num_threads > m_NumThreads ) {
2582 
2583  for (int thread = m_NumThreads; thread < num_threads; ++thread) {
2584  m_CachedSeqs.push_back(new SSeqResBuffer());
2585  }
2586 
2587  for(int vol_idx = 0; vol_idx < m_VolSet.GetNumVols(); vol_idx++) {
2588  m_VolSet.GetVol(vol_idx)->OpenSeqFile(locked);
2589  }
2590  //m_Atlas.SetSliceSize();
2591 
2592  } else if (num_threads < m_NumThreads) {
2593 
2594  for (int thread = num_threads; thread < m_NumThreads; ++thread) {
2595  SSeqResBuffer * buffer = m_CachedSeqs.back();
2597  m_CachedSeqs.pop_back();
2598  delete buffer;
2599  }
2600  }
2601 
2602  m_CacheID.clear();
2603  m_NextCacheID = 0;
2604  m_NumThreads = num_threads;
2605 }
2606 
2608 {
2609  int threadID = CThread::GetSelf();
2610 
2611  if (m_NextCacheID < 0)
2612  return m_CacheID[threadID];
2613 
2614  int retval;
2615  m_Atlas.Lock(locked);
2616 
2617  if (m_CacheID.find(threadID) == m_CacheID.end()) {
2618  m_CacheID[threadID] = m_NextCacheID++;
2619  }
2620  retval = m_CacheID[threadID];
2621  if (m_NextCacheID == m_NumThreads) {
2622  m_NextCacheID = -1;
2623  }
2624 
2625  m_Atlas.Unlock(locked);
2626  return retval;
2627 }
2628 
2630 {
2631  int nvols = m_VolSet.GetNumVols();
2632  for (int vol = 0; vol < nvols; ++vol) {
2633  m_VolSet.GetVolNonConst(vol)->SetMemBit(mbit);
2634  }
2635 }
2636 
2637 void CSeqDBImpl::SetVolsOidMaskType(int oid_mask_type)
2638 {
2639  int nvols = m_VolSet.GetNumVols();
2640  for (int vol = 0; vol < nvols; ++vol) {
2641  m_VolSet.GetVolNonConst(vol)->SetOidMaskType(oid_mask_type);
2642  }
2643 }
2644 
2645 void CSeqDBImpl::DebugDump(CDebugDumpContext ddc, unsigned int depth) const
2646 {
2647  ddc.SetFrame("CSeqDBImpl");
2648  CObject::DebugDump(ddc, depth);
2649  ddc.Log("m_DBNames", m_DBNames);
2650  ddc.Log("m_Aliases", &m_Aliases, depth);
2651  ddc.Log("m_OIDList", m_OIDList, depth);
2652  ddc.Log("m_RestrictBegin", m_RestrictBegin);
2653  ddc.Log("m_RestrictEnd", m_RestrictEnd);
2654  ddc.Log("m_NextChunkOID", m_NextChunkOID);
2655  ddc.Log("m_NumSeqs", m_NumSeqs);
2656  ddc.Log("m_NumSeqsStats", m_NumSeqsStats);
2657  ddc.Log("m_NumOIDs", m_NumOIDs);
2658  ddc.Log("m_TotalLength", m_TotalLength);
2659  ddc.Log("m_ExactTotalLength", m_ExactTotalLength);
2660  ddc.Log("m_TotalLengthStats", m_TotalLengthStats);
2661  ddc.Log("m_VolumeLength", m_VolumeLength);
2662  ddc.Log("m_MaxLength", m_MaxLength);
2663  ddc.Log("m_MinLength", m_MinLength);
2664  ddc.Log("m_SeqType", string(1, m_SeqType));
2665  ddc.Log("m_OidListSetup", m_OidListSetup);
2666  ddc.Log("m_NeedTotalsScan", m_NeedTotalsScan);
2667  ddc.Log("m_Date", m_Date);
2668  ddc.Log("m_UseGiMask", m_UseGiMask);
2669  ddc.Log("m_GiMask", m_GiMask);
2670  ddc.Log("m_NumThreads", m_NumThreads);
2671  ddc.Log("m_NextCacheID", m_NextCacheID);
2672 }
2673 
2675 {
2677 
2678 }
2679 
2681 {
2682  return m_VolSet.GetNumVols();
2683 }
2684 
2685 void CSeqDBImpl::GetLMDBFileNames(vector<string> & lmdb_list) const
2686 {
2687  m_LMDBSet.GetLMDBFileNames(lmdb_list);
2688 }
2689 
2690 
2691 void CSeqDBImpl::x_GetTaxIdsForSeqId(const CSeq_id & seq_id, int oid, CBlast_def_line::TTaxIds & taxid_set)
2692 {
2693 
2694  CSeqDBLockHold locked(m_Atlas);
2695  CRef<CBlast_def_line_set> defline_set = x_GetHdr(oid, locked);
2696 
2697  if ((! defline_set.Empty()) && defline_set->CanGet()) {
2698  ITERATE(list< CRef<CBlast_def_line> >, defline, defline_set->Get()) {
2699  if (! (*defline)->CanGetSeqid()) {
2700  continue;
2701  }
2702 
2703  ITERATE(list< CRef<CSeq_id> >, df_seqid, (*defline)->GetSeqid()) {
2704  if((*df_seqid)->Match(seq_id)) {
2705  CBlast_def_line::TTaxIds df_taxids = (*defline)->GetTaxIds();
2706  if(!df_taxids.empty()) {
2707  taxid_set.insert(df_taxids.begin(), df_taxids.end());
2708  }
2709  return;
2710  }
2711  }
2712  }
2713  }
2714 }
2715 
2716 void CSeqDBImpl::GetTaxIdsForSeqId(const CSeq_id & seq_id, vector<TTaxId> & taxids)
2717 {
2718  vector<int> oids;
2719  SeqidToOids(seq_id, oids, true);
2720  taxids.clear();
2721  CBlast_def_line::TTaxIds taxid_set;
2722  for (unsigned int i=0; i < oids.size(); i++) {
2723  x_GetTaxIdsForSeqId(seq_id, oids[i], taxid_set);
2724  }
2725 
2726  if (!taxid_set.empty()) {
2727  taxids.insert(taxids.begin(), taxid_set.begin(), taxid_set.end());
2728  }
2729 }
2730 
2732 
Declaration of ADT to retrieve sequences for the BLAST engine.
#define BLAST_SEQSRC_MINLENGTH
Default minimal sequence length.
Definition: blast_seqsrc.h:205
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
`Blob' Class for SeqDB (and WriteDB).
Definition: seqdbblob.hpp:56
int GetReadOffset() const
Get the current read pointer offset.
Definition: seqdbblob.cpp:557
Int4 ReadInt4()
Read a 4 byte integer at the pointer (and move the pointer).
Definition: seqdbblob.cpp:139
int Size() const
Get size of blob contents.
Definition: seqdbblob.cpp:518
void Clear()
Clear all owned data and reference an empty string.
Definition: seqdbblob.cpp:58
void SeekRead(int offset)
Move the read pointer to a specific location.
Definition: seqdbblob.cpp:547
const char * ReadRaw(int size)
Read raw data (moving the read pointer).
Definition: seqdbblob.cpp:416
Definition: Dbtag.hpp:53
void SetFrame(const string &frame)
Definition: ddumpable.cpp:137
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
Definition: ddumpable.cpp:151
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectIStreamAsnBinary –.
Definition: objistrasnb.hpp:59
CSeqDBAliasFile class.
Definition: seqdbalias.hpp:982
void GetAliasFileValues(TAliasFileValues &afv, const CSeqDBVolSet &volset)
Get Name/Value Data From Alias Files.
Int8 GetNumSeqsStats(const CSeqDBVolSet &volset) const
Get the number of sequences available.
Uint8 GetTotalLength(const CSeqDBVolSet &volset) const
Get the total length of the set of databases.
Uint8 GetTotalLengthStats(const CSeqDBVolSet &volset) const
Get the total length of the set of databases.
bool NeedTotalsScan(const CSeqDBVolSet &volset) const
Check whether a db scan is need to compute correct totals.
Int4 GetMinLength(const CSeqDBVolSet &volset) const
Get the number of sequences available.
Int8 GetNumOIDs(const CSeqDBVolSet &volset) const
Get the size of the OID range.
void GetMaskList(vector< string > &mask_list)
Get Gi-based Mask Names From Alias Files.
void FindVolumePaths(vector< string > &vols, vector< string > *alias, bool recursive) const
Find the base names of volumes.
bool HasFilters()
Check if any volume filtering exists.
string GetTitle(const CSeqDBVolSet &volset) const
Get the title.
Int8 GetNumSeqs(const CSeqDBVolSet &volset) const
Get the number of sequences available.
CRef< CSeqDB_FilterTree > GetFilterTree()
Get filtering tree for all volumes.
Guard object for the SeqDBAtlas singleton.
Definition: seqdbatlas.hpp:632
CSeqDBAtlas & Get()
Get the CSeqDBAtlas object.
Definition: seqdbatlas.cpp:331
CSeqDBAtlas class.
Definition: seqdbatlas.hpp:298
static void RetRegion(const char *datap)
Free allocated memory.
Definition: seqdbatlas.cpp:226
Uint8 GetSliceSize()
Get the current slice size.
Definition: seqdbatlas.hpp:497
void Lock(CSeqDBLockHold &locked)
Lock the atlas.
Definition: seqdbatlas.hpp:463
void Unlock(CSeqDBLockHold &locked)
Unlock the atlas.
Definition: seqdbatlas.hpp:480
CSeqDBException.
Definition: seqdbcommon.hpp:73
CSeqDBGiList.
int GetNumGis() const
Get the number of GIs in the array.
void GetGiList(vector< TGi > &gis) const
Get the gi list.
void GetTiList(vector< TTi > &tis) const
Get the ti list.
int GetNumTis() const
Get the number of TIs in the array.
CSeqDBGiMask class.
Definition: seqdbgimask.hpp:57
void GetMaskData(int algo_id, TGi gi, CSeqDB::TSequenceRanges &ranges, CSeqDBLockHold &locked)
Get the mask data for GI.
Definition: seqdbgimask.cpp:66
int GetAlgorithmId(const string &algo_name) const
Get the mask algorithsm id for a string id.
const string & GetDesc(int algo_id, CSeqDBLockHold &locked)
Get the mask description for algo id.
Definition: seqdbgimask.cpp:57
void GetAvailableMaskAlgorithms(vector< int > &algo) const
Get the available mask algorithsm ids.
Definition: seqdbgimask.hpp:99
SeqDB ID list for performing boolean set operations.
bool Blank() const
Check if an ID list is blank.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist)
Get gi to taxid map for an OID.
Definition: seqdbimpl.cpp:448
int m_NumSeqsStats
Number of sequences in the overall database.
Definition: seqdbimpl.hpp:1313
void GetDBTaxIds(set< TTaxId > &tax_ids)
Get all unique tax ids from db.
Definition: seqdbimpl.cpp:1315
CSeqDBAliasFile m_Aliases
Alias node hierarchy management object.
Definition: seqdbimpl.hpp:1287
char GetSeqType() const
Get the sequence type.
Definition: seqdbimpl.cpp:994
int x_GetSeqBuffer(SSeqResBuffer *buffer, int oid, const char **seq) const
Get sequence from buffer.
Definition: seqdbimpl.cpp:670
CRef< CSeqDBGiList > m_UserGiList
The User GI list for the entire CSeqDB object.
Definition: seqdbimpl.hpp:1343
void x_InitIdSet()
Initialize Id Set.
Definition: seqdbimpl.cpp:1917
int GetOidAtOffset(int first_seq, Uint8 residue) const
Find the OID corresponding to the offset given in residues, into the database as a whole.
Definition: seqdbimpl.cpp:1471
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Raw Sequence and Ambiguity Data.
Definition: seqdbimpl.cpp:1672
int GetMinLength() const
Returns the length of the smallest sequence in the database.
Definition: seqdbimpl.cpp:1081
int m_NumThreads
number of thread clients
Definition: seqdbimpl.hpp:1388
void x_RetSeqBuffer(SSeqResBuffer *buffer) const
Return sequence to buffer.
Definition: seqdbimpl.cpp:659
CSeqDBIdSet m_IdSet
The positive or negative ID list for the entire CSeqDB object.
Definition: seqdbimpl.hpp:1349
int x_GetCacheID(CSeqDBLockHold &locked) const
Get local cache ID for current thread.
Definition: seqdbimpl.cpp:2607
int x_GetMinLength() const
Returns the shortest sequence lengths of all volumes.
Definition: seqdbimpl.cpp:982
void GetLeafTaxIDs(int oid, map< TGi, set< TTaxId > > &gi_to_taxid_set, bool persist)
Get gi to taxid map for an OID.
Definition: seqdbimpl.cpp:525
~CSeqDBImpl()
Destructor.
Definition: seqdbimpl.cpp:204
string m_Date
Cached most recent date string for GetDate().
Definition: seqdbimpl.hpp:1355
void AccessionsToOids(const vector< string > &accs, vector< blastdb::TOid > &oids)
Definition: seqdbimpl.cpp:1355
CRef< CSeqDBOIDList > m_OIDList
The list of included OIDs (construction is deferred).
Definition: seqdbimpl.hpp:1295
EBlastDbVersion GetBlastDbVersion() const
Return blast db version.
Definition: seqdbimpl.cpp:2674
CSeqDBImpl(const string &db_name_list, char prot_nucl, int oid_begin, int oid_end, CSeqDBGiList *gi_list, CSeqDBNegativeList *neg_list, CSeqDBIdSet idset, bool use_atlas_lock)
Standard Constructor.
Definition: seqdbimpl.cpp:45
void x_BuildMaskAlgorithmList(CSeqDBLockHold &locked)
Get a list of algorithm IDs for which mask data exists.
Definition: seqdbimpl.cpp:2422
int m_MaskDataColumn
Column ID for mask data column.
Definition: seqdbimpl.hpp:1382
int GetMaskAlgorithmId(const string &algo_name)
Get the numeric ID for a algorithm name.
Definition: seqdbimpl.cpp:2300
CFastMutex m_OIDLock
Mutex which synchronizes access to the OID list.
Definition: seqdbimpl.hpp:1304
int GetColumnId(const string &title)
Get an ID number for a given column title.
Definition: seqdbimpl.cpp:1978
bool GiToOidwFilterCheck(TGi gi, int &oid)
GiToOis is meant to simply return oid for a gi if one exisits This method finds the oid and checks if...
Definition: seqdbimpl.cpp:1198
void GetColumnBlob(int col_id, int oid, bool keep, CBlastDbBlob &blob)
Fetch the data blob for the given column and oid.
Definition: seqdbimpl.cpp:2090
void GetStringBounds(string *low_id, string *high_id, int *count)
Get String Bounds.
Definition: seqdbimpl.cpp:1806
void SetIterationRange(int oid_begin, int oid_end)
Set Iteration Range.
Definition: seqdbimpl.cpp:183
TGi x_GetSeqGI(int oid, CSeqDBLockHold &locked)
Look up for the GI of a sequence.
Definition: seqdbimpl.cpp:894
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Definition: seqdbimpl.cpp:839
Uint8 m_TotalLength
Total length of database (in bases).
Definition: seqdbimpl.hpp:1319
void GetMaskAlgorithmDetails(int algorithm_id, string &program, string &program_name, string &algo_opts)
Get information about one type of masking available here.
Definition: seqdbimpl.cpp:2388
bool m_UseGiMask
Which type of masks are we using?
Definition: seqdbimpl.hpp:1376
CSeqDBAtlas & m_Atlas
Reference to memory management layer.
Definition: seqdbimpl.hpp:1281
CRef< CSeqDBNegativeList > m_NegativeList
The Negative ID list for the entire CSeqDB object.
Definition: seqdbimpl.hpp:1346
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Invoke the garbage collector to free up memory.
Definition: seqdbimpl.cpp:2570
TGi GetSeqGI(int oid)
Look up for the GI of a sequence.
Definition: seqdbimpl.cpp:820
int GetMaxLength() const
Returns the length of the largest sequence in the database.
Definition: seqdbimpl.cpp:1075
int m_RestrictEnd
Ending OID as provided to the constructor.
Definition: seqdbimpl.hpp:1301
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv)
Get Oid list for input tax ids.
Definition: seqdbimpl.cpp:1293
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
Definition: seqdbimpl.cpp:653
void FlushOffsetRangeCache()
Flush all offset ranges cached.
Definition: seqdbimpl.cpp:1854
void GetAllTaxIDs(int oid, set< TTaxId > &taxids)
Get all tax ids (leaf and non-leaf for an oid.
Definition: seqdbimpl.cpp:509
int GetNumOfVols() const
Definition: seqdbimpl.cpp:2680
Uint8 GetVolumeLength() const
Returns the sum of the lengths of all volumes.
Definition: seqdbimpl.cpp:876
void GetLMDBFileNames(vector< string > &lmdb_list) const
Definition: seqdbimpl.cpp:2685
const string & GetDBNameList() const
Get list of database names.
Definition: seqdbimpl.cpp:1087
void x_FillSeqBuffer(SSeqResBuffer *buffer, int oid) const
Fill up the buffer.
Definition: seqdbimpl.cpp:687
CObjectIStreamAsnBinary * reusable_inpstr
Definition: seqdbimpl.hpp:1425
unsigned GetSequenceHash(int oid)
Get the sequence hash for a given OID.
Definition: seqdbimpl.cpp:1865
char m_SeqType
Type of sequences used by this instance.
Definition: seqdbimpl.hpp:1337
void x_GetTaxIdsForSeqId(const CSeq_id &seq_id, int oid, CBlast_def_line::TTaxIds &taxid_set)
Definition: seqdbimpl.cpp:2691
CRef< CBioseq > GetBioseq(int oid, TGi target_gi, const CSeq_id *target_seq_id, bool seqdata)
Get a CBioseq for a sequence.
Definition: seqdbimpl.cpp:606
static void FindVolumePaths(const string &dbname, char prot_nucl, vector< string > &paths, vector< string > *alias_paths, bool recursive, bool expand_links)
Find volume paths.
Definition: seqdbimpl.cpp:1526
void ListColumns(vector< string > &titles)
List columns titles found in this database.
Definition: seqdbimpl.cpp:1963
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const
Definition: seqdbimpl.cpp:776
bool m_OidListSetup
True if OID list setup is done (or was not required).
Definition: seqdbimpl.hpp:1340
Uint8 m_TotalLengthStats
Total length of database (in bases).
Definition: seqdbimpl.hpp:1325
bool OidToPig(int oid, int &pig) const
Translate a PIG to an OID.
Definition: seqdbimpl.cpp:1133
void GetAliasFileValues(TAliasFileValues &afv)
Get Name/Value Data From Alias Files.
Definition: seqdbimpl.cpp:1556
Uint8 x_GetTotalLength() const
Returns the sum of the lengths of all available sequences.
Definition: seqdbimpl.cpp:958
Uint8 GetExactTotalLength()
Returns the exact sum of the lengths of all available sequences.
Definition: seqdbimpl.cpp:851
void GetTaxIdsForSeqId(const CSeq_id &seq_id, vector< TTaxId > &taxids)
Definition: seqdbimpl.cpp:2716
CRef< CSeqDBGiMask > m_GiMask
Gi-based mask.
Definition: seqdbimpl.hpp:1379
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end) const
Fetch data as a CSeq_data object.
Definition: seqdbimpl.cpp:736
map< string, int > m_ColumnTitleMap
Map string column titles to global column IDs.
Definition: seqdbimpl.hpp:1364
const map< string, string > & GetColumnMetaData(int column_id)
Get all metadata for the specified column.
Definition: seqdbimpl.cpp:2027
CRef< CBlast_def_line_set > x_GetHdr(int oid, CSeqDBLockHold &locked)
Get the sequence header data.
Definition: seqdbimpl.cpp:1055
Uint8 x_GetVolumeLength() const
Returns the sum of the lengths of all volumes.
Definition: seqdbimpl.cpp:970
int GetSequence(int oid, const char **buffer) const
Get the sequence data for a sequence.
Definition: seqdbimpl.cpp:718
void x_GetOidList(CSeqDBLockHold &locked)
Build the OID list.
Definition: seqdbimpl.cpp:229
int m_NextChunkOID
"Bookmark" for multithreaded chunk-type OID iteration.
Definition: seqdbimpl.hpp:1307
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
Definition: seqdbimpl.cpp:1622
int x_GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Definition: seqdbimpl.cpp:937
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
Definition: seqdbimpl.cpp:632
string GetAvailableMaskAlgorithmDescriptions()
Returns a formatted string with the list of available masking algorithms in this database for display...
Definition: seqdbimpl.cpp:2317
bool CheckOrFindOID(int &next_oid)
Find an included OID, incrementing next_oid if necessary.
Definition: seqdbimpl.cpp:265
void GetPigBounds(int *low_id, int *high_id, int *count)
Get PIG Bounds.
Definition: seqdbimpl.cpp:1775
void SeqidToOids(const CSeq_id &seqid, vector< int > &oids, bool multi)
Translate a CSeq-id to a list of OIDs.
Definition: seqdbimpl.cpp:1389
int GetNumSeqsStats() const
Returns the number of sequences available.
Definition: seqdbimpl.cpp:833
@ kUnknownTitle
This column is not heard of yet.
Definition: seqdbimpl.hpp:1369
@ kColumnNotFound
This column does not exist (we checked).
Definition: seqdbimpl.hpp:1372
int x_GetMaxLength() const
Returns the longest sequence lengths of all volumes.
Definition: seqdbimpl.cpp:976
void SetVolsOidMaskType(int oid_mask_type)
Definition: seqdbimpl.cpp:2637
int GetAmbigSeq(int oid, char **buffer, int nucl_code, SSeqDBSlice *region, ESeqDBAllocType strategy, CSeqDB::TSequenceRanges *masks=NULL) const
Get a pointer to a range of sequence data with ambiguities.
Definition: seqdbimpl.cpp:754
int m_MaxLength
Longest database sequence.
Definition: seqdbimpl.hpp:1331
int m_RestrictBegin
Starting OID as provided to the constructor.
Definition: seqdbimpl.hpp:1298
int m_NextCacheID
Definition: seqdbimpl.hpp:1392
CSeqDBLMDBSet m_LMDBSet
Definition: seqdbimpl.hpp:1292
void GetGiBounds(TGi *low_id, TGi *high_id, int *count)
Get GI Bounds.
Definition: seqdbimpl.cpp:1743
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
Definition: seqdbimpl.cpp:2281
int GetSeqLengthApprox(int oid) const
Get the approximate sequence length.
Definition: seqdbimpl.cpp:429
Uint8 m_VolumeLength
Total length of all database volumes combined (in bases).
Definition: seqdbimpl.hpp:1328
void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids)
Definition: seqdbimpl.cpp:1343
void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Dump debug information for this object.
Definition: seqdbimpl.cpp:2645
Uint8 x_GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
Definition: seqdbimpl.cpp:964
void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx)
Returns the sum of the sequence lengths.
Definition: seqdbimpl.cpp:1632
CSeqDB_IdRemapper m_AlgorithmIds
Algorithm ID mapping.
Definition: seqdbimpl.hpp:1385
bool x_CheckOrFindOID(int &next_oid, CSeqDBLockHold &locked)
Get the next included oid.
Definition: seqdbimpl.cpp:272
int m_MinLength
Shortest database sequence.
Definition: seqdbimpl.hpp:1334
void GetMaskData(int oid, int algo_id, CSeqDB::TSequenceRanges &ranges)
Get masked ranges of a sequence.
Definition: seqdbimpl.cpp:2516
int x_GetColumnId(const string &title, CSeqDBLockHold &locked)
Get the Column ID for the column with the specified title.
Definition: seqdbimpl.cpp:1986
bool TiToOid(Int8 ti, int &oid)
Translate a TI to an OID.
Definition: seqdbimpl.cpp:1146
Uint8 GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
Definition: seqdbimpl.cpp:870
int x_GetSeqLength(int oid) const
Get the sequence length.
Definition: seqdbimpl.cpp:412
void SetVolsMemBit(int mbit)
Set the membership bit of all volumes.
Definition: seqdbimpl.cpp:2629
Uint8 m_ExactTotalLength
Total length of database (in bases).
Definition: seqdbimpl.hpp:1322
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
Definition: seqdbimpl.cpp:1167
void HashToOids(unsigned hash, vector< int > &oids)
Get the OIDs for a given sequence hash.
Definition: seqdbimpl.cpp:1881
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data)
Apply a range of offsets to a database sequence.
Definition: seqdbimpl.cpp:1836
list< CRef< CSeq_id > > GetSeqIDs(int oid)
Gets a list of sequence identifiers.
Definition: seqdbimpl.cpp:797
void AccessionToOids(const string &acc, vector< int > &oids)
Find OIDs matching the specified string.
Definition: seqdbimpl.cpp:1234
int x_GetNumSeqs() const
Returns the number of sequences available.
Definition: seqdbimpl.cpp:882
string m_DBNames
The list of database names provided to the constructor.
Definition: seqdbimpl.hpp:1284
void FlushSeqMemory()
Flush unnecessarily held memory.
Definition: seqdbimpl.cpp:1114
CRef< CBlast_def_line_set > GetHdr(int oid)
Get the sequence header data.
Definition: seqdbimpl.cpp:1046
vector< SSeqResBuffer * > m_CachedSeqs
Cached sequences.
Definition: seqdbimpl.hpp:1411
int GetNumSeqs() const
Returns the number of sequences available.
Definition: seqdbimpl.cpp:827
vector< CRef< CSeqDB_ColumnEntry > > m_ColumnInfo
Map assigned global column IDs to column information.
Definition: seqdbimpl.hpp:1360
int x_GetNumSeqsStats() const
Returns the number of sequences available.
Definition: seqdbimpl.cpp:925
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
Definition: seqdbimpl.cpp:845
std::map< int, int > m_CacheID
mapping thread ID to storage ID
Definition: seqdbimpl.hpp:1391
int x_GetMaskDataColumn(CSeqDBLockHold &locked)
Open the mask data column (if necessary) and return its id.
Definition: seqdbimpl.cpp:2125
bool PigToOid(int pig, int &oid) const
Translate a PIG to an OID.
Definition: seqdbimpl.cpp:1119
CSeqDB::EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state)
Return a chunk of OIDs, and update the OID bookmark.
Definition: seqdbimpl.cpp:301
string GetDate() const
Returns the construction date of the database.
Definition: seqdbimpl.cpp:1003
CSeqDBVolSet m_VolSet
Set of volumes used by this database instance.
Definition: seqdbimpl.hpp:1290
string GetTitle() const
Returns the database title.
Definition: seqdbimpl.cpp:988
void ResetInternalChunkBookmark()
Restart chunk iteration at the beginning of the database.
Definition: seqdbimpl.cpp:398
string x_FixString(const string &s) const
Adjust string length to offset of first embedded NUL byte.
Definition: seqdbimpl.cpp:1101
int m_NumOIDs
Size of databases OID range.
Definition: seqdbimpl.hpp:1316
bool m_NeedTotalsScan
True if this configuration cannot deduce totals without a scan.
Definition: seqdbimpl.hpp:1352
int m_NumSeqs
Number of sequences in the overall database.
Definition: seqdbimpl.hpp:1310
void x_ScanTotals(bool approx, int *seq_count, Uint8 *base_count, int *max_count, int *min_count, CSeqDBLockHold &locked)
Compute totals via iteration.
Definition: seqdbimpl.cpp:1565
int GetSeqLength(int oid) const
Get the sequence length.
Definition: seqdbimpl.cpp:405
CSeqDBIdSet GetIdSet()
Get IdSet list attached to this database.
Definition: seqdbimpl.cpp:1956
bool OidToGi(int oid, TGi &gi)
Translate a GI to an OID.
Definition: seqdbimpl.cpp:1216
void AccessionToOids(const string &acc, vector< TOid > &oids) const
void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids) const
void GetLMDBFileNames(vector< string > &lmdb_list) const
void AccessionsToOids(const vector< string > &accs, vector< TOid > &oids) const
bool IsBlastDBVersion5() const
void GetDBTaxIds(set< TTaxId > &tax_ids) const
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
CSeqDBLockHold.
Definition: seqdbatlas.hpp:167
CSeqDBNegativeList.
const vector< string > & GetSiList()
const vector< TTi > & GetTiList()
Build ID set for this negative list.
const vector< TGi > & GetGiList()
Build ID set for this negative list.
CSeqDBOIDList.
void UnLease()
Deallocate the memory ranges owned by this object.
bool CheckOrFindOID(TOID &next_oid) const
Find an included oid from the specified point.
static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo &info)
Get the taxonomy names for a given tax id.
Definition: seqdbtax.cpp:219
void OptimizeGiLists()
Optimize the GI list configuration.
void UnLease()
Return storage held by the volumes.
const CSeqDBVol * GetVol(int i) const
Find a volume by index.
Uint8 GetVolumeSetLength() const
Find total volume length for all volumes.
CSeqDBVol * GetVolNonConst(int i)
Find a volume by index.
int GetNumVols() const
Get the number of volumes.
int GetMaxLength() const
CSeqDBVol * FindVol(int oid, int &vol_oid) const
Find a volume by OID.
int GetNumOIDs() const
Get the size of the OID range.
int GetVolOIDStart(int i) const
Get the first OID in a volume.
CSeqDBVol class.
Definition: seqdbvol.hpp:169
void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified Seq-id.
Definition: seqdbvol.cpp:2787
void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified accession or formatted Seq-id.
Definition: seqdbvol.cpp:2773
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)
Fetch the data blob for the given column and oid.
Definition: seqdbvol.cpp:3258
const string & GetVolName() const
Get the volume name.
Definition: seqdbvol.hpp:452
int GetSeqLengthExact(int oid) const
Exact sequence length for nucleotide databases.
Definition: seqdbvol.cpp:296
void OpenSeqFile(CSeqDBLockHold &locked) const
Open sequence file.
Definition: seqdbvol.cpp:111
int GetColumnId(const string &title, CSeqDBLockHold &locked)
Get an ID number for a given column title.
Definition: seqdbvol.cpp:3376
int GetNumOIDs() const
Get the number of OIDs for this volume.
Definition: seqdbvol.cpp:2370
void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const
Get PIG Bounds.
Definition: seqdbvol.cpp:3093
void FlushOffsetRangeCache()
Flush all offset ranges cached.
Definition: seqdbvol.cpp:3183
void ListColumns(set< string > &titles, CSeqDBLockHold &locked)
List the titles of all columns for this volume.
Definition: seqdbvol.cpp:3293
int GetSeqLengthApprox(int oid) const
Approximate sequence length for nucleotide databases.
Definition: seqdbvol.cpp:319
int GetSeqLengthProt(int oid) const
Sequence length for protein databases.
Definition: seqdbvol.cpp:280
void SetOidMaskType(int oid_masks) const
Definition: seqdbvol.hpp:887
bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const
Find the OID given a GI.
Definition: seqdbvol.cpp:2471
void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const
Get GI Bounds.
Definition: seqdbvol.cpp:3068
int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const
Find the OID at a given index into the database.
Definition: seqdbvol.cpp:2822
const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)
Get all metadata for the specified column.
Definition: seqdbvol.cpp:3277
bool PigToOid(int pig, int &oid) const
Find the OID given a PIG.
Definition: seqdbvol.cpp:2395
bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const
Find the OID given a TI.
Definition: seqdbvol.cpp:2438
void SetMemBit(int mbit) const
Set the MEMB_BIT fitlering for this volume.
Definition: seqdbvol.hpp:879
Uint8 GetVolumeLength() const
Get the total length of this volume (in bases).
Definition: seqdbvol.cpp:1880
string GetDate() const
Get the formatting date of the volume.
Definition: seqdbvol.cpp:2380
void GetStringBounds(string &low_id, string &high_id, int &count) const
Get String Bounds.
Definition: seqdbvol.cpp:3116
void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const
Get the OIDs for a given sequence hash.
Definition: seqdbvol.cpp:3233
Something else yet again etc.
Definition: seqdbfilter.hpp:51
int GetEnd() const
Get OID after last included OID.
@ eOidRange
OID Range [start, end).
Definition: seqdbfilter.hpp:59
int GetBegin() const
Get first included OID.
Database-wide column information.
Definition: seqdbcol.hpp:247
const map< string, string > & GetMap()
Get the metadata map.
Definition: seqdbcol.hpp:283
void SetHaveMap()
Indicate that the metadata map is now complete.
Definition: seqdbcol.hpp:270
int GetVolumeIndex(int volnum)
Get a volume-specific column ID.
Definition: seqdbcol.hpp:256
bool HaveMap()
Determine if we have the metadata map yet.
Definition: seqdbcol.hpp:264
void SetMapValue(const string &k, const string &v)
Add a meta-data key/value association.
Definition: seqdbcol.cpp:368
bool HasFilter() const
Check whether this tree represents any volume filtering.
Definition: seqdbfilter.cpp:81
const vector< CRef< CSeqDB_FilterTree > > & GetNodes() const
Get child nodes attached to this node.
vector< CRef< CSeqDB_AliasMask > > TFilters
Type used to store lists of filters found here.
int m_CacheRealAlgo
Cached list of real algorithms for BuildVolAlgos.
Definition: seqdbimpl.hpp:122
int GetVolAlgo(int vol_idx, int algo_id)
Build a list of volume algorithm IDs.
Definition: seqdbimpl.cpp:2242
bool GetDesc(int algorithm_id, string &desc)
Is this object populated?
Definition: seqdbimpl.cpp:2232
void AddMapping(int vol_id, int id, const string &desc)
Register a volume's algorithm definition.
Definition: seqdbimpl.cpp:2190
map< int, string > m_IdToDesc
Map of real IDs to descriptions.
Definition: seqdbimpl.hpp:110
void GetIdList(vector< int > &algorithms)
Get a list of user (real) IDs available here.
Definition: seqdbimpl.cpp:2181
map< string, int > m_DescToId
Map of descriptions to real IDs.
Definition: seqdbimpl.hpp:113
CSeqDB_IdRemapper()
Constructor.
Definition: seqdbimpl.cpp:2176
int GetAlgoId(const string &id)
Translate a string algorithm ID to a numeric algorithm ID.
Definition: seqdbimpl.cpp:2269
int m_CacheVolAlgo
Cached list of volume algorithms for BuildVolAlgos.
Definition: seqdbimpl.hpp:128
int RealToVol(int vol_idx, int algo_id)
Translate a real algorithm ID to a volume algorithm ID.
Definition: seqdbimpl.cpp:2252
void SetNotEmpty()
Is this object populated?
Definition: seqdbimpl.hpp:82
map< int, map< int, int > > m_RealIdToVolumeId
Map of volume# to map of real id to volume-based id.
Definition: seqdbimpl.hpp:116
int m_CacheVolIndex
Cached volume index for BuildVolAlgos.
Definition: seqdbimpl.hpp:125
bool Empty()
Is this object populated?
Definition: seqdbimpl.hpp:73
int m_NextId
Next unassigned synthetic ID.
Definition: seqdbimpl.hpp:107
EOidListType
Indicates how block of OIDs was returned.
Definition: seqdb.hpp:167
@ eOidRange
Definition: seqdb.hpp:169
@ eOidList
Definition: seqdb.hpp:168
ESummaryType
Types of summary information available.
Definition: seqdb.hpp:183
@ eUnfilteredAll
Sum of all sequences, ignoring GI and OID lists and alias files.
Definition: seqdb.hpp:185
@ eFilteredRange
Sum of included sequences with OIDs within the iteration range.
Definition: seqdb.hpp:191
@ eFilteredAll
Values from alias files, or summation over all included sequences.
Definition: seqdb.hpp:188
static const string kOidNotFound
String containing the error message in exceptions thrown when a given OID cannot be found.
Definition: seqdb.hpp:316
CStringException –.
Definition: ncbistr.hpp:4505
CTime –.
Definition: ncbitime.hpp:296
void clear()
Definition: map.hpp:169
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
void clear()
Definition: set.hpp:153
bool empty() const
Definition: set.hpp:133
const_iterator end() const
Definition: set.hpp:136
static unsigned char depth[2 *(256+1+29)+1]
#define C(s)
Definition: common.h:231
#define INVALID_GI
Definition: ncbimisc.hpp:1089
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ZERO_GI
Definition: ncbimisc.hpp:1088
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
Definition: enumerated.cpp:146
void Read(CObjectIStream &in, TObjectPtr object, const CTypeRef &type)
Definition: serial.cpp:60
const string AsFastaString(void) const
Definition: Seq_id.cpp:2265
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2144
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
Definition: ncbiobj.cpp:988
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
Definition: ncbistr.hpp:330
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3310
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
static TID GetSelf(void)
Definition: ncbithr.cpp:515
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
bool CanGet(void) const
Check if it is safe to call Get method.
const Tdata & Get(void) const
Get the member data.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
Definition: Dbtag_.hpp:214
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
bool IsPrf(void) const
Check if variant Prf is selected.
Definition: Seq_id_.hpp:916
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsPir(void) const
Check if variant Pir is selected.
Definition: Seq_id_.hpp:853
@ e_General
for other databases
Definition: Seq_id_.hpp:105
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
yy_size_t n
int len
static MDB_envinfo info
Definition: mdb_load.c:37
CMSAToolJob::TIdMap TIdMap
const TYPE & Get(const CNamedParameterList *param)
map< string, string > TStringMap
Definition: nc_utils.hpp:59
ESERV_Algo algo
#define INT4_MAX
largest nubmer represented by signed int
Definition: ncbi_std.h:141
bool approx(T x_, T y_, T eps_)
Definition: njn_approx.hpp:73
T max(T x_, T y_)
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
static char tmp[2048]
Definition: utf8.c:42
static pcre_uint8 * buffer
Definition: pcretest.c:1051
#define K
#define INIT_CLASS_MARK()
Marker initializer for constructor.
Definition: seqdbatlas.hpp:108
#define CHECK_MARKER()
Assertion to verify the marker.
Definition: seqdbatlas.hpp:111
#define BREAK_MARKER()
Make the marker of this class invalid.
Definition: seqdbatlas.hpp:122
ESeqDBAllocType
Certain methods have an "Alloc" version.
const blastdb::TOid kSeqDBEntryNotFound
Int4 TOid
Ordinal ID in BLAST databases.
Definition: seqdbcommon.hpp:58
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
bool IsStringId(const CSeq_id &id)
Determine if id is srting id.
unsigned SeqDB_SequenceHash(const char *sequence, int length)
Returns a path minus filename.
Definition: seqdbobj.cpp:146
EBlastDbVersion
BLAST database version.
Definition: seqdbcommon.hpp:51
@ eBDB_Version4
Definition: seqdbcommon.hpp:52
@ eBDB_Version5
Definition: seqdbcommon.hpp:53
const U & SeqDB_MapFind(const std::map< T, U > &m, const T &k, const U &dflt)
Find a map value or return a default.
static const string * s_CheckUniqueValues(const map< string, string > &m)
Definition: seqdbimpl.cpp:2151
void s_ReadRanges(int vol_algo, CSeqDB::TSequenceRanges &ranges, CBlastDbBlob &blob)
Definition: seqdbimpl.cpp:2498
static bool s_IsNumericId(const string &id)
Definition: seqdbimpl.cpp:2146
static void s_GetDetails(const string &desc, string &program, string &program_name, string &algo_opts)
Definition: seqdbimpl.cpp:2358
bool s_Contains(const C &c, const K &k)
Definition: seqdbimpl.cpp:2141
void s_AccumulateMinMaxCount(TId low_in, TId high_in, int count_in, TId *low_out, TId *high_out, int *count_out, bool set_all)
Accumulate optional min, max, and count.
Definition: seqdbimpl.cpp:1713
static const string s_RestoreColon(const string &in)
Definition: seqdbimpl.cpp:2352
The top level of the private implementation layer for SeqDB.
Structure to buffer multiple TSeqRes.
Definition: seqdbimpl.hpp:1401
Structure to keep sequence retrieval results.
Definition: seqdbimpl.hpp:1395
const char * address
Definition: seqdbimpl.hpp:1397
List of sequence offset ranges.
Definition: seqdb.hpp:236
void append(const void *src, size_type num_elements)
Append extra elements at the end.
Definition: seqdb.hpp:302
static void Read(CBlastDbBlob &blob, int n, CSeqDB::TSequenceRanges &ranges)
Definition: seqdbimpl.cpp:2489
static int Read(CBlastDbBlob &blob)
Definition: seqdbimpl.cpp:2484
OID-Range type to simplify interfaces.
SSeqDBTaxInfo.
Definition: _hash_fun.h:40
#define _ASSERT
Modified on Fri Dec 01 04:46:59 2023 by modify_doxy.py rev. 669887