NCBI C++ ToolKit
seqdbgilistset.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqdbgilistset.cpp 91383 2020-10-21 16:07:12Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Kevin Bealer
27  *
28  */
29 
30 /// @file seqdbgilistset.cpp
31 /// Implementation for the CSeqDBVol class, which provides an
32 /// interface for all functionality of one database volume.
33 #include <ncbi_pch.hpp>
34 #include "seqdbgilistset.hpp"
35 #include <algorithm>
36 
38 
39 
40 /// Defines a pair of integers and a sort order.
41 ///
42 /// This struct stores a pair of integers, the volume index and the
43 /// oid count. The ordering is by the oid count, descending.
44 
46 public:
47  /// Index of the volume in the volume set.
48  int m_Index;
49 
50  /// Number of OIDs associated with this volume.
51  int m_Count;
52 
53  /// Less than operator, where elements with larger allows sorting.
54  /// Elements are sorted by number of OIDs in descending order.
55  /// @param rhs
56  /// The right hand side of the less than. [in]
57  bool operator < (const SSeqDB_IndexCountPair & rhs) const
58  {
59  return m_Count > rhs.m_Count;
60  }
61 };
62 
63 
64 /// CSeqDBNodeIdList
65 ///
66 /// This class defines a simple CSeqDBGiList subclass which is read
67 /// from a gi list file using the CSeqDBAtlas. It uses the atlas for
68 /// file access and registers the memory used by the vector with the
69 /// atlas layer.
70 
72 public:
73  /// Build a GI,TI, or SI list from a memory mapped file.
74  ///
75  /// Given an ID list file mapped into a region of memory, this
76  /// class reads the GIs or TIs from the file.
77  ///
78  /// @param atlas
79  /// The memory management layer object. [in]
80  /// @param fname
81  /// The filename of this ID list. [in]
82  /// @param list_type
83  /// The type of ID [in]
84  /// @param locked
85  /// Lock holder object for this thread. [in]
87  const CSeqDB_Path & fname,
89  CSeqDBLockHold & locked)
90  : m_VectorMemory(atlas)
91  {
92  CSeqDBAtlas::TIndx file_size(0);
93 
94  CSeqDBFileMemMap memlease(atlas,fname.GetPathS());
95  atlas.GetFileSizeL(fname.GetPathS(), file_size);
96 
97  const char * fbeginp = memlease.GetFileDataPtr(0);
98 
99  const char * fendp = fbeginp + file_size;
100 
101  try {
102  bool in_order = false;
103 
104  switch(list_type) {
106  SeqDB_ReadMemoryGiList(fbeginp, fendp, m_GisOids, & in_order);
107  break;
109  SeqDB_ReadMemoryTiList(fbeginp, fendp, m_TisOids, & in_order);
110  break;
112  SeqDB_ReadMemorySiList(fbeginp, fendp, m_SisOids, & in_order);
113  break;
115  SeqDB_ReadMemoryPigList(fbeginp, fendp, m_PigsOids, & in_order);
116  break;
117  }
118 
119  if (in_order) {
121  }
122  }
123  catch(...) {
124  memlease.Clear();
125  throw;
126  }
127 
128  //memlease.Clear();
129 
130  int vector_size =
131  (int(m_GisOids.size() * sizeof(m_GisOids[0])) +
132  int(m_TisOids.size() * sizeof(m_TisOids[0])));
133 
134  atlas.RegisterExternal(m_VectorMemory, vector_size, locked);
135  // TODO m_VectorMemory for seqid_list
136  }
137 
138  /// Destructor
140  {
141  }
142 
143 private:
144  /// Memory associated with the m_GisOids vector.
146 };
147 
148 void s_ProcessSeqIDsForV5 (vector<string> & idlist)
149 {
150  vector<string> tmplist;
151  tmplist.reserve(idlist.size());
152  for(unsigned int i=0; i < idlist.size(); i++) {
153  try {
155  if(seqid.IsGi()) {
156  continue;
157  }
158 
159  if(seqid.IsPir() || seqid.IsPrf()) {
160  string id = seqid.AsFastaString();
161  tmplist.push_back(id);
162  continue;
163  }
164  tmplist.push_back(seqid.GetSeqIdString(true));
165  } catch (CException & e) {
166  ERR_POST(e.GetMsg());
167  NCBI_THROW(CSeqDBException, eArgErr, "Invalid seq id: " + idlist[i]);
168 
169  }
170  }
171 
172  if (tmplist.size() == 0) {
173  ERR_POST(Warning << "Empty seqid list");
174  }
175  else {
176  sort(tmplist.begin(), tmplist.end());
177  vector<string>::iterator it = unique (tmplist.begin(), tmplist.end());
178  tmplist.resize(distance(tmplist.begin(),it));
179  }
180  idlist.swap(tmplist);
181 }
182 
184 {
185  SBlastSeqIdListInfo newInfo = user_list->GetListInfo();
186  newInfo.is_v4 = false;
187  user_list->SetListInfo (newInfo);
188 
189  vector<string> idlist;
190  user_list->GetSiList(idlist);
191  s_ProcessSeqIDsForV5(idlist);
192  user_list->SetSiList(idlist);
193 }
194 
196 {
197  SBlastSeqIdListInfo newInfo = user_list->GetListInfo();
198  newInfo.is_v4 = false;
199  user_list->SetListInfo (newInfo);
200 
201  vector<string> idlist = user_list->GetSiList();
202  s_ProcessSeqIDsForV5 (idlist);
203  user_list->SetSiList(idlist);
204 }
205 
206 bool s_VerifySeqidlist(const SBlastSeqIdListInfo & list_info, const CSeqDBVolSet & volset, const CSeqDBLMDBSet & lmdb_set)
207 {
208  if(list_info.is_v4 && lmdb_set.IsBlastDBVersion5()) {
209  ERR_POST(Warning << "To obtain better run time performance, " \
210  "please run blastdb_aliastool -seqid_file_in <INPUT_FILE_NAME> " \
211  "-seqid_file_out <OUT_FILE_NAME> and use <OUT_FILE_NAME> as the argument to -seqidlist");
212  return true;
213  }
214 
215  if((!list_info.is_v4) && (!lmdb_set.IsBlastDBVersion5())) {
216  NCBI_THROW(CSeqDBException, eArgErr,
217  "Seqidlist is not in BLAST db v4 format");
218  }
219 
220  if((list_info.db_vol_length > 0) &&
221  (list_info.db_vol_length != volset.GetVolumeSetLength())) {
222  ERR_POST(Warning << "Seqidlist file db info does not match input db");
223  }
224 
225  return false;
226 }
227 
228 void
230  const CSeqDBVolSet & volset,
231  CRef<CSeqDBGiList> user_list,
232  CSeqDBLockHold & locked,
233  const CSeqDBLMDBSet & lmdb_set)
234 {
235  if (m_UserList.NotEmpty() && m_UserList->NotEmpty()) {
236  if(user_list->GetNumSis() > 0) {
237  if(s_VerifySeqidlist(user_list->GetListInfo(), volset, lmdb_set)) {
238  s_ProcessPositiveSeqIDsForV5(user_list);
239  }
240  }
241 
242  if((user_list->GetNumTaxIds() > 0) && !(lmdb_set.IsBlastDBVersion5())) {
243  NCBI_THROW(CSeqDBException, eArgErr, "Taxonomy filtering is not supported in v4 BLAST dbs");
244  }
245  if (lmdb_set.IsBlastDBVersion5()) {
246  if(user_list->GetNumSis() > 0) {
247  vector<string> accs;
248  vector<blastdb::TOid> oids;
249  user_list->GetSiList(accs);
250  lmdb_set.AccessionsToOids(accs, oids);
251  for(unsigned int i=0; i < accs.size(); i++) {
252  user_list->SetSiTranslation(i, oids[i]);
253  }
254  }
255  if(user_list->GetNumTaxIds() > 0) {
256  vector<blastdb::TOid> & oids = user_list->SetOidsForTaxIdsList();
257  set<TTaxId> & tax_ids = user_list->GetTaxIdsList();
258  lmdb_set.TaxIdsToOids(tax_ids, oids);
259  }
260  if((user_list->GetNumGis() == 0) && (user_list->GetNumTis() == 0) &&
261  (user_list->GetNumPigs() == 0)) {
262  return;
263  }
264  }
265  if((user_list->GetNumSis() > 0) && !(lmdb_set.IsBlastDBVersion5())) {
266 
267  user_list->PreprocessIdsForISAMSiLookup();
268  }
269 
270  typedef SSeqDB_IndexCountPair TIndexCount;
271  vector<TIndexCount> OidsPerVolume;
272 
273  // Build a list of volumes sorted by OID count.
274 
275  for(int i = 0; i < volset.GetNumVols(); i++) {
276  const CSeqDBVolEntry * vol = volset.GetVolEntry(i);
277 
278  TIndexCount vol_oids;
279  vol_oids.m_Index = i;
280  vol_oids.m_Count = vol->OIDEnd() - vol->OIDStart();
281 
282  OidsPerVolume.push_back(vol_oids);
283  }
284 
285  // The largest volumes should be used first, to minimize the
286  // number of failed GI->OID conversion attempts. Searching input
287  // GIs against larger volumes first should eliminate most of the
288  // GIs by the time smaller volumes are searched, thus reducing the
289  // total number of lookups.
290 
291  std::sort(OidsPerVolume.begin(), OidsPerVolume.end());
292 
293  for(int i = 0; i < (int)OidsPerVolume.size(); i++) {
294  int vol_idx = OidsPerVolume[i].m_Index;
295 
296  const CSeqDBVolEntry * vol = volset.GetVolEntry(vol_idx);
297 
298  // Note: The implied ISAM lookups will sort by GI/TI.
299 
300  vol->Vol()->IdsToOids(*m_UserList, locked);
301  }
302  }
303 }
304 
305 
306 void
308  const CSeqDBVolSet & volset,
309  CRef<CSeqDBNegativeList> neg_list,
310  CSeqDBLockHold & locked,
311  const CSeqDBLMDBSet & lmdb_set)
312 {
314  // We don't bother to sort these since every ISAM mapping must
315  // be examined for the negative ID list case.
316  if(m_NegativeList->GetNumSis() > 0) {
317  if (s_VerifySeqidlist(m_NegativeList->GetListInfo(), volset, lmdb_set)) {
319  }
320  }
321 
322  if((m_NegativeList->GetNumTaxIds() > 0) && !(lmdb_set.IsBlastDBVersion5())) {
323  NCBI_THROW(CSeqDBException, eArgErr, "Taxonomy filtering is not supported in v4 BLAST dbs");
324  }
325 
326  if(lmdb_set.IsBlastDBVersion5()) {
327 
328  if(m_NegativeList->GetNumSis() > 0) {
329  vector<blastdb::TOid> & oids = m_NegativeList->SetExcludedOids();
330  lmdb_set.NegativeSeqIdsToOids(m_NegativeList->GetSiList(), oids);
331 
332  }
333  if(m_NegativeList->GetNumTaxIds() > 0) {
334  vector<blastdb::TOid> & oids = m_NegativeList->SetExcludedOids();
336  lmdb_set.NegativeTaxIdsToOids(tax_ids, oids);
337  }
338 
339  if((m_NegativeList->GetNumGis() == 0) && (m_NegativeList->GetNumTis() == 0) &&
340  (m_NegativeList->GetNumPigs() == 0)) {
341  return;
342  }
343  }
344 
345  if((m_NegativeList->GetNumSis() > 0) && !(lmdb_set.IsBlastDBVersion5())) {
347  }
348 
349  if (m_NegativeList->GetNumPigs() > 0) {
350  CSeqDBGiList pigs;
352  ITERATE(vector<TPig>, p, m_NegativeList->GetPigList()){
353  pigs.AddPig(*p);
354  }
355  for(int v = 0; v < volset.GetNumVols(); v++) {
356  const CSeqDBVolEntry * vol = volset.GetVolEntry(v);
357  vol->Vol()->IdsToOids(pigs, locked);
358  }
359 
360  vector<blastdb::TOid> & exclude_oid_list = m_NegativeList->SetExcludedOids();
361  for(int o=0; o < pigs.GetNumPigs(); o++) {
362  const CSeqDBGiList::SPigOid & pig = pigs.GetPigOid(o);
363  if(pig.oid != -1) {
364  exclude_oid_list.push_back(pig.oid);
365  }
366  }
367 
368  }
369  for(int i = 0; i < volset.GetNumVols(); i++) {
370  const CSeqDBVolEntry * vol = volset.GetVolEntry(i);
371 
372  // Note: The implied ISAM lookups will sort by GI/TI.
373 
374  vol->Vol()->IdsToOids(*m_NegativeList, locked);
375  }
376  }
377 }
378 
380  const CSeqDBVolSet & volset,
381  CRef<CSeqDBGiList> user_list,
382  CRef<CSeqDBNegativeList> neg_list,
383  CSeqDBLockHold & locked,
384  const CSeqDBLMDBSet & lmdb_set)
385  : m_Atlas (atlas),
386  m_UserList (user_list),
387  m_NegativeList (neg_list)
388 {
389  x_ResolvePositiveList(atlas, volset, user_list, locked, lmdb_set);
390  x_ResolveNegativeList(atlas, volset, neg_list, locked, lmdb_set);
391 }
392 
395  const CSeqDBVol * volp,
396  EGiListType list_type,
397  CSeqDBLockHold & locked)
398 {
399  // Note: possibly the atlas should have a method to add and
400  // subtract pseudo allocations from the memory bound; this would
401  // allow GI list vectors to share the memory bound with memory
402  // mapped file ranges.
403 
404  //m_Atlas.Lock(locked);
405 
406  // Seperate indices are used for TIs and GIs. (Attempting to use
407  // the same file for both should also produce an error when the
408  // binary file is read, as the magic number is different.)
409 
410  TNodeListMap& map_ref = (list_type == eGiList) ? m_GINodeListMap :
411  ((list_type == eTiList) ? m_TINodeListMap : m_SINodeListMap);
412  CRef<CSeqDBGiList> gilist = map_ref[filename.GetPathS()];
413 
414  if (gilist.Empty()) {
416  filename,
417  list_type,
418  locked));
419 
420  if (m_UserList.NotEmpty()) {
421  // Note: translates the GIs and TIs, but ignores Seq-ids.
422  x_TranslateFromUserList(*gilist);
423  }
424 
425  map_ref[filename.GetPathS()] = gilist;
426  }
427 
428  // Note: in pure-GI mode, it might be more efficient (in some
429  // cases) to translate all sub-lists, then translate the main list
430  // in terms of those, rather than the other way round. It might
431  // be a good idea to investigate this -- it should only be done if
432  // the sub-lists are (collectively) smaller than the user list.
433 
434  // If there are Seq-ids, we need to translate all GI lists from
435  // the volume data, because some of the Seq-ids may refer to the
436  // same sequences as GIs in those lists. More sophisticated
437  // methods are possible; for example, we could try to convert all
438  // Seq-ids to GIs. If this worked (it would for those databases
439  // where GIs are available for all sequences), then we would be
440  // able to continue processing as if the User GI list were
441  // strictly GI based.
442 
443  // The ideal solution might be to build a conceptual map of all
444  // data sources and estimate the time needed for different
445  // techniques. This has not been done.
446 
447  bool mixed_ids = m_UserList.Empty() || (!! m_UserList->GetNumSis())|| (m_UserList->GetNumTaxIds());
448 
449  if (! mixed_ids) {
450  if ((m_UserList->GetNumTis() && gilist->GetNumGis()) ||
451  (m_UserList->GetNumGis() && gilist->GetNumTis())) {
452 
453  mixed_ids = true;
454  }
455  }
456 
457  if (m_UserList.Empty() || mixed_ids) {
458  if(gilist->GetNumSis() > 0 ) {
460  }
461  volp->IdsToOids(*gilist, locked);
462  }
463 
464  // If there is a volume GI list, it will also be attached to the
465  // volume, and replaces the user GI list attachment (if there was
466  // one). There can be one user GI list or multiple volume GI
467  // lists for each volume.
468 
469  volp->AttachVolumeGiList(gilist);
470 
471  return gilist;
472 }
473 
475 {
477  CSeqDBGiList & target = gilist;
478 
479  source.InsureOrder(CSeqDBGiList::eGi);
481 
482  int source_num = source.GetNumGis();
483  int target_num = target.GetNumGis();
484 
485  int source_index = 0;
486  int target_index = 0;
487 
488  while(source_index < source_num && target_index < target_num) {
489  TGi source_gi = source.GetGiOid(source_index).gi;
490  TGi target_gi = target.GetGiOid(target_index).gi;
491 
492  // Match; translate if needed
493 
494  if (source_gi == target_gi) {
495  if (target.GetGiOid(target_index).oid == -1) {
496  target.SetGiTranslation(target_index, source.GetGiOid(source_index).oid);
497  }
498  target_index++;
499  source_index++;
500  } else if (source_gi > target_gi) {
501  target_index ++;
502 
503  // Search target using expanding jumps
504  int jump = 2;
505  int test = target_index + jump;
506 
507  while(test < target_num && target.GetGiOid(test).gi < source_gi) {
508  target_index = test;
509  jump *= 2;
510  test = target_index + jump;
511  }
512  } else /* source_gi < target_gi */ {
513  source_index ++;
514 
515  // Search source using expanding jumps
516  int jump = 2;
517  int test = source_index + jump;
518 
519  while(test < source_num && source.GetGiOid(test).gi < target_gi) {
520  source_index = test;
521  jump *= 2;
522  test = source_index + jump;
523  }
524  }
525  }
526 }
527 
529 {
531  CSeqDBGiList & target = gilist;
532 
533  source.InsureOrder(CSeqDBGiList::eGi);
535 
536  int source_num = source.GetNumTis();
537  int target_num = target.GetNumTis();
538 
539  int source_index = 0;
540  int target_index = 0;
541 
542  while(source_index < source_num && target_index < target_num) {
543  TTi source_ti = source.GetTiOid(source_index).ti;
544  TTi target_ti = target.GetTiOid(target_index).ti;
545 
546  // Match; translate if needed
547 
548  if (source_ti == target_ti) {
549  if (target.GetTiOid(target_index).oid == -1) {
550  target.SetTiTranslation(target_index,
551  source.GetTiOid(source_index).oid);
552  }
553 
554  target_index++;
555  source_index++;
556  } else if (source_ti > target_ti) {
557  target_index ++;
558 
559  // Search target using expanding jumps
560  int jump = 2;
561  int test = target_index + jump;
562 
563  while(test < target_num &&
564  target.GetTiOid(test).ti < source_ti) {
565 
566  target_index = test;
567  jump *= 2;
568  test = target_index + jump;
569  }
570  } else /* source_ti < target_ti */ {
571  source_index ++;
572 
573  // Search source using expanding jumps
574  int jump = 2;
575  int test = source_index + jump;
576 
577  while(test < source_num &&
578  source.GetTiOid(test).ti < target_ti) {
579 
580  source_index = test;
581  jump *= 2;
582  test = source_index + jump;
583  }
584  }
585  }
586 }
587 
589 {
592 }
593 
595 
CSeqDBAtlas class.
Definition: seqdbatlas.hpp:297
bool GetFileSizeL(const string &fname, TIndx &length)
Get size of a file.
Definition: seqdbatlas.cpp:160
void RegisterExternal(CSeqDBMemReg &memreg, size_t bytes, CSeqDBLockHold &locked)
Register externally allocated memory.
Definition: seqdbatlas.cpp:264
CNcbiStreamoff TIndx
The type used for file offsets.
Definition: seqdbatlas.hpp:301
CSeqDBException.
Definition: seqdbcommon.hpp:73
const char * GetFileDataPtr(const string &fname, TIndx offset)
Get a pointer to the specified offset.
Definition: seqdbatlas.hpp:755
void Clear()
Clears the memory mapobject.
Definition: seqdbatlas.hpp:735
EGiListType
Gilist types.
TNodeListMap m_GINodeListMap
Map of filenames to alias node specified GI lists.
void x_ResolveNegativeList(CSeqDBAtlas &atlas, const CSeqDBVolSet &volset, CRef< CSeqDBNegativeList > neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
void x_ResolvePositiveList(CSeqDBAtlas &atlas, const CSeqDBVolSet &volset, CRef< CSeqDBGiList > user_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
CSeqDBGiListSet(CSeqDBAtlas &atlas, const CSeqDBVolSet &vol_set, TListRef user_list, TNegativeRef neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
Constructor.
TListRef m_UserList
User-specified GI list.
TNodeListMap m_SINodeListMap
Map of filenames to alias node specified SI lists.
TNegativeRef m_NegativeList
User-specified Negative GI list.
void x_TranslateFromUserList(CSeqDBGiList &gilist)
Translate a volume gilist from the user gilist.
CSeqDBAtlas & m_Atlas
Memory management layer object.
TNodeListMap m_TINodeListMap
Map of filenames to alias node specified TI lists.
void x_TranslateTisFromUserList(CSeqDBGiList &gilist)
Translate a volume gilists's TIs from the user gilist's TIs.
void x_TranslateGisFromUserList(CSeqDBGiList &gilist)
Translate a volume gilists's GIs from the user gilist's GIs.
TListRef GetNodeIdList(const CSeqDB_Path &filename, const CSeqDBVol *volp, EGiListType list_type, CSeqDBLockHold &locked)
Get a reference to a named GI list.
CSeqDBGiList.
vector< SGiOid > m_GisOids
Pairs of GIs and OIDs.
int GetNumGis() const
Get the number of GIs in the array.
const SGiOid & GetGiOid(int index) const
Access an element of the array.
int GetNumTaxIds() const
vector< SPigOid > m_PigsOids
int GetNumSis() const
Get the number of Seq-ids in the array.
const SPigOid & GetPigOid(int index) const
int GetNumTis() const
Get the number of TIs in the array.
vector< STiOid > m_TisOids
Pairs of GIs and OIDs.
int GetNumPigs() const
void SetSiTranslation(int index, int oid)
Specify the correct OID for a Seq-id.
void SetSiList(const vector< string > &new_list)
void AddPig(TPig pig)
void SetTiTranslation(int index, int oid)
Specify the correct OID for a TI.
void GetSiList(vector< string > &sis) const
TODO Get the seqid list?
void SetListInfo(const SBlastSeqIdListInfo &list_info)
TODO Reserve space for seqids?
@ eGi
The array is sorted by GI.
vector< blastdb::TOid > & SetOidsForTaxIdsList()
bool NotEmpty() const
Return true if there are elements present.
const SBlastSeqIdListInfo & GetListInfo()
set< TTaxId > & GetTaxIdsList()
void ReservePigs(size_t n)
void PreprocessIdsForISAMSiLookup()
Preprocess ids for ISAM string id lookup.
const STiOid & GetTiOid(int index) const
Access an element of the array.
void SetGiTranslation(int index, int oid)
Specify the correct OID for a GI.
void InsureOrder(ESortOrder order)
Sort if necessary to insure order of elements.
vector< SSiOid > m_SisOids
Pairs of Seq-ids and OIDs.
ESortOrder m_CurrentOrder
Indicates the current sort order, if any, of this container.
CSeqDBLMDBSet.
void NegativeTaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
void NegativeSeqIdsToOids(const vector< string > &ids, vector< blastdb::TOid > &rv) const
void AccessionsToOids(const vector< string > &accs, vector< TOid > &oids) const
bool IsBlastDBVersion5() const
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
CSeqDBLockHold.
Definition: seqdbatlas.hpp:166
CSeqDBMemReg.
Definition: seqdbatlas.hpp:215
int GetNumTaxIds() const
const vector< string > & GetSiList()
const SBlastSeqIdListInfo & GetListInfo()
int GetNumTis() const
Get the number of TIs in the array.
bool NotEmpty() const
Return true if there are elements present.
void SetSiList(const vector< string > &new_list)
void PreprocessIdsForISAMSiLookup()
int GetNumGis() const
Get the number of GIs in the array.
int GetNumPigs() const
const vector< TPig > & GetPigList()
void SetListInfo(const SBlastSeqIdListInfo &list_info)
vector< blastdb::TOid > & SetExcludedOids()
int GetNumSis() const
Get the number of SeqIds in the array.
set< TTaxId > & GetTaxIdsList()
CSeqDBNodeIdList.
CSeqDBMemReg m_VectorMemory
Memory associated with the m_GisOids vector.
virtual ~CSeqDBNodeFileIdList()
Destructor.
CSeqDBNodeFileIdList(CSeqDBAtlas &atlas, const CSeqDB_Path &fname, CSeqDBGiListSet::EGiListType list_type, CSeqDBLockHold &locked)
Build a GI,TI, or SI list from a memory mapped file.
CSeqDBVolEntry.
Definition: seqdbvolset.hpp:59
int OIDStart() const
Get the starting OID in this volume's range.
int OIDEnd() const
Get the ending OID in this volume's range.
CSeqDBVol * Vol()
Get a pointer to the underlying volume object.
CSeqDBVolSet.
const CSeqDBVolEntry * GetVolEntry(int i) const
Find a volume entry by index.
Uint8 GetVolumeSetLength() const
Find total volume length for all volumes.
int GetNumVols() const
Get the number of volumes.
CSeqDBVol class.
Definition: seqdbvol.hpp:169
void IdsToOids(CSeqDBGiList &gis, CSeqDBLockHold &locked) const
Translate Gis to Oids for the given vector of Gi/Oid pairs.
Definition: seqdbvol.cpp:2483
void AttachVolumeGiList(CRef< CSeqDBGiList > gilist) const
Filter this volume using the specified GI list.
Definition: seqdbvol.hpp:639
CSeqDB_Path.
const string & GetPathS() const
Get the path as a string.
#define test(a, b, c, d, e)
Definition: numeric.c:170
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
@ fParse_RawText
Try to ID raw non-numeric accessions.
Definition: Seq_id.hpp:81
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
Definition: Seq_id.hpp:80
@ fParse_AnyLocal
Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...
Definition: Seq_id.hpp:90
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
bool IsPrf(void) const
Check if variant Prf is selected.
Definition: Seq_id_.hpp:916
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsPir(void) const
Check if variant Pir is selected.
Definition: Seq_id_.hpp:853
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
constexpr auto sort(_Init &&init)
const CharType(& source)[N]
Definition: pointer.h:1149
void SeqDB_ReadMemorySiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order=0)
Read a text SeqID list from an area of memory.
void SeqDB_ReadMemoryGiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order=0)
Read a text or binary GI list from an area of memory.
Uint8 TTi
void SeqDB_ReadMemoryTiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::STiOid > &tis, bool *in_order=0)
Read a text or binary TI list from an area of memory.
void SeqDB_ReadMemoryPigList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SPigOid > &pigs, bool *in_order=0)
void s_ProcessSeqIDsForV5(vector< string > &idlist)
bool s_VerifySeqidlist(const SBlastSeqIdListInfo &list_info, const CSeqDBVolSet &volset, const CSeqDBLMDBSet &lmdb_set)
void s_ProcessNegativeSeqIDsForV5(CRef< CSeqDBNegativeList > &user_list)
void s_ProcessPositiveSeqIDsForV5(CRef< CSeqDBGiList > &user_list)
Defines set of GI lists.
TGi gi
The GI or 0 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
TTi ti
The TI or 0 if unknown.
Blast DB v5 seqid list info.
Defines a pair of integers and a sort order.
bool operator<(const SSeqDB_IndexCountPair &rhs) const
Less than operator, where elements with larger allows sorting.
int m_Index
Index of the volume in the volume set.
int m_Count
Number of OIDs associated with this volume.
Modified on Fri Sep 20 14:57:53 2024 by modify_doxy.py rev. 669887