NCBI C++ ToolKit
data_loader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: data_loader.cpp 98529 2022-11-30 18:45:10Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Data loader base class for object manager
30 *
31 */
32 
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/data_loader.hpp>
38 #include <objmgr/annot_name.hpp>
40 #include <objmgr/impl/tse_info.hpp>
44 #include <objmgr/scope.hpp>
45 #include <objmgr/bioseq_handle.hpp>
47 
48 
51 
52 
53 void CDataLoader::RegisterInObjectManager(
55  CLoaderMaker_Base& loader_maker,
56  CObjectManager::EIsDefault is_default,
57  CObjectManager::TPriority priority)
58 {
59  om.RegisterDataLoader(loader_maker, is_default, priority);
60 }
61 
62 
64 {
65  m_Name = NStr::PtrToString(this);
66  return;
67 }
68 
69 
70 CDataLoader::CDataLoader(const string& loader_name)
71  : m_Name(loader_name)
72 {
73  if (loader_name.empty())
74  {
75  m_Name = NStr::PtrToString(this);
76  }
77 }
78 
79 
81 {
82  return;
83 }
84 
85 
87 {
88  m_DataSource = &data_source;
89 }
90 
91 
93 {
94  return m_DataSource;
95 }
96 
97 
98 void CDataLoader::SetName(const string& loader_name)
99 {
100  m_Name = loader_name;
101 }
102 
103 
104 string CDataLoader::GetName(void) const
105 {
106  return m_Name;
107 }
108 
109 
111 {
112 }
113 
114 
115 void CDataLoader::GC(void)
116 {
117 }
118 
119 
122  EChoice /*choice*/)
123 {
124  NCBI_THROW(CLoaderException, eNotImplemented,
125  "CDataLoader::GetRecords() is not implemented in subclass");
126 }
127 
128 
131  EChoice choice)
132 {
133  try {
134  return GetRecords(idh, choice);
135  }
136  catch ( CBlobStateException& /* ignored */ ) {
137  return TTSE_LockSet();
138  }
139 }
140 
141 
144  const SRequestDetails& details)
145 {
146  return GetRecords(idh, DetailsToChoice(details));
147 }
148 
149 
152 {
153  TTSE_LockSet ret;
154  ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
155  if ( GetBlobId(*it) ) {
156  // correct id is found
157  TTSE_LockSet ret2 = GetRecords(*it, eExtAnnot);
158  ret.swap(ret2);
159  break;
160  }
161  }
162  return ret;
163 }
164 
165 
168  const SAnnotSelector* /*sel*/)
169 {
170  return GetRecords(idh, eOrphanAnnot);
171 }
172 
173 
176  const SAnnotSelector* /*sel*/)
177 {
178  return GetRecords(idh, eExtAnnot);
179 }
180 
181 
184  const SAnnotSelector* sel)
185 {
186  TTSE_LockSet ret;
187  ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
188  if ( !CanGetBlobById() || GetBlobId(*it) ) {
189  // correct id is found
190  TTSE_LockSet ret2 = GetExternalAnnotRecords(*it, sel);
191  if ( !ret2.empty() ) {
192  ret.swap(ret2);
193  break;
194  }
195  }
196  }
197  return ret;
198 }
199 
200 
202 {
203  return sel && sel->IsIncludedAnyNamedAnnotAccession();
204 }
205 
206 
207 bool CDataLoader::IsRequestedNA(const string& na,
208  const SAnnotSelector* sel)
209 {
210  return sel && sel->IsIncludedNamedAnnotAccession(na);
211 }
212 
213 
214 bool CDataLoader::IsProcessedNA(const string& na,
215  const TProcessedNAs* processed_nas)
216 {
217  return processed_nas && processed_nas->find(na) == processed_nas->end();
218 }
219 
220 
221 void CDataLoader::SetProcessedNA(const string& na,
222  TProcessedNAs* processed_nas)
223 {
224  if ( processed_nas ) {
225  processed_nas->insert(na);
226  }
227 }
228 
229 
232  const SAnnotSelector* sel,
233  TProcessedNAs* /*processed_nas*/)
234 {
235  // as a backup call old method that cannot report processed NAs
236  return GetOrphanAnnotRecords(idh, sel);
237 }
238 
239 
242  const SAnnotSelector* sel,
243  TProcessedNAs* processed_nas)
244 {
246  ITERATE(TSeq_idSet, id_it, ids) {
247  CDataLoader::TTSE_LockSet tse_set2 =
248  GetOrphanAnnotRecordsNA(*id_it, sel, processed_nas);
249  if (!tse_set2.empty()) {
250  if (tse_set.empty()) {
251  tse_set.swap(tse_set2);
252  }
253  else {
254  tse_set.insert(tse_set2.begin(), tse_set2.end());
255  }
256  }
257  }
258  return tse_set;
259 }
260 
261 
264  const SAnnotSelector* sel,
265  TProcessedNAs* /*processed_nas*/)
266 {
267  // as a backup call old method that cannot report processed NAs
268  return GetExternalAnnotRecords(idh, sel);
269 }
270 
271 
274  const SAnnotSelector* sel,
275  TProcessedNAs* /*processed_nas*/)
276 {
277  // as a backup call old method that cannot report processed NAs
278  return GetExternalAnnotRecords(bioseq, sel);
279 }
280 
281 
283 {
284  return false;
285 }
286 
287 
289 {
290  NCBI_THROW(CLoaderException, eNotImplemented,
291  "CDataLoader::GetBlobById() is not implemented in subclass");
292 }
293 
295 {
296  NCBI_THROW(CLoaderException, eNotImplemented,
297  "CDataLoader::GetBlobIdFromString(str) is not implemented in subclass");
298 }
299 
300 
302 {
304  ITERATE(TTSE_LockSet, it, locks) {
305  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
306  if ( bs_info ) {
307  ids = bs_info->GetId();
308  break;
309  }
310  }
311 }
312 
313 
315 {
316  // check if sequence exists
317  TIds ids;
318  GetIds(idh, ids);
319  return !ids.empty();
320 }
321 
322 
324 {
325  // default implementation based on GetIds();
326  TIds ids;
327  GetIds(idh, ids);
328  if ( ids.empty() ) {
330  "CDataLoader::GetAccVer() sequence not found");
331  }
333  if ( !acc ) {
334  NCBI_THROW(CLoaderException, eNoData,
335  "CDataLoader::GetAccVer() sequence doesn't have accession");
336  }
337  return acc;
338 }
339 
340 
343 {
344  // default implementation based on GetAccVer() and GetIds()
345  SAccVerFound ret;
346  try {
347  ret.acc_ver = GetAccVer(idh);
348  ret.sequence_found = ret.acc_ver || SequenceExists(idh);
349  }
350  catch ( CLoaderException& exc ) {
351  if ( exc.GetErrCode() == exc.eNotFound ) {
352  // no sequence
353  }
354  else if ( exc.GetErrCode() == exc.eNoData ) {
355  // sequence is known, but there is no accession
356  ret.sequence_found = true;
357  }
358  else {
359  // problem
360  throw;
361  }
362  }
363  return ret;
364 }
365 
366 
368 {
369  // default implementation based on GetIds();
370  TIds ids;
371  GetIds(idh, ids);
372  if ( ids.empty() ) {
374  "CDataLoader::GetGi() sequence not found");
375  }
376  TGi gi = CScope::x_GetGi(ids);
377  if ( gi == ZERO_GI ) {
378  NCBI_THROW(CLoaderException, eNoData,
379  "CDataLoader::GetGi() sequence doesn't have GI");
380  }
381  return gi;
382 }
383 
384 
386 {
387  // default implementation based on GetGi() and GetIds()
388  SGiFound ret;
389  try {
390  ret.gi = GetGi(idh);
391  ret.sequence_found = ret.gi != ZERO_GI || SequenceExists(idh);
392  }
393  catch ( CLoaderException& exc ) {
394  if ( exc.GetErrCode() == exc.eNotFound ) {
395  // no sequence
396  }
397  else if ( exc.GetErrCode() == exc.eNoData ) {
398  // sequence is known, but there is no GI
399  ret.sequence_found = true;
400  }
401  else {
402  // problem
403  throw;
404  }
405  }
406  return ret;
407 }
408 
409 
411 {
412  // default implementation based on GetIds();
413  TIds ids;
414  GetIds(idh, ids);
415  if ( ids.empty() ) {
416  return string();
417  }
418  return objects::GetLabel(ids);
419 }
420 
421 
423 {
424  // default implementation based on GetRecordsNoBlobState();
426  ITERATE(TTSE_LockSet, it, locks) {
427  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
428  if ( bs_info ) {
429  return bs_info->GetTaxId();
430  }
431  }
432  return INVALID_TAX_ID;
433 }
434 
435 
437 {
438  // default implementation based on GetRecordsNoBlobState()
440  ITERATE(TTSE_LockSet, it, locks) {
441  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
442  if ( bs_info ) {
443  return bs_info->GetBioseqLength();
444  }
445  }
446  return kInvalidSeqPos;
447 }
448 
449 
451 {
452  // default implementation based on GetRecordsNoBlobState()
454  ITERATE(TTSE_LockSet, it, locks) {
455  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
456  if ( bs_info ) {
457  CSeq_inst::TMol type = bs_info->GetInst_Mol();
458  if ( type == CSeq_inst::eMol_not_set ) {
459  NCBI_THROW(CLoaderException, eNoData,
460  "CDataLoader::GetSequenceType() type not set");
461  }
462  return type;
463  }
464  }
466  "CDataLoader::GetSequenceType() sequence not found");
467 }
468 
469 
472 {
473  // default implementation based on GetSequenceType() and GetIds()
474  STypeFound ret;
475  try {
476  ret.type = GetSequenceType(idh);
477  ret.sequence_found =
479  }
480  catch ( CLoaderException& exc ) {
481  if ( exc.GetErrCode() == exc.eNotFound ) {
482  // no sequence
483  }
484  else if ( exc.GetErrCode() == exc.eNoData ) {
485  // sequence is known, but there is no type
486  ret.sequence_found = true;
487  }
488  else {
489  // problem
490  throw;
491  }
492  }
493  return ret;
494 }
495 
496 
498 {
499  try {
500  TTSE_LockSet locks = GetRecords(idh, eBioseqCore);
501  ITERATE(TTSE_LockSet, it, locks) {
502  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
503  if ( bs_info ) {
504  return (*it)->GetBlobState();
505  }
506  }
508  }
509  catch ( CBlobStateException& exc ) {
510  return exc.GetBlobState();
511  }
512 }
513 
514 
516 {
517  if ( SequenceExists(idh) ) {
518  NCBI_THROW(CLoaderException, eNoData,
519  "CDataLoader::GetSequenceHash() sequence hash not set");
520  }
522  "CDataLoader::GetSequenceHash() sequence not found");
523 }
524 
525 
528 {
529  // default implementation based on GetSequenceHash() and GetIds()
530  SHashFound ret;
531  try {
532  ret.hash = GetSequenceHash(idh);
533  if ( !ret.hash ) {
534  // hash = 0, we don't know what causes it:
535  // absence of sequence, unknown hash, or the hash happens to be 0.
536  ret.sequence_found = SequenceExists(idh);
537  }
538  else {
539  ret.sequence_found = true;
540  ret.hash_known = true;
541  }
542  }
543  catch ( CLoaderException& exc ) {
544  if ( exc.GetErrCode() == exc.eNotFound ) {
545  // no sequence found
546  }
547  else if ( exc.GetErrCode() == exc.eNoData ) {
548  // sequence exists
549  ret.sequence_found = true;
550  }
551  else {
552  // problem
553  throw;
554  }
555  }
556  return ret;
557 }
558 
559 
560 void CDataLoader::GetAccVers(const TIds& ids, TLoaded& loaded, TIds& ret)
561 {
562  size_t count = ids.size();
563  _ASSERT(ids.size() == loaded.size());
564  _ASSERT(ids.size() == ret.size());
565  TIds seq_ids;
566  for ( size_t i = 0; i < count; ++i ) {
567  if ( loaded[i] ) {
568  continue;
569  }
570  SAccVerFound data = GetAccVerFound(ids[i]);
571  if ( data.sequence_found ) {
572  ret[i] = data.acc_ver;
573  loaded[i] = true;
574  }
575  }
576 }
577 
578 
579 void CDataLoader::GetGis(const TIds& ids, TLoaded& loaded, TGis& ret)
580 {
581  size_t count = ids.size();
582  _ASSERT(ids.size() == loaded.size());
583  _ASSERT(ids.size() == ret.size());
584  TIds seq_ids;
585  for ( size_t i = 0; i < count; ++i ) {
586  if ( loaded[i] ) {
587  continue;
588  }
589  SGiFound data = GetGiFound(ids[i]);
590  if ( data.sequence_found ) {
591  ret[i] = data.gi;
592  loaded[i] = true;
593  }
594  }
595 }
596 
597 
598 void CDataLoader::GetLabels(const TIds& ids, TLoaded& loaded, TLabels& ret)
599 {
600  size_t count = ids.size();
601  _ASSERT(ids.size() == loaded.size());
602  _ASSERT(ids.size() == ret.size());
603  for ( size_t i = 0; i < count; ++i ) {
604  if ( loaded[i] ) {
605  continue;
606  }
607  string label = GetLabel(ids[i]);
608  if ( !label.empty() ) {
609  ret[i] = label;
610  loaded[i] = true;
611  }
612  }
613 }
614 
615 
616 void CDataLoader::GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret)
617 {
618  size_t count = ids.size();
619  _ASSERT(ids.size() == loaded.size());
620  _ASSERT(ids.size() == ret.size());
621  for ( size_t i = 0; i < count; ++i ) {
622  if ( loaded[i] ) {
623  continue;
624  }
625 
626  TTaxId taxid = GetTaxId(ids[i]);
627  if ( taxid != INVALID_TAX_ID ) {
628  ret[i] = taxid;
629  loaded[i] = true;
630  }
631  }
632 }
633 
634 
636  TSequenceLengths& ret)
637 {
638  size_t count = ids.size();
639  _ASSERT(ids.size() == loaded.size());
640  _ASSERT(ids.size() == ret.size());
641  for ( size_t i = 0; i < count; ++i ) {
642  if ( loaded[i] ) {
643  continue;
644  }
645 
646  TSeqPos len = GetSequenceLength(ids[i]);
647  if ( len != kInvalidSeqPos ) {
648  ret[i] = len;
649  loaded[i] = true;
650  }
651  }
652 }
653 
654 
655 void CDataLoader::GetSequenceTypes(const TIds& ids, TLoaded& loaded,
656  TSequenceTypes& ret)
657 {
658  size_t count = ids.size();
659  _ASSERT(ids.size() == loaded.size());
660  _ASSERT(ids.size() == ret.size());
661  for ( size_t i = 0; i < count; ++i ) {
662  if ( loaded[i] ) {
663  continue;
664  }
665 
666  STypeFound data = GetSequenceTypeFound(ids[i]);
667  if ( data.sequence_found ) {
668  ret[i] = data.type;
669  loaded[i] = true;
670  }
671  }
672 }
673 
674 
675 void CDataLoader::GetSequenceStates(const TIds& ids, TLoaded& loaded,
676  TSequenceStates& ret)
677 {
678  const int kNotFound = (CBioseq_Handle::fState_not_found |
680 
681  size_t count = ids.size();
682  _ASSERT(ids.size() == loaded.size());
683  _ASSERT(ids.size() == ret.size());
684  for ( size_t i = 0; i < count; ++i ) {
685  if ( loaded[i] ) {
686  continue;
687  }
688 
689  int state = GetSequenceState(ids[i]);
690  if ( state != kNotFound ) {
691  ret[i] = state;
692  loaded[i] = true;
693  }
694  }
695 }
696 
697 
698 void CDataLoader::GetSequenceHashes(const TIds& ids, TLoaded& loaded,
699  TSequenceHashes& ret, THashKnown& known)
700 {
701  size_t count = ids.size();
702  _ASSERT(ids.size() == loaded.size());
703  _ASSERT(ids.size() == ret.size());
704  for ( size_t i = 0; i < count; ++i ) {
705  if ( loaded[i] ) {
706  continue;
707  }
708 
709  SHashFound data = GetSequenceHashFound(ids[i]);
710  if ( data.sequence_found ) {
711  ret[i] = data.hash;
712  loaded[i] = true;
713  known[i] = data.hash_known;
714  }
715  }
716 }
717 
718 
719 void CDataLoader::GetCDDAnnots(const TSeqIdSets& id_sets, TLoaded& loaded, TCDD_Locks& ret)
720 {
721  SAnnotSelector sel;
722  sel.AddNamedAnnots("CDD")
725  for (size_t i = 0; i < id_sets.size(); ++i) {
726  for (auto id : id_sets[i]) {
728  CConstRef<CBioseq_Info> bs_info;
729  ITERATE(TTSE_LockSet, it, locks) {
730  bs_info = (*it)->FindMatchingBioseq(id);
731  if (!bs_info) continue;
732  TProcessedNAs nas;
733  const auto& id_locks = GetExternalAnnotRecordsNA(*bs_info, &sel, &nas);
734  if (id_locks.empty()) continue;
735  ret[i] = *id_locks.begin();
736  loaded[i] = true;
737  break;
738  }
739  if (loaded[i]) break;
740  }
741  }
742 }
743 
744 
746 {
747  NON_CONST_ITERATE(TTSE_LockSets, tse_set, tse_sets) {
748  tse_set->second = GetRecords(tse_set->first, eBlob);
749  }
750 }
751 
752 
755 {
756  EChoice ret = eCore;
757  ITERATE ( SRequestDetails::TAnnotSet, i, annots ) {
758  ITERATE ( SRequestDetails::TAnnotTypesSet, j, i->second ) {
759  EChoice cur = eCore;
760  switch ( j->GetAnnotType() ) {
762  cur = eFeatures;
763  break;
765  cur = eGraph;
766  break;
768  cur = eAlign;
769  break;
771  return eAnnot;
772  default:
773  break;
774  }
775  if ( cur != eCore && cur != ret ) {
776  if ( ret != eCore ) return eAnnot;
777  ret = cur;
778  }
779  }
780  }
781  return ret;
782 }
783 
784 
787 {
788  EChoice ret = DetailsToChoice(details.m_NeedAnnots);
789  switch ( details.m_AnnotBlobType ) {
791  // no annotations
792  ret = eCore;
793  break;
795  // no change
796  break;
798  // shift from internal to external annotations
799  _ASSERT(ret >= eFeatures && ret <= eAnnot);
800  ret = EChoice(ret + eExtFeatures - eFeatures);
801  _ASSERT(ret >= eExtFeatures && ret <= eExtAnnot);
802  break;
804  // all orphan annots
805  ret = eOrphanAnnot;
806  break;
807  default:
808  // all other cases -> eAll
809  ret = eAll;
810  break;
811  }
812  if ( !details.m_NeedSeqMap.Empty() || !details.m_NeedSeqData.Empty() ) {
813  // include sequence
814  if ( ret == eCore ) {
815  ret = eSequence;
816  }
817  else if ( ret >= eFeatures && ret <= eAnnot ) {
818  // only internal annot + sequence -> whole blob
819  ret = eBlob;
820  }
821  else {
822  // all blobs
823  ret = eAll;
824  }
825  }
826  return ret;
827 }
828 
829 
831 {
832  SRequestDetails details;
834  bool sequence = false;
835  switch ( choice ) {
836  case eAll:
837  sequence = true;
838  // from all blobs
840  break;
841  case eBlob:
842  case eBioseq:
843  case eBioseqCore:
844  sequence = true;
845  // internal only
847  break;
848  case eSequence:
849  sequence = true;
850  break;
851  case eAnnot:
852  // internal only
854  break;
855  case eGraph:
857  // internal only
859  break;
860  case eFeatures:
862  // internal only
864  break;
865  case eAlign:
867  // internal only
869  break;
870  case eExtAnnot:
871  // external only
873  break;
874  case eExtGraph:
876  // external only
878  break;
879  case eExtFeatures:
881  // external only
883  break;
884  case eExtAlign:
886  // external only
888  break;
889  case eOrphanAnnot:
890  // orphan annotations only
892  break;
893  default:
894  break;
895  }
896  if ( sequence ) {
899  }
902  }
903  return details;
904 }
905 
906 
907 void CDataLoader::GetChunk(TChunk /*chunk_info*/)
908 {
909  NCBI_THROW(CLoaderException, eNotImplemented,
910  "CDataLoader::GetChunk() is not implemented in subclass");
911 }
912 
913 
915 {
916  ITERATE ( TChunkSet, it, chunks ) {
917  GetChunk(*it);
918  }
919 }
920 
921 
924  const TTSE_LockSet& /*tse_set*/)
925 {
926  return TTSE_Lock();
927 }
928 
929 
931 {
932  return TBlobId();
933 }
934 
935 
937 {
938  return 0;
939 }
940 
942 {
943  return TEditSaver();
944 }
945 
946 
948 {
950 }
951 
952 
954 {
955  return 32000; // assume 32KB chunk size
956 }
957 
958 
959 double CDataLoader::EstimateLoadSeconds(const CTSE_Chunk_Info& /*chunk*/, Uint4 bytes) const
960 {
961  return bytes*1e-7+0.001; // assume 10MB/s transfer speed and 1ms overhead
962 }
963 
964 
966 {
967  return kMax_UInt;
968 }
969 
971 {
972  return false;
973 }
974 
975 
976 /////////////////////////////////////////////////////////////////////////////
977 // CBlobId
978 
980 {
981 }
982 
983 bool CBlobId::LessByTypeId(const CBlobId& id2) const
984 {
985  return typeid(*this).before(typeid(id2));
986 }
987 
988 bool CBlobId::operator==(const CBlobId& id) const
989 {
990  return !(*this < id || id < *this);
991 }
992 
993 
TInst_Mol GetInst_Mol(void) const
vector< CSeq_id_Handle > TId
Definition: bioseq_info.hpp:91
TTaxId GetTaxId(void) const
const TId & GetId(void) const
TSeqPos GetBioseqLength(void) const
Blob state exceptions, used by GenBank loader.
Data loader exceptions, used by GenBank loader.
CObjectManager –.
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
bool empty() const
Definition: set.hpp:133
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
void swap(this_type &m)
Definition: set.hpp:102
static int type
Definition: getdata.c:31
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define ZERO_GI
Definition: ncbimisc.hpp:1088
string
Definition: cgiapp.hpp:687
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
string GetLabel(const CSeq_id &id)
virtual int GetSequenceState(const CSeq_id_Handle &idh)
Request for a state of a sequence.
virtual CObjectManager::TPriority GetDefaultPriority(void) const
virtual void GetSequenceHashes(const TIds &ids, TLoaded &loaded, TSequenceHashes &ret, THashKnown &known)
Bulk request for hashes of a set of sequences.
void SetName(const string &loader_name)
Definition: data_loader.cpp:98
virtual TTSE_LockSet GetOrphanAnnotRecords(const CSeq_id_Handle &idh, const SAnnotSelector *sel)
old Get*AnnotRecords() methods
CRef< IEditSaver > TEditSaver
virtual SRequestDetails ChoiceToDetails(EChoice choice) const
void SetTargetDataSource(CDataSource &data_source)
Specify datasource to send loaded data to.
Definition: data_loader.cpp:86
virtual ~CDataLoader(void)
Definition: data_loader.cpp:80
vector< TSeqPos > TSequenceLengths
CDataLoader(void)
Definition: data_loader.cpp:63
virtual int GetSequenceHash(const CSeq_id_Handle &idh)
Request for a sequence hash.
virtual EChoice DetailsToChoice(const SRequestDetails::TAnnotSet &annots) const
vector< CTSE_Lock > TCDD_Locks
vector< CSeq_id_Handle > TIds
virtual TTSE_LockSet GetOrphanAnnotRecordsNA(const CSeq_id_Handle &idh, const SAnnotSelector *sel, TProcessedNAs *processed_nas)
new Get*AnnotRecords() methods
CSeq_inst::TMol type
TBlobState GetBlobState(void)
CBlobIdKey TBlobId
virtual TBlobId GetBlobIdFromString(const string &str) const
static void SetProcessedNA(const string &na, TProcessedNAs *processed_nas)
vector< bool > THashKnown
string GetName(void) const
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
virtual TTSE_Lock ResolveConflict(const CSeq_id_Handle &id, const TTSE_LockSet &tse_set)
Resolve TSE conflict *select the best TSE from the set of dead TSEs.
vector< TGi > TGis
virtual void GetTaxIds(const TIds &ids, TLoaded &loaded, TTaxIds &ret)
Bulk request for taxonomy ids of a set of sequences.
virtual bool CanGetBlobById(void) const
virtual TTSE_Lock GetBlobById(const TBlobId &blob_id)
virtual void GetIds(const CSeq_id_Handle &idh, TIds &ids)
Request for a list of all Seq-ids of a sequence.
TRange m_NeedSeqData
Definition: data_loader.hpp:96
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
TTSE_LockSet GetRecordsNoBlobState(const CSeq_id_Handle &idh, EChoice choice)
The same as GetRecords() but always returns empty TSE lock set instead of throwing CBlobStateExceptio...
virtual TGi GetGi(const CSeq_id_Handle &idh)
Request for a gi of a sequence.
static bool IsRequestedAnyNA(const SAnnotSelector *sel)
vector< CSeq_inst::TMol > TSequenceTypes
virtual TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
virtual TTSE_LockSet GetDetailedRecords(const CSeq_id_Handle &idh, const SRequestDetails &details)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
vector< vector< CSeq_id_Handle > > TSeqIdSets
static bool IsRequestedNA(const string &na, const SAnnotSelector *sel)
virtual bool operator==(const CBlobId &id) const
virtual TTaxId GetTaxId(const CSeq_id_Handle &idh)
Request for a taxonomy id of a sequence.
virtual string GetLabel(const CSeq_id_Handle &idh)
Request for a label string of a sequence.
bool SequenceExists(const CSeq_id_Handle &idh)
helper function to check if sequence exists, uses GetIds()
virtual double EstimateLoadSeconds(const CTSE_Chunk_Info &chunk, Uint4 bytes) const
virtual CSeq_id_Handle GetAccVer(const CSeq_id_Handle &idh)
Request for a accession.version Seq-id of a sequence.
static bool IsProcessedNA(const string &na, const TProcessedNAs *processed_nas)
virtual STypeFound GetSequenceTypeFound(const CSeq_id_Handle &idh)
static CSeq_id_Handle x_GetAccVer(const TIds &ids)
Definition: scope.cpp:425
virtual void GetSequenceTypes(const TIds &ids, TLoaded &loaded, TSequenceTypes &ret)
Bulk request for types of a set of sequences.
virtual void GetCDDAnnots(const TSeqIdSets &id_sets, TLoaded &loaded, TCDD_Locks &ret)
virtual void GetBlobs(TTSE_LockSets &tse_sets)
virtual SAccVerFound GetAccVerFound(const CSeq_id_Handle &idh)
virtual Uint4 EstimateLoadBytes(const CTSE_Chunk_Info &chunk) const
TAnnotSet m_NeedAnnots
Definition: data_loader.hpp:97
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
Request for a length of a sequence.
virtual void GC(void)
vector< bool > TLoaded
Bulk loading interface for a small pieces of information per id.
virtual ~CBlobId(void)
virtual SHashFound GetSequenceHashFound(const CSeq_id_Handle &idh)
virtual SGiFound GetGiFound(const CSeq_id_Handle &idh)
virtual void GetSequenceLengths(const TIds &ids, TLoaded &loaded, TSequenceLengths &ret)
Bulk request for lengths of a set of sequences.
virtual TTSE_LockSet GetExternalAnnotRecords(const CSeq_id_Handle &idh, const SAnnotSelector *sel)
virtual void DropTSE(CRef< CTSE_Info > tse_info)
virtual void GetGis(const TIds &ids, TLoaded &loaded, TGis &ret)
Bulk request for gis of a set of sequences.
static TGi x_GetGi(const TIds &ids)
Definition: scope.cpp:448
vector< string > TLabels
TAnnotBlobType m_AnnotBlobType
Definition: data_loader.hpp:98
virtual TTSE_LockSet GetExternalAnnotRecordsNA(const CSeq_id_Handle &idh, const SAnnotSelector *sel, TProcessedNAs *processed_nas)
virtual void GetLabels(const TIds &ids, TLoaded &loaded, TLabels &ret)
Bulk request for label strings of a set of sequences.
virtual TBlobVersion GetBlobVersion(const TBlobId &id)
virtual void GetChunks(const TChunkSet &chunks)
virtual CSeq_inst::TMol GetSequenceType(const CSeq_id_Handle &idh)
Request for a type of a sequence Returns CSeq_inst::eMol_not_set if sequence is not known.
virtual bool GetTrackSplitSeq() const
virtual TEditSaver GetEditSaver() const
virtual unsigned GetDefaultBlobCacheSizeLimit() const
virtual void GetSequenceStates(const TIds &ids, TLoaded &loaded, TSequenceStates &ret)
Bulk request for states of a set of sequences.
virtual void GetAccVers(const TIds &ids, TLoaded &loaded, TIds &ret)
Bulk request for accession.version Seq-ids of a set of sequences.
virtual TTSE_LockSet GetExternalRecords(const CBioseq_Info &bioseq)
Request from a datasource set of blobs with external annotations.
vector< TTaxId > TTaxIds
vector< int > TSequenceStates
vector< int > TSequenceHashes
vector< TChunk > TChunkSet
virtual void GetChunk(TChunk chunk_info)
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
CDataSource * m_DataSource
CTSE_Lock TTSE_Lock
bool LessByTypeId(const CBlobId &id2) const
set< TTSE_Lock > TTSE_LockSet
@ kPriority_Loader
Default priority for main loaders.
@ eExtFeatures
external features
@ eExtAnnot
all external annotations
@ eExtAlign
external aligns
@ eAll
all blobs (main and external)
@ eSequence
seq data
@ eBlob
whole main
@ eOrphanAnnot
all external annotations if no Bioseq exists
@ eGraph
graph annotations from main blob
@ eCore
?only seq-entry core?
@ eAnnot
all annotations from main blob
@ eBioseq
main blob with complete bioseq
@ eAlign
aligns from main blob
@ eBioseqCore
main blob with bioseq core (no seqdata and annots)
@ eExtGraph
external graph annotations
@ eFeatures
features from main blob
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
bool IsIncludedAnyNamedAnnotAccession(void) const
check if any named annot accession is included in the search
bool IsIncludedNamedAnnotAccession(const string &acc) const
check if named annot accession is included in the search
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_UInt
Definition: ncbi_limits.h:185
bool Empty(void) const
Definition: range.hpp:148
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static void PtrToString(string &out_str, const void *ptr)
Convert pointer to string.
Definition: ncbistr.cpp:2771
static const char label[]
E_Choice
Choice variants.
Definition: Seq_annot_.hpp:131
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
@ e_not_set
No variant selected.
Definition: Seq_annot_.hpp:132
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
int i
int len
@ eNotFound
Not found.
CRef< objects::CObjectManager > om
Better replacement of GetAccVer(), this method should be defined in data loaders, GetAccVer() is left...
Better replacement of GetGi(), this method should be defined in data loaders, GetGi() is left for com...
Better replacement of GetSequenceHash(), this method should be defined in data loaders,...
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
SAnnotSelector –.
Definition: type.c:6
#define _ASSERT
Modified on Wed Nov 29 02:20:53 2023 by modify_doxy.py rev. 669887