NCBI C++ ToolKit
data_loader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: data_loader.cpp 101834 2024-02-20 19:05:51Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Data loader base class for object manager
30 *
31 */
32 
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/data_loader.hpp>
38 #include <objmgr/annot_name.hpp>
40 #include <objmgr/impl/tse_info.hpp>
44 #include <objmgr/scope.hpp>
45 #include <objmgr/bioseq_handle.hpp>
47 
48 
51 
52 
53 void CDataLoader::RegisterInObjectManager(
55  CLoaderMaker_Base& loader_maker,
56  CObjectManager::EIsDefault is_default,
57  CObjectManager::TPriority priority)
58 {
59  om.RegisterDataLoader(loader_maker, is_default, priority);
60 }
61 
62 
64 {
65  m_Name = NStr::PtrToString(this);
66  return;
67 }
68 
69 
70 CDataLoader::CDataLoader(const string& loader_name)
71  : m_Name(loader_name)
72 {
73  if (loader_name.empty())
74  {
75  m_Name = NStr::PtrToString(this);
76  }
77 }
78 
79 
81 {
82  return;
83 }
84 
85 
87 {
88  m_DataSource = &data_source;
89 }
90 
91 
93 {
94  return m_DataSource;
95 }
96 
97 
98 void CDataLoader::SetName(const string& loader_name)
99 {
100  m_Name = loader_name;
101 }
102 
103 
104 string CDataLoader::GetName(void) const
105 {
106  return m_Name;
107 }
108 
109 
111 {
112 }
113 
114 
115 void CDataLoader::GC(void)
116 {
117 }
118 
119 
122  EChoice /*choice*/)
123 {
124  NCBI_THROW(CLoaderException, eNotImplemented,
125  "CDataLoader::GetRecords() is not implemented in subclass");
126 }
127 
128 
131  EChoice choice)
132 {
133  try {
134  return GetRecords(idh, choice);
135  }
136  catch ( CBlobStateException& /* ignored */ ) {
137  return TTSE_LockSet();
138  }
139 }
140 
141 
144  const SRequestDetails& details)
145 {
146  return GetRecords(idh, DetailsToChoice(details));
147 }
148 
149 
152 {
153  TTSE_LockSet ret;
154  ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
155  if ( GetBlobId(*it) ) {
156  // correct id is found
157  TTSE_LockSet ret2 = GetRecords(*it, eExtAnnot);
158  ret.swap(ret2);
159  break;
160  }
161  }
162  return ret;
163 }
164 
165 
168  const SAnnotSelector* /*sel*/)
169 {
170  return GetRecords(idh, eOrphanAnnot);
171 }
172 
173 
176  const SAnnotSelector* /*sel*/)
177 {
178  return GetRecords(idh, eExtAnnot);
179 }
180 
181 
184  const SAnnotSelector* sel)
185 {
186  TTSE_LockSet ret;
187  ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
188  if ( !CanGetBlobById() || GetBlobId(*it) ) {
189  // correct id is found
190  TTSE_LockSet ret2 = GetExternalAnnotRecords(*it, sel);
191  if ( !ret2.empty() ) {
192  ret.swap(ret2);
193  break;
194  }
195  }
196  }
197  return ret;
198 }
199 
200 
202 {
203  return sel && sel->IsIncludedAnyNamedAnnotAccession();
204 }
205 
206 
207 bool CDataLoader::IsRequestedNA(const string& na,
208  const SAnnotSelector* sel)
209 {
210  return sel && sel->IsIncludedNamedAnnotAccession(na);
211 }
212 
213 
214 bool CDataLoader::IsProcessedNA(const string& na,
215  const TProcessedNAs* processed_nas)
216 {
217  return processed_nas && processed_nas->find(na) == processed_nas->end();
218 }
219 
220 
221 void CDataLoader::SetProcessedNA(const string& na,
222  TProcessedNAs* processed_nas)
223 {
224  if ( processed_nas ) {
225  processed_nas->insert(na);
226  }
227 }
228 
229 
232  const SAnnotSelector* sel,
233  TProcessedNAs* /*processed_nas*/)
234 {
235  // as a backup call old method that cannot report processed NAs
236  return GetOrphanAnnotRecords(idh, sel);
237 }
238 
239 
242  const SAnnotSelector* sel,
243  TProcessedNAs* processed_nas)
244 {
246  ITERATE(TSeq_idSet, id_it, ids) {
247  CDataLoader::TTSE_LockSet tse_set2 =
248  GetOrphanAnnotRecordsNA(*id_it, sel, processed_nas);
249  if (!tse_set2.empty()) {
250  if (tse_set.empty()) {
251  tse_set.swap(tse_set2);
252  }
253  else {
254  tse_set.insert(tse_set2.begin(), tse_set2.end());
255  }
256  }
257  }
258  return tse_set;
259 }
260 
261 
264  const SAnnotSelector* sel,
265  TProcessedNAs* /*processed_nas*/)
266 {
267  // as a backup call old method that cannot report processed NAs
268  return GetExternalAnnotRecords(idh, sel);
269 }
270 
271 
274  const SAnnotSelector* sel,
275  TProcessedNAs* /*processed_nas*/)
276 {
277  // as a backup call old method that cannot report processed NAs
278  return GetExternalAnnotRecords(bioseq, sel);
279 }
280 
281 
283 {
284  return false;
285 }
286 
287 
289 {
290  NCBI_THROW(CLoaderException, eNotImplemented,
291  "CDataLoader::GetBlobById() is not implemented in subclass");
292 }
293 
295 {
296  NCBI_THROW(CLoaderException, eNotImplemented,
297  "CDataLoader::GetBlobIdFromString(str) is not implemented in subclass");
298 }
299 
300 
302 {
304  ITERATE(TTSE_LockSet, it, locks) {
305  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
306  if ( bs_info ) {
307  ids = bs_info->GetId();
308  break;
309  }
310  }
311 }
312 
313 
315 {
316  // check if sequence exists
317  TIds ids;
318  GetIds(idh, ids);
319  return !ids.empty();
320 }
321 
322 
324 {
325  // default implementation based on GetIds();
326  TIds ids;
327  GetIds(idh, ids);
328  if ( ids.empty() ) {
330  "CDataLoader::GetAccVer() sequence not found");
331  }
333  if ( !acc ) {
334  NCBI_THROW(CLoaderException, eNoData,
335  "CDataLoader::GetAccVer() sequence doesn't have accession");
336  }
337  return acc;
338 }
339 
340 
343 {
344  // default implementation based on GetAccVer() and GetIds()
345  SAccVerFound ret;
346  try {
347  ret.acc_ver = GetAccVer(idh);
348  ret.sequence_found = ret.acc_ver || SequenceExists(idh);
349  }
350  catch ( CLoaderException& exc ) {
351  if ( exc.GetErrCode() == exc.eNotFound ) {
352  // no sequence
353  }
354  else if ( exc.GetErrCode() == exc.eNoData ) {
355  // sequence is known, but there is no accession
356  ret.sequence_found = true;
357  }
358  else {
359  // problem
360  throw;
361  }
362  }
363  return ret;
364 }
365 
366 
368 {
369  // default implementation based on GetIds();
370  TIds ids;
371  GetIds(idh, ids);
372  if ( ids.empty() ) {
374  "CDataLoader::GetGi() sequence not found");
375  }
376  TGi gi = CScope::x_GetGi(ids);
377  if ( gi == ZERO_GI ) {
378  NCBI_THROW(CLoaderException, eNoData,
379  "CDataLoader::GetGi() sequence doesn't have GI");
380  }
381  return gi;
382 }
383 
384 
386 {
387  // default implementation based on GetGi() and GetIds()
388  SGiFound ret;
389  try {
390  ret.gi = GetGi(idh);
391  ret.sequence_found = ret.gi != ZERO_GI || SequenceExists(idh);
392  }
393  catch ( CLoaderException& exc ) {
394  if ( exc.GetErrCode() == exc.eNotFound ) {
395  // no sequence
396  }
397  else if ( exc.GetErrCode() == exc.eNoData ) {
398  // sequence is known, but there is no GI
399  ret.sequence_found = true;
400  }
401  else {
402  // problem
403  throw;
404  }
405  }
406  return ret;
407 }
408 
409 
411 {
412  // default implementation based on GetIds();
413  TIds ids;
414  GetIds(idh, ids);
415  if ( ids.empty() ) {
416  return string();
417  }
418  return objects::GetLabel(ids);
419 }
420 
421 
423 {
424  // default implementation based on GetRecordsNoBlobState();
426  ITERATE(TTSE_LockSet, it, locks) {
427  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
428  if ( bs_info ) {
429  return bs_info->GetTaxId();
430  }
431  }
432  return INVALID_TAX_ID;
433 }
434 
435 
437 {
438  // default implementation based on GetRecordsNoBlobState()
440  ITERATE(TTSE_LockSet, it, locks) {
441  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
442  if ( bs_info ) {
443  return bs_info->GetBioseqLength();
444  }
445  }
446  return kInvalidSeqPos;
447 }
448 
449 
451 {
452  // default implementation based on GetRecordsNoBlobState()
454  ITERATE(TTSE_LockSet, it, locks) {
455  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
456  if ( bs_info ) {
457  CSeq_inst::TMol type = bs_info->GetInst_Mol();
458  if ( type == CSeq_inst::eMol_not_set ) {
459  NCBI_THROW(CLoaderException, eNoData,
460  "CDataLoader::GetSequenceType() type not set");
461  }
462  return type;
463  }
464  }
466  "CDataLoader::GetSequenceType() sequence not found");
467 }
468 
469 
472 {
473  // default implementation based on GetSequenceType() and GetIds()
474  STypeFound ret;
475  try {
476  ret.type = GetSequenceType(idh);
477  ret.sequence_found =
479  }
480  catch ( CLoaderException& exc ) {
481  if ( exc.GetErrCode() == exc.eNotFound ) {
482  // no sequence
483  }
484  else if ( exc.GetErrCode() == exc.eNoData ) {
485  // sequence is known, but there is no type
486  ret.sequence_found = true;
487  }
488  else {
489  // problem
490  throw;
491  }
492  }
493  return ret;
494 }
495 
496 
498 {
499  try {
500  TTSE_LockSet locks = GetRecords(idh, eBioseqCore);
501  ITERATE(TTSE_LockSet, it, locks) {
502  CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
503  if ( bs_info ) {
504  return (*it)->GetBlobState();
505  }
506  }
508  }
509  catch ( CBlobStateException& exc ) {
510  return exc.GetBlobState();
511  }
512 }
513 
514 
516 {
517  if ( SequenceExists(idh) ) {
518  NCBI_THROW(CLoaderException, eNoData,
519  "CDataLoader::GetSequenceHash() sequence hash not set");
520  }
522  "CDataLoader::GetSequenceHash() sequence not found");
523 }
524 
525 
528 {
529  // default implementation based on GetSequenceHash() and GetIds()
530  SHashFound ret;
531  try {
532  ret.hash = GetSequenceHash(idh);
533  if ( !ret.hash ) {
534  // hash = 0, we don't know what causes it:
535  // absence of sequence, unknown hash, or the hash happens to be 0.
536  ret.sequence_found = SequenceExists(idh);
537  }
538  else {
539  ret.sequence_found = true;
540  ret.hash_known = true;
541  }
542  }
543  catch ( CLoaderException& exc ) {
544  if ( exc.GetErrCode() == exc.eNotFound ) {
545  // no sequence found
546  }
547  else if ( exc.GetErrCode() == exc.eNoData ) {
548  // sequence exists
549  ret.sequence_found = true;
550  }
551  else {
552  // problem
553  throw;
554  }
555  }
556  return ret;
557 }
558 
559 
560 void CDataLoader::GetBulkIds(const TIds& ids, TLoaded& loaded, TBulkIds& ret)
561 {
562  size_t count = ids.size();
563  _ASSERT(ids.size() == loaded.size());
564  _ASSERT(ids.size() == ret.size());
565  TIds seq_ids;
566  for ( size_t i = 0; i < count; ++i ) {
567  if ( loaded[i] ) {
568  continue;
569  }
570  ret[i].clear();
571  GetIds(ids[i], ret[i]);
572  if ( !ret[i].empty() ) {
573  loaded[i] = true;
574  }
575  }
576 }
577 
578 
579 void CDataLoader::GetAccVers(const TIds& ids, TLoaded& loaded, TIds& ret)
580 {
581  size_t count = ids.size();
582  _ASSERT(ids.size() == loaded.size());
583  _ASSERT(ids.size() == ret.size());
584  TIds seq_ids;
585  for ( size_t i = 0; i < count; ++i ) {
586  if ( loaded[i] ) {
587  continue;
588  }
590  if ( data.sequence_found ) {
591  ret[i] = data.acc_ver;
592  loaded[i] = true;
593  }
594  }
595 }
596 
597 
598 void CDataLoader::GetGis(const TIds& ids, TLoaded& loaded, TGis& ret)
599 {
600  size_t count = ids.size();
601  _ASSERT(ids.size() == loaded.size());
602  _ASSERT(ids.size() == ret.size());
603  TIds seq_ids;
604  for ( size_t i = 0; i < count; ++i ) {
605  if ( loaded[i] ) {
606  continue;
607  }
608  SGiFound data = GetGiFound(ids[i]);
609  if ( data.sequence_found ) {
610  ret[i] = data.gi;
611  loaded[i] = true;
612  }
613  }
614 }
615 
616 
617 void CDataLoader::GetLabels(const TIds& ids, TLoaded& loaded, TLabels& ret)
618 {
619  size_t count = ids.size();
620  _ASSERT(ids.size() == loaded.size());
621  _ASSERT(ids.size() == ret.size());
622  for ( size_t i = 0; i < count; ++i ) {
623  if ( loaded[i] ) {
624  continue;
625  }
626  string label = GetLabel(ids[i]);
627  if ( !label.empty() ) {
628  ret[i] = label;
629  loaded[i] = true;
630  }
631  }
632 }
633 
634 
635 void CDataLoader::GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret)
636 {
637  size_t count = ids.size();
638  _ASSERT(ids.size() == loaded.size());
639  _ASSERT(ids.size() == ret.size());
640  for ( size_t i = 0; i < count; ++i ) {
641  if ( loaded[i] ) {
642  continue;
643  }
644 
645  TTaxId taxid = GetTaxId(ids[i]);
646  if ( taxid != INVALID_TAX_ID ) {
647  ret[i] = taxid;
648  loaded[i] = true;
649  }
650  }
651 }
652 
653 
655  TSequenceLengths& ret)
656 {
657  size_t count = ids.size();
658  _ASSERT(ids.size() == loaded.size());
659  _ASSERT(ids.size() == ret.size());
660  for ( size_t i = 0; i < count; ++i ) {
661  if ( loaded[i] ) {
662  continue;
663  }
664 
665  TSeqPos len = GetSequenceLength(ids[i]);
666  if ( len != kInvalidSeqPos ) {
667  ret[i] = len;
668  loaded[i] = true;
669  }
670  }
671 }
672 
673 
674 void CDataLoader::GetSequenceTypes(const TIds& ids, TLoaded& loaded,
675  TSequenceTypes& ret)
676 {
677  size_t count = ids.size();
678  _ASSERT(ids.size() == loaded.size());
679  _ASSERT(ids.size() == ret.size());
680  for ( size_t i = 0; i < count; ++i ) {
681  if ( loaded[i] ) {
682  continue;
683  }
684 
686  if ( data.sequence_found ) {
687  ret[i] = data.type;
688  loaded[i] = true;
689  }
690  }
691 }
692 
693 
694 void CDataLoader::GetSequenceStates(const TIds& ids, TLoaded& loaded,
695  TSequenceStates& ret)
696 {
697  const int kNotFound = (CBioseq_Handle::fState_not_found |
699 
700  size_t count = ids.size();
701  _ASSERT(ids.size() == loaded.size());
702  _ASSERT(ids.size() == ret.size());
703  for ( size_t i = 0; i < count; ++i ) {
704  if ( loaded[i] ) {
705  continue;
706  }
707 
708  int state = GetSequenceState(ids[i]);
709  if ( state != kNotFound ) {
710  ret[i] = state;
711  loaded[i] = true;
712  }
713  }
714 }
715 
716 
717 void CDataLoader::GetSequenceHashes(const TIds& ids, TLoaded& loaded,
718  TSequenceHashes& ret, THashKnown& known)
719 {
720  size_t count = ids.size();
721  _ASSERT(ids.size() == loaded.size());
722  _ASSERT(ids.size() == ret.size());
723  for ( size_t i = 0; i < count; ++i ) {
724  if ( loaded[i] ) {
725  continue;
726  }
727 
729  if ( data.sequence_found ) {
730  ret[i] = data.hash;
731  loaded[i] = true;
732  known[i] = data.hash_known;
733  }
734  }
735 }
736 
737 
738 void CDataLoader::GetCDDAnnots(const TSeqIdSets& id_sets, TLoaded& loaded, TCDD_Locks& ret)
739 {
740  SAnnotSelector sel;
741  sel.AddNamedAnnots("CDD")
744  for (size_t i = 0; i < id_sets.size(); ++i) {
745  for (auto id : id_sets[i]) {
747  CConstRef<CBioseq_Info> bs_info;
748  ITERATE(TTSE_LockSet, it, locks) {
749  bs_info = (*it)->FindMatchingBioseq(id);
750  if (!bs_info) continue;
751  TProcessedNAs nas;
752  const auto& id_locks = GetExternalAnnotRecordsNA(*bs_info, &sel, &nas);
753  if (id_locks.empty()) continue;
754  ret[i] = *id_locks.begin();
755  loaded[i] = true;
756  break;
757  }
758  if (loaded[i]) break;
759  }
760  }
761 }
762 
763 
765 {
766  NON_CONST_ITERATE(TTSE_LockSets, tse_set, tse_sets) {
767  tse_set->second = GetRecords(tse_set->first, eBlob);
768  }
769 }
770 
771 
774 {
775  EChoice ret = eCore;
776  ITERATE ( SRequestDetails::TAnnotSet, i, annots ) {
777  ITERATE ( SRequestDetails::TAnnotTypesSet, j, i->second ) {
778  EChoice cur = eCore;
779  switch ( j->GetAnnotType() ) {
781  cur = eFeatures;
782  break;
784  cur = eGraph;
785  break;
787  cur = eAlign;
788  break;
790  return eAnnot;
791  default:
792  break;
793  }
794  if ( cur != eCore && cur != ret ) {
795  if ( ret != eCore ) return eAnnot;
796  ret = cur;
797  }
798  }
799  }
800  return ret;
801 }
802 
803 
806 {
807  EChoice ret = DetailsToChoice(details.m_NeedAnnots);
808  switch ( details.m_AnnotBlobType ) {
810  // no annotations
811  ret = eCore;
812  break;
814  // no change
815  break;
817  // shift from internal to external annotations
818  _ASSERT(ret >= eFeatures && ret <= eAnnot);
819  ret = EChoice(ret + eExtFeatures - eFeatures);
820  _ASSERT(ret >= eExtFeatures && ret <= eExtAnnot);
821  break;
823  // all orphan annots
824  ret = eOrphanAnnot;
825  break;
826  default:
827  // all other cases -> eAll
828  ret = eAll;
829  break;
830  }
831  if ( !details.m_NeedSeqMap.Empty() || !details.m_NeedSeqData.Empty() ) {
832  // include sequence
833  if ( ret == eCore ) {
834  ret = eSequence;
835  }
836  else if ( ret >= eFeatures && ret <= eAnnot ) {
837  // only internal annot + sequence -> whole blob
838  ret = eBlob;
839  }
840  else {
841  // all blobs
842  ret = eAll;
843  }
844  }
845  return ret;
846 }
847 
848 
850 {
851  SRequestDetails details;
853  bool sequence = false;
854  switch ( choice ) {
855  case eAll:
856  sequence = true;
857  // from all blobs
859  break;
860  case eBlob:
861  case eBioseq:
862  case eBioseqCore:
863  sequence = true;
864  // internal only
866  break;
867  case eSequence:
868  sequence = true;
869  break;
870  case eAnnot:
871  // internal only
873  break;
874  case eGraph:
876  // internal only
878  break;
879  case eFeatures:
881  // internal only
883  break;
884  case eAlign:
886  // internal only
888  break;
889  case eExtAnnot:
890  // external only
892  break;
893  case eExtGraph:
895  // external only
897  break;
898  case eExtFeatures:
900  // external only
902  break;
903  case eExtAlign:
905  // external only
907  break;
908  case eOrphanAnnot:
909  // orphan annotations only
911  break;
912  default:
913  break;
914  }
915  if ( sequence ) {
918  }
921  }
922  return details;
923 }
924 
925 
926 void CDataLoader::GetChunk(TChunk /*chunk_info*/)
927 {
928  NCBI_THROW(CLoaderException, eNotImplemented,
929  "CDataLoader::GetChunk() is not implemented in subclass");
930 }
931 
932 
934 {
935  ITERATE ( TChunkSet, it, chunks ) {
936  GetChunk(*it);
937  }
938 }
939 
940 
943  const TTSE_LockSet& /*tse_set*/)
944 {
945  return TTSE_Lock();
946 }
947 
948 
950 {
951  return TBlobId();
952 }
953 
954 
956 {
957  return 0;
958 }
959 
961 {
962  return TEditSaver();
963 }
964 
965 
967 {
969 }
970 
971 
973 {
974  return 32000; // assume 32KB chunk size
975 }
976 
977 
978 double CDataLoader::EstimateLoadSeconds(const CTSE_Chunk_Info& /*chunk*/, Uint4 bytes) const
979 {
980  return bytes*1e-7+0.001; // assume 10MB/s transfer speed and 1ms overhead
981 }
982 
983 
985 {
986  return kMax_UInt;
987 }
988 
990 {
991  return false;
992 }
993 
994 
995 /////////////////////////////////////////////////////////////////////////////
996 // CBlobId
997 
999 {
1000 }
1001 
1002 bool CBlobId::LessByTypeId(const CBlobId& id2) const
1003 {
1004  return typeid(*this).before(typeid(id2));
1005 }
1006 
1007 bool CBlobId::operator==(const CBlobId& id) const
1008 {
1009  return !(*this < id || id < *this);
1010 }
1011 
1012 
TInst_Mol GetInst_Mol(void) const
vector< CSeq_id_Handle > TId
Definition: bioseq_info.hpp:91
TTaxId GetTaxId(void) const
const TId & GetId(void) const
TSeqPos GetBioseqLength(void) const
Blob state exceptions, used by GenBank loader.
Data loader exceptions, used by GenBank loader.
CObjectManager –.
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
bool empty() const
Definition: set.hpp:133
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
void swap(this_type &m)
Definition: set.hpp:102
static int type
Definition: getdata.c:31
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define ZERO_GI
Definition: ncbimisc.hpp:1088
string
Definition: cgiapp.hpp:690
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
string GetLabel(const CSeq_id &id)
virtual int GetSequenceState(const CSeq_id_Handle &idh)
Request for a state of a sequence.
virtual CObjectManager::TPriority GetDefaultPriority(void) const
virtual void GetSequenceHashes(const TIds &ids, TLoaded &loaded, TSequenceHashes &ret, THashKnown &known)
Bulk request for hashes of a set of sequences.
void SetName(const string &loader_name)
Definition: data_loader.cpp:98
virtual TTSE_LockSet GetOrphanAnnotRecords(const CSeq_id_Handle &idh, const SAnnotSelector *sel)
old Get*AnnotRecords() methods
CRef< IEditSaver > TEditSaver
virtual SRequestDetails ChoiceToDetails(EChoice choice) const
void SetTargetDataSource(CDataSource &data_source)
Specify datasource to send loaded data to.
Definition: data_loader.cpp:86
virtual ~CDataLoader(void)
Definition: data_loader.cpp:80
vector< TSeqPos > TSequenceLengths
CDataLoader(void)
Definition: data_loader.cpp:63
virtual int GetSequenceHash(const CSeq_id_Handle &idh)
Request for a sequence hash.
virtual EChoice DetailsToChoice(const SRequestDetails::TAnnotSet &annots) const
vector< CTSE_Lock > TCDD_Locks
vector< CSeq_id_Handle > TIds
virtual TTSE_LockSet GetOrphanAnnotRecordsNA(const CSeq_id_Handle &idh, const SAnnotSelector *sel, TProcessedNAs *processed_nas)
new Get*AnnotRecords() methods
CSeq_inst::TMol type
TBlobState GetBlobState(void)
CBlobIdKey TBlobId
virtual TBlobId GetBlobIdFromString(const string &str) const
static void SetProcessedNA(const string &na, TProcessedNAs *processed_nas)
vector< bool > THashKnown
string GetName(void) const
CDataSource * GetDataSource(void) const
Definition: data_loader.cpp:92
virtual TTSE_Lock ResolveConflict(const CSeq_id_Handle &id, const TTSE_LockSet &tse_set)
Resolve TSE conflict *select the best TSE from the set of dead TSEs.
vector< TGi > TGis
virtual void GetTaxIds(const TIds &ids, TLoaded &loaded, TTaxIds &ret)
Bulk request for taxonomy ids of a set of sequences.
virtual void GetBulkIds(const TIds &ids, TLoaded &loaded, TBulkIds &ret)
Bulk request for all Seq-ids of a set of sequences.
virtual bool CanGetBlobById(void) const
virtual TTSE_Lock GetBlobById(const TBlobId &blob_id)
virtual void GetIds(const CSeq_id_Handle &idh, TIds &ids)
Request for a list of all Seq-ids of a sequence.
TRange m_NeedSeqData
Definition: data_loader.hpp:96
EChoice
main blob is blob with sequence all other blobs are external and contain external annotations
vector< TIds > TBulkIds
TTSE_LockSet GetRecordsNoBlobState(const CSeq_id_Handle &idh, EChoice choice)
The same as GetRecords() but always returns empty TSE lock set instead of throwing CBlobStateExceptio...
virtual TGi GetGi(const CSeq_id_Handle &idh)
Request for a gi of a sequence.
static bool IsRequestedAnyNA(const SAnnotSelector *sel)
vector< CSeq_inst::TMol > TSequenceTypes
virtual TTSE_LockSet GetRecords(const CSeq_id_Handle &idh, EChoice choice)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
virtual TTSE_LockSet GetDetailedRecords(const CSeq_id_Handle &idh, const SRequestDetails &details)
Request from a datasource using handles and ranges instead of seq-loc The TSEs loaded in this call wi...
vector< vector< CSeq_id_Handle > > TSeqIdSets
static bool IsRequestedNA(const string &na, const SAnnotSelector *sel)
virtual bool operator==(const CBlobId &id) const
virtual TTaxId GetTaxId(const CSeq_id_Handle &idh)
Request for a taxonomy id of a sequence.
virtual string GetLabel(const CSeq_id_Handle &idh)
Request for a label string of a sequence.
bool SequenceExists(const CSeq_id_Handle &idh)
helper function to check if sequence exists, uses GetIds()
virtual double EstimateLoadSeconds(const CTSE_Chunk_Info &chunk, Uint4 bytes) const
virtual CSeq_id_Handle GetAccVer(const CSeq_id_Handle &idh)
Request for a accession.version Seq-id of a sequence.
static bool IsProcessedNA(const string &na, const TProcessedNAs *processed_nas)
virtual STypeFound GetSequenceTypeFound(const CSeq_id_Handle &idh)
static CSeq_id_Handle x_GetAccVer(const TIds &ids)
Definition: scope.cpp:425
virtual void GetSequenceTypes(const TIds &ids, TLoaded &loaded, TSequenceTypes &ret)
Bulk request for types of a set of sequences.
virtual void GetCDDAnnots(const TSeqIdSets &id_sets, TLoaded &loaded, TCDD_Locks &ret)
virtual void GetBlobs(TTSE_LockSets &tse_sets)
virtual SAccVerFound GetAccVerFound(const CSeq_id_Handle &idh)
virtual Uint4 EstimateLoadBytes(const CTSE_Chunk_Info &chunk) const
TAnnotSet m_NeedAnnots
Definition: data_loader.hpp:97
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
Request for a length of a sequence.
virtual void GC(void)
vector< bool > TLoaded
Bulk loading interface for a small pieces of information per id.
virtual ~CBlobId(void)
virtual SHashFound GetSequenceHashFound(const CSeq_id_Handle &idh)
virtual SGiFound GetGiFound(const CSeq_id_Handle &idh)
virtual void GetSequenceLengths(const TIds &ids, TLoaded &loaded, TSequenceLengths &ret)
Bulk request for lengths of a set of sequences.
virtual TTSE_LockSet GetExternalAnnotRecords(const CSeq_id_Handle &idh, const SAnnotSelector *sel)
virtual void DropTSE(CRef< CTSE_Info > tse_info)
virtual void GetGis(const TIds &ids, TLoaded &loaded, TGis &ret)
Bulk request for gis of a set of sequences.
static TGi x_GetGi(const TIds &ids)
Definition: scope.cpp:448
vector< string > TLabels
TAnnotBlobType m_AnnotBlobType
Definition: data_loader.hpp:98
virtual TTSE_LockSet GetExternalAnnotRecordsNA(const CSeq_id_Handle &idh, const SAnnotSelector *sel, TProcessedNAs *processed_nas)
virtual void GetLabels(const TIds &ids, TLoaded &loaded, TLabels &ret)
Bulk request for label strings of a set of sequences.
virtual TBlobVersion GetBlobVersion(const TBlobId &id)
virtual void GetChunks(const TChunkSet &chunks)
virtual CSeq_inst::TMol GetSequenceType(const CSeq_id_Handle &idh)
Request for a type of a sequence Returns CSeq_inst::eMol_not_set if sequence is not known.
virtual bool GetTrackSplitSeq() const
virtual TEditSaver GetEditSaver() const
virtual unsigned GetDefaultBlobCacheSizeLimit() const
virtual void GetSequenceStates(const TIds &ids, TLoaded &loaded, TSequenceStates &ret)
Bulk request for states of a set of sequences.
virtual void GetAccVers(const TIds &ids, TLoaded &loaded, TIds &ret)
Bulk request for accession.version Seq-ids of a set of sequences.
virtual TTSE_LockSet GetExternalRecords(const CBioseq_Info &bioseq)
Request from a datasource set of blobs with external annotations.
vector< TTaxId > TTaxIds
vector< int > TSequenceStates
vector< int > TSequenceHashes
vector< TChunk > TChunkSet
virtual void GetChunk(TChunk chunk_info)
virtual TBlobId GetBlobId(const CSeq_id_Handle &idh)
CDataSource * m_DataSource
CTSE_Lock TTSE_Lock
bool LessByTypeId(const CBlobId &id2) const
set< TTSE_Lock > TTSE_LockSet
@ kPriority_Loader
Default priority for main loaders.
@ eExtFeatures
external features
@ eExtAnnot
all external annotations
@ eExtAlign
external aligns
@ eAll
all blobs (main and external)
@ eSequence
seq data
@ eBlob
whole main
@ eOrphanAnnot
all external annotations if no Bioseq exists
@ eGraph
graph annotations from main blob
@ eCore
?only seq-entry core?
@ eAnnot
all annotations from main blob
@ eBioseq
main blob with complete bioseq
@ eAlign
aligns from main blob
@ eBioseqCore
main blob with bioseq core (no seqdata and annots)
@ eExtGraph
external graph annotations
@ eFeatures
features from main blob
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
bool IsIncludedAnyNamedAnnotAccession(void) const
check if any named annot accession is included in the search
bool IsIncludedNamedAnnotAccession(const string &acc) const
check if named annot accession is included in the search
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_UInt
Definition: ncbi_limits.h:185
bool Empty(void) const
Definition: range.hpp:148
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static void PtrToString(string &out_str, const void *ptr)
Convert pointer to string.
Definition: ncbistr.cpp:2762
static const char label[]
E_Choice
Choice variants.
Definition: Seq_annot_.hpp:131
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
@ e_not_set
No variant selected.
Definition: Seq_annot_.hpp:132
@ eMol_not_set
> cdna = rna
Definition: Seq_inst_.hpp:109
int i
int len
constexpr bool empty(list< Ts... >) noexcept
#define count
@ eNotFound
Not found.
CRef< objects::CObjectManager > om
Better replacement of GetAccVer(), this method should be defined in data loaders, GetAccVer() is left...
Better replacement of GetGi(), this method should be defined in data loaders, GetGi() is left for com...
Better replacement of GetSequenceHash(), this method should be defined in data loaders,...
Better replacement of GetSequenceType(), this method should be defined in data loaders,...
SAnnotSelector –.
Definition: type.c:6
#define _ASSERT
Modified on Fri Sep 20 14:57:19 2024 by modify_doxy.py rev. 669887