NCBI C++ ToolKit
snpread.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: snpread.cpp 100333 2023-07-20 14:36:26Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Eugene Vasilchenko
27  *
28  * File Description:
29  * Access to SNP files
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistr.hpp>
36 #include <corelib/ncbi_param.hpp>
48 #include <sra/error_codes.hpp>
49 #include <unordered_map>
50 
52 #include <ncbi/ncbi.h>
53 #include <insdc/insdc.h>
54 
56 
57 template<>
59 {
60  size_t operator()(ncbi::CTempString val) const
61  {
62  unsigned long __h = 5381;
63  for ( auto c : val ) {
64  __h = __h*17 + c;
65  }
66  return size_t(__h);
67  }
68 };
69 
71 
73 
74 #define NCBI_USE_ERRCODE_X SNPReader
76 
78 
79 
80 #define RC_NO_MORE_ALIGNMENTS RC(rcApp, rcQuery, rcSearching, rcRow, rcNotFound)
81 
82 static const TSeqPos kPageSize = 5000;
83 static const TSeqPos kMaxSNPLength = 256;
85 static const TSeqPos kCoverageZoom = 100;
86 
87 static const char kDefaultAnnotName[] = "SNP";
88 
89 static const char kFeatSubtypesToChars[] = "U-VSMLDIR";
90 
91 static const bool kPreloadSeqList = false;
92 static const bool kPage2FeatErrorWorkaround = true;
93 
94 
95 /////////////////////////////////////////////////////////////////////////////
96 // CSNPDb_Impl
97 /////////////////////////////////////////////////////////////////////////////
98 
99 
101  explicit SSeqTableCursor(const CVDBTable& table);
102 
104 
107 };
108 
109 
111  explicit STrackTableCursor(const CVDBTable& table);
112 
114 
118 };
119 
120 
122  explicit SGraphTableCursor(const CVDBTable& table);
123 
125 
126  DECLARE_VDB_COLUMN_AS(TVDBRowId, FILTER_ID_ROW_NUM);
131 };
132 
133 
135  explicit SPageTableCursor(const CVDBTable& table);
136 
138 
141  DECLARE_VDB_COLUMN_AS(TVDBRowId, FEATURE_ROW_FROM);
142  DECLARE_VDB_COLUMN_AS(TVDBRowCount, FEATURE_ROWS_COUNT);
143 };
144 
145 
147  explicit SFeatTableCursor(const CVDBTable& table);
148 
150 
155  DECLARE_VDB_COLUMN_AS(Uint4, FEAT_SUBTYPE);
156  DECLARE_VDB_COLUMN_AS(Uint4, FEAT_ID_PREFIX);
157  DECLARE_VDB_COLUMN_AS(Uint8, FEAT_ID_VALUE);
161 };
162 
163 
165  explicit SExtraTableCursor(const CVDBTable& table);
166 
168 
170 };
171 
172 
174  : m_Cursor(table),
175  INIT_VDB_COLUMN(ACCESSION),
177 {
178 }
179 
180 
182  : m_Cursor(table),
186 {
187 }
188 
189 
191  : m_Cursor(table),
192  INIT_VDB_COLUMN(FILTER_ID_ROW_NUM),
193  INIT_VDB_COLUMN(SEQ_ID_ROW_NUM),
194  INIT_VDB_COLUMN(BLOCK_FROM),
195  INIT_VDB_COLUMN(GR_TOTAL),
196  INIT_VDB_COLUMN(GR_ZOOM)
197 {
198 }
199 
200 
202  : m_Cursor(table),
203  INIT_VDB_COLUMN(SEQ_ID_ROW_NUM),
204  INIT_VDB_COLUMN(PAGE_FROM),
205  INIT_VDB_COLUMN(FEATURE_ROW_FROM),
206  INIT_VDB_COLUMN(FEATURE_ROWS_COUNT)
207 {
208 }
209 
210 
212  : m_Cursor(table),
213  INIT_VDB_COLUMN(SEQ_ID_ROW_NUM),
216  INIT_VDB_COLUMN(FEAT_TYPE),
217  INIT_VDB_COLUMN(FEAT_SUBTYPE),
218  INIT_VDB_COLUMN(FEAT_ID_PREFIX),
219  INIT_VDB_COLUMN(FEAT_ID_VALUE),
220  INIT_VDB_COLUMN(BIT_FLAGS),
221  INIT_VDB_COLUMN(EXTRA_ROW_FROM),
222  INIT_VDB_COLUMN(EXTRA_ROWS_COUNT)
223 {
224 }
225 
226 
228  : m_Cursor(table),
229  INIT_VDB_COLUMN(RS_ALLELE)
230 {
231 }
232 
233 
235 {
236  CRef<SSeqTableCursor> curs = m_Seq.Get(row);
237  if ( !curs ) {
238  curs = new SSeqTableCursor(SeqTable());
239  }
240  return curs;
241 }
242 
243 
245 {
246  CRef<SGraphTableCursor> curs = m_Graph.Get(row);
247  if ( !curs ) {
248  curs = new SGraphTableCursor(GraphTable());
249  }
250  return curs;
251 }
252 
253 
255 {
256  CRef<SPageTableCursor> curs = m_Page.Get(row);
257  if ( !curs ) {
258  curs = new SPageTableCursor(PageTable());
259  }
260  return curs;
261 }
262 
263 
265 {
266  CRef<SFeatTableCursor> curs = m_Feat.Get(row);
267  if ( !curs ) {
268  curs = new SFeatTableCursor(FeatTable());
269  }
270  return curs;
271 }
272 
273 
275 {
276  CRef<SExtraTableCursor> curs = m_Extra.Get(row);
277  if ( !curs ) {
278  if ( const CVDBTable& table = ExtraTable() ) {
279  curs = new SExtraTableCursor(table);
280  }
281  }
282  return curs;
283 }
284 
285 
287 {
288  m_Seq.Put(curs, row);
289 }
290 
291 
293 {
294  m_Graph.Put(curs, row);
295 }
296 
297 
299 {
300  m_Page.Put(curs, row);
301 }
302 
303 
305 {
306  m_Feat.Put(curs, row);
307 }
308 
309 
311 {
312  m_Extra.Put(curs, row);
313 }
314 
315 
317  CTempString path_or_acc)
318  : m_Mgr(mgr),
319  m_DbPath(path_or_acc)
320 {
322  // SNP VDB are multi-table VDB objects.
323  // However, there could be other VDBs in the same namespace (NA*)
324  // so we have to check this situation and return normal eNotFoundDb error.
325  try {
326  m_Db = CVDB(m_Mgr, path_or_acc);
327  }
328  catch ( CSraException& exc ) {
329  bool another_vdb_table = false;
330  if ( exc.GetErrCode() != exc.eNotFoundDb ) {
331  // check if the accession refers some other VDB object
332  try {
333  CVDBTable table(mgr, path_or_acc);
334  another_vdb_table = true;
335  }
336  catch ( CSraException& /*exc2*/ ) {
337  }
338  }
339  if ( another_vdb_table ) {
340  // It's some other VDB table object
341  // report eNotFoundDb with original rc
342  NCBI_THROW2_FMT(CSraException, eNotFoundDb,
343  "Cannot open VDB: "<<path_or_acc,
344  exc.GetRC());
345  }
346  else {
347  // neither VDB nor another VDB table
348  // report original exception
349  throw;
350  }
351  }
352 
353  {
354  // load track list
355  CVDBTable track_table(m_Db, "TRACK_FILTER");
356  STrackTableCursor cur(track_table);
357  size_t track_count = cur.m_Cursor.GetMaxRowId();
358  m_TrackList.resize(track_count);
359  for ( size_t i = 0; i < track_count; ++i ) {
361  TVDBRowId row = i+1;
362  info.m_Name = *cur.NAME(row);
363  NStr::TrimSuffixInPlace(info.m_Name, "\r");
364  info.m_Filter.m_Filter = *cur.BITS(row);
365  info.m_Filter.m_FilterMask = *cur.MASK(row);
366  m_TrackMapByName[info.m_Name] = i;
367  }
368  if ( track_count == 0 ) {
369  // default track without filtering
370  m_TrackList.resize(1);
371  }
372  m_TrackMapByName[""] = 0;
373  }
374 
375  // open tables
376  m_SeqTable = CVDBTable(m_Db, "SEQUENCE");
378  m_PageTable = CVDBTable(m_Db, "PAGE");
379  m_GraphTable = CVDBTable(m_Db, "COVERAGE_GRAPH");
380  m_FeatTable = CVDBTable(m_Db, "FEAT");
381  m_ExtraTable = CVDBTable(m_Db, "EXTRA");
382 
383  {
384  // load sequence list
385  CRef<SSeqTableCursor> cur = Seq();
386 
387  size_t seq_count = cur->m_Cursor.GetMaxRowId();
388  m_SeqList.resize(seq_count);
389  if ( kPreloadSeqList ) {
390  for ( size_t i = 0; i < seq_count; ++i ) {
391  SSeqInfo& info = m_SeqList[i];
392  TVDBRowId row = i+1;
393  CTempString ref_id = *cur->ACCESSION(row);
394  info.m_Seq_id = new CSeq_id(ref_id);
395  info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(*info.m_Seq_id);
396  info.m_SeqLength = *cur->LEN(row);
397  info.m_Circular = false;
398  // index
399  m_SeqMapBySeq_id[info.m_Seq_id_Handle] = i;
400  }
401  }
402 
403  Put(cur);
404  }
405 
406  if ( kPreloadSeqList ) {
407  // prepare page index
409 
410  for ( TVDBRowId row = 1, max_row = cur->m_Cursor.GetMaxRowId();
411  row <= max_row; ++row ) {
412  SSeqInfo& info = m_SeqList.at(size_t(*cur->SEQ_ID_ROW_NUM(row)-1));
413  SSeqInfo::SPageSet pset;
414  pset.m_SeqPos = *cur->PAGE_FROM(row);
415  pset.m_PageCount = 1;
416  pset.m_RowId = row;
417  if ( !info.m_PageSets.empty() ) {
418  SSeqInfo::SPageSet& prev_pset = info.m_PageSets.back();
419  if ( prev_pset.GetSeqPosEnd(kPageSize) == pset.m_SeqPos &&
420  prev_pset.GetRowIdEnd() == pset.m_RowId ) {
421  prev_pset.m_PageCount += 1;
422  continue;
423  }
424  }
425  info.m_PageSets.push_back(pset);
426  }
427 
428  Put(cur);
429  }
430 
431  // update length and graph positions
432  if ( kPreloadSeqList ) {
433  size_t track_count = m_TrackList.size();
434  TVDBRowId graph_row = 1;
435  for ( auto& info : m_SeqList ) {
436  if ( !info.m_SeqLength ) {
437  info.m_SeqLength = info.m_PageSets.back().GetSeqPosEnd(kPageSize);
438  }
439  info.m_GraphRowId = graph_row;
440  TSeqPos pages = (info.m_SeqLength - 1)/kPageSize + 1;
441  graph_row += pages * track_count;
442  }
443  }
444 }
445 
446 
448 {
449 }
450 
451 
453 {
454  return kPageSize;
455 }
456 
457 
459 {
460  return kOverviewZoom;
461 }
462 
463 
465 {
466  return kCoverageZoom;
467 }
468 
469 
470 CSNPDb_Impl::TTrackInfoList::const_iterator
471 CSNPDb_Impl::FindTrack(const string& name) const
472 {
474  if ( it == m_TrackMapByName.end() ) {
475  return m_TrackList.end();
476  }
477  else {
478  return m_TrackList.begin()+it->second;
479  }
480 }
481 
482 
483 CSNPDb_Impl::TSeqInfoList::const_iterator
484 CSNPDb_Impl::FindSeq(const string& accession, int version)
485 {
486  string prefix = accession+'.';
488  string ver = NStr::NumericToString(version);
489 
491  if ( !range.second ) {
492  // try to find NC_000001.011 format
493  ver = "000";
494  ver[0] += version/100;
495  ver[1] += (version/10)%10;
496  ver[2] += version%10;
498  }
499 
500  if ( !range.second ) {
501  return m_SeqList.end();
502  }
503  size_t index = size_t(range.first-1);
504  if ( index >= m_SeqList.size() ) {
505  return m_SeqList.end();
506  }
507  TSeqInfoList::const_iterator iter = m_SeqList.begin()+index;
508  x_Update(iter);
509  return iter;
510 }
511 
512 
513 CSNPDb_Impl::TSeqInfoList::const_iterator
515 {
516  if ( 1 ) {
517  CConstRef<CSeq_id> id = seq_id.GetSeqId();
518  if ( const CTextseq_id* text_id = id->GetTextseq_Id() ) {
519  if ( text_id->IsSetAccession() && text_id->IsSetVersion() &&
520  !text_id->IsSetName() && !text_id->IsSetRelease() ) {
521  return FindSeq(text_id->GetAccession(), text_id->GetVersion());
522  }
523  }
524  return m_SeqList.end();
525  }
527  if ( it == m_SeqMapBySeq_id.end() ) {
528  return m_SeqList.end();
529  }
530  else {
531  return m_SeqList.begin()+it->second;
532  }
533 }
534 
535 
536 void
537 CSNPDb_Impl::x_Update(TSeqInfoList::const_iterator seq)
538 {
539  _ASSERT(seq >= m_SeqList.begin() && seq < m_SeqList.end());
540  if ( seq->GetMainSeq_id() ) {
541  // already updated
542  return;
543  }
544  CMutexGuard guard(m_Mutex);
546  size_t seq_index = seq-m_SeqList.begin();
547  SSeqInfo& info = m_SeqList[seq_index];
548  if ( info.m_Seq_id ) {
549  // already updated
550  return;
551  }
552  TVDBRowId seq_row = TVDBRowId(seq_index+1);
553 
554  // update id and length
555  CRef<CSeq_id> seq_id;
556  {
557  CRef<SSeqTableCursor> cur = Seq();
558 
559  CTempString ref_id = *cur->ACCESSION(seq_row);
560  seq_id = new CSeq_id(ref_id);
561  info.m_Seq_id_Handle = CSeq_id_Handle::GetHandle(*seq_id);
562  info.m_SeqLength = *cur->LEN(seq_row);
563  info.m_Circular = false;
564 
565  Put(cur);
566  }
567 
568  // update page range
569  {
571 
572  TVDBRowId max_row = cur->m_Cursor.GetMaxRowId();
573  TVDBRowId row_before = 0; // < seq_row
574 
575  // binary search
576  {
577  // look for better bounds
578  auto iter = m_Seq2PageMap.lower_bound(seq_row);
579  if ( iter != m_Seq2PageMap.end() ) {
580  max_row = iter->second;
581  }
582  if ( iter != m_Seq2PageMap.begin() ) {
583  --iter;
584  auto& slot = *iter;
585  row_before = slot.second;
586  }
587  }
588 
589  TVDBRowId row = max_row; // >= seq_row
590  while ( row_before+1 < row ) {
591  TVDBRowId mid_row = row_before+(row-row_before)/2;
592  if ( *cur->SEQ_ID_ROW_NUM(mid_row) < seq_row ) {
593  row_before = mid_row;
594  }
595  else {
596  row = mid_row;
597  }
598  }
599  m_Seq2PageMap[seq_row] = row;
600 
601  info.m_PageSets.clear();
602  for ( ; row <= max_row; ++row ) {
603  if ( *cur->SEQ_ID_ROW_NUM(row) != seq_row ) {
604  break;
605  }
606 
607  SSeqInfo::SPageSet pset;
608  pset.m_SeqPos = *cur->PAGE_FROM(row);
609  pset.m_PageCount = 1;
610  pset.m_RowId = row;
611  if ( !info.m_PageSets.empty() ) {
612  SSeqInfo::SPageSet& prev_pset = info.m_PageSets.back();
613  if ( prev_pset.GetSeqPosEnd(kPageSize) == pset.m_SeqPos &&
614  prev_pset.GetRowIdEnd() == pset.m_RowId ) {
615  prev_pset.m_PageCount += 1;
616  continue;
617  }
618  }
619  info.m_PageSets.push_back(pset);
620  }
621 
622  Put(cur);
623  }
624 
625  // update graph range
626  {
627  if ( !info.m_SeqLength ) {
628  info.m_SeqLength = info.m_PageSets.back().GetSeqPosEnd(kPageSize);
629  }
630  info.m_GraphRowId = (info.m_PageSets.front().m_RowId-1) * m_TrackList.size() + 1;
631  }
632 
633  // final id assignment is an indication of initialization
634  info.m_Seq_id = seq_id;
635 }
636 
637 
639 CSNPDb_Impl::x_GetPageVDBRowRange(TSeqInfoList::const_iterator seq)
640 {
641  if ( seq == GetSeqInfoList().end() ) {
642  NCBI_THROW_FMT(CSraException, eInvalidIndex,
643  "Sequence index is out of bounds: "<<
644  GetDbPath());
645  }
646  return seq->GetPageVDBRowRange();
647 }
648 
649 
650 TVDBRowId
651 CSNPDb_Impl::x_GetGraphVDBRowId(TSeqInfoList::const_iterator seq,
652  TTrackInfoList::const_iterator track)
653 {
654  if ( seq == GetSeqInfoList().end() ) {
655  NCBI_THROW_FMT(CSraException, eInvalidIndex,
656  "Sequence index is out of bounds: "<<
657  GetDbPath());
658  }
659  if ( track == GetTrackInfoList().end() ) {
660  NCBI_THROW_FMT(CSraException, eInvalidIndex,
661  "Filter track index is out of bounds: "<<
662  GetDbPath());
663  }
664  TVDBRowId start = seq->m_GraphRowId;
665  TVDBRowId len = (seq->m_SeqLength-1)/kPageSize + 1;
666  start += (track - GetTrackInfoList().begin())*len;
667  return start;
668 }
669 
670 
671 /////////////////////////////////////////////////////////////////////////////
672 // CSNPDbTrackIterator
673 /////////////////////////////////////////////////////////////////////////////
674 
675 
677  : m_Db(db),
678  m_Iter(GetList().begin())
679 {
680 }
681 
682 
684  size_t track_index)
685  : m_Db(db)
686 {
687  if ( track_index >= GetList().size() ) {
688  NCBI_THROW_FMT(CSraException, eInvalidIndex,
689  "Track index is out of bounds: "<<
690  db->GetDbPath()<<"."<<track_index);
691  }
692  m_Iter = GetList().begin()+track_index;
693 }
694 
695 
697  const string& name)
698  : m_Db(db),
699  m_Iter(db->FindTrack(name))
700 {
701 }
702 
703 
705 {
706  if ( !*this ) {
707  NCBI_THROW(CSraException, eInvalidState,
708  "CSNPDbTrackIterator is invalid");
709  }
710  return *m_Iter;
711 }
712 
713 
715 {
716  m_Db.Reset();
717  m_Iter = TList::const_iterator();
718 }
719 
720 
721 /////////////////////////////////////////////////////////////////////////////
722 // CSNPDbSeqIterator
723 /////////////////////////////////////////////////////////////////////////////
724 
725 
727  : m_Db(db),
728  m_Iter(db->GetSeqInfoList().begin()),
729  m_TrackIter(db->GetTrackInfoList().begin())
730 {
731  if ( *this ) {
732  GetDb().x_Update(m_Iter);
733  }
734 }
735 
736 
738  size_t seq_index)
739 {
740  if ( seq_index >= db->m_SeqList.size() ) {
741  NCBI_THROW_FMT(CSraException, eInvalidIndex,
742  "Sequence index is out of bounds: "<<
743  db->GetDbPath()<<"."<<seq_index);
744  }
745  m_Db = db;
746  m_Iter = db->m_SeqList.begin()+seq_index;
747  m_TrackIter = db->GetTrackInfoList().begin();
748  if ( *this ) {
749  GetDb().x_Update(m_Iter);
750  }
751 }
752 
753 
755  const CSeq_id_Handle& seq_id)
756  : m_Db(db),
757  m_Iter(db.GetNCObject().FindSeq(seq_id)),
758  m_TrackIter(db->GetTrackInfoList().begin())
759 {
760 }
761 
762 
764  const string& accession,
765  int version)
766  : m_Db(db),
767  m_Iter(db.GetNCObject().FindSeq(accession, version)),
768  m_TrackIter(db->GetTrackInfoList().begin())
769 {
770 }
771 
772 
774 {
775  m_TrackIter = track.m_Iter;
776 }
777 
778 
780 {
781  if ( !*this ) {
782  NCBI_THROW(CSraException, eInvalidState,
783  "CSNPDbSeqIterator is invalid");
784  }
785  return *m_Iter;
786 }
787 
788 
790 {
791  m_Iter = CSNPDb_Impl::TSeqInfoList::const_iterator();
792  m_TrackIter = CSNPDb_Impl::TTrackInfoList::const_iterator();
793  m_Db.Reset();
794 }
795 
796 
798 {
799  ++m_Iter;
800  if ( *this ) {
801  GetDb().x_Update(m_Iter);
802  }
803  return *this;
804 }
805 
806 
808 {
809  return GetInfo().m_Circular;
810 }
811 
812 
814 {
815  return kMaxSNPLength;
816 }
817 
818 
820 {
823  TVDBRowId begin = *cur->FEATURE_ROW_FROM(row_ids.GetFrom());
824  TVDBRowId end = *cur->FEATURE_ROW_FROM(row_ids.GetTo());
825  end += *cur->FEATURE_ROWS_COUNT(row_ids.GetTo());
826  GetDb().Put(cur);
827  return end - begin;
828 }
829 
830 
832 {
833  return GetSNPCount();
834 }
835 
836 
838 {
839  const CSNPDb_Impl::SSeqInfo::TPageSets& psets = GetInfo().m_PageSets;
840  return COpenRange<TSeqPos>(psets.front().m_SeqPos,
841  psets.back().GetSeqPosEnd(kPageSize));
842 }
843 
844 
846 
847 
848 inline unsigned x_SetBitCount(Uint8 v)
849 {
850  v = (NCBI_CONST_UINT8(0x5555555555555555) & (v>>1)) +
851  (NCBI_CONST_UINT8(0x5555555555555555) & v);
852  v = (NCBI_CONST_UINT8(0x3333333333333333) & (v>>2)) +
853  (NCBI_CONST_UINT8(0x3333333333333333) & v);
854  v = (NCBI_CONST_UINT8(0x0f0f0f0f0f0f0f0f) & (v>>4)) +
855  (NCBI_CONST_UINT8(0x0f0f0f0f0f0f0f0f) & v);
856  v = (NCBI_CONST_UINT8(0x00ff00ff00ff00ff) & (v>>8)) +
857  (NCBI_CONST_UINT8(0x00ff00ff00ff00ff) & v);
858  v = (NCBI_CONST_UINT8(0x0000ffff0000ffff) & (v>>16)) +
859  (NCBI_CONST_UINT8(0x0000ffff0000ffff) & v);
860  return unsigned(v>>32)+unsigned(v);
861 }
862 
863 
864 inline void x_SetOS8(vector<char>& os, Uint8 data)
865 {
866  os.resize(8);
867  char* dst = os.data();
868  for ( int i = 0; i < 8; ++i ) {
869  dst[i] = char(data>>(8*i));
870  }
871 }
872 
873 
875  const CSNPDbSeqIterator& it)
876 {
877  range = range.IntersectionWith(it.GetSNPRange());
878 }
879 
880 
882 {
883  return pos - pos%step;
884 }
885 
886 
888 {
889  return x_RoundPos(pos+step-1, step);
890 }
891 
892 
894 {
895  range.SetFrom(x_RoundPos(range.GetFrom(), step));
896  range.SetToOpen(x_RoundPosUp(range.GetToOpen(), step));
897 }
898 
899 
901  const CSNPDbSeqIterator& it,
902  const TSeqPos comp)
903 {
904  x_AdjustRange(range, it);
905  x_RoundRange(range, comp);
906 }
907 
908 
909 struct SGraphMaker {
910  static const TSeqPos kMinGraphGap = 1000;
911 
912  enum EGraphSet {
915  };
916  enum EGapsType {
918  eNoGaps
919  };
920 
924  typedef list< CRef<CSeq_graph> > TGraphs;
931 
932  void Start(const CSNPDbSeqIterator& it,
934  TSeqPos comp,
935  EGraphSet graph_set = eMultipleGraphs,
936  EGapsType gaps_type = eAllowGaps)
937  {
938  m_GraphSet = graph_set;
939  m_GapsType = gaps_type;
940  m_Graph = null;
941  m_Graphs.clear();
942  m_Id = it.GetSeqId();
943  x_AdjustGraphRange(range, it, comp);
944  m_Range = range;
945  m_Comp = comp;
946  m_EmptyCount = 0;
947  m_MaxValue = 0;
948  _ASSERT(!range.Empty());
949  _ASSERT(range.GetFrom()%comp == 0);
950  _ASSERT(range.GetToOpen()%comp == 0);
951  }
952 
953  void x_NewGraph()
954  {
955  _ASSERT(!m_Graph);
956  m_Graph = new CSeq_graph();
957  m_MaxValue = 0;
958  }
959  void x_EndGraph(bool save = true)
960  {
961  _ASSERT(m_Graph);
962  CSeq_graph& graph = *m_Graph;
963  graph.SetTitle("SNP Density");
964  size_t count;
965  if ( m_MaxValue <= 255 ) {
966  auto& gr = graph.SetGraph().SetByte();
967  gr.SetMin(1);
968  gr.SetMax(m_MaxValue);
969  gr.SetAxis(0);
970  count = gr.GetValues().size();
971  }
972  else {
973  auto& gr = graph.SetGraph().SetInt();
974  gr.SetMin(1);
975  gr.SetMax(m_MaxValue);
976  gr.SetAxis(0);
977  count = gr.GetValues().size();
978  }
979  TSeqPos length = TSeqPos(count*m_Comp);
980  CSeq_interval& loc = graph.SetLoc().SetInt();
981  loc.SetId(*m_Id);
982  loc.SetFrom(m_Range.GetFrom());
983  loc.SetTo(m_Range.GetFrom()+length-1);
984  graph.SetComp(m_Comp);
985  graph.SetNumval(int(count));
986  m_Range.SetFrom(m_Range.GetFrom()+length);
987  if ( save ) {
988  m_Graphs.push_back(m_Graph);
989  }
990  m_Graph = null;
991  }
993  {
994  if ( !m_Graph ) {
995  x_NewGraph();
996  }
997  return *m_Graph;
998  }
999 
1001  {
1004  if ( m_Graph ) {
1005  x_EndGraph();
1006  }
1008  m_EmptyCount = 0;
1009  }
1010 
1012  {
1013  _ASSERT(count);
1014  CSeq_graph& graph = x_GetGraph();
1015  if ( m_MaxValue <= 255 ) {
1016  auto& vv = graph.SetGraph().SetByte().SetValues();
1017  vv.resize(vv.size() + count);
1018  }
1019  else {
1020  auto& vv = graph.SetGraph().SetInt().SetValues();
1021  vv.resize(vv.size() + count);
1022  }
1023  }
1024  void AddActualValues(TSeqPos count, const Uint4* values)
1025  {
1026  _ASSERT(count);
1027  if ( m_EmptyCount ) {
1028  if ( !m_Graph ||
1030  m_EmptyCount >= kMinGraphGap) ) {
1031  AddActualGap();
1032  }
1033  else {
1035  m_EmptyCount = 0;
1036  }
1037  }
1038  CSeq_graph& graph = x_GetGraph();
1039  m_MaxValue = max(m_MaxValue, *max_element(values, values+count));
1040  if ( m_MaxValue <= 255 ) {
1041  auto& vv = graph.SetGraph().SetByte().SetValues();
1042  vv.insert(vv.end(), values, values+count);
1043  return;
1044  }
1045  if ( graph.GetGraph().IsByte() ) {
1046  CConstRef<CByte_graph> old_data(&graph.GetGraph().GetByte());
1047  auto& old_vv = old_data->GetValues();
1048  auto& vv = graph.SetGraph().SetInt().SetValues();
1049  const Uint1* bb = reinterpret_cast<const Uint1*>(old_vv.data());
1050  vv.assign(bb, bb+old_vv.size());
1051  }
1052  auto& vv = graph.SetGraph().SetInt().SetValues();
1053  vv.insert(vv.end(), values, values+count);
1054  }
1056  {
1057  AddActualValues(1, &value);
1058  }
1059 
1060  void AddEmpty(TSeqPos count)
1061  {
1062  _ASSERT(count);
1063  if ( m_GapsType == eNoGaps ) {
1064  AddActualZeroes(count);
1065  }
1066  else {
1067  m_EmptyCount += count;
1068  }
1069  }
1070  void AddValues(TSeqPos count, const Uint4* values)
1071  {
1072  TSeqPos empty_before = 0;
1073  while ( count && *values == 0 ) {
1074  ++empty_before;
1075  --count;
1076  ++values;
1077  }
1078  if ( empty_before ) {
1079  AddEmpty(empty_before);
1080  }
1081  TSeqPos empty_after = 0;
1082  while ( count && values[count-1] == 0 ) {
1083  ++empty_after;
1084  --count;
1085  }
1086  if ( count ) {
1087  AddActualValues(count, values);
1088  }
1089  if ( empty_after ) {
1090  AddEmpty(empty_after);
1091  }
1092  }
1094  {
1095  if ( !value ) {
1096  AddEmpty(1);
1097  }
1098  else {
1100  }
1101  }
1103  {
1104  if ( m_Graph ) {
1105  x_EndGraph();
1106  }
1107  return m_Graphs;
1108  }
1110  {
1111  CRef<CSeq_graph> ret = m_Graph;
1112  if ( ret ) {
1113  x_EndGraph(false);
1114  }
1115  return ret;
1116  }
1117 };
1118 
1119 
1120 CRef<CSeq_annot> x_NewAnnot(const string& annot_name = kDefaultAnnotName)
1121 {
1122  CRef<CSeq_annot> annot(new CSeq_annot);
1123  annot->SetNameDesc(annot_name);
1124  return annot;
1125 }
1126 
1127 
1129  const CSNPDbSeqIterator& seq_it,
1131  SGraphMaker::EGraphSet graph_set,
1132  SGraphMaker::EGapsType gaps_type)
1133 {
1134  CVDBMgr::CRequestContextUpdater ctx_updater;
1135  g.Start(seq_it, range, kOverviewZoom, graph_set, gaps_type);
1136  for ( CSNPDbGraphIterator it(seq_it, range); it; ++it ) {
1137  g.AddValue(it.GetTotalValue());
1138  }
1139 }
1140 
1141 
1143  const CSNPDbSeqIterator& seq_it,
1145  SGraphMaker::EGraphSet graph_set)
1146 {
1147  CVDBMgr::CRequestContextUpdater ctx_updater;
1148  g.Start(seq_it, range, kCoverageZoom, graph_set);
1149  for ( CSNPDbGraphIterator it(seq_it, range); it; ++it ) {
1150  CRange<TSeqPos> page = it.GetPageRange();
1151  TSeqPos skip_beg = 0;
1152  if ( range.GetFrom() > page.GetFrom() ) {
1153  skip_beg = (range.GetFrom() - page.GetFrom())/kCoverageZoom;
1154  }
1155  TSeqPos skip_end = 0;
1156  if ( range.GetToOpen() < page.GetToOpen() ) {
1157  skip_end = (page.GetToOpen() - range.GetToOpen())/kCoverageZoom;
1158  }
1159  TSeqPos count = kPageSize/kCoverageZoom - skip_beg - skip_end;
1160  if ( !it.GetTotalValue() ) {
1161  g.AddEmpty(count);
1162  }
1163  else {
1164  CVDBValueFor<Uint4> values = it.GetCoverageValues();
1165  _ASSERT(values.size()*kCoverageZoom == kPageSize);
1166  g.AddValues(count, values.data()+skip_beg);
1167  }
1168  }
1169 }
1170 
1171 
1173 
1174 
1177  TFlags flags) const
1178 {
1179  SGraphMaker g;
1180  x_CollectOverviewGraph(g, *this, range,
1181  g.eSingleGraph,
1182  flags & fNoGaps? g.eNoGaps: g.eAllowGaps);
1183  return g.FinishGraph();
1184 }
1185 
1186 
1189  const string& annot_name,
1190  TFlags flags) const
1191 {
1192  SGraphMaker g;
1193  x_CollectOverviewGraph(g, *this, range,
1194  flags & fNoGaps? g.eSingleGraph: g.eMultipleGraphs,
1195  flags & fNoGaps? g.eNoGaps: g.eAllowGaps);
1196  CRef<CSeq_annot> annot = x_NewAnnot(annot_name);
1197  annot->SetData().SetGraph().swap(g.FinishAnnot());
1198  return annot;
1199 }
1200 
1201 
1204  TFlags flags) const
1205 {
1207 }
1208 
1209 
1212 {
1213  SGraphMaker g;
1214  x_CollectCoverageGraph(g, *this, range, g.eSingleGraph);
1215  return g.FinishGraph();
1216 }
1217 
1218 
1221  const string& annot_name,
1222  TFlags flags) const
1223 {
1224  SGraphMaker g;
1225  x_CollectCoverageGraph(g, *this, range, g.eMultipleGraphs);
1226  CRef<CSeq_annot> annot = x_NewAnnot(annot_name);
1227  annot->SetData().SetGraph().swap(g.FinishAnnot());
1228  return annot;
1229 }
1230 
1231 
1234  TFlags flags) const
1235 {
1237 }
1238 
1239 
1242  const SFilter& filter,
1243  TFlags flags) const
1244 {
1245  CRef<CSeq_annot> annot = x_NewAnnot();
1246  x_AdjustRange(range, *this);
1247  CSeq_annot::TData::TFtable& feats = annot->SetData().SetFtable();
1248  SSelector sel(eSearchByStart, filter);
1249  for ( CSNPDbFeatIterator it(*this, range, sel); it; ++it ) {
1250  feats.push_back(it.GetSeq_feat());
1251  }
1252  if ( feats.empty() ) {
1253  return null;
1254  }
1255  return annot;
1256 }
1257 
1258 
1261  TFlags flags) const
1262 {
1263  return GetFeatAnnot(range, GetFilter(), flags);
1264 }
1265 
1266 
1268 
1269 
1271  const char* name = 0)
1272 {
1274  col->SetHeader().SetField_id(id);
1275  if ( name ) {
1276  col->SetHeader().SetField_name(name);
1277  }
1278  return col;
1279 }
1280 
1281 
1283 {
1285  col->SetHeader().SetField_name(name);
1286  return col;
1287 }
1288 
1289 
1290 struct SColumn
1291 {
1292  int id;
1293  const char* name;
1294 
1296 
1297  SColumn(void)
1298  : id(-1),
1299  name(0)
1300  {
1301  }
1302  explicit
1304  const char* name = 0)
1305  : id(id),
1306  name(name)
1307  {
1308  }
1309 
1311  const char* name = 0)
1312  {
1313  this->id = id;
1314  this->name = name;
1315  }
1316 
1318  {
1319  if ( !column ) {
1320  _ASSERT(id >= 0);
1321  column =
1323  }
1324  return column;
1325  }
1327  {
1328  return Ref(x_GetColumn());
1329  }
1330 
1332  {
1333  if ( column ) {
1334  table.SetColumns().push_back(column);
1335  }
1336  }
1337 
1339 };
1340 
1341 struct SIntColumn : public SColumn
1342 {
1344 
1345  explicit
1347  : SColumn(id, name),
1348  values(0)
1349  {
1350  }
1351 
1352  void Add(int value)
1353  {
1354  if ( !values ) {
1355  values = &x_GetColumn()->SetData().SetInt();
1356  }
1357  values->push_back(value);
1358  }
1359 };
1360 
1361 
1362 struct SInt8Column : public SIntColumn
1363 {
1365 
1366  explicit
1368  : SIntColumn(id, name),
1369  values8(0)
1370  {
1371  }
1372 
1374  {
1375  if ( !values8 && int(value) == value ) {
1376  SIntColumn::Add(int(value));
1377  }
1378  else {
1379  if ( !values8 ) {
1380  CSeqTable_column* col = x_GetColumn();
1381  if ( col->IsSetData() ) {
1382  col->SetData().ChangeToInt8();
1383  }
1384  values8 = &col->SetData().SetInt8();
1385  }
1386  values8->push_back(value);
1387  }
1388  }
1389 };
1390 
1391 
1393 {
1396  int size;
1397 
1399  : column(column),
1400  indexes(0),
1401  size(0)
1402  {
1403  }
1404 
1405  void Add(int index)
1406  {
1407  if ( index != size && !indexes ) {
1408  indexes = &column.x_GetColumn()->SetSparse().SetIndexes();
1409  for ( int i = 0; i < size; ++i ) {
1410  indexes->push_back(i);
1411  }
1412  }
1413  if ( indexes ) {
1414  indexes->push_back(index);
1415  }
1416  ++size;
1417  }
1418 
1419  void Optimize(SIntColumn& column, const SIntColumn& backup_column)
1420  {
1421  _ASSERT(&column == &this->column);
1422  if ( !indexes ) {
1423  return;
1424  }
1425 
1426  size_t sparse_size = column.values->size();
1427  _ASSERT(sparse_size == indexes->size());
1428  size_t total_size = backup_column.values->size();
1429  _ASSERT(indexes->back() < total_size);
1430  if ( sparse_size >= total_size/3 ) {
1431  // sparse index is too big, replace with plain column
1433  values.reserve(total_size);
1434  for ( size_t i = 0, j = 0; i < total_size; ++i ) {
1435  TSeqPos to;
1436  if ( j < indexes->size() && i == (*indexes)[j] ) {
1437  to = (*column.values)[j++];
1438  }
1439  else {
1440  to = (*backup_column.values)[i];
1441  }
1442  values.push_back(to);
1443  }
1444  swap(values, *column.values);
1445  indexes = 0;
1447  }
1448  }
1449 };
1450 
1451 
1452 struct SCommonStrings : public SColumn
1453 {
1456  typedef unordered_map<CTempString, int> TIndex;
1458  list<string> index_strings;
1459 
1461  : values(0),
1462  indexes(0)
1463  {
1464  }
1465  explicit
1467  const char* name = 0)
1468  : SColumn(id, name),
1469  values(0),
1470  indexes(0)
1471  {
1472  }
1473 
1475  {
1476  if ( !values ) {
1477  CSeqTable_column* col = x_GetColumn();
1478  values = &col->SetData().SetCommon_string().SetStrings();
1479  indexes = &col->SetData().SetCommon_string().SetIndexes();
1480  }
1481  int ind;
1482  TIndex::const_iterator it = index.find(val);
1483  if ( it == index.end() ) {
1484  ind = int(values->size());
1485  values->push_back(val);
1486  index_strings.push_back(val);
1487  val = index_strings.back();
1488  index.insert(TIndex::value_type(val, ind));
1489  }
1490  else {
1491  ind = it->second;
1492  }
1493  indexes->push_back(ind);
1494  }
1495 
1497  {
1498  if ( values && values->size() == 1 ) {
1499  CSeqTable_column* col = x_GetColumn();
1500  col->SetDefault().SetString().swap(values->front());
1501  col->ResetData();
1502  values = 0;
1503  indexes = 0;
1504  }
1506  }
1507 };
1508 
1509 
1510 struct SCommon8Bytes : public SColumn
1511 {
1516 
1517  explicit
1519  const char* name = 0)
1520  : SColumn(id, name),
1521  values(0),
1522  indexes(0)
1523  {
1524  }
1525 
1526  void Add(Uint8 val)
1527  {
1528  if ( !values ) {
1529  CSeqTable_column* col = x_GetColumn();
1530  values = &col->SetData().SetCommon_bytes().SetBytes();
1531  indexes = &col->SetData().SetCommon_bytes().SetIndexes();
1532  }
1533  pair<TIndex::iterator, bool> ins =
1535  if ( ins.second ) {
1536  ins.first->second = values->size();
1537  vector<char>* data = new vector<char>();
1538  values->push_back(data);
1539  x_SetOS8(*data, val);
1540  }
1541  auto value_index = ins.first->second;
1542  if ( value_index > kMax_Int ) {
1543  NCBI_THROW(CSraException, eDataError,
1544  "CSNPDbSeqIterator: common bytes table is too big");
1545  }
1546  indexes->push_back(int(value_index));
1547  }
1548 
1550  {
1551  if ( values && values->size() == 1 ) {
1552  CSeqTable_column* col = x_GetColumn();
1553  col->SetDefault().SetBytes().swap(*values->front());
1554  col->ResetData();
1555  values = 0;
1556  indexes = 0;
1557  }
1559  }
1560 };
1561 
1562 
1563 static const int kMaxTableAlleles = 4;
1564 
1565 
1567 {
1568  SSeqTableContent(void);
1569 
1570  void Add(const CSNPDbFeatIterator& it);
1571 
1572  CRef<CSeq_annot> GetAnnot(const string& annot_name,
1573  CSeq_id& seq_id);
1574 
1576 
1577  // columns
1581 
1583 
1586 
1588 
1591  const string& value)
1592  {
1594  col->SetDefault().SetString(value);
1595  table.SetColumns().push_back(col);
1596  }
1597 
1600  CSeq_id& value)
1601  {
1603  col->SetDefault().SetId(value);
1604  table.SetColumns().push_back(col);
1605  }
1606 
1608  const char* name,
1609  CSeq_loc& value)
1610  {
1612  col->SetDefault().SetLoc(value);
1613  table.SetColumns().push_back(col);
1614  }
1615 
1617  const char* name,
1618  int value)
1619  {
1621  col->SetDefault().SetInt(value);
1622  table.SetColumns().push_back(col);
1623  }
1624 };
1625 
1626 
1628  : m_TableSize(0),
1629  col_from(CSeqTable_column_info::eField_id_location_from),
1630  col_to(CSeqTable_column_info::eField_id_location_to),
1631  ind_to(col_to),
1632  col_subtype(CSeqTable_column_info::eField_id_ext, "E.VariationClass"),
1633  col_bitfield(CSeqTable_column_info::eField_id_ext, "E.Bitfield"),
1634  col_dbxref(CSeqTable_column_info::eField_id_dbxref, "D.dbSNP")
1635 {
1636  for ( int i = 0; i < kMaxTableAlleles; ++i ) {
1638  "Q.replace");
1639  }
1640 }
1641 
1642 
1643 inline
1645 {
1646  TSeqPos from = it.GetSNPPosition();
1647  TSeqPos len = it.GetSNPLength();
1648 
1649  col_from.Add(from);
1650  if ( len != 1 ) {
1651  col_to.Add(from + len - 1);
1653  }
1654 
1656  for ( size_t i = 0; i < range.second; ++i ) {
1658  col_alleles[i].Add(it.GetAllele(range, i));
1659  }
1660 
1663 
1664  col_dbxref.Add(it.GetFeatId());
1665 
1666  ++m_TableSize;
1667 }
1668 
1669 
1671  CSeq_id& seq_id)
1672 {
1673  if ( !m_TableSize ) {
1674  return null;
1675  }
1676 
1677  CRef<CSeq_annot> table_annot = x_NewAnnot(annot_name);
1678 
1679  CSeq_table& table = table_annot->SetData().SetSeq_table();
1680  table.SetFeat_type(CSeqFeatData::e_Imp);
1681  table.SetFeat_subtype(CSeqFeatData::eSubtype_variation);
1682  table.SetNum_rows(m_TableSize);
1683 
1686  "variation");
1687 
1688  if ( 1 ) {
1689  _ASSERT(is_sorted(col_from.values->begin(), col_from.values->end()));
1690  TSeqPos total_from = col_from.values->front();
1691  TSeqPos total_to = col_from.values->back();
1692  TSeqPos max_len = 1;
1693  if ( col_to.values ) {
1694  max_len = kMaxSNPLength;
1695  total_to += max_len-1;
1696  }
1697  CRef<CSeq_loc> total_loc(new CSeq_loc);
1698  total_loc->SetInt().SetId(seq_id);
1699  total_loc->SetInt().SetFrom(total_from);
1700  total_loc->SetInt().SetTo(total_to);
1702  "Seq-table location",
1703  *total_loc);
1704 
1706  "Sorted, max length",
1707  max_len);
1708 
1711  seq_id);
1712  }
1713 
1716  col_to.Attach(table);
1717  for ( int i = 0; i < kMaxTableAlleles; ++i ) {
1719  }
1720 
1721  if ( col_subtype || col_bitfield ) {
1724  "dbSnpQAdata");
1725  if ( col_subtype ) {
1727  }
1728  if ( col_bitfield ) {
1730  }
1731  }
1732 
1734 
1735  return table_annot;
1736 }
1737 
1738 
1740 {
1742 
1743  bool AddToTable(const CSNPDbFeatIterator& it);
1744 
1746 
1747  void Add(const CSNPDbFeatIterator& it);
1748 
1749  vector< CRef<CSeq_annot> > GetAnnots(const string& annot_name);
1750 
1753 };
1754 
1755 
1757  : m_Seq_id(it.GetSeqId())
1758 {
1759 }
1760 
1761 
1762 vector< CRef<CSeq_annot> >
1763 SSeqTableConverter::GetAnnots(const string& annot_name)
1764 {
1765  vector< CRef<CSeq_annot> > ret;
1766  for ( int k = 0; k < 2; ++k ) {
1767  for ( int i = 0; i < kMaxTableAlleles; ++i ) {
1768  CRef<CSeq_annot> annot =
1769  m_Tables[k][i].GetAnnot(annot_name, *m_Seq_id);
1770  if ( annot ) {
1771  ret.push_back(annot);
1772  }
1773  }
1774  }
1775  if ( m_RegularAnnot ) {
1776  m_RegularAnnot->SetNameDesc(annot_name);
1777  ret.push_back(m_RegularAnnot);
1778  }
1779  return ret;
1780 }
1781 
1782 
1783 inline
1785 {
1787  size_t last_index = range.second - 1;
1788  if ( last_index >= kMaxTableAlleles ) {
1789  return false;
1790  }
1791  m_Tables[it.GetSNPLength() != 1][last_index].Add(it);
1792  return true;
1793 }
1794 
1795 
1796 inline
1798 {
1799  if ( AddToTable(it) ) {
1800  return;
1801  }
1802  if ( !m_RegularAnnot ) {
1804  }
1805  m_RegularAnnot->SetData().SetFtable().push_back(it.GetSeq_feat());
1806 }
1807 
1808 
1810 
1811 
1814  const string& annot_name,
1815  const SFilter& filter,
1816  TFlags flags) const
1817 {
1818  CVDBMgr::CRequestContextUpdater ctx_updater;
1819  x_AdjustRange(range, *this);
1820  SSeqTableConverter cvt(*this);
1821  SSelector sel(eSearchByStart, filter);
1822  for ( CSNPDbFeatIterator it(*this, range, sel); it; ++it ) {
1823  cvt.Add(it);
1824  }
1825  return cvt.GetAnnots(annot_name);
1826 }
1827 
1828 
1831  const SFilter& filter,
1832  TFlags flags) const
1833 {
1834  return GetTableFeatAnnots(range, kDefaultAnnotName, filter, flags);
1835 }
1836 
1837 
1840  const string& annot_name,
1841  TFlags flags) const
1842 {
1843  return GetTableFeatAnnots(range, annot_name, GetFilter(), flags);
1844 }
1845 
1846 
1849  TFlags flags) const
1850 {
1852 }
1853 
1854 
1855 /////////////////////////////////////////////////////////////////////////////
1856 // CSNPDbPageIterator
1857 /////////////////////////////////////////////////////////////////////////////
1858 
1859 
1861 {
1862  if ( m_Cur ) {
1864  _ASSERT(!m_Cur);
1865  }
1866  if ( m_GraphCur ) {
1868  _ASSERT(!m_Cur);
1869  }
1870  m_SeqIter.Reset();
1872 }
1873 
1874 
1876  : m_CurrPageSet(0),
1877  m_CurrPageRowId(0),
1878  m_CurrPagePos(kInvalidSeqPos),
1879  m_SearchMode(eSearchByOverlap)
1880 {
1881 }
1882 
1883 
1885  const CSeq_id_Handle& ref_id,
1886  TSeqPos ref_pos,
1887  TSeqPos window,
1888  ESearchMode search_mode)
1889  : m_SeqIter(db, ref_id)
1890 {
1891  TSeqPos ref_end = window? ref_pos+window: kInvalidSeqPos;
1892  Select(COpenRange<TSeqPos>(ref_pos, ref_end), search_mode);
1893 }
1894 
1895 
1897  const CSeq_id_Handle& ref_id,
1899  ESearchMode search_mode)
1900  : m_SeqIter(db, ref_id)
1901 {
1902  Select(range, search_mode);
1903 }
1904 
1905 
1908  ESearchMode search_mode)
1909  : m_SeqIter(seq)
1910 {
1911  Select(range, search_mode);
1912 }
1913 
1914 
1916 {
1917  *this = iter;
1918 }
1919 
1920 
1923 {
1924  if ( this != &iter ) {
1925  Reset();
1926  m_SeqIter = iter.m_SeqIter;
1927  m_Cur = iter.m_Cur;
1928  m_GraphCur = iter.m_GraphCur;
1934  m_SearchMode = iter.m_SearchMode;
1935  }
1936  return *this;
1937 }
1938 
1939 
1941 {
1942  Reset();
1943 }
1944 
1945 
1948  ESearchMode search_mode)
1949 {
1950  m_SearchRange = ref_range;
1951  m_SearchMode = search_mode;
1952 
1953  if ( !m_SeqIter || ref_range.Empty() ) {
1955  return *this;
1956  }
1957 
1958  TSeqPos pos = ref_range.GetFrom();
1959  if ( m_SearchMode == eSearchByOverlap ) {
1960  // SNP may start before requested position
1961  pos = pos < kMaxSNPLength? 0: pos - (kMaxSNPLength-1);
1962  }
1963 
1964  const CSNPDb_Impl::SSeqInfo::TPageSets& psets = m_SeqIter->m_PageSets;
1965  for ( m_CurrPageSet = 0; m_CurrPageSet < psets.size(); ++m_CurrPageSet ) {
1966  const CSNPDb_Impl::SSeqInfo::SPageSet& pset = psets[m_CurrPageSet];
1967  TSeqPos skip = pos<pset.m_SeqPos? 0: (pos-pset.m_SeqPos)/kPageSize;
1968  if ( skip < pset.m_PageCount ) {
1969  m_CurrPageRowId = pset.m_RowId + skip;
1970  m_CurrPagePos = pset.m_SeqPos + skip * kPageSize;
1971  if ( !m_Cur ) {
1973  }
1974  return *this;
1975  }
1976  }
1977  m_CurrPageRowId = TVDBRowId(-1);
1979  return *this;
1980 }
1981 
1982 
1984 {
1985  x_CheckValid("CSNPDbPageIterator::operator++");
1986 
1987  const CSNPDb_Impl::SSeqInfo::TPageSets& psets = m_SeqIter->m_PageSets;
1988  if ( ++m_CurrPageRowId < psets[m_CurrPageSet].GetRowIdEnd() ) {
1989  // next page in the set
1991  return;
1992  }
1993 
1994  // no more pages in the set, next page set
1995  if ( ++m_CurrPageSet < psets.size() ) {
1996  // first page in the next set
1997  m_CurrPageRowId = psets[m_CurrPageSet].m_RowId;
1998  m_CurrPagePos = psets[m_CurrPageSet].m_SeqPos;
1999  return;
2000  }
2001 
2002  // no more page sets
2004 }
2005 
2006 
2007 void CSNPDbPageIterator::x_ReportInvalid(const char* method) const
2008 {
2009  NCBI_THROW_FMT(CSraException, eInvalidState,
2010  "CSNPDbPageIterator::"<<method<<"(): "
2011  "Invalid iterator state");
2012 }
2013 
2014 
2016 {
2017  x_CheckValid("CSNPDbPageIterator::GetFirstFeatRowId");
2018  return *Cur().FEATURE_ROW_FROM(GetPageRowId());
2019 }
2020 
2021 
2023 {
2024  x_CheckValid("CSNPDbPageIterator::GetFeatCount");
2025  return *Cur().FEATURE_ROWS_COUNT(GetPageRowId());
2026 }
2027 
2028 
2029 /////////////////////////////////////////////////////////////////////////////
2030 // CSNPDbGraphIterator
2031 /////////////////////////////////////////////////////////////////////////////
2032 
2033 
2035 {
2036  if ( m_Cur ) {
2038  _ASSERT(!m_Cur);
2039  }
2040  m_Db.Reset();
2042 }
2043 
2044 
2046  : m_CurrPageRowId(0),
2047  m_CurrPagePos(kInvalidSeqPos)
2048 {
2049 }
2050 
2051 
2053  COpenRange<TSeqPos> ref_range)
2054 {
2055  Select(seq, ref_range);
2056 }
2057 
2058 
2060 {
2061  *this = iter;
2062 }
2063 
2064 
2067 {
2068  if ( this != &iter ) {
2069  Reset();
2070  m_Db = iter.m_Db;
2071  m_Cur = iter.m_Cur;
2072  m_SeqRowId = iter.m_SeqRowId;
2073  m_TrackRowId = iter.m_TrackRowId;
2077  }
2078  return *this;
2079 }
2080 
2081 
2083 {
2084  Reset();
2085 }
2086 
2087 
2090  COpenRange<TSeqPos> ref_range)
2091 {
2092  m_Db = iter.m_Db;
2093  m_SeqRowId = m_Db->x_GetSeqVDBRowId(iter.x_GetSeqIter());
2094  m_TrackRowId = m_Db->x_GetTrackVDBRowId(iter.x_GetTrackIter());
2095  m_SearchRange = ref_range;
2096 
2097  if ( !iter || ref_range.Empty() ) {
2099  return *this;
2100  }
2101 
2102  TSeqPos page = ref_range.GetFrom()/kPageSize;
2103  m_CurrPageRowId = iter.GetGraphVDBRowId() + page;
2104  m_CurrPagePos = page*kPageSize;
2105  if ( !m_Cur ) {
2107  }
2108  return *this;
2109 }
2110 
2111 
2113 {
2114  x_CheckValid("CSNPDbGraphIterator::operator++");
2115 
2116  if ( ++m_CurrPageRowId > m_Cur->m_Cursor.GetMaxRowId() ||
2117  *m_Cur->FILTER_ID_ROW_NUM(m_CurrPageRowId) != m_TrackRowId ||
2118  *m_Cur->SEQ_ID_ROW_NUM(m_CurrPageRowId) != m_SeqRowId ) {
2119  // end of track
2121  return;
2122  }
2123 
2124  m_CurrPagePos = *m_Cur->BLOCK_FROM(m_CurrPageRowId);
2125  if ( m_CurrPagePos >= m_SearchRange.GetToOpen() ) {
2126  // out of range
2128  return;
2129  }
2130 }
2131 
2132 
2133 void CSNPDbGraphIterator::x_ReportInvalid(const char* method) const
2134 {
2135  NCBI_THROW_FMT(CSraException, eInvalidState,
2136  "CSNPDbGraphIterator::"<<method<<"(): "
2137  "Invalid iterator state");
2138 }
2139 
2140 
2142 {
2143  x_CheckValid("CSNPDbGraphIterator::GetTotalValue");
2144  return *m_Cur->GR_TOTAL(m_CurrPageRowId);
2145 }
2146 
2147 
2149 {
2150  x_CheckValid("CSNPDbGraphIterator::GetCoverageValues");
2151  return m_Cur->GR_ZOOM(m_CurrPageRowId);
2152 }
2153 
2154 
2155 /////////////////////////////////////////////////////////////////////////////
2156 // CSNPDbFeatIterator
2157 /////////////////////////////////////////////////////////////////////////////
2158 
2159 
2161 {
2162  if ( m_Graph ) {
2164  }
2165  if ( m_Extra ) {
2167  }
2168  if ( m_Feat ) {
2170  }
2171  m_PageIter.Reset();
2173 }
2174 
2175 
2176 inline
2178 {
2179  m_CurrFeatId = 0;
2180  m_FirstBadFeatId = 0;
2181  if ( m_PageIter ) {
2182  if ( m_Graph && !*m_Graph->GR_TOTAL(x_GetGraphVDBRowId()) ) {
2183  // track graph says there's no matching features on current page
2184  return;
2185  }
2186  if ( TVDBRowCount count = GetPageIter().GetFeatCount() ) {
2187  if ( !m_Feat ) {
2188  m_Feat = GetDb().Feat();
2189  }
2191  if ( kPage2FeatErrorWorkaround ) {
2192  if ( GetPageIter().GetPagePos() == 0 ) {
2193  TVDBRowId seq_row = GetRefIter().GetVDBRowId();
2194  TVDBRowId max_row = m_Feat->m_Cursor.GetMaxRowId();
2195  while ( first < max_row && *m_Feat->SEQ_ID_ROW_NUM(first) < seq_row ) {
2196  ++first;
2197  }
2198  }
2199  }
2200  m_CurrFeatId = first;
2201  m_FirstBadFeatId = first + count;
2202  }
2203  }
2204 }
2205 
2206 
2208  : m_CurrFeatId(0),
2209  m_FirstBadFeatId(0)
2210 {
2211 }
2212 
2213 
2215  const CSeq_id_Handle& ref_id,
2216  TSeqPos ref_pos,
2217  TSeqPos window,
2218  const SSelector& sel)
2219  : m_PageIter(db, ref_id, ref_pos, window, sel.m_SearchMode)
2220 {
2221  x_SetFilter(sel);
2222  x_InitPage();
2223  x_Settle();
2224 }
2225 
2226 
2228  const CSeq_id_Handle& ref_id,
2230  const SSelector& sel)
2231  : m_PageIter(db, ref_id, range, sel.m_SearchMode)
2232 {
2233  x_SetFilter(sel);
2234  x_InitPage();
2235  x_Settle();
2236 }
2237 
2238 
2241  const SSelector& sel)
2242  : m_PageIter(seq, range, sel.m_SearchMode)
2243 {
2244  x_SetFilter(sel);
2245  x_InitPage();
2246  x_Settle();
2247 }
2248 
2249 
2251 {
2252  *this = iter;
2253 }
2254 
2255 
2258 {
2259  if ( this != &iter ) {
2260  Reset();
2261  // params
2262  m_CurRange = iter.m_CurRange;
2263  m_Filter = iter.m_Filter;
2264  // page iter
2265  m_PageIter = iter.m_PageIter;
2266  // feat iter
2267  m_Feat = iter.m_Feat;
2268  m_CurrFeatId = iter.m_CurrFeatId;
2270  // extra iter
2271  m_Extra = iter.m_Extra;
2272  m_ExtraRowId = iter.m_ExtraRowId;
2273  // page track
2274  m_Graph = iter.m_Graph;
2276  }
2277  return *this;
2278 }
2279 
2280 
2282 {
2283  Reset();
2284 }
2285 
2286 
2288 {
2290 }
2291 
2292 
2294 {
2295  m_Filter = sel.m_Filter;
2296  m_Filter.Normalize();
2297  m_GraphBaseRowId = 0;
2298  if ( m_Filter.m_FilterMask ) {
2299  // find best track for page filtering
2300  Uint8 best_bits_count = 0;
2301  CSNPDb_Impl::TTrackInfoList::const_iterator best_track;
2302  ITERATE ( CSNPDb_Impl::TTrackInfoList, it, GetDb().GetTrackInfoList() ) {
2303  TFilter mask = it->m_Filter.m_FilterMask;
2304  if ( mask & ~m_Filter.m_FilterMask ) {
2305  // track filter by other bits than requested
2306  continue;
2307  }
2308  if ( !m_Filter.Matches(it->m_Filter.m_Filter) ) {
2309  // track's bits differ from requested
2310  continue;
2311  }
2312  Uint8 bits_count = x_SetBitCount(mask);
2313  if ( bits_count > best_bits_count ) {
2314  best_bits_count = bits_count;
2315  best_track = it;
2316  }
2317  }
2318  if ( best_bits_count ) {
2320  GetDb().x_GetGraphVDBRowId(x_GetSeqIter(), best_track);
2322  }
2323  }
2324 }
2325 
2326 
2329  const SSelector& sel)
2330 {
2331  m_PageIter.Select(ref_range, sel.m_SearchMode);
2332  x_SetFilter(sel);
2333  x_InitPage();
2334  x_Settle();
2335  return *this;
2336 }
2337 
2338 
2340 {
2341  x_CheckValid("CSNPDbFeatIterator::GetFeatType");
2342  return *Cur().FEAT_TYPE(m_CurrFeatId);
2343 }
2344 
2345 
2347 {
2348  x_CheckValid("CSNPDbFeatIterator::GetFeatSubtype");
2349  return EFeatSubtype(*Cur().FEAT_SUBTYPE(m_CurrFeatId));
2350 }
2351 
2352 
2354 {
2355  return kFeatSubtypesToChars[subtype];
2356 }
2357 
2358 
2360 {
2361  return CTempString(kFeatSubtypesToChars+subtype, 1);
2362 }
2363 
2364 
2366 {
2368 }
2369 
2370 
2372 {
2373  return *Cur().FROM(m_CurrFeatId);
2374 }
2375 
2376 
2378 {
2379  return *Cur().LEN(m_CurrFeatId);
2380 }
2381 
2382 
2383 inline
2385 {
2386  TSeqPos ref_pos = x_GetFrom();
2387  if ( ref_pos >= GetSearchRange().GetToOpen() ) {
2388  // no more
2389  return ePassedTheRegion;
2390  }
2391  if ( GetSearchMode() == eSearchByStart &&
2392  ref_pos < GetSearchRange().GetFrom() ) {
2393  return eExluded;
2394  }
2395  TSeqPos ref_len = x_GetLength();
2396  if ( ref_len == 0 ) { // insertion SNP
2397  // make 2-base interval with insertion point in the middle, if possible
2398  if ( ref_pos > 0 ) {
2399  --ref_pos;
2400  ref_len = 2;
2401  }
2402  }
2403  TSeqPos ref_end = ref_pos + ref_len;
2404  if ( ref_end <= GetSearchRange().GetFrom() ) {
2405  return eExluded;
2406  }
2407  if ( m_Filter.IsSet() ) {
2408  if ( !m_Filter.Matches(GetBitfield()) ) {
2409  return eExluded;
2410  }
2411  }
2412  m_CurRange.SetFrom(ref_pos);
2413  m_CurRange.SetToOpen(ref_end);
2414  return eIncluded;
2415 }
2416 
2417 
2419 {
2420  while ( m_PageIter ) {
2421  while ( m_CurrFeatId < m_FirstBadFeatId ) {
2422  EExcluded exc = x_Excluded();
2423  if ( exc == eIncluded ) {
2424  // found
2425  return;
2426  }
2427  if ( exc == ePassedTheRegion ) {
2428  // passed the region
2429  break;
2430  }
2431  // next feat in page
2432  ++m_CurrFeatId;
2433  }
2434 
2435  ++m_PageIter;
2436  x_InitPage();
2437  }
2438 }
2439 
2440 
2442 {
2443  x_CheckValid("CSNPDbFeatIterator::operator++");
2444  ++m_CurrFeatId;
2445  x_Settle();
2446 }
2447 
2448 
2449 void CSNPDbFeatIterator::x_ReportInvalid(const char* method) const
2450 {
2451  NCBI_THROW_FMT(CSraException, eInvalidState,
2452  "CSNPDbFeatIterator::"<<method<<"(): "
2453  "Invalid iterator state");
2454 }
2455 
2456 
2458 {
2459  x_CheckValid("CSNPDbFeatIterator::GetFeatIdPrefix");
2460  return *Cur().FEAT_ID_PREFIX(m_CurrFeatId);
2461 }
2462 
2463 
2465 {
2466  x_CheckValid("CSNPDbFeatIterator::GetFeatId");
2467  return *Cur().FEAT_ID_VALUE(m_CurrFeatId);
2468 }
2469 
2470 
2472 {
2473  x_CheckValid("CSNPDbFeatIterator::x_GetExtraRange");
2474  TVDBRowId first = 0;
2475  TVDBRowCount count = *Cur().EXTRA_ROWS_COUNT(m_CurrFeatId);
2476  if ( count ) {
2477  first = *Cur().EXTRA_ROW_FROM(m_CurrFeatId);
2478  if ( !m_Extra ) {
2479  m_Extra = GetDb().Extra(first);
2480  }
2481  m_ExtraRowId = first + count - 1;
2482  }
2483  return TExtraRange(first, count);
2484 }
2485 
2486 
2488  size_t index) const
2489 {
2490  _ASSERT(index < range.second);
2491  return *m_Extra->RS_ALLELE(range.first + index);
2492 }
2493 
2494 
2496 {
2497  x_CheckValid("CSNPDbFeatIterator::GetBitfield");
2498  return *Cur().BIT_FLAGS(m_CurrFeatId);
2499 }
2500 
2501 
2502 void CSNPDbFeatIterator::GetBitfieldOS(vector<char>& os) const
2503 {
2504  x_SetOS8(os, GetBitfield());
2505 }
2506 
2507 
2508 template<size_t ValueSize>
2509 static inline
2510 bool x_IsStringConstant(const string& str, const char (&value)[ValueSize])
2511 {
2512  return str.size() == ValueSize-1 && str == value;
2513 }
2514 
2515 #define x_SetStringConstant(obj, Field, value) \
2516  if ( !(obj).NCBI_NAME2(IsSet,Field)() || \
2517  !x_IsStringConstant((obj).NCBI_NAME2(Get,Field)(), value) ) { \
2518  (obj).NCBI_NAME2(Set,Field)((value)); \
2519  }
2520 
2521 
2522 template<class T>
2523 static inline
2525 {
2526  T* ptr = ref.GetPointerOrNull();
2527  if ( !ptr || !ptr->ReferencedOnlyOnce() ) {
2528  ref = ptr = new T;
2529  }
2530  return *ptr;
2531 }
2532 
2533 
2547 
2548 #define ALLELE_CACHE
2549 #ifdef ALLELE_CACHE
2556 #endif
2557 
2559  {
2560  CGb_qual* qual = cache.GetPointerOrNull();
2561  if ( !qual ) {
2562  cache = qual = new CGb_qual;
2563  qual->SetQual("replace");
2564  qual->SetVal(val);
2565  }
2566  return *qual;
2567  }
2569  {
2570  CGb_qual& qual = x_GetPrivate(cache);
2571  x_SetStringConstant(qual, Qual, "replace");
2572  qual.SetVal(val);
2573  return qual;
2574  }
2576  {
2577 #ifdef ALLELE_CACHE
2578  if ( val.size() == 1 ) {
2579  switch ( val[0] ) {
2580  case 'A': return x_GetCommonAllele(m_AlleleCacheA, val);
2581  case 'C': return x_GetCommonAllele(m_AlleleCacheC, val);
2582  case 'G': return x_GetCommonAllele(m_AlleleCacheG, val);
2583  case 'T': return x_GetCommonAllele(m_AlleleCacheT, val);
2584  case '-': return x_GetCommonAllele(m_AlleleCache_minus, val);
2585  default: break;
2586  }
2587  }
2588  if ( val.size() == 0 ) {
2590  }
2591 #endif
2592  return x_GetCachedAllele(cache, val);
2593  }
2594 };
2595 
2596 
2597 inline
2599 {
2600  if ( !m_CreateCache ) {
2602  }
2603  return *m_CreateCache;
2604 }
2605 
2606 
2607 static inline
2608 CObject_id& x_GetObject_id(CRef<CObject_id>& cache, const char* name)
2609 {
2610  if ( !cache ) {
2611  cache = new CObject_id();
2612  cache->SetStr(name);
2613  }
2614  return *cache;
2615 }
2616 
2617 
2619 {
2620  x_CheckValid("CSNPDbFeatIterator::GetSeq_feat");
2621 
2622  if ( !(flags & fUseSharedObjects) ) {
2623  m_CreateCache.reset();
2624  }
2625  SCreateCache& cache = x_GetCreateCache();
2626  CSeq_feat& feat = x_GetPrivate(cache.m_Feat);
2627  {{
2628  CSeqFeatData& data = feat.SetData();
2629  data.Reset();
2630  CImp_feat& imp = x_GetPrivate(cache.m_Imp);
2631  x_SetStringConstant(imp, Key, "variation");
2632  imp.ResetLoc();
2633  imp.ResetDescr();
2634  data.SetImp(imp);
2635  }}
2636  {{
2637  CSeq_loc& loc = feat.SetLocation();
2638  TSeqPos len = GetSNPLength();
2639  loc.Reset();
2640  if ( len == 1 ) {
2641  CSeq_point& loc_pnt = x_GetPrivate(cache.m_LocPnt);
2642  loc_pnt.SetId(*GetSeqId());
2643  TSeqPos pos = GetSNPPosition();
2644  loc_pnt.SetPoint(pos);
2645  loc.SetPnt(loc_pnt);
2646  }
2647  else {
2648  CSeq_interval& loc_int = x_GetPrivate(cache.m_LocInt);
2649  loc_int.SetId(*GetSeqId());
2650  TSeqPos pos = GetSNPPosition();
2651  loc_int.SetFrom(pos);
2652  loc_int.SetTo(pos+len-1);
2653  loc.SetInt(loc_int);
2654  }
2655  }}
2656  if ( flags & fIncludeAlleles ) {
2657  CSeq_feat::TQual& quals = feat.SetQual();
2658  pair<TVDBRowId, size_t> range = GetExtraRange();
2659  quals.assign(range.second, null);
2660  for ( size_t i = 0; i < range.second; ++i ) {
2661  CTempString allele = GetAllele(range, i);
2662  size_t cache_index = min(i, ArraySize(cache.m_Allele)-1);
2663  CGb_qual& qual =
2664  cache.GetAllele(cache.m_Allele[cache_index], allele);
2665  quals[i] = &qual;
2666  }
2667  }
2668  else {
2669  feat.ResetQual();
2670  }
2671  if ( flags & fIncludeRsId ) {
2672  CSeq_feat::TDbxref& dbxref = feat.SetDbxref();
2673  dbxref.resize(1);
2674  dbxref[0] = null;
2675  CDbtag& dbtag = x_GetPrivate(cache.m_Dbtag);
2676  x_SetStringConstant(dbtag, Db, "dbSNP");
2677  Uint8 feat_id = GetFeatId();
2678  switch ( GetFeatIdPrefix() ) {
2679  case eFeatIdPrefix_rs:
2680  dbtag.SetTag().SetStr("rs"+NStr::NumericToString(feat_id));
2681  break;
2682  case eFeatIdPrefix_ss:
2683  dbtag.SetTag().SetStr("ss"+NStr::NumericToString(feat_id));
2684  break;
2685  default:
2686  dbtag.SetTag().SetId8(feat_id);
2687  break;
2688  }
2689  dbxref[0] = &dbtag;
2690  }
2691  else {
2692  feat.ResetDbxref();
2693  }
2694  feat.ResetExt();
2695  TFlags ext_flags = fIncludeBitfield | fIncludeNeighbors | fIncludeSubtype;
2696  if ( flags & ext_flags ) {
2697  CUser_object& ext = x_GetPrivate(cache.m_Ext);
2699  "dbSnpQAdata"));
2700  CUser_object::TData& data = ext.SetData();
2701  data.clear();
2702  if ( flags & fIncludeNeighbors ) {
2703  }
2704  if ( flags & fIncludeSubtype ) {
2705  CUser_field& field = x_GetPrivate(cache.m_Subtype);
2707  "VariationClass"));
2708  field.SetData().SetStr(GetFeatSubtypeString());
2709  ext.SetData().push_back(Ref(&field));
2710  }
2711  if ( flags & fIncludeBitfield ) {
2712  CUser_field& field = x_GetPrivate(cache.m_Bitfield);
2714  "Bitfield"));
2715  GetBitfieldOS(field.SetData().SetOs());
2716  ext.SetData().push_back(Ref(&field));
2717  }
2718  feat.SetExt(ext);
2719  }
2720  return Ref(&feat);
2721 }
2722 
2723 
2724 /////////////////////////////////////////////////////////////////////////////
2725 
2726 // Generation of split-info and chunks
2727 
2729 
2730 // splitter parameters for SNPs and graphs
2731 static const int kTSEId = 1;
2732 static const int kChunkIdFeat = 0;
2733 static const int kChunkIdGraph = 1;
2734 static const int kChunkIdMul = 2;
2735 
2736 // split_version=0 : feat_chunk_pages = kDefaultFeatChunkPages
2737 // 0 < split_version < kDefaultFeatChunkPages : feat_chunk_pages = split_version
2738 // graph_chunk_pages = feat_chunk_pages*kFeatChunksPerGraphChunk
2739 static const TSeqPos kDefaultFeatChunkPages = 200;
2741 static const TSeqPos kTargetFeatsPerChunk = 20000;
2742 
2743 template<class Values>
2744 bool sx_HasNonZero(const Values& values, TSeqPos index, TSeqPos count)
2745 {
2746  TSeqPos end = min(index+count, TSeqPos(values.size()));
2747  for ( TSeqPos i = index; i < end; ++i ) {
2748  if ( values[i] ) {
2749  return true;
2750  }
2751  }
2752  return false;
2753 }
2754 
2755 
2756 template<class TValues>
2757 void sx_AddBits2(vector<char>& bits,
2758  TSeqPos bit_values,
2759  TSeqPos pos_index,
2760  const TValues& values)
2761 {
2762  TSeqPos dst_ind = pos_index / bit_values;
2763  TSeqPos src_ind = 0;
2764  if ( TSeqPos first_offset = pos_index % bit_values ) {
2765  TSeqPos first_count = bit_values - first_offset;
2766  if ( !bits[dst_ind] ) {
2767  bits[dst_ind] = sx_HasNonZero(values, 0, first_count);
2768  }
2769  dst_ind += 1;
2770  src_ind += first_count;
2771  }
2772  while ( src_ind < values.size() ) {
2773  if ( !bits[dst_ind] ) {
2774  bits[dst_ind] = sx_HasNonZero(values, src_ind, bit_values);
2775  }
2776  ++dst_ind;
2777  src_ind += bit_values;
2778  }
2779 }
2780 
2781 
2782 static
2783 void sx_AddBits(vector<char>& bits,
2785  const CSeq_graph& graph)
2786 {
2787  TSeqPos comp = graph.GetComp();
2788  _ASSERT(kChunkSize % comp == 0);
2789  TSeqPos bit_values = kChunkSize / comp;
2790  const CSeq_interval& loc = graph.GetLoc().GetInt();
2791  TSeqPos pos = loc.GetFrom();
2792  _ASSERT(pos % comp == 0);
2793  _ASSERT(graph.GetNumval()*comp == loc.GetLength());
2794  TSeqPos pos_index = pos/comp;
2795  if ( graph.GetGraph().IsByte() ) {
2796  auto& values = graph.GetGraph().GetByte().GetValues();
2797  _ASSERT(values.size() == graph.GetNumval());
2798  sx_AddBits2(bits, bit_values, pos_index, values);
2799  }
2800  else {
2801  auto& values = graph.GetGraph().GetInt().GetValues();
2802  _ASSERT(values.size() == graph.GetNumval());
2803  sx_AddBits2(bits, bit_values, pos_index, values);
2804  }
2805 }
2806 
2807 
2809 {
2810  // get statistics
2811  Uint8 total_feat_count = it.GetSNPCount();
2812  const TSeqPos page_size = it.GetPageSize();
2813  CRange<TSeqPos> total_range = it.GetSNPRange();
2814 
2815  // all calculations are approximate, 1 is added to avoid zero division
2816  TSeqPos page_count = total_range.GetLength()/page_size+1;
2817  Uint8 feat_per_page = total_feat_count/page_count+1;
2818  Uint8 chunk_pages = kTargetFeatsPerChunk/feat_per_page+1;
2819 
2820  // final formula with only one division is
2821  // chunk_pages = (kTargetFeatsPerChunk*total_range.GetLength())/(total_feat_count*page_size)
2822  return TSeqPos(min(Uint8(kDefaultFeatChunkPages), chunk_pages));
2823 }
2824 
2825 
2826 inline string sx_CombineWithZoomLevel(const string& acc, int zoom_level)
2827 {
2828  return CSeq_annot::CombineWithZoomLevel(acc, zoom_level);
2829 }
2830 
2831 
2832 template<class Cont>
2833 typename Cont::value_type::TObjectType& sx_AddNew(Cont& cont)
2834 {
2835  typename Cont::value_type obj(new typename Cont::value_type::TObjectType);
2836  cont.push_back(obj);
2837  return *obj;
2838 }
2839 
2840 
2841 void sx_SetZoomLevel(CSeq_annot& annot, int zoom_level)
2842 {
2843  CUser_object& obj = sx_AddNew(annot.SetDesc().Set()).SetUser();
2844  obj.SetType().SetStr("AnnotationTrack");
2845  obj.AddField("ZoomLevel", zoom_level);
2846 }
2847 
2848 
2850  CSNPDbSeqIterator::TFlags flags,
2851  const string& annot_name,
2852  int overview_zoom)
2853 {
2854  annot.SetNameDesc(annot_name);
2857  (annot_name != kDefaultAnnotName)) ) {
2858  sx_SetZoomLevel(annot, overview_zoom);
2859  }
2860 }
2861 
2863 
2864 
2866 CSNPDbSeqIterator::GetEntry(const string& base_name,
2867  TFlags flags) const
2868 {
2869  CRef<CSeq_entry> entry(new CSeq_entry);
2870  entry->SetSet().SetSeq_set();
2871  auto& annots = entry->SetSet().SetAnnot();
2872 
2873  CRange<TSeqPos> total_range = GetSNPRange();
2874  TSeqPos kFeatChunkPages = sx_CalcFeatChunkPages(*this);
2875  _ASSERT(kFeatChunkPages <= kDefaultFeatChunkPages);
2876  TSeqPos kFeatChunkSize = kFeatChunkPages*GetPageSize();
2878 
2879  vector<char> feat_chunks(total_range.GetTo()/kFeatChunkSize+1);
2880 
2881  // overview graphs is necessary for feature chunk distribution
2882  if ( CRef<CSeq_annot> annot = GetOverviewAnnot(total_range) ) {
2883  for ( auto& g : annot->GetData().GetGraph() ) {
2884  sx_AddBits(feat_chunks, kFeatChunkSize, *g);
2885  }
2886  if ( !(flags & fNoOverviewGraph) ) {
2887  sx_SetOverviewName(*annot, flags, base_name, GetOverviewZoom());
2888  annots.push_back(annot);
2889  }
2890  }
2891  if ( !(flags & fNoCoverageGraph) ) {
2892  // coverage graphs
2893  for ( TSeqPos i = 0; i*kGraphChunkSize < total_range.GetToOpen(); ++i ) {
2895  continue;
2896  }
2898  range.SetFrom(i*kGraphChunkSize);
2899  range.SetToOpen(min(total_range.GetToOpen(), (i+1)*kGraphChunkSize));
2900  if ( auto annot = GetCoverageAnnot(range, base_name) ) {
2901  sx_SetZoomLevel(*annot, GetCoverageZoom());
2902  annots.push_back(annot);
2903  }
2904  }
2905  }
2906  if ( !(flags & fNoSNPFeat) ) {
2907  for ( TSeqPos i = 0; i*kFeatChunkSize < total_range.GetToOpen(); ++i ) {
2909  range.SetFrom(i*kFeatChunkSize);
2910  range.SetToOpen(min(total_range.GetToOpen(), (i+1)*kFeatChunkSize));
2911  for ( auto& annot : GetTableFeatAnnots(range, base_name) ) {
2912  annots.push_back(annot);
2913  }
2914  }
2915  }
2916  return entry;
2917 }
2918 
2919 
2920 pair<CRef<CID2S_Split_Info>, CSNPDbSeqIterator::TSplitVersion>
2922  TFlags flags) const
2923 {
2924  CRef<CID2S_Split_Info> split_info(new CID2S_Split_Info);
2925  split_info->SetChunks();
2926  CBioseq_set& skeleton = split_info->SetSkeleton().SetSet();
2927  skeleton.SetId().SetId(kTSEId);
2928  skeleton.SetSeq_set();
2929 
2930  CRange<TSeqPos> total_range = GetSNPRange();
2931  TSeqPos kFeatChunkPages = sx_CalcFeatChunkPages(*this);
2932  _ASSERT(kFeatChunkPages <= kDefaultFeatChunkPages);
2933  TSplitVersion split_version = kFeatChunkPages == kDefaultFeatChunkPages? 0: kFeatChunkPages;
2934  TSeqPos kFeatChunkSize = kFeatChunkPages*GetPageSize();
2936 
2937  vector<char> feat_chunks(total_range.GetTo()/kFeatChunkSize+1);
2938 
2939  // overview graphs is necessary for feature chunk distribution
2940  if ( CRef<CSeq_annot> annot = GetOverviewAnnot(total_range) ) {
2941  for ( auto& g : annot->GetData().GetGraph() ) {
2942  sx_AddBits(feat_chunks, kFeatChunkSize, *g);
2943  }
2944  if ( !(flags & fNoOverviewGraph) ) {
2945  sx_SetOverviewName(*annot, flags, base_name, GetOverviewZoom());
2946  skeleton.SetAnnot().push_back(annot);
2947  }
2948  }
2949  if ( !(flags & fNoCoverageGraph) ) {
2950  // coverage graphs
2951  string graph_annot_name = sx_CombineWithZoomLevel(base_name, GetCoverageZoom());
2952  for ( int i = 0; i*kGraphChunkSize < total_range.GetToOpen(); ++i ) {
2954  continue;
2955  }
2956 
2957  int chunk_id = i*kChunkIdMul+kChunkIdGraph;
2958  CID2S_Chunk_Info& chunk = sx_AddNew(split_info->SetChunks());
2959  chunk.SetId().Set(chunk_id);
2960  CID2S_Seq_annot_Info& annot_info = sx_AddNew(chunk.SetContent()).SetSeq_annot();
2961  annot_info.SetName(graph_annot_name);
2962  annot_info.SetGraph();
2963  CID2S_Seq_id_Interval& interval = annot_info.SetSeq_loc().SetSeq_id_interval();
2964  interval.SetSeq_id(*GetSeqId());
2965  interval.SetStart(i*kGraphChunkSize);
2966  interval.SetLength(kGraphChunkSize);
2967  }
2968  }
2969  if ( !(flags & fNoSNPFeat) ) {
2970  // features
2971  TSeqPos overflow = GetMaxSNPLength()-1;
2972  for ( int i = 0; i*kFeatChunkSize < total_range.GetToOpen(); ++i ) {
2973  if ( !feat_chunks[i] ) {
2974  continue;
2975  }
2976  int chunk_id = i*kChunkIdMul+kChunkIdFeat;
2977  CID2S_Chunk_Info& chunk = sx_AddNew(split_info->SetChunks());
2978  chunk.SetId().Set(chunk_id);
2979  CID2S_Seq_annot_Info& annot_info = sx_AddNew(chunk.SetContent()).SetSeq_annot();
2980  annot_info.SetName(base_name);
2981  CID2S_Feat_type_Info& feat_type = sx_AddNew(annot_info.SetFeat());
2982  feat_type.SetType(CSeqFeatData::e_Imp);
2983  feat_type.SetSubtypes().push_back(CSeqFeatData::eSubtype_variation);
2984  CID2S_Seq_id_Interval& interval = annot_info.SetSeq_loc().SetSeq_id_interval();
2985  interval.SetSeq_id(*GetSeqId());
2986  interval.SetStart(i*kFeatChunkSize);
2987  interval.SetLength(kFeatChunkSize+overflow);
2988  }
2989  }
2990  return make_pair(split_info, split_version);
2991 }
2992 
2993 
2995  TChunkId chunk_id,
2996  TSplitVersion split_version) const
2997 {
2998  if ( TSeqPos(split_version) >= kDefaultFeatChunkPages ) {
2999  NCBI_THROW_FMT(CSraException, eInvalidArg,
3000  "CSNPDbSeqIterator::GetChunkForVersion("<<chunk_id<<", "<<split_version<<")"
3001  ": invalid split version");
3002  }
3003 
3004  CRef<CID2S_Chunk> chunk(new CID2S_Chunk);
3005  CID2S_Chunk_Data& data = sx_AddNew(chunk->SetData());
3006  int chunk_type = chunk_id%kChunkIdMul;
3007  int i = chunk_id/kChunkIdMul;
3008  data.SetId().SetBioseq_set(kTSEId);
3009 
3010  TSeqPos kFeatChunkPages = split_version? split_version: kDefaultFeatChunkPages;
3011  TSeqPos kFeatChunkSize = kFeatChunkPages*GetPageSize();
3012  if ( chunk_type == kChunkIdFeat ) {
3014  range.SetFrom(i*kFeatChunkSize);
3015  range.SetToOpen((i+1)*kFeatChunkSize);
3016  for ( auto annot : GetTableFeatAnnots(range, base_name) ) {
3017  data.SetAnnots().push_back(annot);
3018  }
3019  }
3020  else if ( chunk_type == kChunkIdGraph ) {
3023  range.SetFrom(i*kGraphChunkSize);
3024  range.SetToOpen((i+1)*kGraphChunkSize);
3025  if ( auto annot = GetCoverageAnnot(range, base_name) ) {
3026  sx_SetZoomLevel(*annot, GetCoverageZoom());
3027  data.SetAnnots().push_back(annot);
3028  }
3029  }
3030  return chunk;
3031 }
3032 
3033 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define NAME
ncbi::TMaskedQueryRegions mask
Definition: Dbtag.hpp:53
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
CID2S_Chunk_Data –.
CID2S_Chunk_Info –.
CID2S_Chunk –.
Definition: ID2S_Chunk.hpp:66
CID2S_Feat_type_Info –.
CID2S_Seq_annot_Info –.
CID2S_Seq_id_Interval –.
CID2S_Split_Info –.
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
CObject –.
Definition: ncbiobj.hpp:180
CRange –.
Definition: Range.hpp:68
CRef –.
Definition: ncbiobj.hpp:618
void GetBitfieldOS(vector< char > &octet_stream) const
Definition: snpread.cpp:2502
AutoPtr< SCreateCache > m_CreateCache
Definition: snpread.hpp:1019
TFilter GetBitfield(void) const
Definition: snpread.cpp:2495
CTempString GetAllele(const TExtraRange &range, size_t index) const
Definition: snpread.cpp:2487
static char GetFeatSubtypeChar(EFeatSubtype subtype)
Definition: snpread.cpp:2353
void x_InitPage(void)
Definition: snpread.cpp:2177
~CSNPDbFeatIterator(void)
Definition: snpread.cpp:2281
TVDBRowId m_GraphBaseRowId
Definition: snpread.hpp:1009
const CSNPDb_Impl::SFeatTableCursor & Cur(void) const
Definition: snpread.hpp:965
CSNPDb_Impl & GetDb(void) const
Definition: snpread.hpp:962
CSNPDbFeatIterator(void)
Definition: snpread.cpp:2207
TVDBRowId m_FirstBadFeatId
Definition: snpread.hpp:1014
TSeqPos x_GetFrom(void) const
Definition: snpread.cpp:2371
ESearchMode GetSearchMode(void) const
Definition: snpread.hpp:910
COpenRange< TSeqPos > m_CurRange
Definition: snpread.hpp:1011
pair< TVDBRowId, size_t > TExtraRange
Definition: snpread.hpp:890
Uint4 GetFeatIdPrefix(void) const
Definition: snpread.cpp:2457
TSeqPos GetSNPLength(void) const
Definition: snpread.hpp:917
EExcluded x_Excluded(void)
Definition: snpread.cpp:2384
EFeatSubtype GetFeatSubtype(void) const
Definition: snpread.cpp:2346
CSNPDbFeatIterator & Select(COpenRange< TSeqPos > ref_range, const SSelector &sel=SSelector())
Definition: snpread.cpp:2328
const CSNPDbSeqIterator & GetRefIter(void) const
Definition: snpread.hpp:897
void x_Next(void)
Definition: snpread.cpp:2441
TVDBRowId m_ExtraRowId
Definition: snpread.hpp:1007
TSeqPos GetSNPPosition(void) const
Definition: snpread.hpp:914
Uint8 GetFeatId(void) const
Definition: snpread.cpp:2464
CRef< CSNPDb_Impl::SGraphTableCursor > m_Graph
Definition: snpread.hpp:1008
TVDBRowId m_CurrFeatId
Definition: snpread.hpp:1014
TExtraRange GetExtraRange(void) const
Definition: snpread.cpp:2471
TSeqPos x_GetLength(void) const
Definition: snpread.cpp:2377
CRef< CSNPDb_Impl::SExtraTableCursor > m_Extra
Definition: snpread.hpp:1006
void x_CheckValid(const char *method) const
Definition: snpread.hpp:979
CSNPDb_Impl::TSeqInfoList::const_iterator x_GetSeqIter() const
Definition: snpread.hpp:999
const CRange< TSeqPos > & GetSearchRange(void) const
Definition: snpread.hpp:907
CTempString GetFeatType(void) const
Definition: snpread.cpp:2339
CRef< CSeq_feat > GetSeq_feat(TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:2618
CTempString GetFeatSubtypeString(void) const
Definition: snpread.cpp:2365
CSNPDbPageIterator m_PageIter
Definition: snpread.hpp:1004
CRef< CSNPDb_Impl::SFeatTableCursor > m_Feat
Definition: snpread.hpp:1005
TVDBRowId x_GetGraphVDBRowId() const
Definition: snpread.cpp:2287
void Reset(void)
Definition: snpread.cpp:2160
void x_Settle(void)
Definition: snpread.cpp:2418
SCreateCache & x_GetCreateCache(void) const
Definition: snpread.cpp:2598
CSNPDbFeatIterator & operator=(const CSNPDbFeatIterator &iter)
Definition: snpread.cpp:2257
void x_ReportInvalid(const char *method) const
Definition: snpread.cpp:2449
CRef< CSeq_id > GetSeqId(void) const
Definition: snpread.hpp:901
const CSNPDbPageIterator & GetPageIter(void) const
Definition: snpread.hpp:894
void x_SetFilter(const SSelector &sel)
Definition: snpread.cpp:2293
TVDBRowId m_SeqRowId
Definition: snpread.hpp:841
CVDBValueFor< Uint4 > GetCoverageValues(void) const
Definition: snpread.cpp:2148
~CSNPDbGraphIterator(void)
Definition: snpread.cpp:2082
Uint4 GetTotalValue(void) const
Definition: snpread.cpp:2141
void Reset(void)
Definition: snpread.cpp:2034
TVDBRowId m_CurrPageRowId
Definition: snpread.hpp:845
CSNPDb_Impl & GetDb(void) const
Definition: snpread.hpp:821
CRef< CSNPDb_Impl::SGraphTableCursor > m_Cur
Definition: snpread.hpp:839
TSeqPos m_CurrPagePos
Definition: snpread.hpp:846
CSNPDbGraphIterator & Select(const CSNPDbSeqIterator &iter, COpenRange< TSeqPos > ref_range)
Definition: snpread.cpp:2089
void x_CheckValid(const char *method) const
Definition: snpread.hpp:829
CSNPDbGraphIterator & operator=(const CSNPDbGraphIterator &iter)
Definition: snpread.cpp:2066
TVDBRowId m_TrackRowId
Definition: snpread.hpp:842
CRange< TSeqPos > m_SearchRange
Definition: snpread.hpp:843
void x_ReportInvalid(const char *method) const
Definition: snpread.cpp:2133
void x_Next(void)
Definition: snpread.cpp:2112
TVDBRowId GetFirstFeatRowId(void) const
Definition: snpread.cpp:2015
TVDBRowId GetPageRowId(void) const
Definition: snpread.hpp:700
TSeqPos GetPagePos(void) const
Definition: snpread.hpp:692
CSNPDbPageIterator & Select(COpenRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
Definition: snpread.cpp:1947
const CSNPDb_Impl::SPageTableCursor & Cur(void) const
Definition: snpread.hpp:721
void x_Next(void)
Definition: snpread.cpp:1983
size_t m_CurrPageSet
Definition: snpread.hpp:764
TVDBRowCount GetFeatCount(void) const
Definition: snpread.cpp:2022
TVDBRowId m_LastGraphRowId
Definition: snpread.hpp:760
TSeqPos m_CurrPagePos
Definition: snpread.hpp:766
CSNPDbPageIterator(void)
Definition: snpread.cpp:1875
CSNPDbSeqIterator m_SeqIter
Definition: snpread.hpp:756
CSNPDbPageIterator & operator=(const CSNPDbPageIterator &iter)
Definition: snpread.cpp:1922
void x_ReportInvalid(const char *method) const
Definition: snpread.cpp:2007
ESearchMode m_SearchMode
Definition: snpread.hpp:768
CSNPDb_Impl & GetDb(void) const
Definition: snpread.hpp:718
void x_CheckValid(const char *method) const
Definition: snpread.hpp:747
~CSNPDbPageIterator(void)
Definition: snpread.cpp:1940
CRef< CSNPDb_Impl::SPageTableCursor > m_Cur
Definition: snpread.hpp:758
TVDBRowId m_CurrPageRowId
Definition: snpread.hpp:765
CRange< TSeqPos > m_SearchRange
Definition: snpread.hpp:762
void Reset(void)
Definition: snpread.cpp:1860
CRef< CSNPDb_Impl::SGraphTableCursor > m_GraphCur
Definition: snpread.hpp:759
CRef< CSeq_graph > GetCoverageGraph(CRange< TSeqPos > range) const
Definition: snpread.cpp:1211
CRef< CSeq_annot > GetOverviewAnnot(CRange< TSeqPos > range, const string &annot_name, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:1188
const SFilter & GetFilter() const
Definition: snpread.hpp:610
Uint8 GetSNPCount(CRange< TSeqPos > range) const
Definition: snpread.cpp:831
TList::const_iterator m_Iter
Definition: snpread.hpp:634
CSNPDb_Impl::TTrackInfoList::const_iterator m_TrackIter
Definition: snpread.hpp:635
CSNPDbSeqIterator & operator++(void)
Definition: snpread.cpp:797
CRef< CSeq_id > GetSeqId(void) const
Definition: snpread.hpp:500
pair< CRef< CID2S_Split_Info >, TSplitVersion > GetSplitInfoAndVersion(const string &base_name, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:2921
vector< CRef< CSeq_annot > > TAnnotSet
Definition: snpread.hpp:580
TSeqPos GetCoverageZoom(void) const
Definition: snpread.hpp:519
TSeqPos GetOverviewZoom(void) const
Definition: snpread.hpp:516
TSeqPos GetMaxSNPLength(void) const
Definition: snpread.cpp:813
TSeqPos GetPageSize(void) const
Definition: snpread.hpp:513
CRef< CSeq_annot > GetCoverageAnnot(CRange< TSeqPos > range, const string &annot_name, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:1220
void Reset(void)
Definition: snpread.cpp:789
void SetTrack(const CSNPDbTrackIterator &track)
Definition: snpread.cpp:773
CSNPDb_Impl & GetDb(void) const
Definition: snpread.hpp:606
CRef< CSeq_annot > GetFeatAnnot(CRange< TSeqPos > range, const SFilter &filter, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:1241
CSNPDbSeqIterator(void)
Definition: snpread.hpp:479
CRef< CSeq_entry > GetEntry(const string &base_name, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:2866
TVDBRowId GetVDBRowId(void) const
Definition: snpread.hpp:530
TAnnotSet GetTableFeatAnnots(CRange< TSeqPos > range, const string &annot_name, const SFilter &filter, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:1813
const TInfo & GetInfo() const
Definition: snpread.cpp:779
TVDBRowId GetGraphVDBRowId() const
Definition: snpread.hpp:539
TList::value_type TInfo
Definition: snpread.hpp:477
Uint8 GetSNPCount(void) const
Definition: snpread.cpp:819
CRange< TVDBRowId > GetPageVDBRowRange(void) const
Definition: snpread.hpp:536
CRef< CSeq_graph > GetOverviewGraph(CRange< TSeqPos > range, TFlags flags=fDefaultFlags) const
Definition: snpread.cpp:1176
CRange< TSeqPos > GetSNPRange(void) const
Definition: snpread.cpp:837
TList::const_iterator x_GetSeqIter() const
Definition: snpread.hpp:625
CRef< CID2S_Chunk > GetChunkForVersion(const string &base_name, TChunkId chunk_id, TSplitVersion split_version) const
Definition: snpread.cpp:2994
CSNPDb_Impl::TTrackInfoList::const_iterator x_GetTrackIter() const
Definition: snpread.hpp:628
bool IsCircular(void) const
Definition: snpread.cpp:807
void Reset(void)
Definition: snpread.cpp:714
const TInfo & GetInfo() const
Definition: snpread.cpp:704
TList::const_iterator m_Iter
Definition: snpread.hpp:468
CSNPDbTrackIterator(void)
Definition: snpread.hpp:409
TList::value_type TInfo
Definition: snpread.hpp:407
const TList & GetList() const
Definition: snpread.hpp:458
CVDBObjectCache< SGraphTableCursor > m_Graph
Definition: snpread.hpp:354
CVDBTable m_PageTable
Definition: snpread.hpp:348
CVDBTable m_ExtraTable
Definition: snpread.hpp:350
CRef< SExtraTableCursor > Extra(TVDBRowId row=0)
Definition: snpread.cpp:274
const CVDBTable & ExtraTable(void)
Definition: snpread.hpp:295
TTrackInfoMapByName m_TrackMapByName
Definition: snpread.hpp:365
CRef< SPageTableCursor > Page(TVDBRowId row=0)
Definition: snpread.cpp:254
TSeqInfoList::const_iterator FindSeq(const string &accession, int version)
Definition: snpread.cpp:484
const CVDBTable & PageTable(void)
Definition: snpread.hpp:287
const string & GetDbPath(void) const
Definition: snpread.hpp:233
const TTrackInfoList & GetTrackInfoList(void) const
Definition: snpread.hpp:244
CMutex m_Mutex
Definition: snpread.hpp:366
CSNPDb_Impl(CVDBMgr &mgr, CTempString path_or_acc)
Definition: snpread.cpp:316
const CVDBTable & FeatTable(void)
Definition: snpread.hpp:291
const TSeqInfoList & GetSeqInfoList(void) const
Definition: snpread.hpp:237
TTrackInfoList m_TrackList
Definition: snpread.hpp:364
TSeqPos GetOverviewZoom(void) const
Definition: snpread.cpp:458
const CVDBTable & SeqTable(void)
Definition: snpread.hpp:279
TSeqPos GetPageSize(void) const
Definition: snpread.cpp:452
TSeqPos GetCoverageZoom(void) const
Definition: snpread.cpp:464
CRef< SFeatTableCursor > Feat(TVDBRowId row=0)
Definition: snpread.cpp:264
CVDBMgr m_Mgr
Definition: snpread.hpp:343
CVDBTable m_FeatTable
Definition: snpread.hpp:349
void x_Update(TSeqInfoList::const_iterator seq)
Definition: snpread.cpp:537
string m_DbPath
Definition: snpread.hpp:344
CRef< SGraphTableCursor > Graph(TVDBRowId row=0)
Definition: snpread.cpp:244
virtual ~CSNPDb_Impl(void)
Definition: snpread.cpp:447
CVDBTable m_GraphTable
Definition: snpread.hpp:347
TVDBRowId x_GetGraphVDBRowId(TSeqInfoList::const_iterator seq, TTrackInfoList::const_iterator track)
Definition: snpread.cpp:651
CVDBObjectCache< SExtraTableCursor > m_Extra
Definition: snpread.hpp:357
CVDBObjectCache< SFeatTableCursor > m_Feat
Definition: snpread.hpp:356
CVDBTableIndex m_SeqAccIndex
Definition: snpread.hpp:353
CVDBObjectCache< SSeqTableCursor > m_Seq
Definition: snpread.hpp:352
CVDBTable m_SeqTable
Definition: snpread.hpp:346
CRange< TVDBRowId > x_GetPageVDBRowRange(TSeqInfoList::const_iterator seq)
Definition: snpread.cpp:639
TSeq2PageMap m_Seq2PageMap
Definition: snpread.hpp:362
CVDBObjectCache< SPageTableCursor > m_Page
Definition: snpread.hpp:355
vector< STrackInfo > TTrackInfoList
Definition: snpread.hpp:230
CRef< SSeqTableCursor > Seq(TVDBRowId row=0)
Definition: snpread.cpp:234
TSeqInfoList m_SeqList
Definition: snpread.hpp:359
const CVDBTable & GraphTable(void)
Definition: snpread.hpp:283
void Put(CRef< SSeqTableCursor > &curs, TVDBRowId row=0)
Definition: snpread.cpp:286
TTrackInfoList::const_iterator FindTrack(const string &name) const
Definition: snpread.cpp:471
TSeqInfoMapBySeq_id m_SeqMapBySeq_id
Definition: snpread.hpp:360
static string CombineWithZoomLevel(const string &acc, int zoom_level)
Combine accession string and zoom level into a string with separator.
Definition: Seq_annot.cpp:254
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
TSeqPos GetLength(void) const
@ eNotFoundDb
DB main file not found.
Definition: exception.hpp:92
virtual TErrCode GetErrCode(void) const
Definition: sraread.cpp:163
rc_t GetRC(void) const
Definition: exception.hpp:144
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
TVDBRowId GetMaxRowId(void) const
Definition: vdbread.cpp:1388
TVDBRowIdRange Find(const string &value) const
Definition: vdbread.cpp:1276
const TValue * data() const
Definition: vdbread.hpp:956
size_t size(void) const
Definition: vdbread.hpp:690
size_type size() const
Definition: map.hpp:148
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator lower_bound(const key_type &key) const
Definition: map.hpp:154
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const_iterator find(const key_type &key) const
Definition: map.hpp:153
The NCBI C++ standard methods for dealing with std::string.
static const unsigned kGraphChunkSize
uint32_t INSDC_coord_len
Definition: csraread.hpp:49
#define BITS
Definition: ct_nlmzip_i.h:135
static ulg bb
static uch flags
#define T(s)
Definition: common.h:230
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
#define LEN(ptr)
static const char * str(char *buf, int n)
Definition: stats.c:84
#define MASK(n)
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:719
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:986
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_Int
Definition: ncbi_limits.h:184
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
position_type GetLength(void) const
Definition: range.hpp:158
TThisType & SetFrom(position_type from)
Definition: range.hpp:170
TThisType & SetToOpen(position_type toOpen)
Definition: range.hpp:175
position_type GetToOpen(void) const
Definition: range.hpp:138
position_type GetFrom(void) const
Definition: range.hpp:134
bool Empty(void) const
Definition: range.hpp:148
static void TrimSuffixInPlace(string &str, const CTempString suffix, ECase use_case=eCase)
Trim suffix from a string (in-place)
Definition: ncbistr.cpp:3278
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
BEGIN_STD_SCOPE bool is_sorted(Iterator iter1, Iterator iter2)
is_sorted is provided by some implementations of the STL and may be included in future releases of al...
Definition: ncbiutil.hpp:274
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
TData & SetData(void)
Assign a value to Data data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
vector< CRef< CUser_field > > TData
EField_id
identification of the column data in the objects described by the table known column data types posit...
void SetHeader(THeader &value)
Assign a value to Header data member.
void ResetSparse(void)
Reset Sparse data member.
void ResetData(void)
Reset Data data member.
void SetSparse(TSparse &value)
Assign a value to Sparse data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetData(void) const
row data Check if a value has been assigned to Data data member.
vector< CStringUTF8 > TStrings
vector< vector< char > * > TBytes
void SetDefault(TDefault &value)
Assign a value to Default data member.
@ eField_id_qual
field-name must be "Q.xxx", see below
@ eField_id_ext_type
extra fields, see also special values for str below
@ eField_id_data_imp_key
various data fields
void SetName(const TName &value)
Assign a value to Name data member.
void SetType(TType value)
Assign a value to Type data member.
TSubtypes & SetSubtypes(void)
Assign a value to Subtypes data member.
void SetSkeleton(TSkeleton &value)
Assign a value to Skeleton data member.
void SetId(const TId &value)
Assign a value to Id data member.
void SetSeq_loc(TSeq_loc &value)
Assign a value to Seq_loc data member.
TChunks & SetChunks(void)
Assign a value to Chunks data member.
void SetStart(TStart value)
Assign a value to Start data member.
TData & SetData(void)
Assign a value to Data data member.
TFeat & SetFeat(void)
Assign a value to Feat data member.
TContent & SetContent(void)
Assign a value to Content data member.
void SetSeq_id(TSeq_id &value)
Assign a value to Seq_id data member.
void SetGraph(void)
Set NULL data member (assign 'NULL' value to Graph data member).
void SetLength(TLength value)
Assign a value to Length data member.
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
Definition: Seq_feat_.hpp:1339
void ResetDescr(void)
Reset Descr data member.
Definition: Imp_feat_.cpp:62
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void ResetLoc(void)
Reset Loc data member.
Definition: Imp_feat_.cpp:56
void SetExt(TExt &value)
Assign a value to Ext data member.
Definition: Seq_feat_.cpp:153
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void ResetExt(void)
Reset Ext data member.
Definition: Seq_feat_.cpp:148
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:268
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
void ResetDbxref(void)
Reset Dbxref data member.
Definition: Seq_feat_.cpp:188
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
void ResetQual(void)
Reset Qual data member.
Definition: Seq_feat_.cpp:136
void SetTo(TTo value)
Assign a value to To data member.
void SetPoint(TPoint value)
Assign a value to Point data member.
Definition: Seq_point_.hpp:312
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_point_.cpp:61
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
virtual void Reset(void)
Reset the whole object.
Definition: Seq_loc_.cpp:59
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
void SetTitle(const TTitle &value)
Assign a value to Title data member.
Definition: Seq_graph_.hpp:784
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_graph_.cpp:131
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
const TGraph & GetGraph(void) const
Get the Graph member data.
const TValues & GetValues(void) const
Get the Values member data.
Definition: Int_graph_.hpp:425
const TByte & GetByte(void) const
Get the variant data.
Definition: Seq_graph_.cpp:153
void SetGraph(TGraph &value)
Assign a value to Graph data member.
Definition: Seq_graph_.cpp:250
const TValues & GetValues(void) const
Get the Values member data.
bool IsByte(void) const
Check if variant Byte is selected.
Definition: Seq_graph_.hpp:757
void SetLoc(TLoc &value)
Assign a value to Loc data member.
Definition: Seq_graph_.cpp:224
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Seq_graph_.hpp:869
TNumval GetNumval(void) const
Get the Numval member data.
TComp GetComp(void) const
Get the Comp member data.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
void SetId(TId &value)
Assign a value to Id data member.
Definition: Bioseq_set_.cpp:93
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
static const TSeqPos kFeatChunkSize
Definition of all error codes used in SRA C++ support libraries.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
int len
static MDB_envinfo info
Definition: mdb_load.c:37
static int version
Definition: mdb_load.c:29
const size_t kChunkSize
Definition: na_utils.cpp:587
range(_Ty, _Ty) -> range< _Ty >
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::SIZE size
Magic spell ;-) needed for some weird compilers... very empiric.
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define NCBI_CONST_UINT8(v)
Definition: ncbi_std.h:196
T max(T x_, T y_)
T min(T x_, T y_)
static const char * prefix[]
Definition: pcregrep.c:405
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
CRef< CSeqTable_column > x_MakeColumn(CSeqTable_column_info::EField_id id, const char *name=0)
Definition: snpread.cpp:1270
void x_CollectOverviewGraph(SGraphMaker &g, const CSNPDbSeqIterator &seq_it, CRange< TSeqPos > range, SGraphMaker::EGraphSet graph_set, SGraphMaker::EGapsType gaps_type)
Definition: snpread.cpp:1128
void x_AdjustGraphRange(CRange< TSeqPos > &range, const CSNPDbSeqIterator &it, const TSeqPos comp)
Definition: snpread.cpp:900
NCBI_DEFINE_ERR_SUBCODE_X(1)
static const TSeqPos kDefaultFeatChunkPages
Definition: snpread.cpp:2739
BEGIN_STD_NAMESPACE
Definition: snpread.cpp:55
static bool x_IsStringConstant(const string &str, const char(&value)[ValueSize])
Definition: snpread.cpp:2510
TSeqPos sx_CalcFeatChunkPages(const CSNPDbSeqIterator &it)
Definition: snpread.cpp:2808
BEGIN_NAMESPACE(objects)
void x_RoundRange(CRange< TSeqPos > &range, TSeqPos step)
Definition: snpread.cpp:893
unsigned x_SetBitCount(Uint8 v)
Definition: snpread.cpp:848
bool sx_HasNonZero(const Values &values, TSeqPos index, TSeqPos count)
Definition: snpread.cpp:2744
static const char kDefaultAnnotName[]
Definition: snpread.cpp:87
static const TSeqPos kTargetFeatsPerChunk
Definition: snpread.cpp:2741
static const TSeqPos kPageSize
Definition: snpread.cpp:82
static const TSeqPos kFeatChunksPerGraphChunk
Definition: snpread.cpp:2740
BEGIN_LOCAL_NAMESPACE
Definition: snpread.cpp:845
void sx_SetZoomLevel(CSeq_annot &annot, int zoom_level)
Definition: snpread.cpp:2841
END_STD_NAMESPACE
Definition: snpread.cpp:70
static const TSeqPos kOverviewZoom
Definition: snpread.cpp:84
void x_SetOS8(vector< char > &os, Uint8 data)
Definition: snpread.cpp:864
static void sx_AddBits(vector< char > &bits, TSeqPos kChunkSize, const CSeq_graph &graph)
Definition: snpread.cpp:2783
void sx_SetOverviewName(CSeq_annot &annot, CSNPDbSeqIterator::TFlags flags, const string &annot_name, int overview_zoom)
Definition: snpread.cpp:2849
static const TSeqPos kMaxSNPLength
Definition: snpread.cpp:83
static const char kFeatSubtypesToChars[]
Definition: snpread.cpp:89
END_LOCAL_NAMESPACE
Definition: snpread.cpp:1172
void x_CollectCoverageGraph(SGraphMaker &g, const CSNPDbSeqIterator &seq_it, CRange< TSeqPos > range, SGraphMaker::EGraphSet graph_set)
Definition: snpread.cpp:1142
static const int kChunkIdMul
Definition: snpread.cpp:2734
static const int kMaxTableAlleles
Definition: snpread.cpp:1563
static const bool kPreloadSeqList
Definition: snpread.cpp:91
CRef< CSeq_annot > x_NewAnnot(const string &annot_name=kDefaultAnnotName)
Definition: snpread.cpp:1120
END_NCBI_NAMESPACE
Definition: snpread.cpp:3035
static const TSeqPos kCoverageZoom
Definition: snpread.cpp:85
static CObject_id & x_GetObject_id(CRef< CObject_id > &cache, const char *name)
Definition: snpread.cpp:2608
static const bool kPage2FeatErrorWorkaround
Definition: snpread.cpp:92
static T & x_GetPrivate(CRef< T > &ref)
Definition: snpread.cpp:2524
END_NAMESPACE(objects)
BEGIN_NCBI_NAMESPACE
Definition: snpread.cpp:72
string sx_CombineWithZoomLevel(const string &acc, int zoom_level)
Definition: snpread.cpp:2826
static const int kChunkIdFeat
Definition: snpread.cpp:2732
TSeqPos x_RoundPos(TSeqPos pos, TSeqPos step)
Definition: snpread.cpp:881
#define x_SetStringConstant(obj, Field, value)
Definition: snpread.cpp:2515
static const int kChunkIdGraph
Definition: snpread.cpp:2733
TSeqPos x_RoundPosUp(TSeqPos pos, TSeqPos step)
Definition: snpread.cpp:887
void x_AdjustRange(CRange< TSeqPos > &range, const CSNPDbSeqIterator &it)
Definition: snpread.cpp:874
void sx_AddBits2(vector< char > &bits, TSeqPos bit_values, TSeqPos pos_index, const TValues &values)
Definition: snpread.cpp:2757
Cont::value_type::TObjectType & sx_AddNew(Cont &cont)
Definition: snpread.cpp:2833
static const int kTSEId
Definition: snpread.cpp:2731
#define NCBI_THROW2_FMT(exception_class, err_code, message, extra)
Definition: exception.hpp:176
#define row(bind, expected)
Definition: string_bind.c:73
CRef< CUser_field > m_Bitfield
Definition: snpread.cpp:2545
CRef< CGb_qual > m_AlleleCache_empty
Definition: snpread.cpp:2550
CRef< CGb_qual > m_AlleleCache_minus
Definition: snpread.cpp:2551
CRef< CGb_qual > m_AlleleCacheG
Definition: snpread.cpp:2554
CRef< CSeq_interval > m_LocInt
Definition: snpread.cpp:2537
CGb_qual & x_GetCommonAllele(CRef< CGb_qual > &cache, CTempString val)
Definition: snpread.cpp:2558
CRef< CGb_qual > m_AlleleCacheT
Definition: snpread.cpp:2555
CRef< CUser_field > m_Subtype
Definition: snpread.cpp:2546
CRef< CObject_id > m_ObjectIdBitfield
Definition: snpread.cpp:2543
CRef< CSeq_point > m_LocPnt
Definition: snpread.cpp:2538
CRef< CGb_qual > m_Allele[4]
Definition: snpread.cpp:2539
CGb_qual & GetAllele(CRef< CGb_qual > &cache, CTempString val)
Definition: snpread.cpp:2575
CRef< CUser_object > m_Ext
Definition: snpread.cpp:2541
CGb_qual & x_GetCachedAllele(CRef< CGb_qual > &cache, CTempString val)
Definition: snpread.cpp:2568
CRef< CGb_qual > m_AlleleCacheC
Definition: snpread.cpp:2553
CRef< CGb_qual > m_AlleleCacheA
Definition: snpread.cpp:2552
CRef< CObject_id > m_ObjectIdQAdata
Definition: snpread.cpp:2542
CRef< CObject_id > m_ObjectIdSubtype
Definition: snpread.cpp:2544
SExtraTableCursor(const CVDBTable &table)
Definition: snpread.cpp:227
DECLARE_VDB_COLUMN_AS(Uint4, FEAT_ID_PREFIX)
DECLARE_VDB_COLUMN_AS(Uint4, FEAT_SUBTYPE)
DECLARE_VDB_COLUMN_AS(TVDBRowId, SEQ_ID_ROW_NUM)
SFeatTableCursor(const CVDBTable &table)
Definition: snpread.cpp:211
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, LEN)
DECLARE_VDB_COLUMN_AS(TVDBRowCount, EXTRA_ROWS_COUNT)
DECLARE_VDB_COLUMN_AS(TVDBRowId, EXTRA_ROW_FROM)
DECLARE_VDB_COLUMN_AS(Uint8, BIT_FLAGS)
DECLARE_VDB_COLUMN_AS(Uint8, FEAT_ID_VALUE)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, FROM)
DECLARE_VDB_COLUMN_AS(Uint4, GR_ZOOM)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FILTER_ID_ROW_NUM)
SGraphTableCursor(const CVDBTable &table)
Definition: snpread.cpp:190
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, BLOCK_FROM)
DECLARE_VDB_COLUMN_AS(Uint4, GR_TOTAL)
DECLARE_VDB_COLUMN_AS(TVDBRowId, SEQ_ID_ROW_NUM)
SPageTableCursor(const CVDBTable &table)
Definition: snpread.cpp:201
DECLARE_VDB_COLUMN_AS(TVDBRowCount, FEATURE_ROWS_COUNT)
DECLARE_VDB_COLUMN_AS(TVDBRowId, SEQ_ID_ROW_NUM)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEATURE_ROW_FROM)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, PAGE_FROM)
TSeqPos GetSeqPosEnd(TSeqPos page_size) const
Definition: snpread.hpp:205
TVDBRowId GetRowIdEnd(void) const
Definition: snpread.hpp:208
vector< SPageSet > TPageSets
Definition: snpread.hpp:217
SSeqTableCursor(const CVDBTable &table)
Definition: snpread.cpp:173
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, LEN)
DECLARE_VDB_COLUMN_AS_STRING(ACCESSION)
STrackTableCursor(const CVDBTable &table)
Definition: snpread.cpp:181
SColumn(CSeqTable_column_info::EField_id id, const char *name=0)
Definition: snpread.cpp:1303
CSeqTable_column * x_GetColumn(void)
Definition: snpread.cpp:1317
int id
Definition: snpread.cpp:1292
const char * name
Definition: snpread.cpp:1293
DECLARE_OPERATOR_BOOL_REF(column)
void Attach(CSeq_table &table)
Definition: snpread.cpp:1331
CRef< CSeqTable_column > column
Definition: snpread.cpp:1295
void Init(CSeqTable_column_info::EField_id id, const char *name=0)
Definition: snpread.cpp:1310
CRef< CSeqTable_column > GetColumn(void)
Definition: snpread.cpp:1326
SColumn(void)
Definition: snpread.cpp:1297
CCommonBytes_table::TIndexes * indexes
Definition: snpread.cpp:1513
void Attach(CSeq_table &table)
Definition: snpread.cpp:1549
TIndex index
Definition: snpread.cpp:1515
map< Uint8, size_t > TIndex
Definition: snpread.cpp:1514
CCommonBytes_table::TBytes * values
Definition: snpread.cpp:1512
SCommon8Bytes(CSeqTable_column_info::EField_id id, const char *name=0)
Definition: snpread.cpp:1518
void Add(Uint8 val)
Definition: snpread.cpp:1526
SCommonStrings(CSeqTable_column_info::EField_id id, const char *name=0)
Definition: snpread.cpp:1466
CCommonString_table::TIndexes * indexes
Definition: snpread.cpp:1455
void Attach(CSeq_table &table)
Definition: snpread.cpp:1496
list< string > index_strings
Definition: snpread.cpp:1458
unordered_map< CTempString, int > TIndex
Definition: snpread.cpp:1456
void Add(CTempString val)
Definition: snpread.cpp:1474
CCommonString_table::TStrings * values
Definition: snpread.cpp:1454
CRef< CSeq_id > m_Id
Definition: snpread.cpp:926
EGapsType m_GapsType
Definition: snpread.cpp:922
void Start(const CSNPDbSeqIterator &it, CRange< TSeqPos > &range, TSeqPos comp, EGraphSet graph_set=eMultipleGraphs, EGapsType gaps_type=eAllowGaps)
Definition: snpread.cpp:932
CRef< CSeq_graph > m_Graph
Definition: snpread.cpp:923
EGraphSet m_GraphSet
Definition: snpread.cpp:921
void AddValue(Uint4 value)
Definition: snpread.cpp:1093
TGraphs m_Graphs
Definition: snpread.cpp:925
CRange< TSeqPos > m_Range
Definition: snpread.cpp:927
TSeqPos m_Comp
Definition: snpread.cpp:928
list< CRef< CSeq_graph > > TGraphs
Definition: snpread.cpp:924
static const TSeqPos kMinGraphGap
Definition: snpread.cpp:910
CRef< CSeq_graph > FinishGraph()
Definition: snpread.cpp:1109
void x_NewGraph()
Definition: snpread.cpp:953
void AddValues(TSeqPos count, const Uint4 *values)
Definition: snpread.cpp:1070
void AddActualValues(TSeqPos count, const Uint4 *values)
Definition: snpread.cpp:1024
@ eMultipleGraphs
Definition: snpread.cpp:913
TGraphs & FinishAnnot()
Definition: snpread.cpp:1102
CSeq_graph & x_GetGraph()
Definition: snpread.cpp:992
Uint4 m_MaxValue
Definition: snpread.cpp:930
void AddActualValue(Uint4 value)
Definition: snpread.cpp:1055
void x_EndGraph(bool save=true)
Definition: snpread.cpp:959
void AddActualZeroes(TSeqPos count)
Definition: snpread.cpp:1011
TSeqPos m_EmptyCount
Definition: snpread.cpp:929
void AddEmpty(TSeqPos count)
Definition: snpread.cpp:1060
void AddActualGap()
Definition: snpread.cpp:1000
SInt8Column(CSeqTable_column_info::EField_id id, const char *name=0)
Definition: snpread.cpp:1367
CSeqTable_multi_data::TInt8 * values8
Definition: snpread.cpp:1364
void Add(Int8 value)
Definition: snpread.cpp:1373
SIntColumn(CSeqTable_column_info::EField_id id, const char *name=0)
Definition: snpread.cpp:1346
void Add(int value)
Definition: snpread.cpp:1352
CSeqTable_multi_data::TInt * values
Definition: snpread.cpp:1343
bool Matches(TFilter bits) const
Definition: snpread.hpp:132
bool IsSet(void) const
Definition: snpread.hpp:128
void Normalize(void)
Definition: snpread.hpp:123
ESearchMode m_SearchMode
Definition: snpread.hpp:171
int TChunkId
Definition: snpread.hpp:176
Uint8 TFilter
Definition: snpread.hpp:96
@ eSearchByOverlap
Definition: snpread.hpp:69
@ eSearchByStart
Definition: snpread.hpp:70
int TSplitVersion
Definition: snpread.hpp:175
SCommon8Bytes col_bitfield
Definition: snpread.cpp:1585
static void AddFixedInt(CSeq_table &table, const char *name, int value)
Definition: snpread.cpp:1616
void Add(const CSNPDbFeatIterator &it)
Definition: snpread.cpp:1644
SSparseIndex ind_to
Definition: snpread.cpp:1580
CRef< CSeq_annot > GetAnnot(const string &annot_name, CSeq_id &seq_id)
Definition: snpread.cpp:1670
SIntColumn col_from
Definition: snpread.cpp:1578
static void AddFixedSeq_id(CSeq_table &table, CSeqTable_column_info::EField_id id, CSeq_id &value)
Definition: snpread.cpp:1598
SSeqTableContent(void)
Definition: snpread.cpp:1627
SCommonStrings col_subtype
Definition: snpread.cpp:1584
SCommonStrings col_alleles[kMaxTableAlleles]
Definition: snpread.cpp:1582
static void AddFixedSeq_loc(CSeq_table &table, const char *name, CSeq_loc &value)
Definition: snpread.cpp:1607
SIntColumn col_to
Definition: snpread.cpp:1579
SInt8Column col_dbxref
Definition: snpread.cpp:1587
static void AddFixedString(CSeq_table &table, CSeqTable_column_info::EField_id id, const string &value)
Definition: snpread.cpp:1589
bool AddToTable(const CSNPDbFeatIterator &it)
Definition: snpread.cpp:1784
SSeqTableContent m_Tables[2][kMaxTableAlleles]
Definition: snpread.cpp:1745
SSeqTableConverter(const CSNPDbSeqIterator &it)
Definition: snpread.cpp:1756
CRef< CSeq_id > m_Seq_id
Definition: snpread.cpp:1751
vector< CRef< CSeq_annot > > GetAnnots(const string &annot_name)
Definition: snpread.cpp:1763
CRef< CSeq_annot > m_RegularAnnot
Definition: snpread.cpp:1752
void Add(const CSNPDbFeatIterator &it)
Definition: snpread.cpp:1797
SSparseIndex(SColumn &column)
Definition: snpread.cpp:1398
CSeqTable_sparse_index::TIndexes * indexes
Definition: snpread.cpp:1395
void Optimize(SIntColumn &column, const SIntColumn &backup_column)
Definition: snpread.cpp:1419
void Add(int index)
Definition: snpread.cpp:1405
SColumn & column
Definition: snpread.cpp:1394
size_t operator()(ncbi::CTempString val) const
Definition: snpread.cpp:60
Definition: _hash_fun.h:40
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
#define INIT_VDB_COLUMN(name)
Definition: vdbread.hpp:593
uint64_t TVDBRowCount
Definition: vdbread.hpp:82
pair< TVDBRowId, TVDBRowCount > TVDBRowIdRange
Definition: vdbread.hpp:83
int64_t TVDBRowId
Definition: vdbread.hpp:79
int32_t INSDC_coord_zero
Definition: wgsread.hpp:55
Modified on Wed Apr 17 13:08:33 2024 by modify_doxy.py rev. 669887