NCBI C++ ToolKit
Dense_seg.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: Dense_seg.cpp 100562 2023-08-10 21:37:30Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the data definition file
34  * 'seqalign.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 #include <algorithm>
41 
42 // generated includes
44 
47 #include <serial/objistr.hpp>
48 #include <corelib/ncbi_param.hpp>
49 
50 // generated classes
51 
53 
54 BEGIN_objects_SCOPE // namespace ncbi::objects::
55 
56 // destructor
58 {
59 }
60 
61 
63 {
64  /// do the base copy
65  CSerialObject::Assign(obj, how);
66 
67  /// copy our specific items
68  if (GetTypeInfo() == obj.GetThisTypeInfo()) {
69  const CDense_seg& other = static_cast<const CDense_seg&>(obj);
70  m_set_State1[0] = other.m_set_State1[0];
71  m_Widths = other.m_Widths;
72  }
73 }
74 
75 // Implemented as a macro so any assertion messages will be more meaningful
76 #ifdef _DEBUG
77 # define ASSERT_CONSISTENCY() \
78  do { \
79  TNumseg numseg = GetNumseg(); \
80  TDim dim = GetDim(); \
81  _ASSERT(numseg >= 0); \
82  _ASSERT(dim >= 0); \
83  _ASSERT(size_t(numseg) == GetLens().size()); \
84  _ASSERT(size_t(numseg) * dim == GetStarts().size()); \
85  _ASSERT(!IsSetStrands() || GetStarts().size() == GetStrands().size()); \
86  _ASSERT(size_t(dim) == GetIds().size()); \
87  } while (false)
88 #else
89 # define ASSERT_CONSISTENCY() NCBI_EAT_SEMICOLON()
90 #endif
91 
93 {
94  const TDim dim = GetDim();
95  if (dim < 0) {
96  NCBI_THROW(CSeqalignException, eInvalidAlignment,
97  "CDense_seg::CheckNumRows(): "
98  "negative dim");
99  }
100  if (size_t(dim) != GetIds().size()) {
101  NCBI_THROW(CSeqalignException, eInvalidAlignment,
102  "CDense_seg::CheckNumRows(): "
103  "ids.size is inconsistent with dim");
104  }
105  return dim;
106 }
107 
108 
110 {
111  const CDense_seg::TStarts& starts = GetStarts();
112  const CDense_seg::TStrands& strands = GetStrands();
113  const CDense_seg::TLens& lens = GetLens();
114  const CDense_seg::TWidths& widths = GetWidths();
115 
116  const TDim numrows = GetDim();
117  const TNumseg numsegs = GetNumseg();
118  const size_t num = static_cast<size_t>(numrows) * numsegs;
119 
120  if (numrows < 0) {
121  NCBI_THROW(CSeqalignException, eInvalidAlignment,
122  "CDense_seg::CheckNumSegs(): "
123  "negative dim");
124  }
125  if (numsegs < 0) {
126  NCBI_THROW(CSeqalignException, eInvalidAlignment,
127  "CDense_seg::CheckNumSegs(): "
128  "negative numseg");
129  }
130  if (starts.size() != num) {
131  NCBI_THROW(CSeqalignException, eInvalidAlignment,
132  "CDense_seg::CheckNumSegs(): "
133  "starts.size is inconsistent with dim * numseg");
134  }
135  if (lens.size() != size_t(numsegs)) {
136  NCBI_THROW(CSeqalignException, eInvalidAlignment,
137  "CDense_seg::CheckNumSegs(): "
138  "lens.size is inconsistent with numseg");
139  }
140  if (strands.size() && strands.size() != num) {
141  NCBI_THROW(CSeqalignException, eInvalidAlignment,
142  "CDense_seg::CheckNumSegs(): "
143  "strands.size is inconsistent with dim * numseg");
144  }
145  if (widths.size() && widths.size() != size_t(numrows)) {
146  NCBI_THROW(CSeqalignException, eInvalidAlignment,
147  "CDense_seg::CheckNumSegs(): "
148  "widths.size is inconsistent with dim");
149  }
150  return numsegs;
151 }
152 
153 
155 {
156  if ( IsSetIds() && (size_t)row < GetIds().size()) {
157  return *GetIds()[row];
158  }
159  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
160  "CDense_seg::GetSeq_id(): "
161  "can not get seq-id for the row requested.");
162 }
163 
164 
166 {
167  const TDim& dim = GetDim();
168  const TNumseg& numseg = CheckNumSegs();
169  const TStarts& starts = GetStarts();
170 
171  if (row < 0 || row >= dim) {
172  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
173  "CDense_seg::GetSeqStart(): "
174  "Invalid row number");
175  }
176 
177  TSignedSeqPos start;
179  TNumseg seg = numseg;
180  int pos = (seg - 1) * dim + row;
181  while (seg--) {
182  if ((start = starts[pos]) >= 0) {
183  return start;
184  }
185  pos -= dim;
186  }
187  } else {
188  TNumseg seg = -1;
189  int pos = row;
190  while (++seg < numseg) {
191  if ((start = starts[pos]) >= 0) {
192  return start;
193  }
194  pos += dim;
195  }
196  }
197  NCBI_THROW(CSeqalignException, eInvalidAlignment,
198  "CDense_seg::GetSeqStart(): "
199  "Row is empty");
200 }
201 
202 
204 {
205  const TDim& dim = GetDim();
206  const TNumseg& numseg = CheckNumSegs();
207  const TStarts& starts = GetStarts();
208 
209  if (row < 0 || row >= dim) {
210  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
211  "CDense_seg::GetSeqStop(): "
212  "Invalid row number");
213  }
214 
215  TSignedSeqPos start;
217  TNumseg seg = -1;
218  int pos = row;
219  while (++seg < numseg) {
220  if ((start = starts[pos]) >= 0) {
221  return start + GetLens()[seg] - 1;
222  }
223  pos += dim;
224  }
225  } else {
226  TNumseg seg = numseg;
227  int pos = (seg - 1) * dim + row;
228  while (seg--) {
229  if ((start = starts[pos]) >= 0) {
230  return start + GetLens()[seg] - 1;
231  }
232  pos -= dim;
233  }
234  }
235  NCBI_THROW(CSeqalignException, eInvalidAlignment,
236  "CDense_seg::GetSeqStop(): "
237  "Row is empty");
238 }
239 
240 
242 {
243  const size_t& strands_size = GetStrands().size();
244 
245  if ( !strands_size ) {
246  return eNa_strand_plus;
247  } else {
248  TDim dim = CheckNumRows();
249 
250  if (strands_size < (size_t) dim) {
251  // The ASN.1 spec technically requires numrows x numsegs
252  // strands, however in practice NCBI assumes that
253  // Dense-seg's strands are fixed per row. Since we will
254  // obtain the strand from the first segment (assuming they
255  // are fixed) and for efficiency (to eliminate unnecessary
256  // multiplication) we don't check for the full numrows x
257  // numsegs size here
258  NCBI_THROW(CSeqalignException, eInvalidAlignment,
259  "CDense_seg::GetSeqStrand(): "
260  "Invalid strands size");
261  }
262 
263  if (row < 0 || row >= dim) {
264  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
265  "CDense_seg::GetSeqStrand(): "
266  "Invalid row number");
267  }
268 
269  return GetStrands()[row];
270  }
271 }
272 
273 
274 void CDense_seg::Validate(bool full_test) const
275 {
276  const CDense_seg::TStarts& starts = GetStarts();
277  const CDense_seg::TStrands& strands = GetStrands();
278  const CDense_seg::TLens& lens = GetLens();
279  const CDense_seg::TWidths& widths = GetWidths();
280 
281  const size_t numrows = CheckNumRows();
282  const size_t numsegs = CheckNumSegs();
283 
284  if (numsegs == 0) {
285  return;
286  }
287 
288  if (full_test) {
289  const size_t max = numrows * (numsegs -1);
290 
291  bool strands_exist = !strands.empty();
292 
293  size_t numseg = 0, numrow = 0, offset = 0;
294  for (numrow = 0; numrow < numrows; numrow++) {
295  TSignedSeqPos min_start = -1, start;
296  bool plus = strands_exist ?
297  strands[numrow] != eNa_strand_minus:
298  true;
299 
300  if (plus) {
301  offset = 0;
302  } else {
303  offset = max;
304  }
305 
306  for (numseg = 0; numseg < numsegs; numseg++) {
307  start = starts[offset + numrow];
308  if (start >= 0) {
309  if (start < min_start) {
310  string errstr = string("CDense_seg::Validate():")
311  + " Starts are not consistent!"
312  + " Row=" + NStr::SizetToString(numrow) +
313  " Seg=" + NStr::SizetToString(plus ? numseg :
314  numsegs - 1 - numseg) +
315  " MinStart=" + NStr::NumericToString(min_start) +
316  " Start=" + NStr::NumericToString(start);
317 
318  NCBI_THROW(CSeqalignException, eInvalidAlignment,
319  errstr);
320  }
321  min_start = start +
322  lens[plus ? numseg : numsegs - 1 - numseg] *
323  (widths.size() == numrows ?
324  widths[numrow] : 1);
325  }
326  if (plus) {
327  offset += numrows;
328  } else {
329  offset -= numrows;
330  }
331  }
332  if (min_start == -1) {
333  string errstr = string("CDense_seg::Validate():")
334  + " Row " + NStr::SizetToString(numrow) +
335  " is empty.";
336  NCBI_THROW(CSeqalignException, eInvalidAlignment,
337  errstr);
338  }
339  }
340  }
341 }
342 
343 
345 {
347 
348  list<TSignedSeqRange> delete_ranges;
349  int i;
350  int j;
351 
352  /// leading gap segments first
353  for (i = 0; i < GetNumseg(); ++i) {
354  int count_gaps = 0;
355  for (j = 0; j < GetDim(); ++j) {
356  TSignedSeqPos this_start = GetStarts()[i * GetDim() + j];
357  if (this_start == -1) {
358  ++count_gaps;
359  }
360  }
361 
362  if (GetDim() - count_gaps > 1) {
363  /// no can do
364  break;
365  }
366  }
367 
368  if (i == GetNumseg() + 1) {
369  /// error case - all gapped, so don't bother
370  return;
371  }
372  if (i != 0) {
373  delete_ranges.push_back(TSignedSeqRange(0, i));
374  }
375 
376  /// trailing gap segments next
377  for (i = GetNumseg() - 1; i >= 0; --i) {
378  int count_gaps = 0;
379  for (j = 0; j < GetDim(); ++j) {
380  TSignedSeqPos this_start = GetStarts()[i * GetDim() + j];
381  if (this_start == -1) {
382  ++count_gaps;
383  }
384  }
385 
386  if (GetDim() - count_gaps > 1) {
387  /// no can do
388  break;
389  }
390  }
391 
392  if (i != GetNumseg() - 1) {
393  delete_ranges.push_back(TSignedSeqRange(i + 1, GetNumseg()));
394  }
395 
396  list<TSignedSeqRange>::reverse_iterator iter = delete_ranges.rbegin();
397  list<TSignedSeqRange>::reverse_iterator end = delete_ranges.rend();
398  for ( ; iter != end; ++iter) {
399  TSignedSeqRange r = *iter;
400  if (r.GetLength() == 0) {
401  continue;
402  }
403 
404  /// we can trim the first i segments
405  if (IsSetStrands()) {
406  _ASSERT(static_cast<int>(GetStrands().size())
407  >= r.GetTo() * GetDim());
408  SetStrands().erase(SetStrands().begin() + r.GetFrom() * GetDim(),
409  SetStrands().begin() + r.GetTo() * GetDim());
410  }
411  if (IsSetStarts()) {
412  _ASSERT(static_cast<int>(GetStarts().size())
413  >= r.GetTo() * GetDim());
414  SetStarts().erase(SetStarts().begin() + r.GetFrom() * GetDim(),
415  SetStarts().begin() + r.GetTo() * GetDim());
416  }
417  if (IsSetLens()) {
418  _ASSERT(static_cast<int>(GetLens().size())
419  >= r.GetTo());
420  SetLens().erase(SetLens().begin() + r.GetFrom(),
421  SetLens().begin() + r.GetTo());
422  }
423  }
424 
425  /// fix our number of segments
426  SetNumseg(static_cast<TNumseg>(GetLens().size()));
427 
429 }
430 
431 
433 {
435 
436  int i;
437  int j;
438  vector<bool> can_merge(GetNumseg() - 1, true); // start off with all true
439  unsigned int merge_count = 0;
440  for (i = 0; i < GetNumseg() - 1; ++i) {
441 
442  for (j = 0; j < GetDim(); ++j) {
443  TSignedSeqPos this_start = GetStarts()[i * GetDim() + j];
444  TSignedSeqPos next_start = GetStarts()[(i + 1) * GetDim() + j];
445 
446  /// check to make sure there is not a gap mismatch
447  if ( (this_start == -1 && next_start != -1)
448  || (this_start != -1 && next_start == -1) ) {
449  can_merge[i] = false;
450  break;
451  }
452 
453  /// check to make sure there is no unaligned space
454  /// between this segment and the next
455  if (this_start != -1 && next_start != -1) {
456  TSignedSeqPos seg_len = GetLens()[i];
457  if (IsSetStrands() &&
458  GetStrands()[i * GetDim() + j] == eNa_strand_minus) {
459  seg_len = GetLens()[i + 1];
460  seg_len = -seg_len;
461  }
462 
463  if (this_start + seg_len != next_start) {
464  can_merge[i] = false;
465  break;
466  }
467  }
468 
469  /// check that the strands all agree between this segment
470  /// and the next (although it is rare, it is legal for them
471  /// to be different)
472  if (IsSetStrands()) {
473  if (GetStrands()[i * GetDim() + j]
474  != GetStrands()[(i + 1) * GetDim() + j]) {
475  can_merge[i] = false;
476  break;
477  }
478  }
479  }
480  if (can_merge[i]) {
481  ++merge_count;
482  }
483  }
484 
485  if (merge_count == 0) {
486  // nothing needs to be done
487  return;
488  }
489 
490  CDense_seg::TStarts new_starts;
491  CDense_seg::TLens new_lens;
492  CDense_seg::TStrands new_strands;
493  new_starts.reserve((GetNumseg() - merge_count) * GetDim());
494  new_lens.reserve(GetNumseg() - merge_count);
495  if (IsSetStrands()) {
496  new_strands.reserve((GetNumseg() - merge_count) * GetDim());
497  }
498  for (i = 0; i < GetNumseg(); ++i) {
499  if (i > 0 && can_merge[i - 1]) {
500  // merge this segment into the last one
501  new_lens.back() += GetLens()[i];
502  if (IsSetStrands()) {
503  for (j = 0; j < GetDim(); ++j) {
504  if (GetStrands()[i * GetDim() + j] == eNa_strand_minus) {
505  new_starts[new_starts.size() - GetDim() + j] =
506  GetStarts()[i * GetDim() + j];
507  }
508  }
509  }
510  } else {
511  // just copy the original segment i onto the end of the new stuff
512  new_lens.push_back(GetLens()[i]);
513  for (j = 0; j < GetDim(); ++j) {
514  new_starts.push_back(GetStarts()[i * GetDim() + j]);
515  if (IsSetStrands()) {
516  new_strands.push_back(GetStrands()[i * GetDim() + j]);
517  }
518  }
519  }
520  }
521 
522  SetStarts().swap(new_starts);
523  SetLens().swap(new_lens);
524  if (IsSetStrands()) {
525  SetStrands().swap(new_strands);
526  }
527 
528  SetNumseg(static_cast<TNumseg>(GetLens().size()));
529 
531 }
532 
533 
535 {
536  #define IDX(_x,_y) (((_x)*GetDim())+(_y))
537  bool swaps_made = false;
538  do {
539  swaps_made = false;
540  for(int i=0; i < GetNumseg()-1; ++i) {
541 
542  bool curr_gap = false, next_gap = false;
543  int curr_seq_row = GetDim()+1, next_seq_row = GetDim()+1;
544  for (int j=0; j < GetDim(); ++j) {
545  if (GetStarts()[IDX(i,j)] == -1)
546  curr_gap = true;
547  else
548  curr_seq_row = min(curr_seq_row, j);
549  if (GetStarts()[IDX(i+1,j)] == -1)
550  next_gap = true;
551  else
552  next_seq_row = min(next_seq_row, j);
553  }
554  if(!(curr_gap & next_gap))
555  continue;
556 
557  // if this Seg and next Seg are both gaps,
558  // and the first row with sequence is not curr
559  // swap the two Segs
560  if(next_seq_row < curr_seq_row) {
561  for(int j=0; j < GetDim(); ++j) {
562  swap(SetStarts()[IDX(i,j)], SetStarts()[IDX(i+1,j)]);
563  if (GetStrands().size() > (size_t)IDX(i+1,j))
564  swap(SetStrands()[IDX(i,j)], SetStrands()[IDX(i+1,j)]);
565  }
566  swap(SetLens()[i], SetLens()[i+1]);
567  swaps_made = true;
568  }
569  }
570  } while(swaps_made);
571 }
572 
573 
575 {
577 
578  // consistency checks
579  TDim dim = CheckNumRows();
580  TNumseg numseg = CheckNumSegs();
581 
582  int i;
583  int j;
584  vector<bool> remove(numseg, true); // start out with all true
585  unsigned int remove_count = 0;
586  for (i = 0; i < numseg; ++i) {
587 
588  for (j = 0; j < dim; ++j) {
589  if (GetStarts()[i * dim + j] != -1) {
590  // not a gap
591  remove[i] = false;
592  break;
593  }
594  }
595  if (remove[i]) {
596  ++remove_count;
597  }
598  }
599 
600  if (remove_count == 0) {
601  // nothing to remove; leave unchanged
602  return;
603  }
604 
605  CDense_seg::TStarts new_starts;
606  CDense_seg::TLens new_lens;
607  CDense_seg::TStrands new_strands;
608  new_starts.reserve((numseg - remove_count) * dim);
609  new_lens.reserve(numseg - remove_count);
610  if (IsSetStrands()) {
611  new_strands.reserve((numseg - remove_count) * dim);
612  }
613  for (i = 0; i < numseg; ++i) {
614  if (!remove[i]) {
615  // copy the original segment i onto the end of the new stuff
616  new_lens.push_back(GetLens()[i]);
617  for (j = 0; j < dim; ++j) {
618  new_starts.push_back(GetStarts()[i * dim + j]);
619  if (IsSetStrands()) {
620  new_strands.push_back(GetStrands()[i * dim + j]);
621  }
622  }
623  }
624  }
625 
626  SetStarts().swap(new_starts);
627  SetLens().swap(new_lens);
628  if (IsSetStrands()) {
629  SetStrands().swap(new_strands);
630  }
631 
632  SetNumseg(static_cast<TNumseg>(GetLens().size()));
633 
634 #ifdef _DEBUG
635  Validate(true);
636 #endif
637 
638 }
639 
640 
641 //-----------------------------------------------------------------------------
642 // PRE : none
643 // POST: same alignment, opposite orientation
645 {
646  //flip strands
647  if (IsSetStrands()) {
649  switch (*i) {
650  case eNa_strand_plus: *i = eNa_strand_minus; break;
651  case eNa_strand_minus: *i = eNa_strand_plus; break;
652  default: break;//do nothing if not + or -
653  }
654  }
655  } else {
656  // Interpret unset strands as plus strands.
657  // Since we're reversing, set them all to minus.
658  SetStrands().resize(GetStarts().size(), eNa_strand_minus);
659  }
660 
661  //reverse list o' lengths
662  {
663  CDense_seg::TLens::iterator f = SetLens().begin();
664  CDense_seg::TLens::iterator r = SetLens().end();
665  while (f < r) {
666  swap(*(f++), *(--r));
667  }
668  }
669 
670  //reverse list o' starts
671  CDense_seg::TStarts &starts = SetStarts();
672  int f = 0;
673  int r = (GetNumseg() - 1) * GetDim();
674  while (f < r) {
675  for (int i = 0; i < GetDim(); ++i) {
676  swap(starts[f+i], starts[r+i]);
677  }
678  f += GetDim();
679  r -= GetDim();
680  }
681 }
682 
683 //-----------------------------------------------------------------------------
684 // PRE : numbers of the rows to swap
685 // POST: alignment rearranged with row1 where row2 used to be & vice versa
687 {
688  if (row1 >= GetDim() || row1 < 0 ||
689  row2 >= GetDim() || row2 < 0) {
690  NCBI_THROW(CSeqalignException, eOutOfRange,
691  "Row numbers supplied to CDense_seg::SwapRows must be "
692  "in the range [0, dim)");
693  }
694 
695  //swap ids
696  swap(SetIds()[row1], SetIds()[row2]);
697 
698  int idxStop = GetNumseg()*GetDim();
699 
700  //swap starts
701  for(int i = 0; i < idxStop; i += GetDim()) {
702  swap(SetStarts()[i+row1], SetStarts()[i+row2]);
703  }
704 
705  //swap strands
706  if (IsSetStrands()) {
707  for(int i = 0; i < idxStop; i += GetDim()) {
708  swap(SetStrands()[i+row1], SetStrands()[i+row2]);
709  }
710  }
711 }
712 
713 
714 /*---------------------------------------------------------------------------*/
715 // PRE : this is a validated dense seg; row & sequence position on row in
716 // alignment
717 // POST: the number of the segment in which this sequence position falls
720 {
721  bool found = false;
723  for (seg = 0; seg < GetNumseg() && !found; ++seg) {
724  TSignedSeqPos start = GetStarts()[seg * GetDim() + row];
725  TSignedSeqPos len = GetLens()[seg];
726  if (start != -1) {
727  if (pos >= start && pos < start + len) {
728  found = true;
729  }
730  }
731  }
732  if (!found) {
733  NCBI_THROW(CSeqalignException, eInvalidAlignment,
734  "CDense_seg::x_FindSegment(): "
735  "Can't find a segment containing position " +
736  NStr::NumericToString(pos));
737  }
738 
739  return seg - 1;
740 }
741 
742 
743 //-----------------------------------------------------------------------------
744 // PRE : range on a row in the alignment
745 // POST: dst Dense_seg reset &
747 ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
748 {
749  if (row < 0 || row >= GetDim()) {
750  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
751  "CDense_seg::ExtractSlice():"
752  " Invalid row number ("
753  + NStr::NumericToString(row) + ")");
754  }
755 
756  if (from > to) {
757  swap(from, to);
758  }
759  if (from < GetSeqStart(row)) {
760  NCBI_THROW(CSeqalignException, eOutOfRange,
761  "CDense_seg::ExtractSlice(): "
762  "start position (" + NStr::NumericToString(from) +
763  ") off end of alignment");
764  }
765  if (to > GetSeqStop(row)) {
766  NCBI_THROW(CSeqalignException, eOutOfRange,
767  "CDense_seg::ExtractSlice(): "
768  "stop position (" + NStr::NumericToString(to) +
769  ") off end of alignment");
770  }
771 
772 
773  CRef<CDense_seg> ds(new CDense_seg);
774  ds->SetDim(GetDim());
775  ds->SetNumseg(0);
776  ITERATE(CDense_seg::TIds, idI, GetIds()) {
777  CSeq_id *si = new CSeq_id;
778  si->Assign(**idI);
779  ds->SetIds().push_back(CRef<CSeq_id>(si));
780  }
781 
782  //find start/stop segments
783  CDense_seg::TNumseg startSeg = x_FindSegment(row, from);
784  CDense_seg::TNumseg stopSeg = x_FindSegment(row, to);
785 
786  TSeqPos startOffset = from - GetStarts()[startSeg * GetDim() + row];
787  TSeqPos stopOffset = GetStarts()[stopSeg * GetDim() + row] +
788  GetLens()[stopSeg] - 1 - to;
789  if (IsSetStrands() && GetStrands()[row] == eNa_strand_minus) {
790  swap(startOffset, stopOffset);
791  swap(startSeg, stopSeg); // make sure startSeg is first
792  }
793 
794  for (CDense_seg::TNumseg seg = startSeg; seg <= stopSeg; ++seg) {
795  //starts
796  for (CDense_seg::TDim dim = 0; dim < GetDim(); ++dim) {
797  TSignedSeqPos start = GetStarts()[seg * GetDim() + dim];
798  if (start != -1) {
799  if (seg == startSeg && (!IsSetStrands() ||
800  GetStrands()[seg * GetDim() + dim] == eNa_strand_plus)) {
801  start += startOffset;
802  }
803  if (seg == stopSeg && IsSetStrands() &&
804  GetStrands()[seg * GetDim() + dim] == eNa_strand_minus) {
805  start += stopOffset;
806  }
807  }
808  ds->SetStarts().push_back(start);
809  }
810 
811  //len
812  TSeqPos len = GetLens()[seg];
813  if (seg == startSeg) {
814  len -= startOffset;
815  }
816  if (seg == stopSeg) {
817  len -= stopOffset;
818  }
819  ds->SetLens().push_back(len);
820 
821  //strands
822  if (IsSetStrands()) {
823  for (CDense_seg::TDim dim = 0; dim < GetDim(); ++dim) {
824  ds->SetStrands().push_back(GetStrands()[seg * GetDim() + dim]);
825  }
826  }
827  ++ds->SetNumseg();
828  }
829 
830 #ifdef _DEBUG
831  ds->Validate(true);
832 #endif
833  return ds;
834 }
835 
836 
837 CRef<CDense_seg> CDense_seg::ExtractRows(const vector<TDim>& rows) const
838 {
839  // consistency checks
840  TDim dim = CheckNumRows();
841  TNumseg numseg = CheckNumSegs();
842 
843  CRef<CDense_seg> new_ds(new CDense_seg);
844  new_ds->SetDim(static_cast<TDim>(rows.size()));
845  new_ds->SetNumseg(GetNumseg());
846 
847  // reserve for efficiency
848  new_ds->SetIds().reserve(rows.size());
849  new_ds->SetStarts().reserve(rows.size() * GetNumseg());
850  new_ds->SetLens().reserve(GetNumseg());
851  if (IsSetStrands()) {
852  new_ds->SetStrands().reserve(rows.size() * GetNumseg());
853  }
854 
855  ITERATE (vector<TDim>, row, rows) {
856  // sole check that rows are not out of range
857  if (*row < 0 || *row >= dim) {
858  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
859  "CDense_seg::ExtractRows():"
860  " Invalid row number ("
861  + NStr::NumericToString(*row) + ")");
862  }
863  // *copy* the ID (don't just make a reference to it)
864  CRef<CSeq_id> id_copy(new CSeq_id);
865  id_copy->Assign(*GetIds()[*row]);
866  new_ds->SetIds().push_back(id_copy);
867  }
868  for (TNumseg segnum = 0; segnum < numseg; ++segnum) {
869  new_ds->SetLens().push_back(GetLens()[segnum]);
870  ITERATE (vector<TDim>, row, rows) {
871  int idx = segnum * dim + *row;
872  new_ds->SetStarts().push_back(GetStarts()[idx]);
873  if (IsSetStrands()) {
874  new_ds->SetStrands().push_back(GetStrands()[idx]);
875  }
876  }
877  }
878 
879  new_ds->Compact(); // even if original was compact, new one may not be
880 
881 #ifdef _DEBUG
882  new_ds->Validate(true);
883 #endif
884 
885  // Scores are not propagated
886  return new_ds;
887 }
888 
889 
892 {
893  if (offset == 0) return;
894 
895  // Check for out-of-range negative offset
896  if (offset < 0) {
897  for (TNumseg seg = 0, pos = row;
898  seg < GetNumseg();
899  ++seg, pos += GetDim()) {
900 
901  if (GetStarts()[pos] >= 0) {
902  if (GetStarts()[pos] < -offset) {
903  NCBI_THROW(CSeqalignException, eOutOfRange,
904  "Negative offset greater than seq position");
905  }
906  }
907  }
908  }
909 
910  // Modify positions
911  for (TNumseg seg = 0, pos = row;
912  seg < GetNumseg();
913  ++seg, pos += GetDim()) {
914  if (GetStarts()[pos] >= 0) {
915  SetStarts()[pos] += offset;
916  }
917  }
918 }
919 
920 
921 /// @deprecated
923  bool ignore_strand)
924 {
925  if (loc.IsWhole()) {
926  return;
927  }
928 
929  TSeqPos row_stop = GetSeqStop(row);
930 
931  TSeqPos ttl_loc_len = 0;
932  {{
933  CSeq_loc_CI seq_loc_i(loc);
934  do {
935  ttl_loc_len += seq_loc_i.GetRange().GetLength();
936  } while (++seq_loc_i);
937  }}
938 
939  // check the validity of the seq-loc
940  if (ttl_loc_len < row_stop + 1) {
941  string errstr("CDense_seg::RemapToLoc():"
942  " Seq-loc is not long enough to"
943  " cover the alignment!"
944  " Maximum row seq pos is ");
945  errstr += NStr::NumericToString(row_stop);
946  errstr += ". The total seq-loc len is only ";
947  errstr += NStr::SizetToString(ttl_loc_len);
948  errstr += ", it should be at least ";
949  errstr += NStr::NumericToString(row_stop+1);
950  errstr += " (= max seq pos + 1).";
951  NCBI_THROW(CSeqalignException, eOutOfRange, errstr);
952  }
953 
954  const CDense_seg::TStarts& starts = GetStarts();
955  const CDense_seg::TStrands& strands = GetStrands();
956  const CDense_seg::TLens& lens = GetLens();
957 
958  TDim numrows = CheckNumRows();
959  TNumseg numsegs = CheckNumSegs();
960 
961  CSeq_loc_CI seq_loc_i(loc);
962 
963  TSeqPos start, loc_len, len, len_so_far;
964  start = seq_loc_i.GetRange().GetFrom();
965  len = loc_len = seq_loc_i.GetRange().GetLength();
966  len_so_far = 0;
967 
968  bool row_plus = !strands.size() || strands[row] != eNa_strand_minus;
969  bool loc_plus = seq_loc_i.GetStrand() != eNa_strand_minus;
970 
971  // iterate through segments
972  size_t idx = loc_plus ? row : (numsegs - 1) * numrows + row;
973  TNumseg seg = loc_plus ? 0 : numsegs - 1;
974  while (loc_plus ? seg < GetNumseg() : seg >= 0) {
975  if (starts[idx] == -1) {
976  // ignore gaps in our sequence
977  if (loc_plus) {
978  idx += numrows; seg++;
979  } else {
980  idx -= numrows; seg--;
981  }
982  continue;
983  }
984 
985  // iterate the seq-loc if needed
986  if ((loc_plus == row_plus ?
987  starts[idx] : ttl_loc_len - starts[idx] - lens[seg])
988  > len_so_far + loc_len) {
989 
990  if (++seq_loc_i) {
991  len_so_far += len;
992  len = seq_loc_i.GetRange().GetLength();
993  start = seq_loc_i.GetRange().GetFrom();
994  } else {
995  NCBI_THROW(CSeqalignException, eInvalidInputData,
996  "CDense_seg::RemapToLoc():"
997  " Internal error");
998  }
999 
1000  // assert the strand is the same
1001  if (loc_plus != (seq_loc_i.GetStrand() != eNa_strand_minus)) {
1002  NCBI_THROW(CSeqalignException, eInvalidInputData,
1003  "CDense_seg::RemapToLoc():"
1004  " The strand should be the same accross"
1005  " the input seq-loc");
1006  }
1007  }
1008 
1009  // offset for the starting position
1010  if (loc_plus == row_plus) {
1011  SetStarts()[idx] += start - len_so_far;
1012  } else {
1013  SetStarts()[idx] =
1014  start - len_so_far + ttl_loc_len - starts[idx] - lens[seg];
1015  }
1016 
1017  if (lens[seg] > len) {
1018  TSignedSeqPos len_diff = lens[seg] - len;
1019  while (1) {
1020  // move to the next loc part that extends beyond our length
1021  ++seq_loc_i;
1022  if (seq_loc_i) {
1023  start = seq_loc_i.GetRange().GetFrom();
1024  } else {
1025  NCBI_THROW(CSeqalignException, eOutOfRange,
1026  "CDense_seg::RemapToLoc():"
1027  " Internal error");
1028  }
1029 
1030  // split our segment
1031  SetLens().insert(SetLens().begin() +
1032  (loc_plus ? seg : seg + 1),
1033  len);
1034  SetLens()[loc_plus ? seg + 1 : seg] = len_diff;
1035 
1036  // insert new data to account for our split segment
1037  TStarts temp_starts(numrows, -1);
1038  for (int row_i = 0, tmp_idx = seg * numrows;
1039  row_i < numrows; ++row_i, ++tmp_idx) {
1040  TSignedSeqPos& this_start = SetStarts()[tmp_idx];
1041  if (this_start != -1) {
1042  temp_starts[row_i] = this_start;
1043  if (loc_plus == (strands[row_i] != eNa_strand_minus)) {
1044  if (row == row_i) {
1045  temp_starts[row_i] = start;
1046  } else {
1047  temp_starts[row_i] += len;
1048  }
1049  } else {
1050  this_start += len_diff;
1051  }
1052  }
1053  }
1054 
1055  len_so_far += loc_len;
1056  len = loc_len = seq_loc_i.GetRange().GetLength();
1057 
1058  SetStarts().insert(SetStarts().begin() +
1059  (loc_plus ? seg + 1 : seg) * numrows,
1060  temp_starts.begin(), temp_starts.end());
1061 
1062  if (strands.size()) {
1063  SetStrands().insert
1064  (SetStrands().begin(),
1065  strands.begin(), strands.begin() + numrows);
1066  }
1067 
1068  SetNumseg()++;
1069 
1070  if ((len_diff = lens[seg] - len) > 0) {
1071  if (loc_plus) {
1072  idx += numrows; seg++;
1073  } else {
1074  idx -= numrows; seg--;
1075  }
1076  } else {
1077  break;
1078  }
1079  }
1080  } else {
1081  len -= lens[seg];
1082  }
1083 
1084  if (loc_plus) {
1085  idx += numrows; seg++;
1086  } else {
1087  idx -= numrows; seg--;
1088  }
1089  } // while iterating through segments
1090 
1091  // finally, modify the strands if different
1092  if ( !ignore_strand ) {
1093  if (loc_plus != row_plus) {
1094  if (!strands.size()) {
1095  // strands do not exist, create them
1096  SetStrands().resize(GetNumseg() * GetDim(), eNa_strand_plus);
1097  }
1098  for (seg = 0, idx = row;
1099  seg < GetNumseg(); seg++, idx += numrows) {
1100  SetStrands()[idx] = loc_plus ? eNa_strand_plus : eNa_strand_minus;
1101  }
1102  }
1103  }
1104 
1105 }
1106 
1107 
1109 {
1110  // this dense-seg
1111  const CDense_seg::TStarts& starts = GetStarts();
1112  const CDense_seg::TStrands& strands = GetStrands();
1113  const CDense_seg::TLens& lens = GetLens();
1114  const CDense_seg::TIds& ids = GetIds();
1115 
1116  TDim numrows = CheckNumRows();
1117  TNumseg numsegs = CheckNumSegs();
1118 
1119  bool strands_exist = !strands.empty();
1120 
1121  TNumseg seg = 0;
1122  TDim row = 0;
1123 
1124 
1125  // extra segments
1126  CDense_seg::TStarts extra_starts;
1127  CDense_seg::TLens extra_lens;
1128  TNumseg extra_numsegs = 0;
1129  TNumseg extra_seg = 0;
1130  vector<TNumseg> extra_segs;
1131 
1132 
1133  // new dense-seg
1134  CRef<CDense_seg> new_ds(new CDense_seg);
1135  CDense_seg::TStarts& new_starts = new_ds->SetStarts();
1136  CDense_seg::TStrands& new_strands = new_ds->SetStrands();
1137  CDense_seg::TLens& new_lens = new_ds->SetLens();
1138  CDense_seg::TIds& new_ids = new_ds->SetIds();
1139 
1140  // dimentions
1141  new_ds->SetDim(numrows);
1142  TNumseg& new_numsegs = new_ds->SetNumseg();
1143  new_numsegs = numsegs; // initialize
1144 
1145  // ids
1146  new_ids.resize(numrows);
1147  for (row = 0; row < numrows; row++) {
1148  CRef<CSeq_id> id(new CSeq_id);
1149  SerialAssign(*id, *ids[row]);
1150  new_ids[row] = id;
1151  }
1152 
1153  TNumseg new_seg = 0;
1154 
1155  // temporary data
1156  vector<TSignedSeqPos> expected_positions;
1157  expected_positions.resize(numrows, -1);
1158 
1159  vector<bool> plus;
1160  plus.resize(numrows, true);
1161  if (strands_exist) {
1162  for (row = 0; row < numrows; row++) {
1163  if (strands[row] == eNa_strand_minus) {
1164  plus[row] = false;
1165  }
1166  }
1167  }
1168 
1169  TSignedSeqPos extra_len = 0;
1170  TNumseg idx = 0, new_idx = 0, extra_idx = 0;
1171 
1172  // main loop through segments
1173  for (seg = 0; seg < numsegs; seg++) {
1174  const TSeqPos& len = lens[seg];
1175  for (row = 0; row < numrows; row++) {
1176 
1177  const TSignedSeqPos& start = starts[idx++];
1178 
1179  TSignedSeqPos& expected_pos = expected_positions[row];
1180 
1181  if (start >= 0) {
1182 
1183  if (expected_pos >= 0) {
1184  // check if there's an unaligned insert
1185 
1186  if (plus[row]) {
1187  extra_len = start - expected_pos;
1188  } else {
1189  extra_len = expected_pos - start - len;
1190  }
1191 
1192  if (extra_len < 0) {
1193  string errstr("CDense_seg::AddUnalignedSegments():"
1194  " Illegal overlap at Row ");
1195  errstr += NStr::SizetToString(row);
1196  errstr += " Segment ";
1197  errstr += NStr::SizetToString(seg);
1198  errstr += ".";
1199  NCBI_THROW(CSeqalignException, eInvalidAlignment,
1200  errstr);
1201  } else if (extra_len > 0) {
1202  // insert new segment
1203  extra_segs.push_back(seg);
1204  extra_lens.push_back(extra_len);
1205  extra_starts.resize(extra_idx + numrows, -1);
1206  extra_starts[extra_idx + row] =
1207  plus[row] ? expected_pos : start + len;
1208 
1209  extra_idx += numrows;
1210  ++extra_numsegs;
1211  }
1212  }
1213 
1214  // set the new expected_pos
1215  if (plus[row]) {
1216  expected_pos = start + len;
1217  } else {
1218  expected_pos = start;
1219  }
1220  }
1221  }
1222  }
1223 
1224  // lens & starts
1225  new_numsegs = numsegs + extra_numsegs;
1226  new_lens.resize(numsegs + extra_numsegs);
1227  new_starts.resize(numrows * new_numsegs);
1228  for (seg = 0, new_seg = 0, extra_seg = 0,
1229  idx = 0, new_idx = 0, extra_idx = 0;
1230  seg < numsegs;
1231  seg++, new_seg++) {
1232 
1233  // insert extra segments
1234  if (extra_numsegs) {
1235  for ( ; size_t(extra_seg) < extra_segs.size() && extra_segs[extra_seg] == seg; ++new_seg, ++extra_seg) {
1236  new_lens[new_seg] = extra_lens[extra_seg];
1237  for (row = 0; row < numrows; ++row, ++new_idx, ++extra_idx) {
1238  new_starts[new_idx] = extra_starts[extra_idx];
1239  }
1240  }
1241  }
1242 
1243  // add the existing segment
1244  new_lens[new_seg] = lens[seg];
1245  for (row = 0; row < numrows; row++) {
1246  new_starts[new_idx++] = starts[idx++];
1247  }
1248  }
1249 
1250 
1251  // strands
1252  new_strands.resize(numrows * new_numsegs, eNa_strand_plus);
1253  if (strands_exist) {
1254  new_idx = 0;
1255  for (new_seg = 0; new_seg < new_numsegs; new_seg++) {
1256  for (row = 0; row < numrows; row++, new_idx++) {
1257  if ( !plus[row] ) {
1258  new_strands[new_idx] = eNa_strand_minus;
1259  }
1260  }
1261  }
1262  }
1263 
1264  return new_ds;
1265 }
1266 
1267 
1268 //-----------------------------------------------------------------------------
1269 // PRE : alignment transcript (RLE or not) and start coordinates
1270 // POST: Starts, lens and strands. Ids and scores not affected.
1271 
1272 // initialize from pairwise alignment transcript
1273 void CDense_seg::FromTranscript(TSeqPos query_start, ENa_strand query_strand,
1274  TSeqPos subj_start, ENa_strand subj_strand,
1275  const string& transcript )
1276 {
1277  // check that strands are specific
1278  bool query_strand_specific =
1279  query_strand == eNa_strand_plus || query_strand == eNa_strand_minus;
1280  bool subj_strand_specific =
1281  subj_strand == eNa_strand_plus || subj_strand == eNa_strand_minus;
1282 
1283  if(!query_strand_specific || !subj_strand_specific) {
1284  NCBI_THROW(CSeqalignException, eInvalidInputData, "Unknown strand");
1285  }
1286 
1287  TStarts &starts = SetStarts();
1288  starts.clear();
1289  TLens &lens = SetLens();
1290  lens.clear();
1291  TStrands &strands = SetStrands();
1292  strands.clear();
1293 
1294  SetDim(2);
1295 
1296  // iterate through the transcript
1297  TNumseg seg_count = 0;
1298 
1299  TSeqPos start1 = 0, pos1 = 0; // relative to exon start in mrna
1300  TSeqPos start2 = 0, pos2 = 0; // and genomic
1301  TSeqPos seg_len = 0;
1302 
1303  string::const_iterator ib = transcript.begin();
1304  string::const_iterator ie = transcript.end();
1305  string::const_iterator ii = ib;
1306  unsigned char seg_type;
1307  static const char* badsymerr = "Unknown or unsupported transcript symbol";
1308  char c = ii != ie ? *ii++ : 0;
1309  if(c == 'M' || c == 'R') {
1310  seg_type = 0;
1311  ++pos1;
1312  ++pos2;
1313  }
1314  else if (c == 'I') {
1315  seg_type = 1;
1316  ++pos2;
1317  }
1318  else if (c == 'D') {
1319  seg_type = 2;
1320  ++pos1;
1321  }
1322  else {
1323  if (c == 0) {
1324  NCBI_THROW(CSeqalignException, eInvalidInputData,
1325  "Empty transcript");
1326  }
1327  else {
1328  NCBI_THROW(CSeqalignException, eInvalidInputData, badsymerr);
1329  }
1330  }
1331 
1332  while(ii < ie) {
1333 
1334  c = *ii;
1335  if(isalpha((unsigned char) c)) {
1336 
1337  if(seg_type == 0 && (c == 'M' || c == 'R')) {
1338  ++pos1;
1339  ++pos2;
1340  }
1341  else if(seg_type == 1 && c == 'I') {
1342  ++pos2;
1343  }
1344  else if(seg_type == 2 && c == 'D') {
1345  ++pos1;
1346  }
1347  else {
1348 
1349  // close current seg
1350  TSeqPos query_close = query_strand == eNa_strand_plus?
1351  start1: 1 - pos1;
1352  starts.push_back(seg_type == 1? (TSeqPos)-1: query_start + query_close);
1353  strands.push_back(query_strand);
1354 
1355  TSeqPos subj_close = subj_strand == eNa_strand_plus?
1356  start2: 1- pos2;
1357  starts.push_back(seg_type == 2? (TSeqPos)-1: subj_start + subj_close);
1358  strands.push_back(subj_strand);
1359 
1360  switch(seg_type) {
1361  case 0: seg_len = pos1 - start1; break;
1362  case 1: seg_len = pos2 - start2; break;
1363  case 2: seg_len = pos1 - start1; break;
1364  }
1365  lens.push_back(seg_len);
1366  ++seg_count;
1367 
1368  // start a new seg
1369  start1 = pos1;
1370  start2 = pos2;
1371 
1372  if(c == 'M' || c == 'R'){
1373  seg_type = 0; // matches and mismatches
1374  ++pos1;
1375  ++pos2;
1376  }
1377  else if (c == 'I') {
1378  seg_type = 1; // inserts
1379  ++pos2;
1380  }
1381  else if (c == 'D') {
1382  seg_type = 2; // dels
1383  ++pos1;
1384  }
1385  else {
1386 
1387  NCBI_THROW(CSeqalignException, eInvalidInputData,
1388  badsymerr);
1389  }
1390  }
1391  ++ii;
1392  }
1393  else {
1394 
1395  if(!isdigit((unsigned char) c)) {
1396 
1397  NCBI_THROW(CSeqalignException, eInvalidInputData,
1398  "Alignment transcript corrupt");
1399  }
1400 
1401  TSeqPos len = 0;
1402  while(ii < ie && isdigit((unsigned char)(*ii))) {
1403  len = 10*len + *ii - '0';
1404  ++ii;
1405  }
1406  --len;
1407  switch(seg_type) {
1408  case 0: pos1 += len; pos2 += len; break;
1409  case 1: pos2 += len; break;
1410  case 2: pos1 += len; break;
1411  }
1412  }
1413  }
1414 
1415  TSeqPos query_close = query_strand == eNa_strand_plus? start1: 1 - pos1;
1416  starts.push_back(seg_type == 1? (TSeqPos)-1: query_start + query_close);
1417  strands.push_back(query_strand);
1418 
1419  TSeqPos subj_close = subj_strand == eNa_strand_plus? start2: 1 - pos2;
1420  starts.push_back(seg_type == 2? (TSeqPos)-1: subj_start + subj_close);
1421  strands.push_back(subj_strand);
1422 
1423  switch(seg_type) {
1424 
1425  case 0: seg_len = pos1 - start1; break;
1426  case 1: seg_len = pos2 - start2; break;
1427  case 2: seg_len = pos1 - start1; break;
1428  }
1429  lens.push_back(seg_len);
1430  ++seg_count;
1431 
1432  SetNumseg(seg_count);
1433 }
1434 
1435 
1437 {
1438  if (GetDim() <= row) {
1439  NCBI_THROW(CSeqalignException, eInvalidRowNumber,
1440  "Invalid row number in CreateRowSeq_interval(): " +
1442  }
1444  TSeqPos from = kInvalidSeqPos;
1445  TSeqPos to = 0;
1446  TSeqPos plus_len = 0;
1447  TSeqPos minus_len = 0;
1448  ret->SetId().Assign(*GetIds()[row]);
1449  for (int seg = 0; seg < GetNumseg(); seg++) {
1450  int idx = seg*GetDim() + row;
1451  int start = GetStarts()[idx];
1452  if (start < 0) {
1453  continue;
1454  }
1455  if (TSeqPos(start) < from) {
1456  from = TSeqPos(start);
1457  }
1458  TSeqPos len = GetLens()[seg];
1459  if (start + len > to) {
1460  to = start + len;
1461  }
1462  if ( !IsSetStrands() || !IsReverse(GetStrands()[idx]) ) {
1463  plus_len += len;
1464  }
1465  else {
1466  minus_len += len;
1467  }
1468  }
1469  if (from == kInvalidSeqPos || to == 0) {
1470  NCBI_THROW(CSeqalignException, eOutOfRange,
1471  "Can not convert row to seq-interval - invalid from/to value");
1472  }
1473  ret->SetFrom(from);
1474  ret->SetTo(to - 1);
1475  if ( IsSetStrands() ) {
1476  // If more than 2/3 of the segment is on plus, use plus strand
1477  if (plus_len >= minus_len*2) {
1478  ret->SetStrand(eNa_strand_plus);
1479  }
1480  // If more than 2/3 is on minus, use minus strand
1481  else if (plus_len*2 < minus_len) {
1483  }
1484  // Otherwise set strand to 'both'
1485  else {
1486  ret->SetStrand(eNa_strand_both);
1487  }
1488  }
1489  return ret;
1490 }
1491 
1492 
1493 NCBI_PARAM_DECL(bool, OBJECTS, DENSE_SEG_RESERVE);
1494 NCBI_PARAM_DEF_EX(bool, OBJECTS, DENSE_SEG_RESERVE, true,
1495  eParam_NoThread, OBJECTS_DENSE_SEG_RESERVE);
1496 
1498  const CObjectInfoMI& member)
1499 {
1500  static CSafeStatic<NCBI_PARAM_TYPE(OBJECTS, DENSE_SEG_RESERVE)> s_Reserve;
1501 
1502  if ( !s_Reserve->Get() ) {
1503  return;
1504  }
1506  size_t numseg = ds.GetNumseg();
1507  try {
1508  switch ( member.GetMemberIndex() ) {
1509  case 4: // "starts"
1510  ds.SetStarts().reserve(ds.GetDim()*numseg);
1511  break;
1512  case 5: // "lens"
1513  ds.SetLens().reserve(numseg);
1514  break;
1515  case 6: // "strands"
1516  ds.SetStrands().reserve(ds.GetDim()*numseg);
1517  break;
1518  default:
1519  break;
1520  }
1521  }
1522  catch ( bad_alloc& /*ignored*/ ) {
1523  // ignore insufficient memory exception from advisory reserve()
1524  }
1525 }
1526 
1527 
1528 /////////////////////////////////////////////////////////////////////////////
1529 // Read hooks to reserve memory of Dense-seg vector<> to estimated size.
1530 /////////////////////////////////////////////////////////////////////////////
1531 
1532 
1534 {
1538 }
1539 
1540 
1542 {
1546 }
1547 
1548 
1550  const CObjectInfoMI& member)
1551 {
1553  size_t size = ds.GetDim()*ds.GetNumseg();
1555  try {
1556  array.reserve(size);
1557  }
1558  catch ( bad_alloc& /*ignored*/ ) {
1559  // ignore insufficient memory exception from advisory reserve()
1560  }
1561  DefaultRead(in, member);
1562 }
1563 
1564 
1566 {
1568  return type.FindMember("starts");
1569 }
1570 
1571 
1573 {
1575  x_GetMember().SetLocalReadHook(in, hook);
1576 }
1577 
1578 
1580 {
1582  x_GetMember().SetGlobalReadHook(hook);
1583 }
1584 
1585 
1587  const CObjectInfoMI& member)
1588 {
1590  size_t size = ds.GetNumseg();
1592  try {
1593  array.reserve(size);
1594  }
1595  catch ( bad_alloc& /*ignored*/ ) {
1596  // ignore insufficient memory exception from advisory reserve()
1597  }
1598  DefaultRead(in, member);
1599 }
1600 
1601 
1603 {
1605  return type.FindMember("lens");
1606 }
1607 
1608 
1610 {
1612  x_GetMember().SetLocalReadHook(in, hook);
1613 }
1614 
1615 
1617 {
1619  x_GetMember().SetGlobalReadHook(hook);
1620 }
1621 
1622 
1624  const CObjectInfoMI& member)
1625 {
1627  size_t size = ds.GetDim()*ds.GetNumseg();
1629  try {
1630  array.reserve(size);
1631  }
1632  catch ( bad_alloc& /*ignored*/ ) {
1633  // ignore insufficient memory exception from advisory reserve()
1634  }
1635  DefaultRead(in, member);
1636 }
1637 
1638 
1640 {
1642  return type.FindMember("strands");
1643 }
1644 
1645 
1647 {
1649  x_GetMember().SetLocalReadHook(in, hook);
1650 }
1651 
1652 
1654 {
1656  x_GetMember().SetGlobalReadHook(hook);
1657 }
1658 
1659 
1660 END_objects_SCOPE // namespace ncbi::objects::
1661 
#define ASSERT_CONSISTENCY()
Definition: Dense_seg.cpp:77
NCBI_PARAM_DECL(bool, OBJECTS, DENSE_SEG_RESERVE)
#define IDX(_x, _y)
NCBI_PARAM_DEF_EX(bool, OBJECTS, DENSE_SEG_RESERVE, true, eParam_NoThread, OBJECTS_DENSE_SEG_RESERVE)
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
static char * s_Reserve(size_t size, CSimpleBufferT< char > &buffer)
Definition: bgzf.cpp:285
static void SetHook(CObjectIStream &in)
Definition: Dense_seg.cpp:1609
static void SetGlobalHook(void)
Definition: Dense_seg.cpp:1616
static CObjectTypeInfoMI x_GetMember(void)
Definition: Dense_seg.cpp:1602
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
Definition: Dense_seg.cpp:1586
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
Definition: Dense_seg.cpp:1549
static void SetHook(CObjectIStream &in)
Definition: Dense_seg.cpp:1572
static void SetGlobalHook(void)
Definition: Dense_seg.cpp:1579
static CObjectTypeInfoMI x_GetMember(void)
Definition: Dense_seg.cpp:1565
static void SetHook(CObjectIStream &in)
Definition: Dense_seg.cpp:1646
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
Definition: Dense_seg.cpp:1623
static CObjectTypeInfoMI x_GetMember(void)
Definition: Dense_seg.cpp:1639
static void SetGlobalHook(void)
Definition: Dense_seg.cpp:1653
virtual void PreReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
Return true to invoke default reading method afterwards.
Definition: Dense_seg.cpp:1497
void TrimEndGaps()
Trim leading/training gaps if possible.
Definition: Dense_seg.cpp:344
CRef< CDense_seg > ExtractRows(const vector< TDim > &rows) const
Extract specified rows of the alignment, in specified order.
Definition: Dense_seg.cpp:837
const CSeq_id & GetSeq_id(TDim row) const
Definition: Dense_seg.cpp:154
TWidths m_Widths
Definition: Dense_seg.hpp:177
~CDense_seg(void)
Definition: Dense_seg.cpp:57
ENa_strand GetSeqStrand(TDim row) const
Definition: Dense_seg.cpp:241
CRef< CSeq_interval > CreateRowSeq_interval(TDim row) const
Definition: Dense_seg.cpp:1436
TSeqPos GetSeqStop(TDim row) const
Definition: Dense_seg.cpp:203
void Reverse(void)
Reverse the segments' orientation.
Definition: Dense_seg.cpp:644
static void SetGlobalReserveHooks(void)
Definition: Dense_seg.cpp:1541
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Definition: Dense_seg.cpp:890
void RemovePureGapSegs()
Remove any segments in which every row has a gap (these can arise when ExtractRows is used)
Definition: Dense_seg.cpp:574
TNumseg CheckNumSegs(void) const
Definition: Dense_seg.cpp:109
void SwapRows(TDim row1, TDim row2)
Swap two rows (changing *order*, not content)
Definition: Dense_seg.cpp:686
TSeqPos GetSeqStart(TDim row) const
Definition: Dense_seg.cpp:165
void OrderAdjacentGaps()
Order adjacent gaps, so that the side with sequence is in row-decending order.
Definition: Dense_seg.cpp:534
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)
Definition: Dense_seg.cpp:1273
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
Definition: Dense_seg.cpp:747
TDim CheckNumRows(void) const
Definition: Dense_seg.cpp:92
TNumseg x_FindSegment(TDim row, TSignedSeqPos pos) const
Definition: Dense_seg.cpp:719
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
Definition: Dense_seg.cpp:1108
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
Definition: Dense_seg.cpp:432
Uint4 m_set_State1[1]
Definition: Dense_seg.hpp:176
vector< int > TWidths
Definition: Dense_seg.hpp:73
void Validate(bool full_test=false) const
Definition: Dense_seg.cpp:274
void RemapToLoc(TDim row, const CSeq_loc &loc, bool ignore_strand=false)
Definition: Dense_seg.cpp:922
static void SetReserveHooks(CObjectIStream &in)
Definition: Dense_seg.cpp:1533
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
Definition: Dense_seg.cpp:62
const TWidths & GetWidths(void) const
Definition: Dense_seg.hpp:210
CObjectIStream –.
Definition: objistr.hpp:93
CObjectInfoMI –.
Definition: objectiter.hpp:432
CObjectTypeInfoMI –.
Definition: objectiter.hpp:246
CObjectTypeInfo –.
Definition: objectinfo.hpp:94
CSafeStatic<>::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
Base class for all serializable objects.
Definition: serialbase.hpp:150
static const char si[8][64]
Definition: des.c:146
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:90
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:687
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
ESerialRecursionMode
How to assign and compare child sub-objects of serial objects.
Definition: serialdef.hpp:191
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
Definition: serialbase.hpp:482
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1056
static C * Get(const CTypesIterator &it)
Definition: objecttype.hpp:116
TMemberIndex GetMemberIndex(void) const
Get index of the member in the class.
const CObjectInfo & GetClassObject(void) const
Get containing class data.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
@ eParam_NoThread
Do not use per-thread values.
Definition: ncbi_param.hpp:418
position_type GetLength(void) const
Definition: range.hpp:158
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2751
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetLens(void) const
lengths in ids order within segs Check if a value has been assigned to Lens data member.
Definition: Dense_seg_.hpp:543
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
Definition: Dense_seg_.hpp:568
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
TDim & SetDim(void)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:434
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
TNumseg & SetNumseg(void)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:481
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
bool IsSetStarts(void) const
start OFFSETS in ids order within segs Check if a value has been assigned to Starts data member.
Definition: Dense_seg_.hpp:518
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
bool CanGetStrands(void) const
Check if it is safe to call GetStrands method.
Definition: Dense_seg_.hpp:574
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const TStrands & GetStrands(void) const
Get the Strands member data.
Definition: Dense_seg_.hpp:580
bool IsSetIds(void) const
sequences in order Check if a value has been assigned to Ids data member.
Definition: Dense_seg_.hpp:493
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
void SetStrand(TStrand value)
Assign a value to Strand data member.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
int i
int len
const struct ncbi::grid::netcache::search::fields::SIZE size
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
T plus(T x_)
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
#define row(bind, expected)
Definition: string_bind.c:73
Definition: type.c:6
#define _ASSERT
Modified on Tue Apr 23 07:40:49 2024 by modify_doxy.py rev. 669887