NCBI C++ ToolKit
align_range_coll.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UTIL___ALIGN_RANGE_COLL__HPP
2 #define UTIL___ALIGN_RANGE_COLL__HPP
3 
4 /* $Id: align_range_coll.hpp 95264 2021-10-29 12:37:02Z grichenk $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Andrey Yazhuk
30  *
31  * File Description:
32  * class CAlignRangeCollection - sorted collection of CAlignRange-s
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbistl.hpp>
37 #include <corelib/ncbiexpt.hpp>
38 
39 #include <util/range.hpp>
41 
42 #include <algorithm>
43 
44 
46 
48 {
49 public:
51  {
52  }
53 
54  virtual const char* GetErrCodeString(void) const override
55  {
56  return "CAlignRangeCollection - operation resulted in invalid state.";
57  }
58 };
59 
60 template<class TAlnRange> class CAlignRangeCollection;
61 template<class TAlnRange> class CAlignRangeCollectionList;
62 
63 ///////////////////////////////////////////////////////////////////////////////
64 /// class CAlignRangeCollection<TAlignRange> represent a sorted collection of
65 /// TAlignRange.
66 /// The collection has two coordinate spaces: "First" - primary, usually
67 /// represents alignment or consensus space, "Second" - represents a coordinate
68 /// space associated with the aligned sequence.
69 /// policies - do not overlap on master
70 /// - do not overlap on aligned
71 /// When collection is normalized all segments are sorted by "From" position,
72 /// and all policies are enforced.
73 /// If policy check fails
74 template<class TAlnRange>
76 {
77 public:
78  typedef TAlnRange TAlignRange;
81  typedef vector<TAlignRange> TAlignRangeVector;
82  typedef typename TAlignRangeVector::const_iterator const_iterator;
83  typedef typename TAlignRangeVector::const_reverse_iterator const_reverse_iterator;
84  typedef typename TAlignRangeVector::size_type size_type;
85 
86  enum EFlags {
87  /// Policies:
88  fKeepNormalized = 0x0001, /// enforce all policies after any modification
89  /// if normalization is not possible exception will be thrown
90  fAllowMixedDir = 0x0002, /// allow segments with different orientation
91  fAllowOverlap = 0x0004, /// allow segments overlapping on the first sequence
92  fAllowAbutting = 0x0008, /// allows segments not separated by gaps
93  fIgnoreInsertions = 0x0010, /// do not store insertions
94 
96  fDefaultPoicy = fDefaultPolicy, /// Keep compatible with SC-8.0
97 
98  fPolicyMask = 0x001f,
99 
100  /// State flags:
101  fNotValidated = 0x0100, /// collection was modified and not validated
102  fInvalid = 0x0200, /// one or more policies violated
103 
104  fUnsorted = 0x010000,
105  fDirect = 0x020000, /// contains at least one direct range
106  fReversed = 0x040000, /// contains at least one reversed range
108  fOverlap = 0x080000,
109  fAbutting = 0x100000
110 
111  /// if fUnsorted is set then there fOverlap and fAbutting are undefined (test is expensive)
112  /// if fOverlap is set fAbutting is undefined
113  };
114  /// adding empty ranges is considered valid, they are simply ignored
115 
121  eRight
122  };
123 
125  : m_Flags(flags)
126  {
127  }
128 
130  int flags)
131  : m_Flags(flags)
132  {
133  m_Ranges = v;
134  }
135 
136  /// @name Container Interface
137  /// @{
138  /// immitating vector, but providing only "const" access to elements
139  void reserve(int count)
140  {
141  m_Ranges.reserve(count);
142  }
143 
145 
147  {
148  return m_Ranges.begin();
149  }
150 
152  {
153  return m_Ranges.end();
154  }
155 
157  {
158  return m_Ranges.rbegin();
159  }
160 
162  {
163  return m_Ranges.rend();
164  }
165 
166  size_type size() const
167  {
168  return m_Ranges.size();
169  }
170 
171  bool empty() const
172  {
173  return m_Ranges.empty();
174  }
175 
177  {
179  return std::lower_bound(begin(), end(), r.GetFirstFrom(), p);
180  }
181 
183  {
184  if (r.GetLength() > 0) {
185  const_iterator it = end();
186 
187  if (IsSet(fKeepNormalized)) { // find insertion point
188  it = find_insertion_point(r);
189  }
190 
191  return insert(it, r);
192  }
193  return end();
194  }
195 
196  // insert(where, r)() will add a specified range into a specified position
197  // if insertion violates policies the segment will be inserted and
198  // appropriate flags set (exception thrown)
200  {
201  const_iterator it_ins = end(); // insertion point (return value)
202  if (r.GetLength() > 0) {
203  iterator it = begin_nc() + (where - begin());
204 
205  int r_dir = r.IsDirect() ? fDirect : fReversed;
206  m_Flags |= r_dir;
207 
208  if (IsSet(fKeepNormalized)) {
209  if (it != begin_nc()) { // check left
210  TAlignRange& r_left = *(it - 1);
211  if (r_left.IsAbutting(r)) {
212  if (IsSet(fAllowAbutting)) {
213  m_Flags |= fAbutting;
214  }
215  else {
216  r_left.CombineWithAbutting(r);
217  it_ins = it - 1;
218  }
219  }
220  else { // can overlap or be mixed
221  int flags = ValidateRanges(r_left, r);
222  m_Flags |= flags;
223  }
224  }
225  if (it != end_nc()) { // check right
226  TAlignRange& r_right = *it;
227  if (r_right.IsAbutting(r)) {
228  if (IsSet(fAllowAbutting)) {
229  m_Flags |= fAbutting;
230  }
231  else { // merge
232  if (it_ins != end()) {
233  TAlignRange& r_left = *(it - 1);
234  r_left.CombineWithAbutting(r_right);
235  m_Ranges.erase(it);
236  }
237  else {
238  r_right.CombineWithAbutting(r);
239  it_ins = it;
240  }
241  }
242  }
243  else { // can overlap or be mixed
244  int flags = ValidateRanges(r, r_right);
245  m_Flags |= flags;
246  }
247  }
248 
249  if (it_ins == end()) {
250  it_ins = m_Ranges.insert(it, r);
251  }
252  x_ValidateFlags(); // validate and throw exception if needed
253  }
254  else {
256  it_ins = m_Ranges.insert(it, r);
257  }
258  }
259  return it_ins;
260  }
261 
263  {
264  iterator it_del = begin_nc() + (it - begin());
265  const_iterator it_res = end();
266 
267  if (it_del >= begin_nc() && it_del < end_nc()) {
268  it_res = m_Ranges.erase(it_del);
269 
270  if (empty()) {
273  }
274  else {
275  if (IsSet(fInvalid)) {
276  x_SetFlags(fNotValidated); // erasing probably fixed the problem
277  }
278  }
279  return it_res;
280  }
281  return it_del;
282  }
283 
284  void push_back(const TAlignRange& r)
285  {
286  insert(end(), r);
287  }
288 
289  void pop_back()
290  {
291  const_iterator it = end();
292  erase(--it);
293  }
294 
295  const TAlignRange& operator[](size_type pos) const
296  {
297  return m_Ranges[pos];
298  }
299 
301  {
302  return m_Ranges[pos];
303  }
304 
305  void clear()
306  {
307  m_Ranges.clear();
308 
311  }
312 
313  // returns an iterator pointing to a range containing "pos" or end() if
314  // such a range does not exist
316  {
317  pair<const_iterator, bool> res = find_2(pos);
318  return res.second ? res.first : m_Ranges.end();
319  }
320 
321  /// returns an iterator pointing to a range containing "pos"; if such a
322  /// range does not exists an iterator points to a first range that has
323  /// ToOpen > pos; the bool element of pair specifies whether the range
324  /// contains the position.
325  pair<const_iterator, bool> find_2(position_type pos) const
326  {
328  const_iterator it = std::lower_bound(begin(), end(), pos, p); /* NCBI_FAKE_WARNING: WorkShop */
329  bool b_contains = (it != end() && it->GetFirstFrom() <= pos);
330  return make_pair(it, b_contains);
331  }
332 
334  {
336  return std::lower_bound(begin(), end(), pos, p);
337  }
338 
340  {
342  return std::upper_bound(begin(), end(), pos, p);
343  }
344 
345  /// @}
346 
348  {
349  if (!m_Ranges.empty()) {
350  return begin()->GetFirstFrom();
351  }
352  else {
353  return TAlignRange::GetEmptyFrom();
354  }
355  }
356 
358  {
359  if (!m_Ranges.empty()) {
360  return rbegin()->GetFirstToOpen();
361  }
362  else {
363  return TAlignRange::GetEmptyToOpen();
364  }
365  }
366 
368  {
369  if (!m_Ranges.empty()) {
370  return rbegin()->GetFirstTo();
371  }
372  else {
373  return TAlignRange::GetEmptyTo();
374  }
375  }
376 
378  {
379  if (!m_Ranges.empty()) {
380  position_type from = begin()->GetFirstFrom();
381  return rbegin()->GetFirstToOpen() - from;
382  }
383  else {
384  return 0;
385  }
386  }
387 
389  {
391  }
392 
393  int GetFlags() const {
394  return m_Flags;
395  }
396 
397  bool IsSet(int flags) const {
398  return (m_Flags & flags) == flags;
399  }
400 
401  int GetPolicyFlags() const {
402  return m_Flags & fPolicyMask;
403  }
404 
405  int GetStateFlags() const {
406  return m_Flags & ~fPolicyMask;
407  }
408 
410  ESearchDirection dir = eNone) const
411  {
412  pair<const_iterator, bool> res = find_2(pos);
413 
414  // when translating from first to second eForward = eRight, eBackwards = eLeft
415  if (!res.second) { // pos does not belong to a segment - adjust
416  if (dir == eRight || dir == eForward) {
417  if (res.first != end()) {
418  res.second = true;
419  pos = res.first->GetFirstFrom();
420  }
421  }
422  else if (dir == eLeft || dir == eBackwards) {
423  if (res.first != begin()) {
424  --res.first;
425  res.second = true;
426  pos = res.first->GetFirstTo();
427  }
428  }
429  }
430  return res.second ? res.first->GetSecondPosByFirstPos(pos)
431  : -1;
432  }
433 
435  ESearchDirection dir = eNone) const
436  {
437  ESearchDirection dir_direct = eNone, dir_reversed = eNone;
438  if (dir == eLeft) {
439  dir_direct = eBackwards;
440  dir_reversed = eForward;
441  }
442  else if (dir == eRight) {
443  dir_direct = eForward;
444  dir_reversed = eBackwards;
445  }
446 
447  const_iterator it_closest = end();
448  TSignedSeqPos min_dist = -1, min_pos = -1;
449 
450  for (const_iterator it = begin(); it != end(); it++) {
451  const TAlignRange& r = *it;
452  TSignedSeqPos res = r.GetFirstPosBySecondPos(pos);
453  if(res != -1) {
454  return res;
455  }
456  else if(dir != eNone) {
457  // it does not contain pos - check the distance between "r" and "pos"
458  int dist = -1;
459  int res_pos = -1;
460  ESearchDirection dir_2 = r.IsDirect() ? dir_direct : dir_reversed;
461  if (dir == eForward || dir_2 == eForward) {
462  res_pos = r.GetSecondFrom();
463  dist = res_pos - pos;
464  }
465  else if (dir == eBackwards || dir_2 == eBackwards) {
466  res_pos = it->GetSecondTo();
467  dist = pos - res_pos;
468  }
469  if (dist > 0 && (min_dist == -1 || min_dist > dist)) {
470  it_closest = it;
471  min_dist = dist;
472  min_pos = res_pos;
473  }
474  }
475  }
476 
477  return (it_closest == end()) ? -1 : it_closest->GetFirstPosBySecondPos(min_pos);
478  }
479 
480  void Sort()
481  {
482  std::sort(m_Ranges.begin(), m_Ranges.end(),
484  SortInsertions();
485 
488  }
489 
490  void SortInsertions(void)
491  {
492  std::sort(m_Insertions.begin(), m_Insertions.end(),
494  }
495 
496  /// merge adjacent segments together, merging changes collection size and invalidates
497  /// iterators
499  {
500  if(IsSet(fUnsorted) || IsSet(fOverlap)) { // operation cannot be performed
501  throw CAlignRangeCollException();
502  }
503 
504  vector<bool>* dead = NULL; // vector of indexes for elements that need to be removed
505  size_t range_size = m_Ranges.size();
506 
507  for (size_t i = range_size - 1; i > 0; --i) {
508  if (m_Ranges[i - 1].IsAbutting(m_Ranges[i])) {
509  if (dead == NULL) { // allocate temp vector
510  dead = new vector<bool>(range_size, false);
511  dead->reserve(range_size);
512  }
513  m_Ranges[i - 1].CombineWithAbutting(m_Ranges[i]); // merge i into i-1
514  (*dead)[i] = true;
515  }
516  }
517  if (dead) { // there are deal elements - need to compress
518  size_t shift = 0;
519  for (size_t i = 0; i < range_size; i++ ) {
520  if ((*dead)[i]) {
521  ++shift;
522  }
523  else if(shift > 0) {
524  m_Ranges[i - shift] = m_Ranges[i];
525  }
526  }
527  delete dead;
528  m_Ranges.resize(range_size - shift);
529  }
532  }
533 
534  /// analyses segements and updates flags
535  void Validate()
536  {
537  if (IsSet(fNotValidated) || IsSet(fInvalid)) {
538  int complete = fUnsorted | fMixedDir | fAbutting | fOverlap;
539  int flags = 0;
540  for (size_t i = 0; i < m_Ranges.size()-1; i++) {
542  if(flags == complete) {
543  break;
544  }
545  }
546  if (flags == 0) {
548  }
549  else {
551  }
553  }
554  }
555 
556  /// ensures that segments are sorted, if fAllowAdjust is not set - merges adjacent segments
557  void Normalize()
558  {
559  Sort();
560  CombineAbutting();
561  Validate();
562  }
563 
564  /// determine conflicts between two ranges
565  static int ValidateRanges(const TAlignRange& r_1, const TAlignRange& r_2)
566  {
567  _ASSERT(r_1.NotEmpty() && r_2.NotEmpty());
568 
569  const TAlignRange* r_left = &r_1;
570  const TAlignRange* r_right = &r_2;
571  int flags = 0;
572 
573  if (r_1.IsDirect() != r_2.IsDirect()) {
574  flags = fMixedDir;
575  }
576  if (r_2.GetFirstFrom() < r_1.GetFirstFrom()) {
577  flags |= fUnsorted;
578  swap(r_left, r_right);
579  }
580  if (r_left->GetFirstToOpen() > r_right->GetFirstFrom()) {
581  flags |= fOverlap;
582  }
583  else if (r_1.IsAbutting(r_2)) {
584  flags |= fAbutting;
585  }
586  return flags;
587  }
588 
590  {
591  if ( IsSet(fIgnoreInsertions) ) {
592  return;
593  }
594  m_Insertions.push_back(r);
595  }
596 
597  void AddInsertions(const TAlignRangeVector& insertions)
598  {
599  if ( IsSet(fIgnoreInsertions) ) {
600  return;
601  }
602  ITERATE(typename TAlignRangeVector, it, insertions) {
603  m_Insertions.push_back(*it);
604  }
605  SortInsertions();
606  }
607 
608  void AddInsertions(const TThisType& collection)
609  {
610  if ( IsSet(fIgnoreInsertions) ) {
611  return;
612  }
613  ITERATE(typename TAlignRangeVector, it, collection) {
614  m_Insertions.push_back(*it);
615  }
616  SortInsertions();
617  }
618 
619  /// Each insertion shows where the 'first' sequence has a gap
620  /// while the 'second' sequence has the insertion of the specified
621  /// length. Direction of the insertion is always 'direct'.
623  {
624  return m_Insertions;
625  }
626 
630  }
631 
635  }
636 
637 protected:
638  void x_SetFlags(int flags)
639  {
640  m_Flags |= flags;
641  }
642  void x_ResetFlags(int flags)
643  {
644  m_Flags &= ~flags;
645  }
647  {
648  if (IsSet(fKeepNormalized)) {
649  int invalid_flags = m_Flags & (fMixedDir | fOverlap | fAbutting);
650  if (IsSet(fAllowMixedDir)) {
651  invalid_flags &= ~fMixedDir;
652  }
653  if (IsSet(fAllowOverlap)) {
654  invalid_flags &= ~fOverlap;
655  }
656  if (IsSet(fAllowAbutting)) {
657  invalid_flags &= ~fAbutting;
658  }
659  if ((invalid_flags & fMixedDir) == fMixedDir ||
660  (invalid_flags & (fOverlap | fAbutting))) {
662  throw CAlignRangeCollException(); // copy collection to exception?
663  }
664  }
665  }
666 
667  typedef typename TAlignRangeVector::iterator iterator;
668  typedef typename TAlignRangeVector::reverse_iterator reverse_iterator;
669 
671  {
672  return m_Ranges.begin();
673  }
675  {
676  return m_Ranges.end();
677  }
678  bool x_Equals(const TThisType &c) const
679  {
680  if (&c == this) {
681  return true;
682  }
683  else {
684  return m_Ranges == c.m_Ranges;
685  }
686  }
687 
689  {
690  for ( auto& rg : m_Ranges ) {
691  rg.SetFirstFrom(rg.GetFirstFrom()*3);
692  rg.SetSecondFrom(rg.GetSecondFrom()*3);
693  rg.SetLength(rg.GetLength()*3);
694  }
695  for ( auto& rg : m_Insertions ) {
696  rg.SetFirstFrom(rg.GetFirstFrom()*3);
697  rg.SetSecondFrom(rg.GetSecondFrom()*3);
698  rg.SetLength(rg.GetLength()*3);
699  }
700  }
701 
703  typename TAlignRangeVector::iterator it = ranges.begin();
704  while (it != ranges.end()) {
705  if (!it->IntersectingWith(range)) {
706  typename TAlignRangeVector::iterator del = it;
707  ++it;
708  ranges.erase(del);
709  continue;
710  }
711  if (it->GetFirstFrom() < range.GetFrom() || it->GetFirstTo() > range.GetTo()) {
712  it->IntersectWith(range);
713  }
714  ++it;
715  }
716  }
717 
719  typename TAlignRangeVector::iterator it = ranges.begin();
720  while (it != ranges.end()) {
721  if (it->GetSecondTo() < range.GetFrom() || it->GetSecondFrom() > range.GetTo()) {
722  typename TAlignRangeVector::iterator del = it;
723  ++it;
724  ranges.erase(del);
725  continue;
726  }
727  if (it->GetSecondFrom() < range.GetFrom() || it->GetSecondTo() > range.GetTo()) {
728  it->IntersectSecondWith(range);
729  }
730  ++it;
731  }
732  }
733 
736  int m_Flags; /// combination of EFlags
737 };
738 
739 
740 ///////////////////////////////////////////////////////////////////////////////
741 /// class CAlignRangeCollectionList<TAlignRange> represent a sorted collection of
742 /// TAlignRange.
743 /// The collection has two coordinate spaces: "First" - primary, usually
744 /// represents alignment or consensus space, "Second" - represents a coordinate
745 /// space associated with the aligned sequence.
746 /// policies - do not overlap on master
747 /// - do not overlap on aligned
748 /// When collection is normalized all segments are sorted by "From" position,
749 /// and all policies are enforced.
750 /// If policy check fails
751 template<class TAlnRange>
753 {
754 public:
755  typedef TAlnRange TAlignRange;
758  typedef list<TAlignRange> TAlignRangeList;
759  typedef vector<TAlignRange> TInsertions;
760  /// Use iterators on TAlignRangeList or TInsertions for insertions
761  typedef vector<TAlignRange> TAlignRangeVectorImpl;
762  NCBI_DEPRECATED typedef vector<TAlignRange> TAlignRangeVector;
763 
764  typedef typename TAlignRangeList::iterator TRawIterator;
766  {
767  public:
769  typedef ptrdiff_t difference_type;
770  typedef const TAlignRange* pointer;
771  typedef const TAlignRange& reference;
772  typedef bidirectional_iterator_tag iterator_category;
773 
775  {
776  }
778  : m_Iter(iter)
779  {
780  }
781 
783  {
784  advance(m_Iter, delta);
785  return *this;
786  }
788  {
789  return *this += -delta;
790  }
792  {
793  ++m_Iter;
794  return *this;
795  }
797  {
798  --m_Iter;
799  return *this;
800  }
802  {
803  const_iterator ret = *this;
804  ++m_Iter;
805  return ret;
806  }
808  {
809  const_iterator ret = *this;
810  --m_Iter;
811  return ret;
812  }
814  {
815  return const_iterator(*this) += delta;
816  }
818  {
819  return const_iterator(*this) -= delta;
820  }
821 
822  bool operator==(const const_iterator& iter) const
823  {
824  return m_Iter == iter.m_Iter;
825  }
826  bool operator!=(const const_iterator& iter) const
827  {
828  return !(*this == iter);
829  }
830 
832  {
833  return *m_Iter;
834  }
836  {
837  return &*m_Iter;
838  }
839 
840  protected:
841  friend TThisType;
842 
844  };
845  class iterator : public const_iterator
846  {
847  public:
850 
852  {
853  }
855  : const_iterator(i)
856  {
857  }
858 
860  {
862  return *this;
863  }
865  {
866  return *this += -delta;
867  }
869  {
870  return iterator(*this) += delta;
871  }
873  {
874  return iterator(*this) -= delta;
875  }
876 
878  {
879  return *this->m_Iter;
880  }
881  pointer operator->() const
882  {
883  return &*this->m_Iter;
884  }
885  };
886 
887  typedef typename TAlignRangeList::const_reverse_iterator const_reverse_iterator;
888  typedef typename TAlignRangeList::size_type size_type;
889 
890  enum EFlags {
891  /// Policies:
892  fKeepNormalized = 0x0001, /// enforce all policies after any modification
893  /// if normalization is not possible exception will be thrown
894  fAllowMixedDir = 0x0002, /// allow segments with different orientation
895  fAllowOverlap = 0x0004, /// allow segments overlapping on the first sequence
896  fAllowAbutting = 0x0008, /// allows segments not separated by gaps
897  fIgnoreInsertions = 0x0010, /// do not store insertions
898 
900  fDefaultPoicy = fDefaultPolicy, /// Keep compatible with SC-8.0
901 
902  fPolicyMask = 0x001f,
903 
904  /// State flags:
905  fNotValidated = 0x0100, /// collection was modified and not validated
906  fInvalid = 0x0200, /// one or more policies violated
907 
908  fUnsorted = 0x010000,
909  fDirect = 0x020000, /// contains at least one direct range
910  fReversed = 0x040000, /// contains at least one reversed range
912  fOverlap = 0x080000,
913  fAbutting = 0x100000
914 
915  /// if fUnsorted is set then there fOverlap and fAbutting are undefined (test is expensive)
916  /// if fOverlap is set fAbutting is undefined
917  };
918  /// adding empty ranges is considered valid, they are simply ignored
919 
925  eRight
926  };
927 
929  : m_Flags(flags)
930  {
931  }
932 
934  int flags)
935  : m_Flags(flags)
936  {
937  m_RangesList.assign(v.begin(), v.end());
938  x_IndexAll();
939  }
940 
942  int flags)
943  : m_Flags(flags)
944  {
945  m_RangesList = v;
946  x_IndexAll();
947  }
948 
949  /// @name Container Interface
950  /// @{
951  /// immitating vector, but providing only "const" access to elements
952  void reserve(int count)
953  {
954  m_RangesList.reserve(count);
955  m_RangesVector.reserve(count);
956  m_IndexByFirst.reserve(count);
957  m_IndexBySecond.reserve(count);
958  }
959 
960  const_iterator begin() const
961  {
962  return const_cast<TAlignRangeList&>(m_RangesList).begin();
963  }
964 
965  const_iterator end() const
966  {
967  return const_cast<TAlignRangeList&>(m_RangesList).end();
968  }
969 
971  {
972  return m_RangesList.rbegin();
973  }
974 
976  {
977  return m_RangesList.rend();
978  }
979 
980  size_type size() const
981  {
982  return m_RangesList.size();
983  }
984 
985  bool empty() const
986  {
987  return m_RangesList.empty();
988  }
989 
992  using is_transparent = void;
993  bool operator()(const TIndexKey& k1, const TIndexKey& k2) const
994  {
995  return k1->GetFirstFrom() < k2->GetFirstFrom();
996  }
997  bool operator()(const TIndexKey& k1, position_type pos) const
998  {
999  return k1->GetFirstFrom() < pos;
1000  }
1001  bool operator()(position_type pos, const TIndexKey& k2) const
1002  {
1003  return pos < k2->GetFirstFrom();
1004  }
1005  };
1007  using is_transparent = void;
1008  bool operator()(const TIndexKey& k1, const TIndexKey& k2) const
1009  {
1010  return k1->GetSecondFrom() < k2->GetSecondFrom();
1011  }
1012  bool operator()(const TIndexKey& k1, position_type pos) const
1013  {
1014  return k1->GetSecondFrom() < pos;
1015  }
1016  bool operator()(position_type pos, const TIndexKey& k2) const
1017  {
1018  return pos < k2->GetSecondFrom();
1019  }
1020  };
1023 
1025  {
1026  return m_IndexByFirst;
1027  }
1029  {
1030  return m_IndexBySecond;
1031  }
1032 
1033  const_iterator insert(const TAlignRange& r)
1034  {
1035  if (r.GetLength() > 0) {
1036  iterator it = end_nc();
1037 
1038  if (IsSet(fKeepNormalized)) { // find insertion point
1039  it = x_FindInsertionPlace(r);
1040  }
1041 
1042  return insert(it, r);
1043  }
1044  return end();
1045  }
1046 
1047  // insert(where, r)() will add a specified range into a specified position
1048  // if insertion violates policies the segment will be inserted and
1049  // appropriate flags set (exception thrown)
1050  const_iterator insert(const_iterator it, const TAlignRange& arg_r)
1051  {
1052  TAlignRange r = arg_r;
1053  const_iterator it_ins = end(); // insertion point (return value)
1054  if (r.GetLength() > 0) {
1055  int r_dir = r.IsDirect() ? fDirect : fReversed;
1056  m_Flags |= r_dir;
1057 
1058  if (IsSet(fKeepNormalized)) {
1059  if (it != begin_nc()) { // check left
1060  auto it_left = prev(it);
1061  if (it_left->IsAbutting(r)) {
1062  if (IsSet(fAllowAbutting)) {
1063  // indicate that we have abutting ranges
1064  m_Flags |= fAbutting;
1065  }
1066  else {
1067  // merge with existing abutting range and remove it from the set
1068  r.CombineWithAbutting(*it_left);
1069  x_Erase(it_left.m_Iter);
1070  }
1071  }
1072  else { // can overlap or be mixed
1073  int flags = ValidateRanges(*it_left, r);
1074  m_Flags |= flags;
1075  }
1076  }
1077  if (it != end_nc()) { // check right
1078  if (it->IsAbutting(r)) {
1079  if (IsSet(fAllowAbutting)) {
1080  m_Flags |= fAbutting;
1081  }
1082  else { // merge
1083  r.CombineWithAbutting(*it);
1084  x_Erase((it++).m_Iter);
1085  }
1086  }
1087  else { // can overlap or be mixed
1088  int flags = ValidateRanges(r, *it);
1089  m_Flags |= flags;
1090  }
1091  }
1092 
1093  it_ins = x_Insert(it.m_Iter, r);
1094  x_ValidateFlags(); // validate and throw exception if needed
1095  }
1096  else {
1098  it_ins = x_Insert(it.m_Iter, r);
1099  }
1100  }
1101  return it_ins;
1102  }
1103 
1104  const_iterator erase(const_iterator it_del)
1105  {
1106  const_iterator it_res = end();
1107 
1108  if (it_del != it_res) {
1109  it_res = next(it_del);
1110  x_Erase(it_del.m_Iter);
1111 
1112  if (empty()) {
1115  }
1116  else {
1117  if (IsSet(fInvalid)) {
1118  x_SetFlags(fNotValidated); // erasing probably fixed the problem
1119  }
1120  }
1121  }
1122 
1123  return it_res;
1124  }
1125 
1126  void push_back(const TAlignRange& r)
1127  {
1128  insert(end(), r);
1129  }
1130 
1131  void pop_back()
1132  {
1133  erase(prev(end()));
1134  }
1135 
1136  /// Use iterator access
1138  {
1139  return x_GetAlignRangeVector()[pos];
1140  }
1141  /*
1142  TAlignRange& operator[](size_type pos)
1143  {
1144  return x_GetAlignRangeVector()[pos];
1145  }
1146  */
1147 
1148  void clear()
1149  {
1150  x_UnindexAll();
1151  m_RangesList.clear();
1152  m_RangesVector.clear();
1153 
1156  }
1157 
1158  // returns an iterator pointing to a range containing "pos" or end() if
1159  // such a range does not exist
1160  const_iterator find(position_type pos) const
1161  {
1162  pair<const_iterator, bool> res = find_2(pos);
1163  return res.second ? res.first : end();
1164  }
1165 
1166  /// returns an iterator pointing to a range containing "pos"; if such a
1167  /// range does not exists an iterator points to a first range that has
1168  /// ToOpen > pos; the bool element of pair specifies whether the range
1169  /// contains the position.
1170  pair<const_iterator, bool> find_2(position_type pos) const
1171  {
1172  const_iterator it = x_FindRangeContaining(pos);
1173  bool b_contains = (it != end() && it->GetFirstFrom() <= pos);
1174  return make_pair(it, b_contains);
1175  }
1176 
1177 
1178  // returns an iterator pointing to a range containing "pos" or end() if
1179  // such a range does not exist
1180  const_iterator find_by_second(position_type pos) const
1181  {
1182  pair<pair<const_iterator, const_iterator>, bool> res = x_FindRangesBySecondContaining(pos);
1183  return res.second? res.first.first: end();
1184  }
1185 
1186  /// returns an iterator pointing to a range containing "pos"; if such a
1187  /// range does not exists an iterator points to a first range that has
1188  /// ToOpen > pos; the bool element of pair specifies whether the range
1189  /// contains the position.
1190  pair<pair<const_iterator, const_iterator>, bool> find_2_by_second(position_type pos) const
1191  {
1192  return x_FindRangesBySecondContaining(pos);
1193  }
1194 
1195 
1196  // returns an iterator pointing to a place for new range to be inserted
1197  const_iterator find_insertion_point(const TAlignRange& range) const
1198  {
1199  return const_cast<TThisType*>(this)->x_FindInsertionPlace(range);
1200  }
1201 
1202 #if 0
1203  const_iterator lower_bound(position_type pos) const
1204  {
1206  return std::lower_bound(begin(), end(), pos, p);
1207  }
1208 
1209  const_iterator upper_bound(position_type pos) const
1210  {
1212  return std::upper_bound(begin(), end(), pos, p);
1213  }
1214 #endif
1215 
1216  /// @}
1217 
1219  {
1220  if ( empty() ) {
1221  return TAlignRange::GetEmptyFrom();
1222  }
1223  else {
1224  return begin()->GetFirstFrom();
1225  }
1226  }
1227 
1229  {
1230  if ( empty() ) {
1231  return TAlignRange::GetEmptyToOpen();
1232  }
1233  else {
1234  return rbegin()->GetFirstToOpen();
1235  }
1236  }
1237 
1239  {
1240  if ( empty() ) {
1241  return TAlignRange::GetEmptyTo();
1242  }
1243  else {
1244  return rbegin()->GetFirstTo();
1245  }
1246  }
1247 
1249  {
1250  if ( empty() ) {
1251  return 0;
1252  }
1253  else {
1254  position_type from = begin()->GetFirstFrom();
1255  return rbegin()->GetFirstToOpen() - from;
1256  }
1257  }
1258 
1260  {
1262  }
1263 
1264  int GetFlags() const {
1265  return m_Flags;
1266  }
1267 
1268  bool IsSet(int flags) const {
1269  return (m_Flags & flags) == flags;
1270  }
1271 
1272  int GetPolicyFlags() const {
1273  return m_Flags & fPolicyMask;
1274  }
1275 
1276  int GetStateFlags() const {
1277  return m_Flags & ~fPolicyMask;
1278  }
1279 
1281  ESearchDirection dir = eNone) const
1282  {
1283  pair<const_iterator, bool> res = find_2(pos);
1284 
1285  // when translating from first to second eForward = eRight, eBackwards = eLeft
1286  if ( !res.second ) { // pos does not belong to a segment - adjust
1287  if (dir == eRight || dir == eForward) {
1288  if ( res.first != end() ) {
1289  res.second = true;
1290  pos = res.first->GetFirstFrom();
1291  }
1292  }
1293  else if (dir == eLeft || dir == eBackwards) {
1294  if ( res.first != begin() ) {
1295  --res.first;
1296  res.second = true;
1297  pos = res.first->GetFirstTo();
1298  }
1299  }
1300  }
1301  return res.second ? res.first->GetSecondPosByFirstPos(pos)
1302  : -1;
1303  }
1304 
1306  ESearchDirection dir = eNone) const
1307  {
1308  ESearchDirection dir_direct = eNone, dir_reversed = eNone;
1309  if ( dir == eLeft ) {
1310  dir_direct = eBackwards;
1311  dir_reversed = eForward;
1312  }
1313  else if ( dir == eRight ) {
1314  dir_direct = eForward;
1315  dir_reversed = eBackwards;
1316  }
1317 
1318  if ( 1 ) {
1319  const_iterator it_closest = end();
1320  TSignedSeqPos min_dist = -1, min_pos = -1;
1321 
1322  pair<pair<const_iterator, const_iterator>, bool> res = find_2_by_second(pos);
1323  if ( !res.second ) {
1324  // it does not contain pos - check the distance between "r" and "pos"
1325  if ( res.first.first != end() ) {
1326  // check next range
1327  const TAlignRange& r = *res.first.first;
1328  _ASSERT(r.GetSecondFrom() > pos);
1329  ESearchDirection dir_2 = r.IsDirect() ? dir_direct : dir_reversed;
1330  if ( dir == eForward || dir_2 == eForward ) {
1331  it_closest = res.first.first;
1332  min_pos = r.GetSecondFrom();
1333  min_dist = r.GetSecondFrom() - pos;
1334  }
1335  }
1336  if ( res.first.second != end() ) {
1337  // check previous range
1338  const TAlignRange& r = *res.first.second;
1339  _ASSERT(r.GetSecondTo() < pos);
1340  ESearchDirection dir_2 = r.IsDirect() ? dir_direct : dir_reversed;
1341  if ( dir == eBackwards || dir_2 == eBackwards ) {
1342  int dist = pos - r.GetSecondTo();
1343  if ( min_dist < 0 || dist < min_dist ) {
1344  it_closest = res.first.second;
1345  min_pos = r.GetSecondTo();
1346  min_dist = dist;
1347  }
1348  }
1349  }
1350  if ( min_dist >= 0 ) {
1351  pos = min_pos;
1352  }
1353  }
1354  else {
1355  it_closest = res.first.first;
1356  min_dist = 0;
1357  }
1358  return min_dist >= 0? it_closest->GetFirstPosBySecondPos(pos): -1;
1359  }
1360 
1361  const_iterator it_closest = end();
1362  TSignedSeqPos min_dist = -1, min_pos = -1;
1363 
1364  for (const_iterator it = begin(); it != end(); it++) {
1365  const TAlignRange& r = *it;
1366  TSignedSeqPos res = r.GetFirstPosBySecondPos(pos);
1367  if(res != -1) {
1368  return res;
1369  }
1370  else if(dir != eNone) {
1371  // it does not contain pos - check the distance between "r" and "pos"
1372  int dist = -1;
1373  int res_pos = -1;
1374  ESearchDirection dir_2 = r.IsDirect() ? dir_direct : dir_reversed;
1375  if (dir == eForward || dir_2 == eForward) {
1376  res_pos = r.GetSecondFrom();
1377  dist = res_pos - pos;
1378  }
1379  else if (dir == eBackwards || dir_2 == eBackwards) {
1380  res_pos = it->GetSecondTo();
1381  dist = pos - res_pos;
1382  }
1383  if (dist > 0 && (min_dist == -1 || min_dist > dist)) {
1384  it_closest = it;
1385  min_dist = dist;
1386  min_pos = res_pos;
1387  }
1388  }
1389  }
1390 
1391  return (it_closest == end()) ? -1 : it_closest->GetFirstPosBySecondPos(min_pos);
1392  }
1393 
1394  void Sort()
1395  {
1396  if ( !empty() ) {
1397  // reorder list of ranges by index
1398  auto where = m_RangesList.begin();
1399  for ( auto& next : m_IndexByFirst ) {
1400  if ( next == where ) {
1401  ++where;
1402  }
1403  else {
1404  m_RangesList.splice(where, m_RangesList, next);
1405  }
1406  }
1407  _ASSERT(where == m_RangesList.end());
1408  }
1409  SortInsertions();
1410 
1413  }
1414 
1415  void SortInsertions(void)
1416  {
1418  //m_Insertions.sort(PAlignRangeFromLess<TAlignRange>());
1419  }
1420 
1421  /// merge adjacent segments together, merging changes collection size and invalidates
1422  /// iterators
1424  {
1425  if(IsSet(fUnsorted) || IsSet(fOverlap)) { // operation cannot be performed
1426  throw CAlignRangeCollException();
1427  }
1428 
1429  if ( !empty() ) {
1430  for ( auto i = m_RangesList.begin(), i_next = next(i);
1431  i_next != m_RangesList.end();
1432  i = i_next, ++i_next ) {
1433  if ( i->IsAbutting(*i_next) ) {
1434  x_Unindex(i_next);
1435  i_next->CombineWithAbutting(*i);
1436  x_Erase(i);
1437  x_Index(i_next);
1438  }
1439  }
1440  }
1441 
1444  }
1445 
1446  /// analyses segements and updates flags
1447  void Validate()
1448  {
1449  if (IsSet(fNotValidated) || IsSet(fInvalid)) {
1450  int complete = fUnsorted | fMixedDir | fAbutting | fOverlap;
1451  int flags = 0;
1452  if ( !empty() ) {
1453  for ( auto i = m_RangesList.begin(), i_next = next(i);
1454  i_next != m_RangesList.end();
1455  i = i_next, ++i_next ) {
1456  flags |= ValidateRanges(*i, *i_next);
1457  if(flags == complete) {
1458  break;
1459  }
1460  }
1461  }
1462  if (flags == 0) {
1464  }
1465  else {
1467  }
1469  }
1470  }
1471 
1472  /// ensures that segments are sorted, if fAllowAdjust is not set - merges adjacent segments
1473  void Normalize()
1474  {
1475  Sort();
1476  CombineAbutting();
1477  Validate();
1478  }
1479 
1480  /// determine conflicts between two ranges
1481  static int ValidateRanges(const TAlignRange& r_1, const TAlignRange& r_2)
1482  {
1483  _ASSERT(r_1.NotEmpty() && r_2.NotEmpty());
1484 
1485  const TAlignRange* r_left = &r_1;
1486  const TAlignRange* r_right = &r_2;
1487  int flags = 0;
1488 
1489  if (r_1.IsDirect() != r_2.IsDirect()) {
1490  flags = fMixedDir;
1491  }
1492  if (r_2.GetFirstFrom() < r_1.GetFirstFrom()) {
1493  flags |= fUnsorted;
1494  swap(r_left, r_right);
1495  }
1496  if (r_left->GetFirstToOpen() > r_right->GetFirstFrom()) {
1497  flags |= fOverlap;
1498  }
1499  else if (r_1.IsAbutting(r_2)) {
1500  flags |= fAbutting;
1501  }
1502  return flags;
1503  }
1504 
1506  {
1507  if ( IsSet(fIgnoreInsertions) ) {
1508  return;
1509  }
1510  m_Insertions.push_back(r);
1511  }
1512 
1513  void AddInsertions(const TInsertions& insertions)
1514  {
1515  if ( IsSet(fIgnoreInsertions) ) {
1516  return;
1517  }
1518  for ( auto& i : insertions) {
1519  m_Insertions.push_back(i);
1520  }
1521  SortInsertions();
1522  }
1523 
1524  void AddInsertions(const TThisType& collection)
1525  {
1526  if ( IsSet(fIgnoreInsertions) ) {
1527  return;
1528  }
1529  for ( auto& i : collection ) {
1530  m_Insertions.push_back(i);
1531  }
1532  SortInsertions();
1533  }
1534 
1535  /// Each insertion shows where the 'first' sequence has a gap
1536  /// while the 'second' sequence has the insertion of the specified
1537  /// length. Direction of the insertion is always 'direct'.
1539  {
1540  return m_Insertions;
1541  }
1542 
1546  }
1547 
1551  }
1552 
1553 protected:
1554  void x_SetFlags(int flags)
1555  {
1556  m_Flags |= flags;
1557  }
1559  {
1560  m_Flags &= ~flags;
1561  }
1563  {
1564  if (IsSet(fKeepNormalized)) {
1565  int invalid_flags = m_Flags & (fMixedDir | fOverlap | fAbutting);
1566  if (IsSet(fAllowMixedDir)) {
1567  invalid_flags &= ~fMixedDir;
1568  }
1569  if (IsSet(fAllowOverlap)) {
1570  invalid_flags &= ~fOverlap;
1571  }
1572  if (IsSet(fAllowAbutting)) {
1573  invalid_flags &= ~fAbutting;
1574  }
1575  if ((invalid_flags & fMixedDir) == fMixedDir ||
1576  (invalid_flags & (fOverlap | fAbutting))) {
1578  throw CAlignRangeCollException(); // copy collection to exception?
1579  }
1580  }
1581  }
1582 
1583  iterator begin_nc()
1584  {
1585  return m_RangesList.begin();
1586  }
1587  iterator end_nc()
1588  {
1589  return m_RangesList.end();
1590  }
1591  bool x_Equals(const TThisType &c) const
1592  {
1593  return this == &c || m_RangesList == c.m_RangesList;
1594  }
1595 
1597  {
1598  if ( m_RangesVector.empty() ) {
1599  m_RangesVector.assign(m_RangesList.begin(), m_RangesList.end());
1600  }
1601  _ASSERT(m_RangesVector.size() == m_RangesList.size());
1602  return m_RangesVector;
1603  }
1604 
1605  void x_Index(TIndexKey iter)
1606  {
1607  m_IndexByFirst.insert(iter);
1608  m_IndexBySecond.insert(iter);
1609  }
1611  {
1612  auto pos = iter->GetFirstFrom();
1613  for ( auto it = m_IndexByFirst.lower_bound(pos); it != m_IndexByFirst.end() && (*it)->GetFirstFrom() == pos; ++it ) {
1614  if ( *it == iter ) {
1615  m_IndexByFirst.erase(it);
1616  return;
1617  }
1618  }
1619  _ASSERT(0&&"Cannot find range in m_IndexByFirst");
1620  }
1622  {
1623  auto pos = iter->GetSecondFrom();
1624  for ( auto it = m_IndexBySecond.lower_bound(pos); it != m_IndexBySecond.end() && (*it)->GetSecondFrom() == pos; ++it ) {
1625  if ( *it == iter ) {
1626  m_IndexBySecond.erase(it);
1627  return;
1628  }
1629  }
1630  _ASSERT(0&&"Cannot find range in m_IndexBySecond");
1631  }
1633  {
1634  x_UnindexByFirst(iter);
1635  x_UnindexBySecond(iter);
1636  }
1637  void x_IndexAll()
1638  {
1639  for ( auto it = m_RangesList.begin(); it != m_RangesList.end(); ++it ) {
1640  x_Index(it);
1641  }
1642  }
1644  {
1647  }
1649  {
1650  if ( where != m_RangesList.end() ) {
1651  m_RangesVector.clear();
1652  }
1653  else if ( !m_RangesVector.empty() ) {
1654  _ASSERT(m_RangesVector.size() == m_RangesList.size());
1655  m_RangesVector.push_back(r);
1656  }
1657  auto new_iter = m_RangesList.insert(where, r);
1658  x_Index(new_iter);
1659  return new_iter;
1660  }
1662  {
1663  x_Unindex(iter);
1664  if ( next(iter) != m_RangesList.end() ) {
1665  m_RangesVector.clear();
1666  }
1667  else if ( !m_RangesVector.empty() ) {
1668  _ASSERT(m_RangesVector.size() == m_RangesList.size());
1669  _ASSERT(m_RangesVector.back() == *iter);
1670  m_RangesVector.pop_back();
1671  }
1672  m_RangesList.erase(iter);
1673  }
1674 
1675  const_iterator x_FindRangeContaining(position_type pos) const
1676  {
1677  auto iter = m_IndexByFirst.upper_bound(pos);
1678  if ( iter != m_IndexByFirst.begin() ) {
1679  _ASSERT(iter == m_IndexByFirst.end() || (*iter)->GetFirstFrom() > pos);
1680  auto prev_iter = prev(iter);
1681  _ASSERT((*prev_iter)->GetFirstFrom() <= pos);
1682  if ( (*prev_iter)->GetFirstToOpen() > pos ) {
1683  return *prev_iter;
1684  }
1685  }
1686  if ( iter != m_IndexByFirst.end() ) {
1687  return *iter;
1688  }
1689  return end();
1690  }
1691  // find range containing pos on its second row
1692  // if such range exists then:
1693  // result.second = true
1694  // result.first.first = the range
1695  // result.first.second = end()
1696  // if no such range exist then:
1697  // result.second = false
1698  // result.first.first is the next range by second row coordinates or end() if none
1699  // result.first.second is previous range by second row coordinates or end() if none
1700  pair<pair<const_iterator, const_iterator>, bool>
1702  {
1703  pair<pair<const_iterator, const_iterator>, bool> ret;
1704  auto iter = m_IndexBySecond.upper_bound(pos);
1705  _ASSERT(iter == m_IndexBySecond.end() || (*iter)->GetSecondFrom() > pos);
1706  if ( iter != m_IndexBySecond.begin() ) {
1707  auto prev_iter = prev(iter);
1708  _ASSERT((*prev_iter)->GetSecondFrom() <= pos);
1709  if ( (*prev_iter)->GetSecondToOpen() > pos ) {
1710  ret.second = true;
1711  ret.first.first = *prev_iter;
1712  ret.first.second = end();
1713  return ret;
1714  }
1715  ret.first.second = *prev_iter;
1716  }
1717  else {
1718  ret.first.second = end();
1719  }
1720  if ( iter != m_IndexBySecond.end() ) {
1721  ret.first.first = *iter;
1722  }
1723  else {
1724  ret.first.first = end();
1725  }
1726  return ret;
1727  }
1729  {
1730  _ASSERT(!r.Empty());
1731  auto iter = m_IndexByFirst.lower_bound(r.GetFirstFrom());
1732  if ( iter == m_IndexByFirst.end() ) {
1733  return end_nc();
1734  }
1735  else {
1736  return *iter;
1737  }
1738  }
1739 
1741  {
1742  for ( auto& rg : m_RangesVector ) {
1743  rg.SetFirstFrom(rg.GetFirstFrom()*3);
1744  rg.SetSecondFrom(rg.GetSecondFrom()*3);
1745  rg.SetLength(rg.GetLength()*3);
1746  }
1747  for ( auto& rg : m_RangesList ) {
1748  rg.SetFirstFrom(rg.GetFirstFrom()*3);
1749  rg.SetSecondFrom(rg.GetSecondFrom()*3);
1750  rg.SetLength(rg.GetLength()*3);
1751  }
1752  for ( auto& rg : m_Insertions ) {
1753  rg.SetFirstFrom(rg.GetFirstFrom()*3);
1754  rg.SetSecondFrom(rg.GetSecondFrom()*3);
1755  rg.SetLength(rg.GetLength()*3);
1756  }
1757  }
1758 
1759  template<class TRanges>
1761  typename TRanges::iterator it = ranges.begin();
1762  while (it != ranges.end()) {
1763  if (!it->IntersectingWith(range)) {
1764  typename TRanges::iterator del = it;
1765  ++it;
1766  ranges.erase(del);
1767  continue;
1768  }
1769  if (it->GetFirstFrom() < range.GetFrom() || it->GetFirstTo() > range.GetTo()) {
1770  it->IntersectWith(range);
1771  }
1772  ++it;
1773  }
1774  }
1775 
1776  template<class TRanges>
1778  typename TRanges::iterator it = ranges.begin();
1779  while (it != ranges.end()) {
1780  if (it->GetSecondTo() < range.GetFrom() || it->GetSecondFrom() > range.GetTo()) {
1781  typename TRanges::iterator del = it;
1782  ++it;
1783  ranges.erase(del);
1784  continue;
1785  }
1786  if (it->GetSecondFrom() < range.GetFrom() || it->GetSecondTo() > range.GetTo()) {
1787  it->IntersectSecondWith(range);
1788  }
1789  ++it;
1790  }
1791  }
1792 
1793  mutable TAlignRangeVectorImpl m_RangesVector; // vector of align ranges for indexed access
1794  TAlignRangeList m_RangesList; // main align range storage
1795  TInsertions m_Insertions; // insertions storage
1796  int m_Flags; /// combination of EFlags
1797 
1798  TIndexByFirst m_IndexByFirst; // index of ranges by coordinate on first sequence
1799  TIndexBySecond m_IndexBySecond; // index of ranges by coordinate on second sequence
1800 };
1801 
1802 
1803 ///////////////////////////////////////////////////////////////////////////////
1804 /// CAlignRangeCollExtender
1805 /// this is an adapter for a collection that perfroms some of the operations
1806 /// faster due to an additional index built on top of the collection.
1807 
1808 template <class TColl>
1810 {
1811 public:
1813  typedef typename TColl::position_type position_type;
1814  typedef typename TColl::ESearchDirection TSearchDir;
1815 
1818 
1819  CAlignRangeCollExtender(const TColl& coll)
1820  {
1821  Init(coll);
1822  }
1823 
1824  void Init(const TColl& coll)
1825  {
1826  m_Coll = &coll;
1827  m_Ranges.clear(); // we are lazy and do not rebuild the index right now
1828  m_MapDirty = true;
1829  }
1830 
1831  void Clear()
1832  {
1833  m_Ranges.clear();
1834  m_MapDirty = true;
1835  }
1836 
1837  void UpdateIndex() const
1838  {
1839  if (m_MapDirty) {
1840  _ASSERT(m_Coll);
1841 
1842  m_Ranges.clear();
1843  ITERATE(typename TColl, it, *m_Coll) {
1844  const TAlignRange* r = &*it;
1845 
1846  if (m_Ranges.empty()) {
1847  m_From = r->GetSecondFrom();
1848  m_ToOpen = r->GetSecondToOpen();
1849  }
1850  else {
1851  m_From = min(m_From, r->GetSecondFrom());
1852  m_ToOpen = max(m_ToOpen, r->GetSecondToOpen());
1853  }
1854 
1856  (r->GetSecondFrom(), r));
1857  }
1858 
1859  m_MapDirty = false;
1860  }
1861  }
1862 
1864  {
1865  return m_Ranges.begin();
1866  }
1867 
1869  {
1870  return m_Ranges.end();
1871  }
1872 
1874  {
1875  UpdateIndex();
1876  return m_From;
1877  }
1878 
1880  {
1881  UpdateIndex();
1882  return m_ToOpen;
1883  }
1884 
1886  {
1887  return GetSecondToOpen() - 1;
1888  }
1889 
1891  {
1892  return m_ToOpen - m_From;
1893  }
1894 
1896  {
1898  }
1899 
1900  /// finds a segment at logarithmic time (linear time on first call)
1901  /// returns an iterator to the fisrt segment containing "pos"
1903  {
1904  typename TFrom2Range::const_iterator it =
1905  std::lower_bound(m_Ranges.begin(), m_Ranges.end(), pos, PItLess());
1906  if (it != m_Ranges.end()) {
1907  const TAlignRange& r = *it->second;
1908  _ASSERT(r.GetSecondTo() >= pos);
1909  return r.SecondContains(pos) ? it : m_Ranges.end();
1910  }
1911  return it;
1912  }
1913 
1914 protected:
1915  struct PItLess
1916  {
1917  typedef typename TAlignRange::position_type position_type;
1918  bool operator()(const typename TFrom2Range::value_type& p,
1919  position_type pos)
1920  {
1921  return p.second->GetSecondTo() < pos;
1922  }
1924  const typename TFrom2Range::value_type& p)
1925  {
1926  return pos < p.second->GetSecondTo();
1927  }
1928  bool operator()(const typename TFrom2Range::value_type& p1,
1929  const typename TFrom2Range::value_type& p2)
1930  {
1931  return p1.second->GetSecondTo() < p2.second->GetSecondTo();
1932  }
1933  };
1934 
1935  const TColl* m_Coll;
1936 
1937  mutable bool m_MapDirty; // need to update map
1941 };
1942 
1943 
1944 template<class TAlnRange>
1945 inline
1947 {
1948  m_Flags = src.GetFlags();
1949  m_Ranges.assign(src.begin(), src.end());
1950  m_Insertions.assign(src.GetInsertions().begin(), src.GetInsertions().end());
1951 }
1952 
1953 
1955 
1956 #endif // UTIL___ALIGN_RANGE_COLL__HPP
vector< TRangeWithFuzz > TRanges
Definition: Seq_loc.cpp:4277
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
CAlignRangeCollExtender this is an adapter for a collection that perfroms some of the operations fast...
position_type GetSecondTo() const
position_type GetSecondToOpen() const
void Init(const TColl &coll)
CAlignRangeCollExtender(const TColl &coll)
multimap< position_type, const TAlignRange * > TFrom2Range
position_type GetSecondFrom() const
TColl::ESearchDirection TSearchDir
CRange< position_type > GetSecondRange() const
const_iterator FindOnSecond(position_type pos) const
finds a segment at logarithmic time (linear time on first call) returns an iterator to the fisrt segm...
const_iterator end() const
TFrom2Range::const_iterator const_iterator
TColl::position_type position_type
position_type GetSecondLength(void) const
TColl::TAlignRange TAlignRange
const_iterator begin() const
bidirectional_iterator_tag iterator_category
bool operator==(const const_iterator &iter) const
bool operator!=(const const_iterator &iter) const
class CAlignRangeCollectionList<TAlignRange> represent a sorted collection of TAlignRange.
const_iterator find_insertion_point(const TAlignRange &range) const
bool IsSet(int flags) const
static int ValidateRanges(const TAlignRange &r_1, const TAlignRange &r_2)
determine conflicts between two ranges
const_iterator x_FindRangeContaining(position_type pos) const
CAlignRangeCollectionList(const TAlignRangeVectorImpl &v, int flags)
const_iterator begin() const
const_iterator insert(const TAlignRange &r)
ESearchDirection
adding empty ranges is considered valid, they are simply ignored
CAlignRangeCollectionList< TAlignRange > TThisType
void x_Unindex(TIndexKey iter)
const_iterator insert(const_iterator it, const TAlignRange &arg_r)
TAlignRangeVectorImpl m_RangesVector
void x_UnindexBySecond(TIndexKey iter)
const TAlignRange & operator[](size_type pos) const
Use iterator access.
void IntersectFirst(const CRange< position_type > &range)
list< TAlignRange > TAlignRangeList
pair< const_iterator, bool > find_2(position_type pos) const
returns an iterator pointing to a range containing "pos"; if such a range does not exists an iterator...
const_reverse_iterator rbegin() const
position_type GetFirstLength(void) const
TSignedSeqPos GetSecondPosByFirstPos(position_type pos, ESearchDirection dir=eNone) const
void IntersectSecond(const CRange< position_type > &range)
void Normalize()
ensures that segments are sorted, if fAllowAdjust is not set - merges adjacent segments
pair< pair< const_iterator, const_iterator >, bool > find_2_by_second(position_type pos) const
returns an iterator pointing to a range containing "pos"; if such a range does not exists an iterator...
void push_back(const TAlignRange &r)
void x_IntersectFirst(TRanges &ranges, const CRange< position_type > &range)
TAlignRangeList::size_type size_type
bool x_Equals(const TThisType &c) const
iterator x_FindInsertionPlace(const TAlignRange &r)
position_type GetFirstFrom() const
TAlignRangeList::const_reverse_iterator const_reverse_iterator
const TIndexByFirst & GetIndexByFirst() const
CRange< position_type > GetFirstRange() const
@ fMixedDir
contains at least one reversed range
@ fPolicyMask
Keep compatible with SC-8.0.
@ fAllowOverlap
allow segments with different orientation
@ fAllowAbutting
allow segments overlapping on the first sequence
@ fInvalid
collection was modified and not validated
@ fIgnoreInsertions
allows segments not separated by gaps
@ fAllowMixedDir
enforce all policies after any modification
@ fReversed
contains at least one direct range
@ fDefaultPolicy
do not store insertions
@ fUnsorted
one or more policies violated
TSignedSeqPos GetFirstPosBySecondPos(position_type pos, ESearchDirection dir=eNone) const
TAlignRangeList::iterator TRawIterator
CAlignRangeCollectionList(const TAlignRangeList &v, int flags)
const TIndexBySecond & GetIndexBySecond() const
const_iterator find(position_type pos) const
TIndexByFirst m_IndexByFirst
combination of EFlags
void CombineAbutting()
merge adjacent segments together, merging changes collection size and invalidates iterators
const TInsertions & GetInsertions() const
Each insertion shows where the 'first' sequence has a gap while the 'second' sequence has the inserti...
vector< TAlignRange > TInsertions
const_reverse_iterator rend() const
TRawIterator x_Insert(TRawIterator where, const TAlignRange &r)
void x_Erase(TRawIterator iter)
multiset< TIndexKey, PIndexBySecondLess > TIndexBySecond
void x_Index(TIndexKey iter)
const_iterator end() const
multiset< TIndexKey, PIndexByFirstLess > TIndexByFirst
vector< TAlignRange > TAlignRangeVector
position_type GetFirstTo() const
vector< TAlignRange > TAlignRangeVectorImpl
Use iterators on TAlignRangeList or TInsertions for insertions.
position_type GetFirstToOpen() const
void x_UnindexByFirst(TIndexKey iter)
const_iterator find_by_second(position_type pos) const
const_iterator erase(const_iterator it_del)
void Validate()
analyses segements and updates flags
void AddInsertion(const TAlignRange &r)
TAlignRangeVectorImpl & x_GetAlignRangeVector() const
void x_IntersectSecond(TRanges &ranges, const CRange< position_type > &range)
TAlignRange::position_type position_type
void AddInsertions(const TInsertions &insertions)
void AddInsertions(const TThisType &collection)
CAlignRangeCollectionList(int flags=fDefaultPolicy)
pair< pair< const_iterator, const_iterator >, bool > x_FindRangesBySecondContaining(position_type pos) const
class CAlignRangeCollection<TAlignRange> represent a sorted collection of TAlignRange.
position_type GetFirstTo() const
void Normalize()
ensures that segments are sorted, if fAllowAdjust is not set - merges adjacent segments
void x_IntersectSecond(TAlignRangeVector &ranges, const CRange< position_type > &range)
TAlignRangeVector m_Ranges
const_reverse_iterator rend() const
TAlignRangeVector::const_reverse_iterator const_reverse_iterator
size_type size() const
TAlignRangeVector::const_iterator const_iterator
vector< TAlignRange > TAlignRangeVector
const_iterator insert(const_iterator where, const TAlignRange &r)
bool IsSet(int flags) const
CAlignRangeCollection(const TAlignRangeVector &v, int flags)
void AddInsertions(const TThisType &collection)
TSignedSeqPos GetFirstPosBySecondPos(position_type pos, ESearchDirection dir=eNone) const
void push_back(const TAlignRange &r)
void CombineAbutting()
merge adjacent segments together, merging changes collection size and invalidates iterators
CRange< position_type > GetFirstRange() const
position_type GetFirstToOpen() const
const_iterator lower_bound(position_type pos) const
@ fAllowOverlap
allow segments with different orientation
@ fMixedDir
contains at least one reversed range
@ fPolicyMask
Keep compatible with SC-8.0.
@ fIgnoreInsertions
allows segments not separated by gaps
@ fAllowAbutting
allow segments overlapping on the first sequence
@ fAllowMixedDir
enforce all policies after any modification
@ fInvalid
collection was modified and not validated
@ fUnsorted
one or more policies violated
@ fDefaultPolicy
do not store insertions
@ fReversed
contains at least one direct range
void AddInsertions(const TAlignRangeVector &insertions)
position_type GetFirstFrom() const
void Validate()
analyses segements and updates flags
const_iterator insert(const TAlignRange &r)
void x_SetFlags(int flags)
const_iterator upper_bound(position_type pos) const
TSignedSeqPos GetSecondPosByFirstPos(position_type pos, ESearchDirection dir=eNone) const
const_iterator erase(const_iterator it)
bool x_Equals(const TThisType &c) const
void IntersectSecond(const CRange< position_type > &range)
const TAlignRange & operator[](size_type pos) const
position_type GetFirstLength(void) const
const_iterator find(position_type pos) const
void x_ResetFlags(int flags)
ESearchDirection
adding empty ranges is considered valid, they are simply ignored
CAlignRangeCollection(int flags=fDefaultPolicy)
TAlignRange::position_type position_type
const_iterator end() const
const_reverse_iterator rbegin() const
TAlignRangeVector m_Insertions
void AddInsertion(const TAlignRange &r)
TAlignRangeVector::size_type size_type
void x_IntersectFirst(TAlignRangeVector &ranges, const CRange< position_type > &range)
void Assign(const CAlignRangeCollectionList< TAlignRange > &src)
pair< const_iterator, bool > find_2(position_type pos) const
returns an iterator pointing to a range containing "pos"; if such a range does not exists an iterator...
CAlignRangeCollection< TAlignRange > TThisType
const TAlignRangeVector & GetInsertions() const
Each insertion shows where the 'first' sequence has a gap while the 'second' sequence has the inserti...
const_iterator begin() const
TAlignRangeVector::reverse_iterator reverse_iterator
const_iterator find_insertion_point(const TAlignRange &r)
static int ValidateRanges(const TAlignRange &r_1, const TAlignRange &r_2)
determine conflicts between two ranges
TAlignRange & operator[](size_type pos)
void IntersectFirst(const CRange< position_type > &range)
CAlignRange Represents an element of pairwise alignment of two sequences.
Definition: align_range.hpp:63
CRange –.
Definition: Range.hpp:68
void clear()
Definition: map.hpp:309
const_iterator end() const
Definition: map.hpp:292
iterator insert(const value_type &val)
Definition: map.hpp:305
const_iterator begin() const
Definition: map.hpp:291
bool empty() const
Definition: map.hpp:289
const_iterator begin() const
Definition: set.hpp:286
const_iterator upper_bound(const key_type &key) const
Definition: set.hpp:290
iterator insert(const value_type &val)
Definition: set.hpp:300
const_iterator end() const
Definition: set.hpp:287
void clear()
Definition: set.hpp:304
void erase(iterator pos)
Definition: set.hpp:302
const_iterator lower_bound(const key_type &key) const
Definition: set.hpp:289
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
pair< TRange, CRef< CSeq_align > > TAlignRange
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_DEPRECATED
bool IsAbutting(const TThisType &r) const
position_type GetFirstToOpen(void) const
bool IsDirect() const
Definition: align_range.hpp:96
bool NotEmpty(void) const
TThisType & CombineWithAbutting(const TThisType &r)
Position position_type
Definition: align_range.hpp:65
position_type GetFirstFrom(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
int i
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
Defines NCBI C++ exception handling.
The NCBI C++/STL use hints.
T max(T x_, T y_)
T min(T x_, T y_)
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define count
bool operator()(const typename TFrom2Range::value_type &p1, const typename TFrom2Range::value_type &p2)
TAlignRange::position_type position_type
bool operator()(const typename TFrom2Range::value_type &p, position_type pos)
bool operator()(position_type pos, const typename TFrom2Range::value_type &p)
bool operator()(position_type pos, const TIndexKey &k2) const
bool operator()(const TIndexKey &k1, const TIndexKey &k2) const
bool operator()(const TIndexKey &k1, position_type pos) const
bool operator()(position_type pos, const TIndexKey &k2) const
bool operator()(const TIndexKey &k1, const TIndexKey &k2) const
bool operator()(const TIndexKey &k1, position_type pos) const
#define _ASSERT
Modified on Fri Sep 20 14:57:39 2024 by modify_doxy.py rev. 669887