NCBI C++ ToolKit
handle_range.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: handle_range.cpp 99486 2023-04-04 20:35:05Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * CHandleRange:
30 * Internal class to be used instead of CSeq_loc
31 * for better performance.
32 *
33 */
34 
35 #include <ncbi_pch.hpp>
37 
38 #include <algorithm>
39 
42 
43 ////////////////////////////////////////////////////////////////////
44 //
45 //
46 //
47 
48 
50  : m_TotalRanges_plus(TRange::GetEmpty()),
51  m_TotalRanges_minus(TRange::GetEmpty()),
52  m_IsCircular(false),
53  m_IsSingleStrand(true),
54  m_MoreBefore(false),
55  m_MoreAfter(false)
56 {
57 }
58 
59 
61  : m_TotalRanges_plus(TRange::GetEmpty()),
62  m_TotalRanges_minus(TRange::GetEmpty()),
63  m_IsCircular(false),
64  m_IsSingleStrand(true),
65  m_MoreBefore(false),
66  m_MoreAfter(false)
67 {
68  ITERATE ( CHandleRange, it, src ) {
69  if ( it->first.IntersectingWith(filter) ) {
70  AddRange(it->first & filter, it->second);
71  }
72  }
73 }
74 
75 
77 {
78 }
79 
80 
82 {
83  TTotalRangeFlags ret = 0;
84  if ( m_Ranges.empty() ) {
85  return ret;
86  }
87  if ( !m_IsCircular ) {
88  if ( !m_TotalRanges_plus.Empty() ||
89  x_IncludesPlus(m_Ranges.front().second) ) {
90  ret |= eStrandPlus;
91  }
92  if ( !m_TotalRanges_minus.Empty() ||
93  x_IncludesMinus(m_Ranges.front().second) ) {
94  ret |= eStrandMinus;
95  }
96  }
97  else {
98  if ( x_IncludesPlus(m_Ranges.front().second) ) {
99  ret |= eStrandPlus;
100  }
101  if ( x_IncludesMinus(m_Ranges.front().second) ) {
102  ret |= eStrandMinus;
103  }
104  }
105  return ret;
106 }
107 
108 
110 {
111  AddRange(range, strand, false, false);
112 }
113 
114 
116  bool more_before, bool more_after, bool circular_rna)
117 {
118  if ( !m_Ranges.empty() && m_IsSingleStrand ) {
119  if ( strand != m_Ranges.front().second ) {
120  // Different strands, the location can not be circular
121  if ( m_IsCircular ) {
122  // Different strands, the location can not be circular
123  // Reorganize total ranges by strand
124  TRange total_range = m_TotalRanges_plus;
125  total_range += m_TotalRanges_minus;
126  if ( x_IncludesPlus(m_Ranges.front().second) ) {
127  m_TotalRanges_plus = total_range;
128  }
129  else {
131  }
132  if ( x_IncludesMinus(m_Ranges.front().second) ) {
133  m_TotalRanges_minus = total_range;
134  }
135  else {
137  }
138  m_IsCircular = false;
139  }
140  m_IsSingleStrand = false;
141  }
142  else {
143  // Same strand, but location may become circular
144  if ( !m_IsCircular && !circular_rna ) {
145  // Check if location becomes circular
147  // compare with last non-empty range
148  if ( !it->first.Empty() ) {
149  if ( x_IncludesPlus(strand) ) {
150  m_IsCircular =
151  range.GetFrom() < it->first.GetFrom();
152  }
153  else {
154  m_IsCircular =
155  range.GetFrom() > it->first.GetFrom();
156  }
157  break;
158  }
159  }
160  if ( m_IsCircular ) {
161  // Reorganize total ranges.
162  // First part (everything already collected)
163  // goes to m_TotalRanges_plus,
164  // second part (all new ranges)
165  // will go to m_TotalRanges_minus.
166 
167  // Verify that until now all ranges are on the same strand.
173  }
174  else {
176  _ASSERT(!m_Ranges.empty());
177  //_ASSERT(more_before);
178  //_ASSERT(m_MoreAfter);
179  if ( more_after ) {
180  m_MoreAfter = true;
181  }
182  }
183  }
184  }
185  }
186  else {
187  if ( more_before ) {
188  m_MoreBefore = true;
189  }
190  if ( more_after ) {
191  m_MoreAfter = true;
192  }
193  }
194  m_Ranges.push_back(TRanges::value_type(range, strand));
195  if ( !m_IsCircular ) {
196  // Regular location
197  if ( x_IncludesPlus(strand) ) {
199  }
200  if ( x_IncludesMinus(strand) ) {
202  }
203  }
204  else {
205  // Circular location, second part
207  }
208 }
209 
210 
212 {
213  ITERATE ( CHandleRange, it, hr ) {
214  AddRange(it->first, it->second);
215  }
216 }
217 
218 
220 {
221  return
222  str1 == eNa_strand_unknown // str1 includes anything
223  ||
224  str2 == eNa_strand_unknown // str2 includes anything
225  ||
226  str1 == str2; // accept only equal strands
227  //### Not sure about "eNa_strand_both includes eNa_strand_plus" etc.
228 }
229 
230 
232 {
233  return IsCircular() || hr.IsCircular() ||
236 }
237 
238 
240 {
241  ITERATE(TRanges, it1, m_Ranges) {
242  ITERATE(TRanges, it2, hr.m_Ranges) {
243  if ( it1->first.IntersectingWith(it2->first) ) {
244  if ( x_IntersectingStrands(it1->second, it2->second) ) {
245  return true;
246  }
247  }
248  }
249  }
250  return false;
251 }
252 
253 
255 {
257 }
258 
259 
261 {
263  return false;
264  }
265  ITERATE(TRanges, it1, m_Ranges) {
266  ITERATE(TRanges, it2, hr.m_Ranges) {
267  if ( it1->first.IntersectingWith(it2->first) ) {
268  return true;
269  }
270  }
271  }
272  return false;
273 }
274 
275 
277 {
278  for ( TRanges::iterator it = m_Ranges.begin(); it != m_Ranges.end(); ) {
279  // Find intersecting intervals, discard strand information
280  // Also merge adjacent ranges, prevent merging whole-to + whole-from
281  if ( !it->first.Empty() &&
282  (it->first.IntersectingWith(range) ||
283  it->first.GetFrom() == range.GetToOpen() ||
284  it->first.GetToOpen() == range.GetFrom()) ) {
285  // Remove the intersecting interval, update the merged range.
286  // We assume that WholeFrom is less than any non-whole value
287  // and WholeTo is greater than any non-whole value.
288  range += it->first;
289  it = m_Ranges.erase(it);
290  }
291  else {
292  ++it;
293  }
294  }
296 }
297 
298 
300 {
301  if ( !m_IsCircular ) {
302  // Since empty ranges have extremely large 'from' coordinate it's
303  // ok to simply return min of 'from' coordinates.
305  }
306  return IsReverse(m_Ranges.front().second) ?
308 }
309 
310 
312 {
313  if ( !m_IsCircular ) {
314  // A bit more logic is required to check empty ranges.
315  if ( m_TotalRanges_minus.Empty() ) {
316  return m_TotalRanges_plus.GetTo();
317  }
318  else if ( m_TotalRanges_plus.Empty() ) {
319  return m_TotalRanges_minus.GetTo();
320  }
321  else {
323  }
324  }
325  return IsReverse(m_Ranges.front().second) ?
327 }
328 
329 
332 {
333  TRange ret = TRange::GetEmpty();
334  if (m_IsCircular) {
335  ETotalRangeFlags circular_strand =
336  IsReverse(m_Ranges.front().second) ?
338  if (flags & circular_strand) {
339  ret = TRange::GetWhole();
340  }
341  return ret;
342  }
343  if (flags & eStrandPlus) { // == eCircularStart
344  ret += m_TotalRanges_plus;
345  }
346  if (flags & eStrandMinus) { // == eCircularEnd
347  ret += m_TotalRanges_minus;
348  }
349  if ( m_IsSingleStrand && (m_MoreBefore || m_MoreAfter) ) {
350  _ASSERT(!m_Ranges.empty());
351  if ( x_IncludesPlus(m_Ranges.front().second) ) {
352  if ( (flags & eStrandPlus) ||
353  x_IncludesMinus(m_Ranges.front().second) ) {
354  if ( m_MoreBefore ) {
356  }
357  if ( m_MoreAfter ) {
358  ret.SetTo(TRange::GetWholeTo());
359  }
360  }
361  }
362  else {
363  if ( (flags & eStrandMinus) ) {
364  if ( m_MoreAfter ) {
366  }
367  if ( m_MoreBefore ) {
368  ret.SetTo(TRange::GetWholeTo());
369  }
370  }
371  }
372  }
373  return ret;
374 }
375 
376 
378 CHandleRange::GetCircularRangeStart(bool include_origin) const
379 {
382  if ( include_origin ) {
383  // Adjust start/stop to include cut point
384  if ( !IsReverse(m_Ranges.front().second) ) {
385  // Include end
386  ret.SetTo(TRange::GetWholeTo());
387  }
388  else {
389  // Include start
391  }
392  }
393  return ret;
394 }
395 
396 
398 CHandleRange::GetCircularRangeEnd(bool include_origin) const
399 {
402  if ( include_origin ) {
403  // Adjust start/stop to include cut point
404  if ( !IsReverse(m_Ranges.front().second) ) {
405  // Include end
407  }
408  else {
409  // Include start
410  ret.SetTo(TRange::GetWholeTo());
411  }
412  }
413  return ret;
414 }
415 
416 
419 {
420  TRange ret = TRange::GetEmpty();
421  if ( !range.Empty() ) {
422  ITERATE ( TRanges, it, m_Ranges ) {
423  ret += it->first.IntersectionWith(range);
424  }
425  }
426  return ret;
427 }
428 
429 
431  ENa_strand strand) const
432 {
433  if ( !range.Empty() ) {
434  ITERATE ( TRanges, it, m_Ranges ) {
435  if ( range.IntersectingWith(it->first) &&
436  x_IntersectingStrands(strand, it->second) ) {
437  return true;
438  }
439  }
440  }
441  return false;
442 }
443 
444 
445 bool CHandleRange::HasGaps(void) const
446 {
447  return m_Ranges.size() > 1 || m_MoreBefore || m_MoreAfter;
448 }
449 
450 
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
CHandleRange(void)
TTotalRangeFlags GetStrandsFlag(void) const
TSeqPos GetRight(void) const
bool x_IncludesMinus(const ENa_strand &strand) const
~CHandleRange(void)
bool IsCircular(void) const
TSeqPos GetLeft(void) const
bool IntersectingWithSubranges(const CHandleRange &hr) const
void AddRange(TRange range, ENa_strand strand)
bool IntersectingWith(const CHandleRange &hr) const
TRanges m_Ranges
TRange GetCircularRangeStart(bool include_origin=true) const
static bool x_IntersectingStrands(ENa_strand str1, ENa_strand str2)
TRange m_TotalRanges_minus
bool x_IncludesPlus(const ENa_strand &strand) const
void AddRanges(const CHandleRange &hr)
TRange m_TotalRanges_plus
bool IntersectingWithTotalRange(const CHandleRange &hr) const
vector< TRangeWithStrand > TRanges
bool IntersectingWith_NoStrand(const CHandleRange &hr) const
TRange GetCircularRangeEnd(bool include_origin=true) const
bool HasGaps(void) const
TRange GetOverlappingRange(TTotalRangeFlags flags=eStrandAny) const
unsigned int TTotalRangeFlags
void MergeRange(TRange range, ENa_strand strand)
static uch flags
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_REVERSE_ITERATE(Type, Var, Cont)
Non constant version of REVERSE_ITERATE macro.
Definition: ncbimisc.hpp:834
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
static TThisType GetEmpty(void)
Definition: range.hpp:306
bool Empty(void) const
Definition: range.hpp:148
static position_type GetWholeFrom(void)
Definition: range.hpp:256
static TThisType GetWhole(void)
Definition: range.hpp:272
static position_type GetWholeTo(void)
Definition: range.hpp:264
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
range(_Ty, _Ty) -> range< _Ty >
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
T max(T x_, T y_)
T min(T x_, T y_)
#define _ASSERT
Modified on Wed May 29 18:43:50 2024 by modify_doxy.py rev. 669887