NCBI C++ ToolKit
handle_range_map.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: handle_range_map.cpp 99486 2023-04-04 20:35:05Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * CHandle_Range_Map is a substitute for seq-loc to make searching
30 * over locations more effective.
31 *
32 */
33 
34 #include <ncbi_pch.hpp>
36 #include <objects/seq/seq__.hpp>
42 #include <objmgr/seq_map_ci.hpp>
44 #include <objmgr/impl/tse_info.hpp>
45 
48 
49 
50 ////////////////////////////////////////////////////////////////////
51 //
52 // CHandleRangeMap::
53 //
54 
55 
57 {
58 }
59 
60 
62 {
63 }
64 
65 
67 {
68  m_LocMap.clear();
69 }
70 
71 
74 
79 };
80 
81 
83  ETransSplicing trans_splcing)
84 {
86  state.m_TransSplicing = trans_splcing;
87  AddLocation(loc, state);
88 }
89 
90 
93 {
94  switch ( loc.Which() ) {
96  case CSeq_loc::e_Null:
97  {
98  return;
99  }
100  case CSeq_loc::e_Empty:
101  {
104  return;
105  }
106  case CSeq_loc::e_Whole:
107  {
110  return;
111  }
112  case CSeq_loc::e_Int:
113  {
114  const CSeq_interval& i = loc.GetInt();
115  AddRange(i.GetId(),
116  i.GetFrom(),
117  i.GetTo(),
118  i.IsSetStrand()? i.GetStrand(): eNa_strand_unknown,
119  state);
120  return;
121  }
122  case CSeq_loc::e_Pnt:
123  {
124  const CSeq_point& p = loc.GetPnt();
125  AddRange(p.GetId(),
126  p.GetPoint(),
127  p.GetPoint(),
129  state);
130  return;
131  }
133  {
134  // extract each range
135  const CPacked_seqint& pi = loc.GetPacked_int();
136  ITERATE( CPacked_seqint::Tdata, ii, pi.Get() ) {
137  const CSeq_interval& i = **ii;
138  AddRange(i.GetId(),
139  i.GetFrom(),
140  i.GetTo(),
141  i.IsSetStrand()? i.GetStrand(): eNa_strand_unknown,
142  state);
143  }
144  return;
145  }
147  {
148  // extract each point
149  const CPacked_seqpnt& pp = loc.GetPacked_pnt();
151  ENa_strand strand =
154  AddRange(idh, CRange<TSeqPos>(*pi, *pi), strand, state);
155  }
156  return;
157  }
158  case CSeq_loc::e_Mix:
159  {
160  // extract sub-locations
161  ITERATE ( CSeq_loc_mix::Tdata, li, loc.GetMix().Get() ) {
162  AddLocation(**li, state);
163  }
164  return;
165  }
166  case CSeq_loc::e_Equiv:
167  {
168  // extract sub-locations
169  bool first = true; // allow intron only before the first sub-location
170  ITERATE ( CSeq_loc_equiv::Tdata, li, loc.GetEquiv().Get() ) {
171  if ( first ) {
172  // the remaining gaps between sub-location aren't introns
173  first = false;
174  }
175  else {
176  // there's no intron between equiv sub-locations
177  state.m_PrevId.Reset();
178  }
179  AddLocation(**li, state);
180  }
181  return;
182  }
183  case CSeq_loc::e_Bond:
184  {
185  const CSeq_bond& bond = loc.GetBond();
186  const CSeq_point& pa = bond.GetA();
187  AddRange(pa.GetId(),
188  pa.GetPoint(),
189  pa.GetPoint(),
191  state);
192  if ( bond.IsSetB() ) {
193  const CSeq_point& pb = bond.GetB();
194  AddRange(pb.GetId(),
195  pb.GetPoint(),
196  pb.GetPoint(),
197  pb.IsSetStrand()? pb.GetStrand(): eNa_strand_unknown,
198  state);
199  }
200  return;
201  }
202  case CSeq_loc::e_Feat:
203  {
204  //### Not implemented (do we need it?)
205  return;
206  }
207  } // switch
208 }
209 
210 
212  const TRange& range, ENa_strand strand)
213 {
215  state.m_TransSplicing = eNoTransSplicing;
216  AddRange(h, range, strand, state);
217 }
218 
219 
221  const TRange& range, ENa_strand strand)
222 {
224  state.m_TransSplicing = eNoTransSplicing;
225  AddRange(id, range, strand, state);
226 }
227 
228 
230  TSeqPos from, TSeqPos to, ENa_strand strand,
231  SAddState& state)
232 {
233  AddRange(id, TRange(from, to), strand, state);
234 }
235 
236 
238  TSeqPos from, TSeqPos to, ENa_strand strand)
239 {
241  state.m_TransSplicing = eNoTransSplicing;
242  AddRange(id, from, to, strand, state);
243 }
244 
245 
247  const TRange& range,
248  ENa_strand strand,
249  SAddState& state)
250 {
251  CHandleRange& hr = m_LocMap[h];
252  if ( state.m_TransSplicing == eNoTransSplicing &&
253  state.m_PrevId && h && state.m_PrevId != h ) {
254  m_LocMap[state.m_PrevId].m_MoreAfter = true;
255  hr.m_MoreBefore = true;
256  if ( m_MasterSeq ) {
257  int pos1 = m_MasterSeq->FindSeg(state.m_PrevId);
258  int pos2 = m_MasterSeq->FindSeg(h);
259  if ( pos1 >= 0 && pos2 >= 0 && abs(pos2-pos1) > 1 ) {
260  bool minus1 = m_MasterSeq->GetMinusStrand(pos1);
261  bool minus2 = m_MasterSeq->GetMinusStrand(pos2);
262  bool backw = pos2 < pos1;
263  bool backw1 = IsReverse(state.m_PrevStrand) != minus1;
264  bool backw2 = IsReverse(strand) != minus2;
265  if ( backw1 == backw && backw2 == backw ) {
266  ENa_strand strand2 = backw? Reverse(strand): strand;
267  int dir = backw ? -1: 1;
268  for ( int pos = pos1+dir; pos != pos2; pos += dir ) {
269  CHandleRange& mhr =
271  mhr.AddRange(TRange::GetEmpty(), strand2, true, true);
272  }
273  }
274  }
275  }
276  }
277  hr.AddRange(range, strand, false, false, state.m_TransSplicing == eCircularRNA);
278  state.m_PrevId = h;
279  state.m_PrevStrand = strand;
280  state.m_PrevRange = range;
281 }
282 
283 
285  const TRange& range,
286  ENa_strand strand,
287  SAddState& state)
288 {
290 }
291 
292 
294  const CHandleRange& hr)
295 {
296  m_LocMap[h].AddRanges(hr);
297 }
298 
299 
301 {
302  return m_LocMap[h];
303 }
304 
305 
307 {
308  CHandleRangeMap rmap;
309  rmap.AddLocation(loc);
310  return IntersectingWithMap(rmap);
311 }
312 
313 
315 {
316  if ( rmap.m_LocMap.size() > m_LocMap.size() ) {
317  return rmap.IntersectingWithMap(*this);
318  }
319  ITERATE ( CHandleRangeMap, it1, rmap ) {
320  const_iterator it2 = m_LocMap.find(it1->first);
321  if ( it2 != end() && it1->second.IntersectingWith(it2->second) ) {
322  return true;
323  }
324  }
325  return false;
326 }
327 
328 
330 {
331  if ( rmap.m_LocMap.size() > m_LocMap.size() ) {
332  return rmap.TotalRangeIntersectingWith(*this);
333  }
334  ITERATE ( CHandleRangeMap, it1, rmap ) {
335  TLocMap::const_iterator it2 = m_LocMap.find(it1->first);
336  if ( it2 != end() && it1->second.GetOverlappingRange()
337  .IntersectingWith(it2->second.GetOverlappingRange()) ) {
338  return true;
339  }
340  }
341  return false;
342 }
343 
344 
345 /////////////////////////////////////////////////////////////////////////////
346 // CMasterSeqSegments
347 /////////////////////////////////////////////////////////////////////////////
348 
350 {
351 }
352 
353 
355 {
356 }
357 
358 
360 {
361  AddSegments(master.GetSeqMap());
362  for ( int idx = 0; idx < GetSegmentCount(); ++idx ) {
363  const CSeq_id_Handle& h = GetHandle(idx);
365  master.GetTSE_Info().FindMatchingBioseq(h);
366  if ( seg ) {
367  AddSegmentIds(idx, seg->GetId());
368  }
369  }
370 }
371 
372 
373 int CMasterSeqSegments::AddSegment(const CSeq_id_Handle& id, bool minus_strand)
374 {
375  int idx = GetSegmentCount();
376  m_SegSet.push_back(TSeg(id, minus_strand));
377  AddSegmentId(idx, id);
378  return idx;
379 }
380 
381 
383 {
384  m_Id2Seg[id] = idx;
385 }
386 
387 
388 void CMasterSeqSegments::AddSegmentIds(int idx, const TIds& ids)
389 {
390  ITERATE ( TIds, it, ids ) {
391  AddSegmentId(idx, *it);
392  }
393 }
394 
395 
396 void CMasterSeqSegments::AddSegmentIds(int idx, const TIds2& ids)
397 {
398  ITERATE ( TIds2, it, ids ) {
400  }
401 }
402 
403 
405 {
406  ITERATE ( TIds, it, ids ) {
407  int idx = FindSeg(*it);
408  if ( idx >= 0 ) {
409  AddSegmentIds(idx, ids);
410  return;
411  }
412  }
413 }
414 
415 
417 {
418  ITERATE ( TIds2, it, ids ) {
419  int idx = FindSeg(CSeq_id_Handle::GetHandle(**it));
420  if ( idx >= 0 ) {
421  AddSegmentIds(idx, ids);
422  return;
423  }
424  }
425 }
426 
427 
429 {
430  for ( CSeqMap_CI it(ConstRef(&seq), 0, CSeqMap::fFindRef); it; ++it ) {
431  AddSegment(it.GetRefSeqid(), it.GetRefMinusStrand());
432  }
433 }
434 
435 
437 {
439  return it == m_Id2Seg.end()? -1: it->second;
440 }
441 
442 
444 {
445  _ASSERT(size_t(seg) < m_SegSet.size());
446  return m_SegSet[seg].first;
447 }
448 
449 
451 {
452  _ASSERT(size_t(seg) < m_SegSet.size());
453  return m_SegSet[seg].second;
454 }
455 
456 
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
ENa_strand Reverse(ENa_strand s)
Definition: Na_strand.hpp:90
User-defined methods of the data storage class.
const CSeqMap & GetSeqMap(void) const
const TId & GetId(void) const
CHandleRange::TRange TRange
void AddLocation(const CSeq_loc &loc, ETransSplicing trans_splcing=eNoTransSplicing)
void AddRanges(const CSeq_id_Handle &h, const CHandleRange &hr)
bool TotalRangeIntersectingWith(const CHandleRangeMap &rmap) const
bool IntersectingWithMap(const CHandleRangeMap &rmap) const
void AddRange(const CSeq_id_Handle &h, const TRange &range, ENa_strand strand)
bool IntersectingWithLoc(const CSeq_loc &loc) const
CConstRef< CMasterSeqSegments > m_MasterSeq
TLocMap::const_iterator const_iterator
const_iterator end(void) const
void AddRange(TRange range, ENa_strand strand)
int AddSegment(const CSeq_id_Handle &id, bool minus_strand)
bool GetMinusStrand(int seg) const
list< CRef< CSeq_id > > TIds2
vector< CSeq_id_Handle > TIds
int GetSegmentCount(void) const
void AddSegments(const CSeqMap &seq)
void AddSegmentIds(int idx, const TIds &ids)
void AddSegmentId(int idx, const CSeq_id_Handle &id)
int FindSeg(const CSeq_id_Handle &h) const
const CSeq_id_Handle & GetHandle(int seg) const
pair< CSeq_id_Handle, bool > TSeg
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
CSeqMap –.
Definition: seq_map.hpp:93
const CTSE_Info & GetTSE_Info(void) const
CConstRef< CBioseq_Info > FindMatchingBioseq(const CSeq_id_Handle &id) const
Definition: tse_info.cpp:776
size_type size() const
Definition: map.hpp:148
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
struct parameters_t * pb[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const float pi
Definition: math.hpp:54
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ fFindRef
Definition: seq_map.hpp:137
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
static TThisType GetEmpty(void)
Definition: range.hpp:306
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
const TB & GetB(void) const
Get the B member data.
Definition: Seq_bond_.hpp:243
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
TStrand GetStrand(void) const
Get the Strand member data.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
list< CRef< CSeq_loc > > Tdata
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
const TId & GetId(void) const
Get the Id member data.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_point_.hpp:350
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:260
const Tdata & Get(void) const
Get the member data.
const TEquiv & GetEquiv(void) const
Get the variant data.
Definition: Seq_loc_.cpp:304
list< CRef< CSeq_loc > > Tdata
vector< TSeqPos > TPoints
const TA & GetA(void) const
Get the A member data.
Definition: Seq_bond_.hpp:213
const TEmpty & GetEmpty(void) const
Get the variant data.
Definition: Seq_loc_.cpp:150
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Definition: Seq_point_.hpp:331
const TPoints & GetPoints(void) const
Get the Points member data.
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
bool IsSetB(void) const
other end may not be available Check if a value has been assigned to B data member.
Definition: Seq_bond_.hpp:231
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
const TBond & GetBond(void) const
Get the variant data.
Definition: Seq_loc_.cpp:326
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
@ e_Equiv
equivalent sets of locations
Definition: Seq_loc_.hpp:106
@ e_Empty
to NULL one Seq-id in a collection
Definition: Seq_loc_.hpp:99
@ e_Feat
indirect, through a Seq-feat
Definition: Seq_loc_.hpp:108
@ e_Int
from to
Definition: Seq_loc_.hpp:101
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
int i
range(_Ty, _Ty) -> range< _Ty >
#define abs(a)
Definition: ncbi_heapmgr.c:130
CHandleRange::TRange TRange
#define _ASSERT
Modified on Sat Dec 02 09:22:17 2023 by modify_doxy.py rev. 669887