NCBI C++ ToolKit
id_range.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: id_range.cpp 100544 2023-08-10 16:44:49Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description:
29 * Utility class for collecting ranges of sequences
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
42 #include <objmgr/error_codes.hpp>
43 
60 
61 
62 #define NCBI_USE_ERRCODE_X ObjMgr_IdRange
63 
65 
67 
69 
70 
72 {
73  Add(range.GetTotalRange());
74 }
75 
76 
78 {
80 }
81 
82 
83 void COneSeqRange::Add(TSeqPos start, TSeqPos stop_exclusive)
84 {
85  Add(COpenRange<TSeqPos>(start, stop_exclusive));
86 }
87 
88 
90 {
92 }
93 
94 
96 {
97 }
98 
99 
101 {
102 }
103 
104 
106 {
107  ITERATE ( TRanges, it, m_Ranges ) {
108  if ( it != m_Ranges.begin() ) {
109  out << ',';
110  }
111  TRange range = it->second.GetTotalRange();
112  out << it->first.AsString();
113  if ( range != TRange::GetWhole() ) {
114  out << "(" << range.GetFrom() << "-" << range.GetTo() << ")";
115  }
116  }
117  return out;
118 }
119 
120 
122 {
123  CSeq_id_Handle ret;
124  if ( m_Ranges.size() == 1 ) {
125  ret = m_Ranges.begin()->first;
126  }
127  return ret;
128 }
129 
130 
131 void CSeqsRange::Add(const CSeq_id_Handle& id, const COneSeqRange& loc)
132 {
133  m_Ranges[id].Add(loc);
134 }
135 
136 
138 {
139  m_Ranges[id].Add(range);
140 }
141 
142 
144 {
145  ITERATE ( CSeqsRange, it, range ) {
146  m_Ranges[it->first].Add(it->second);
147  }
148 }
149 
150 
152 {
153  ITERATE ( CHandleRangeMap, it, hrmap ) {
154  m_Ranges[it->first].Add(it->second);
155  }
156 }
157 
158 
160 {
161  CHandleRangeMap hrmap;
162  hrmap.SetMasterSeq(impl.GetMaster());
163  hrmap.AddLocation(loc);
164  Add(hrmap);
165 }
166 
167 
169 {
170  Add(obj.GetLocation(), impl);
171  if ( obj.IsSetProduct() ) {
172  Add(obj.GetProduct(), impl);
173  }
174 }
175 
176 
178 {
179  const CSeq_align::C_Segs& segs = obj.GetSegs();
180  switch ( segs.Which() ) {
183  Add(**it, impl);
184  }
185  break;
187  Add(segs.GetDenseg(), impl);
188  break;
190  ITERATE ( CSeq_align::C_Segs::TStd, it, segs.GetStd() ) {
191  ITERATE ( CStd_seg::TLoc, it_loc, (*it)->GetLoc() ) {
192  Add(**it_loc, impl);
193  }
194  }
195  break;
197  Add(segs.GetPacked(), impl);
198  break;
200  ITERATE ( CSeq_align_set::Tdata, it, segs.GetDisc().Get() ) {
201  Add(**it, impl);
202  }
203  break;
205  Add(segs.GetSpliced(), impl);
206  break;
208  Add(segs.GetSparse(), impl);
209  break;
210  default:
211  break;
212  }
213 }
214 
215 
216 void CSeqsRange::Add(const CDense_seg& denseg,
217  const CBlobSplitterImpl& /*impl*/)
218 {
219  size_t dim = denseg.GetDim();
220  size_t numseg = denseg.GetNumseg();
221  // claimed dimension may not be accurate :-/
222  if ( numseg != denseg.GetLens().size() ) {
223  ERR_POST_X(1, Warning << "Invalid 'lens' size in denseg");
224  numseg = min(numseg, denseg.GetLens().size());
225  }
226  if ( dim != denseg.GetIds().size() ) {
227  ERR_POST_X(2, Warning << "Invalid 'ids' size in denseg");
228  dim = min(dim, denseg.GetIds().size());
229  }
230  if ( dim*numseg != denseg.GetStarts().size() ) {
231  ERR_POST_X(3, Warning << "Invalid 'starts' size in denseg");
232  dim = min(dim*numseg, denseg.GetStarts().size()) / numseg;
233  }
234  CDense_seg::TStarts::const_iterator it_start = denseg.GetStarts().begin();
235  CDense_seg::TLens::const_iterator it_len = denseg.GetLens().begin();
236  for ( size_t seg = 0; seg < numseg; seg++, ++it_len) {
237  CDense_seg::TIds::const_iterator it_id = denseg.GetIds().begin();
238  for ( size_t seq = 0; seq < dim; seq++, ++it_start, ++it_id) {
239  if ( *it_start >= 0 ) {
241  m_Ranges[idh].Add(*it_start, *it_start + *it_len);
242  }
243  }
244  }
245 }
246 
247 
248 void CSeqsRange::Add(const CDense_diag& diag,
249  const CBlobSplitterImpl& /*impl*/)
250 {
251  size_t dim = diag.GetDim();
252  if ( dim != diag.GetIds().size() ) {
253  ERR_POST_X(4, Warning << "Invalid 'ids' size in dendiag");
254  dim = min(dim, diag.GetIds().size());
255  }
256  if ( dim != diag.GetStarts().size() ) {
257  ERR_POST_X(5, Warning << "Invalid 'starts' size in dendiag");
258  dim = min(dim, diag.GetStarts().size());
259  }
260  TSeqPos len = diag.GetLen();
261  for ( size_t i = 0; i < dim; ++i ) {
263  TSeqPos start = diag.GetStarts()[i];
264  m_Ranges[idh].Add(start, start + len);
265  }
266 }
267 
268 
269 void CSeqsRange::Add(const CPacked_seg& packed,
270  const CBlobSplitterImpl& /*impl*/)
271 {
272  size_t dim = packed.GetDim();
273  size_t numseg = packed.GetNumseg();
274  // claimed dimension may not be accurate :-/
275  if ( dim * numseg > packed.GetStarts().size() ) {
276  dim = packed.GetStarts().size() / numseg;
277  }
278  if ( dim * numseg > packed.GetPresent().size() ) {
279  dim = packed.GetPresent().size() / numseg;
280  }
281  if ( dim > packed.GetLens().size() ) {
282  dim = packed.GetLens().size();
283  }
284  CPacked_seg::TStarts::const_iterator it_start = packed.GetStarts().begin();
285  CPacked_seg::TLens::const_iterator it_len = packed.GetLens().begin();
286  CPacked_seg::TPresent::const_iterator it_pres= packed.GetPresent().begin();
287  for ( size_t seg = 0; seg < numseg; seg++, ++it_len ) {
288  CPacked_seg::TIds::const_iterator it_id = packed.GetIds().begin();
289  for ( size_t seq = 0; seq < dim; seq++, ++it_pres) {
290  if ( *it_pres ) {
292  m_Ranges[idh].Add(*it_start, *it_start + *it_len);
293  ++it_id;
294  ++it_start;
295  }
296  }
297  }
298 }
299 
300 
301 void CSeqsRange::Add(const CSpliced_seg& spliced,
302  const CBlobSplitterImpl& /*impl*/)
303 {
304  const CSeq_id* gen_id = spliced.IsSetGenomic_id() ?
305  &spliced.GetGenomic_id() : 0;
306  const CSeq_id* prod_id = spliced.IsSetProduct_id() ?
307  &spliced.GetProduct_id() : 0;
308  ITERATE ( CSpliced_seg::TExons, it, spliced.GetExons() ) {
309  const CSpliced_exon& ex = **it;
310  const CSeq_id* ex_gen_id = ex.IsSetGenomic_id() ?
311  &ex.GetGenomic_id() : gen_id;
312  if ( ex_gen_id ) {
313  CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*ex_gen_id);
314  m_Ranges[idh].Add(ex.GetGenomic_start(), ex.GetGenomic_end());
315  }
316  const CSeq_id* ex_prod_id = ex.IsSetProduct_id() ?
317  &ex.GetProduct_id() : prod_id;
318  if ( ex_prod_id ) {
319  CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*ex_prod_id);
320  m_Ranges[idh].Add(ex.GetProduct_start().IsNucpos() ?
323  ex.GetProduct_end().IsNucpos() ?
325  : ex.GetProduct_end().GetProtpos().GetAmin());
326  }
327  }
328 }
329 
330 
331 void CSeqsRange::Add(const CSparse_seg& sparse,
332  const CBlobSplitterImpl& /*impl*/)
333 {
334  ITERATE ( CSparse_seg::TRows, it, sparse.GetRows() ) {
335  const CSparse_align& aln_row = **it;
336  size_t numseg = aln_row.GetNumseg();
337  if (numseg != aln_row.GetFirst_starts().size()) {
338  ERR_POST_X(6, Warning <<
339  "Invalid size of 'first-starts' in sparse-align");
340  numseg = min(numseg, aln_row.GetFirst_starts().size());
341  }
342  if (numseg != aln_row.GetSecond_starts().size()) {
343  ERR_POST_X(7, Warning <<
344  "Invalid size of 'second-starts' in sparse-align");
345  numseg = min(numseg, aln_row.GetSecond_starts().size());
346  }
347  if (numseg != aln_row.GetLens().size()) {
348  ERR_POST_X(8, Warning <<
349  "Invalid size of 'lens' in sparse-align");
350  numseg = min(numseg, aln_row.GetLens().size());
351  }
352  if (aln_row.IsSetSecond_strands() &&
353  numseg != aln_row.GetSecond_strands().size()) {
354  ERR_POST_X(9, Warning <<
355  "Invalid size of 'second-strands' in sparse-align");
356  numseg = min(numseg, aln_row.GetSecond_strands().size());
357  }
358 
359  for (size_t seg = 0; seg < numseg; ++seg) {
360  TSeqPos len = aln_row.GetLens()[seg];
361  CSeq_id_Handle idh =
363  m_Ranges[idh].Add(aln_row.GetFirst_starts()[seg],
364  aln_row.GetFirst_starts()[seg] + len - 1);
365  idh = CSeq_id_Handle::GetHandle(aln_row.GetSecond_id());
366  m_Ranges[idh].Add(aln_row.GetSecond_starts()[seg],
367  aln_row.GetSecond_starts()[seg] + len - 1);
368  }
369  }
370 }
371 
372 
374 {
375  Add(obj.GetLoc(), impl);
376 }
377 
378 
380 {
382  if ( info->IsFeatTable() ) {
383  Add(info->GetLocation(), table, impl);
384  Add(info->GetProduct(), table, impl);
385  }
386  else {
387  CConstRef<CSeq_loc> loc = info->GetTableLocation();
388  if ( loc ) {
389  Add(*loc, impl);
390  }
391  }
392 }
393 
394 
396  const CBlobSplitterImpl& impl)
397 {
398  if ( !loc.IsSet() ) {
399  return;
400  }
401  size_t num_rows = table.GetNum_rows();
402  if ( loc.IsRealLoc() ) { // full Seq-loc object
403  for ( size_t row = 0; row < num_rows; ++row ) {
404  Add(*loc.GetLoc(row), impl);
405  }
406  }
407  else { // simplified Seq-loc object
409  SAnnotObject_Index index;
410  for ( size_t row = 0; row < num_rows; ++row ) {
411  loc.SetTableKeyAndIndex(row, key, index);
412  Add(key.m_Handle, key.m_Range);
413  }
414  }
415 }
416 
417 
418 int CSeqsRange::Compare(const CSeqsRange& other) const
419 {
420  for ( TRanges::const_iterator i1(m_Ranges.begin()), i2(other.m_Ranges.begin());
421  i1 != m_Ranges.end() || i2 != other.m_Ranges.end(); ++i1, ++i2 ) {
422  if ( int cmp = (i1 != m_Ranges.end()) - (i2 != other.m_Ranges.end()) ) {
423  return cmp;
424  }
425  if ( i1->first != i2->first ) {
426  string s1 = i1->first.AsString();
427  string s2 = i2->first.AsString();
428  if ( int cmp = NStr::CompareNocase(s1, s2) ) {
429  return cmp;
430  }
431  }
432  auto r1 = i1->second.GetTotalRange();
433  auto r2 = i2->second.GetTotalRange();
434  if ( r1.GetFrom() < r2.GetFrom() ) {
435  return -1;
436  }
437  if ( r1.GetFrom() > r2.GetFrom() ) {
438  return 1;
439  }
440  if ( r1.GetTo() < r2.GetTo() ) {
441  return 1;
442  }
443  if ( r1.GetTo() > r2.GetTo() ) {
444  return -1;
445  }
446  }
447  return 0;
448 }
449 
450 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void AddLocation(const CSeq_loc &loc, ETransSplicing trans_splcing=eNoTransSplicing)
void SetMasterSeq(const CMasterSeqSegments *master_seq)
TRange GetOverlappingRange(TTotalRangeFlags flags=eStrandAny) const
TRange m_TotalRange
Definition: id_range.hpp:92
void Add(const COneSeqRange &range)
Definition: id_range.cpp:71
CPacked_seg –.
Definition: Packed_seg.hpp:66
bool IsSet(void) const
void SetTableKeyAndIndex(size_t row, SAnnotObject_Key &key, SAnnotObject_Index &index) const
bool IsRealLoc(void) const
CConstRef< CSeq_loc > GetLoc(size_t row) const
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
~CSeqsRange(void)
Definition: id_range.cpp:100
CSeq_id_Handle GetSingleId(void) const
Definition: id_range.cpp:121
CSeqsRange(void)
Definition: id_range.cpp:95
TRanges m_Ranges
Definition: id_range.hpp:153
CNcbiOstream & Print(CNcbiOstream &out) const
Definition: id_range.cpp:105
int Compare(const CSeqsRange &other) const
Definition: id_range.cpp:418
void Add(const CSeq_id_Handle &id, const COneSeqRange &loc)
Definition: id_range.cpp:131
size_type size() const
Definition: map.hpp:148
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
const TProtpos & GetProtpos(void) const
Get the variant data.
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
vector< CRef< CSeq_loc > > TLoc
Definition: Std_seg_.hpp:93
vector< CRef< CSparse_align > > TRows
Definition: Sparse_seg_.hpp:99
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
list< CRef< CStd_seg > > TStd
Definition: Seq_align_.hpp:196
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
const TStarts & GetStarts(void) const
Get the Starts member data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
TNumseg GetNumseg(void) const
Get the Numseg member data.
const TFirst_id & GetFirst_id(void) const
Get the First_id member data.
TLen GetLen(void) const
Get the Len member data.
TAmin GetAmin(void) const
Get the Amin member data.
Definition: Prot_pos_.hpp:220
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
TDim GetDim(void) const
Get the Dim member data.
const TIds & GetIds(void) const
Get the Ids member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
const TLens & GetLens(void) const
Get the Lens member data.
const TSecond_starts & GetSecond_starts(void) const
Get the Second_starts member data.
const TPacked & GetPacked(void) const
Get the variant data.
Definition: Seq_align_.cpp:175
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:752
const TLens & GetLens(void) const
Get the Lens member data.
const TIds & GetIds(void) const
Get the Ids member data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsSetSecond_strands(void) const
Check if a value has been assigned to Second_strands data member.
const TFirst_starts & GetFirst_starts(void) const
Get the First_starts member data.
const TDendiag & GetDendiag(void) const
Get the variant data.
Definition: Seq_align_.hpp:726
const TPresent & GetPresent(void) const
Get the Present member data.
TNumseg GetNumseg(void) const
Get the Numseg member data.
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
const TSecond_strands & GetSecond_strands(void) const
Get the Second_strands member data.
const TSecond_id & GetSecond_id(void) const
Get the Second_id member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
const TStarts & GetStarts(void) const
Get the Starts member data.
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
list< CRef< CSeq_align > > Tdata
const TSparse & GetSparse(void) const
Get the variant data.
Definition: Seq_align_.cpp:241
bool IsSetGenomic_id(void) const
Check if a value has been assigned to Genomic_id data member.
TDim GetDim(void) const
Get the Dim member data.
bool IsNucpos(void) const
Check if variant Nucpos is selected.
const TRows & GetRows(void) const
Get the Rows member data.
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
list< CRef< CDense_diag > > TDendiag
Definition: Seq_align_.hpp:194
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetGenomic_id(void) const
Check if a value has been assigned to Genomic_id data member.
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Seq_graph_.hpp:869
NCBI_DEFINE_ERR_SUBCODE_X(9)
Definition of all error codes used in objmgr libraries (xobjmgr.lib, xobjutil.lib and others).
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
int len
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::KEY key
T min(T x_, T y_)
#define const
Definition: zconf.h:230
Modified on Tue Dec 05 02:06:31 2023 by modify_doxy.py rev. 669887