NCBI C++ ToolKit
wiggle_reader.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: wiggle_reader.hpp 94247 2021-07-12 15:10:19Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * WIGGLE file reader
30  *
31  */
32 
33 #ifndef OBJTOOLS_READERS___WIGGLEREADER__HPP
34 #define OBJTOOLS_READERS___WIGGLEREADER__HPP
35 
36 #include <corelib/ncbistd.hpp>
40 
42 
44 
45 BEGIN_objects_SCOPE // namespace ncbi::objects::
46 
47 // ============================================================================
49 // ============================================================================
50  string mChrom;
54 
55  void Reset() {
56  mChrom.clear();
57  mStart = mStep = 0;
58  mSpan = 1;
59  }
61  Reset();
62  }
63 };
64 
65 // ============================================================================
66 struct SVarStepInfo {
67 // ============================================================================
68  string mChrom;
70 
71  void Reset() {
72  mChrom.clear();
73  mSpan =1;
74  }
76  Reset();
77  }
78 };
79 
80 // ============================================================================
81 struct SValueInfo {
82 // ============================================================================
83  string m_Chrom;
86  double m_Value;
87 
88  SValueInfo(): m_Pos(0), m_Span(1), m_Value(0.0) {};
89 
90  TSeqPos GetEnd(void) const {
91  return m_Pos + m_Span;
92  }
93  bool operator<(const SValueInfo& v) const {
94  if (m_Chrom != v.m_Chrom) {
95  return m_Chrom < v.m_Chrom;
96  }
97  return m_Pos < v.m_Pos;
98  }
99 };
100 
101 // ============================================================================
102 struct SWiggleStat {
103 // ============================================================================
109 
111  : m_FixedSpan(true),
112  m_HaveGaps(false),
113  m_IntValues(true),
114  m_Span(1),
115  m_Min(0),
116  m_Max(0),
117  m_Step(1),
118  m_StepMul(1)
119  {
120  }
122  {
123  m_FixedSpan = true;
124  m_Span = span;
125  }
126  void AddSpan(TSeqPos span)
127  {
128  if ( span != m_Span ) {
129  m_FixedSpan = false;
130  }
131  }
132  void SetFirstValue(double v)
133  {
134  m_Min = m_Max = v;
135  m_IntValues = v == int(v);
136  }
137  void AddValue(double v)
138  {
139  if ( v < m_Min ) {
140  m_Min = v;
141  }
142  if ( v > m_Max ) {
143  m_Max = v;
144  }
145  if ( m_IntValues && v != int(v) ) {
146  m_IntValues = false;
147  }
148  }
149  int AsByte(double v) const
150  {
151  return int((v-m_Min)*m_StepMul+.5);
152  }
153 };
154 
155 // ----------------------------------------------------------------------------
157 // ----------------------------------------------------------------------------
158 {
159 public:
161  CSeq_id& id,
162  unsigned int start,
163  unsigned int span,
164  double value)
165  {
167  m_pInterval->SetId(id);
168  m_pInterval->SetFrom(start-1);
169  m_pInterval->SetTo(start-1+span-1);
170  m_value = value;
171  };
172 
174 
175  void Dump(
176  CNcbiOstream& ostr) const
177  {
178  ostr << " [CRawWiggleRecord ";
179  ostr << "id=\"" << m_pInterval->GetId().AsFastaString() << "\" ";
180  ostr << "start=" << m_pInterval->GetFrom() << " ";
181  ostr << "stop=" << m_pInterval->GetTo() << " ";
182  ostr << "value=" << m_value << "]" << endl;
183  }
184 
185 public:
187  double m_value;
188 };
189 
190 // ----------------------------------------------------------------------------
192 // ----------------------------------------------------------------------------
193 {
194 public:
197 
198 public:
199  void Reset()
200  {
201  m_pId.Reset();
202  m_Records.clear();
203  }
204 
205  void Dump(
206  CNcbiOstream& ostr) const
207  {
208  ostr << "[CRawWiggleTrack" << endl;
209  for (vector<CRawWiggleRecord>::const_iterator it = m_Records.begin();
210  it != m_Records.end(); ++it) {
211  it->Dump(ostr);
212  }
213  ostr << "]" << std::endl;
214  }
215 
216  void AddRecord(
217  CRawWiggleRecord record)
218  {
219  m_Records.push_back(record);
220  }
221 
222  const vector<CRawWiggleRecord>& Records() const
223  {
224  return m_Records;
225  }
226 
227  bool HasData() const
228  {
229  return (!m_Records.empty());
230  }
231 
232 public:
234  vector<CRawWiggleRecord> m_Records;
235 };
236 
237 // ----------------------------------------------------------------------------
239 // ----------------------------------------------------------------------------
240  : public CReaderBase
241 {
242 public:
243  typedef vector<SValueInfo> TValues;
244 
245 public:
247  int = fDefaults,
248  const string& = "",
249  const string& = "",
250  CReaderListener* = nullptr);
251 
252  virtual ~CWiggleReader();
253 
254  //
255  // object interface:
256  //
257 public:
259  fDefaults = 0,
260  fJoinSame = 1<<8,
261  fAsByte = 1<<9,
262  fAsGraph = 1<<10,
263  fDumpStats = 1<<11,
264  fAsRaw = 1<<12,
265  };
266  typedef int TFlags;
267 
269  ReadSeqAnnot(
270  ILineReader&,
271  ILineErrorListener* =nullptr ) override;
272 
273  virtual bool
274  ReadTrackData(
275  ILineReader&,
277  ILineErrorListener* =nullptr );
278 
279  //
280  // helpers:
281  //
282 protected:
283  void xGetData(
284  ILineReader&,
285  TReaderData&) override;
286 
287  void xProcessData(
288  const TReaderData&,
289  CSeq_annot&) override;
290 
291  void xPostProcessAnnot(
292  CSeq_annot&) override;
293 
294  bool
296  const string&,
297  CSeq_annot&) override;
298 
299  bool
301  const string&) override;
302 
303  bool
304  xProcessFixedStepData(
305  TReaderData::const_iterator&,
306  const TReaderData&);
307 
308  void
309  xGetFixedStepInfo(
310  const string&,
311  SFixedStepInfo&);
312 
313  void
314  xReadFixedStepData(
315  const SFixedStepInfo&,
316  TReaderData::const_iterator&,
317  const TReaderData&);
318 
319  bool
320  xReadFixedStepDataRaw(
321  const SFixedStepInfo&,
322  TReaderData::const_iterator&,
323  const TReaderData&,
324  CRawWiggleTrack&);
325 
326  bool
327  xProcessVariableStepData(
328  TReaderData::const_iterator&,
329  const TReaderData&);
330 
331  bool
332  xProcessBedData(
333  TReaderData::const_iterator&,
334  const TReaderData&);
335 
336  void
337  xGetVariableStepInfo(
338  const string&,
339  SVarStepInfo&);
340 
341  void
342  xReadVariableStepData(
343  const SVarStepInfo&,
344  TReaderData::const_iterator&,
345  const TReaderData&);
346 
347  bool
348  xReadVariableStepDataRaw(
349  const SVarStepInfo&,
350  TReaderData::const_iterator&,
351  const TReaderData&,
352  CRawWiggleTrack&);
353 
354  string
355  xGetWord(
356  string&);
357 
358  bool
359  xSkipWS(
360  string&);
361 
362  string
363  xGetParamName(
364  string&);
365 
366  string
367  xGetParamValue(
368  string&);
369 
370  void
371  xGetPos(
372  string&,
373  TSeqPos& v);
374 
375  bool
376  xTryGetDoubleSimple(
377  string&,
378  double& v);
379 
380  void
381  xGetDouble(
382  string& line,
383  double& v);
384 
386  xMakeChromId();
387 
389  xMakeTable();
390 
392  xMakeGraph();
393 
394  void
395  xPreprocessValues(
396  SWiggleStat&);
397 
398  void
400  if ( !m_OmitZeros || value.m_Value != 0 ) {
401  m_Values.push_back(value);
402  }
403  }
404 
405  double
406  xEstimateSize(
407  size_t rows,
408  bool fixed_span) const;
409 
410  void
411  xSetTotalLoc(
412  CSeq_loc& loc,
413  CSeq_id& chrom_id);
414 
415  void
416  xDumpChromValues();
417 
418  void
419  xSetChrom(
420  const string& chrom);
421 
422  bool
423  xValuesAreFromSingleSequence() const;
424 
425  //
426  // data:
427  //
428 protected:
429  string m_ChromId;
431  double m_GapValue;
434 
435  enum ETrackType {
438  eTrackType_bedGraph
439  };
442 };
443 
444 END_objects_SCOPE
446 
447 #endif // OBJTOOLS_READERS___WIGGLEREADER__HPP
CRawWiggleRecord(CSeq_id &id, unsigned int start, unsigned int span, double value)
CRef< CSeq_interval > m_pInterval
void Dump(CNcbiOstream &ostr) const
void Dump(CNcbiOstream &ostr) const
bool HasData() const
CRef< CSeq_id > m_pId
const vector< CRawWiggleRecord > & Records() const
void AddRecord(CRawWiggleRecord record)
vector< CRawWiggleRecord > m_Records
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
virtual void xProcessData(const TReaderData &, CSeq_annot &)
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
virtual void xPostProcessAnnot(CSeq_annot &)
vector< TReaderLine > TReaderData
Definition: reader_base.hpp:70
virtual bool xParseTrackLine(const string &)
virtual void xGetData(ILineReader &, TReaderData &)
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
vector< SValueInfo > TValues
CRef< CSeq_annot > m_Annot
ETrackType m_TrackType
void xAddValue(const SValueInfo &value)
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
void SetTo(TTo value)
Assign a value to To data member.
const TId & GetId(void) const
Get the Id member data.
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
TSeqPos m_Span
double m_Value
TSeqPos m_Pos
string m_Chrom
TSeqPos GetEnd(void) const
bool operator<(const SValueInfo &v) const
int AsByte(double v) const
void AddValue(double v)
void SetFirstSpan(TSeqPos span)
void SetFirstValue(double v)
void AddSpan(TSeqPos span)
Modified on Fri Sep 20 14:58:12 2024 by modify_doxy.py rev. 669887