NCBI C++ ToolKit
bed_reader.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bed_reader.hpp 93699 2021-05-13 21:23:28Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * BED file reader
30  *
31  */
32 
33 #ifndef OBJTOOLS_READERS___BEDREADER__HPP
34 #define OBJTOOLS_READERS___BEDREADER__HPP
35 
36 #include <corelib/ncbistd.hpp>
44 
45 class CLinePreBuffer;
46 class CBedAutoSql;
47 class CBedColumnData;
48 
49 // ----------------------------------------------------------------------------
51 // ----------------------------------------------------------------------------
52 {
53 public:
54  CRawBedRecord(): m_score(-1) {};
55 
56  virtual ~CRawBedRecord() {};
57 
58  void SetInterval(
59  CSeq_id& id,
60  unsigned int start,
61  unsigned int stop,
62  ENa_strand strand);
63 
64  void SetScore(
65  unsigned int score);
66 
67  void Dump(
68  CNcbiOstream& ostr) const;
69 
70 public:
72  int m_score;
73 };
74 
75 
76 // ----------------------------------------------------------------------------
78 // ----------------------------------------------------------------------------
79 {
80 public:
83 
84 public:
85  void Dump(
86  CNcbiOstream& ostr) const;
87 
88  void Reset() { m_Records.clear(); };
89  void AddRecord(
90  CRawBedRecord& record) { m_Records.push_back(record); };
91  const vector<CRawBedRecord>& Records() const { return m_Records; };
92  bool HasData() const { return (!m_Records.empty()); };
93 
94 public:
96  vector<CRawBedRecord> m_Records;
97 };
98 
99 
100 // ----------------------------------------------------------------------------
101 /// CReaderBase implementation that reads BED data files, either a single object
102 /// or all objects found. For the purpose of CBedReader, an object consists of
103 /// a run of records all with the same ID (BED comlumn 1), and all contained
104 /// within a single track.
105 ///
107 // ----------------------------------------------------------------------------
108  : public CReaderBase
109 {
110  //
111  // object management:
112  //
113 public:
114  CBedReader(
115  int = fNormal,
116  const string& = "",
117  const string& = "",
118  CReaderListener* = nullptr);
119  virtual ~CBedReader();
120 
121  //
122  // object interface:
123  //
124 public:
125  enum EBedFlags {
126  fThreeFeatFormat = 1<<8,
127  fDirectedFeatureModel = 1<<9,
128  fAutoSql = 1<<10,
129  fAddDefaultColumns = 1<<11,
130  };
131  typedef int TFlags;
132 
133  /// Read a single object from given line reader containing BED data. The
134  /// resulting Seq-annot will contain a feature table.
135  /// @param lr
136  /// line reader to read from.
137  /// @param pErrors
138  /// pointer to optional error container object.
139  ///
141  ReadSeqAnnot(
142  ILineReader& lr,
143  ILineErrorListener* pErrors=nullptr ) override;
144 
145  virtual bool
146  ReadTrackData(
147  ILineReader&,
148  CRawBedTrack&,
149  ILineErrorListener* =nullptr );
150 
151  virtual bool
152  SetAutoSql(
153  const string&);
154 
155  virtual bool
156  SetAutoSql(
157  CNcbiIstream&);
158 
159 protected:
160  CRef<CSeq_annot> xCreateSeqAnnot() override;
161 
162  void xGetData(
163  ILineReader&,
164  TReaderData&) override;
165 
166  void xProcessData(
167  const TReaderData&,
168  CSeq_annot&) override;
169 
170  virtual bool xDetermineLikelyColumnCount(
173 
174  bool xParseTrackLine(
175  const string&) override;
176 
177  bool xParseFeature(
178  const SReaderLine&,
179  CSeq_annot&,
181 
182  bool xParseFeatureAutoSql(
183  const CBedColumnData&,
184  CSeq_annot&,
186 
187  bool xParseFeatureUserFormat(
188  const CBedColumnData&,
189  CSeq_annot&,
191 
192  bool xParseFeatureThreeFeatFormat(
193  const CBedColumnData&,
194  CSeq_annot&,
196 
197  bool xParseFeatureGeneModelFormat(
198  const CBedColumnData&,
199  CSeq_annot&,
201 
202  bool xAppendFeatureChrom(
203  const CBedColumnData&,
204  CSeq_annot&,
205  unsigned int,
207 
208  bool xAppendFeatureThick(
209  const CBedColumnData&,
210  CSeq_annot&,
211  unsigned int,
213 
214  bool xAppendFeatureBlock(
215  const CBedColumnData&,
216  CSeq_annot&,
217  unsigned int,
219 
220  CRef<CSeq_feat> xAppendFeatureGene(
221  const CBedColumnData&,
222  CSeq_annot&,
223  unsigned int,
225 
226  CRef<CSeq_feat> xAppendFeatureRna(
227  const CBedColumnData&,
228  CSeq_annot&,
229  unsigned int,
231 
232  CRef<CSeq_feat> xAppendFeatureCds(
233  const CBedColumnData&,
234  CSeq_annot&,
235  unsigned int,
237 
238  void xSetFeatureLocation(
240  const CBedColumnData&);
241  void xSetFeatureLocationChrom(
243  const CBedColumnData&);
244  void xSetFeatureLocationGene(
246  const CBedColumnData&);
247  void xSetFeatureLocationThick(
249  const CBedColumnData&);
250  void xSetFeatureLocationCds(
252  const CBedColumnData&);
253  void xSetFeatureLocationBlock(
255  const CBedColumnData&);
256  void xSetFeatureLocationRna(
258  const CBedColumnData&);
259  void xSetFeatureIdsChrom(
261  const CBedColumnData&,
262  unsigned int);
263  void xSetFeatureIdsGene(
265  const CBedColumnData&,
266  unsigned int);
267  void xSetFeatureIdsThick(
269  const CBedColumnData&,
270  unsigned int);
271  void xSetFeatureIdsCds(
273  const CBedColumnData&,
274  unsigned int);
275  void xSetFeatureIdsBlock(
277  const CBedColumnData&,
278  unsigned int);
279  void xSetFeatureIdsRna(
281  const CBedColumnData&,
282  unsigned int);
283  void xSetFeatureBedData(
285  const CBedColumnData&,
287  void xSetFeatureTitle(
289  const CBedColumnData&);
290  void xSetFeatureScore(
292  const CBedColumnData&);
293  void xSetFeatureColor(
295  const CBedColumnData&,
297 
298  void xSetFeatureColorFromItemRgb(
300  const string&,
302  void xSetFeatureColorFromScore(
304  const string&);
305  void xSetFeatureColorByStrand(
307  const string&,
308  ENa_strand,
310  void xSetFeatureColorDefault(
312 
313  bool xContainsThickFeature(
314  const CBedColumnData&) const;
315 
316  bool xContainsBlockFeature(
317  const CBedColumnData&) const;
318 
319  bool xContainsRnaFeature(
320  const CBedColumnData&) const;
321 
322  bool xContainsCdsFeature(
323  const CBedColumnData&) const;
324 
325  ENa_strand xGetStrand(
326  const CBedColumnData&) const;
327 
328  virtual void xAssignBedColumnCount(
329  CSeq_annot&);
330 
331  void xSetFeatureDisplayData(
333  const CBedColumnData&);
334 
335  void xPostProcessAnnot(
336  CSeq_annot&) override;
337 
338  bool
339  xReadBedDataRaw(
340  ILineReader&,
341  CRawBedTrack&,
343 
344  bool
345  xReadBedRecordRaw(
346  const string&,
347  CRawBedRecord&,
349 
350  static void xCleanColumnValues(
351  vector<string>&);
352 
353  //
354  // data:
355  //
356 protected:
357  string m_currentId;
360  vector<string>::size_type mRealColumnCount;
361  vector<string>::size_type mValidColumnCount;
363  unsigned int m_CurrentFeatureCount;
365  unsigned int m_CurBatchSize;
366  const unsigned int m_MaxBatchSize;
367  unique_ptr<CLinePreBuffer> mLinePreBuffer;
368 
369  unique_ptr<CBedAutoSql> mpAutoSql;
370 };
371 
374 
375 #endif // OBJTOOLS_READERS___BEDREADER__HPP
CReaderBase implementation that reads BED data files, either a single object or all objects found.
Definition: bed_reader.hpp:109
unique_ptr< CBedAutoSql > mpAutoSql
Definition: bed_reader.hpp:369
unique_ptr< CLinePreBuffer > mLinePreBuffer
Definition: bed_reader.hpp:367
string m_currentId
Definition: bed_reader.hpp:357
string mColumnSeparator
Definition: bed_reader.hpp:358
bool mAssumeErrorsAreRecordLevel
Definition: bed_reader.hpp:362
bool m_usescore
Definition: bed_reader.hpp:364
unsigned int m_CurBatchSize
Definition: bed_reader.hpp:365
vector< string >::size_type mValidColumnCount
Definition: bed_reader.hpp:361
const unsigned int m_MaxBatchSize
Definition: bed_reader.hpp:366
unsigned int m_CurrentFeatureCount
Definition: bed_reader.hpp:363
NStr::TSplitFlags mColumnSplitFlags
Definition: bed_reader.hpp:359
vector< string >::size_type mRealColumnCount
Definition: bed_reader.hpp:360
virtual ~CRawBedRecord()
Definition: bed_reader.hpp:56
CRef< CSeq_interval > m_pInterval
Definition: bed_reader.hpp:71
void Reset()
Definition: bed_reader.hpp:88
vector< CRawBedRecord > m_Records
Definition: bed_reader.hpp:96
CRef< CSeq_id > m_pId
Definition: bed_reader.hpp:92
bool HasData() const
Definition: bed_reader.hpp:92
const vector< CRawBedRecord > & Records() const
Definition: bed_reader.hpp:91
void AddRecord(CRawBedRecord &record)
Definition: bed_reader.hpp:89
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
vector< TReaderLine > TReaderData
Definition: reader_base.hpp:70
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
int TSplitFlags
Bitwise OR of ESplitFlags.
Definition: ncbistr.hpp:2512
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
static SLJIT_INLINE sljit_ins lr(sljit_gpr dst, sljit_gpr src)
void Dump(CSplitCacheApp *app, const C &obj, ESerialDataFormat format, const string &key, const string &suffix=kEmptyStr)
Modified on Fri Sep 20 14:58:14 2024 by modify_doxy.py rev. 669887