NCBI C++ ToolKit
wiggle_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: wiggle_reader.cpp 102775 2024-07-10 16:05:26Z gotvyans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig, leaning heavily on code lifted from the the wig2table
27  * project, by Aaron Ucko.
28  *
29  * File Description:
30  * WIGGLE file reader
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 
37 #include <util/line_reader.hpp>
38 
44 
47 
49 BEGIN_objects_SCOPE
50 
51 // ----------------------------------------------------------------------------
52 bool
54 // ----------------------------------------------------------------------------
55 {
56  if (m_Values.empty()) {
57  return false;
58  }
59  TValues::const_iterator cit = m_Values.begin();
60  string first = cit->m_Chrom;
61  for (cit++; cit != m_Values.end(); ++cit) {
62  if (cit->m_Chrom != first) {
63  return false;
64  }
65  }
66  return true;
67 }
68 
69 // ----------------------------------------------------------------------------
71  int iFlags,
72  const string& name,
73  const string& title,
74  CReaderListener* pRl):
75 // ----------------------------------------------------------------------------
76  CReaderBase(iFlags, name, title, CReadUtil::AsSeqId, pRl),
77  m_OmitZeros(false),
78  m_TrackType(eTrackType_invalid)
79 {
80  m_uLineNumber = 0;
81  m_GapValue = 0.0;
82 }
83 
84 // ----------------------------------------------------------------------------
86 // ----------------------------------------------------------------------------
87 {
88 }
89 
90 // ----------------------------------------------------------------------------
93  ILineReader& lr,
94  ILineErrorListener* pEL)
95 // ----------------------------------------------------------------------------
96 {
97  m_ChromId.clear();
98  m_Values.clear();
100  m_ChromId.clear();
101  m_Values.clear();
102  xParseTrackLine("track type=wiggle_0");
103  }
104 
105  xProgressInit(lr);
106 
107  m_uDataCount = 0;
109 
110  TReaderData readerData;
111  xGuardedGetData(lr, readerData, pEL);
112  if (readerData.empty()) {
113  pAnnot.Reset();
114  return pAnnot;
115  }
116  xGuardedProcessData(readerData, *pAnnot, pEL);
117  xPostProcessAnnot(*pAnnot);
118  return pAnnot;
119 }
120 
121 // ----------------------------------------------------------------------------
122 void
124  ILineReader& lr,
125  TReaderData& readerData)
126 // ----------------------------------------------------------------------------
127 {
128  // Goal: Extract one wiggle graph object (including all its data lines) at
129  // once
130  bool haveData = false;
131  readerData.clear();
132  string line;
133  while (xGetLine(lr, line)) {
134  bool isMeta = NStr::StartsWith(line, "fixedStep") ||
135  NStr::StartsWith(line, "variableStep") ||
136  xIsTrackLine(line) ||
137  xIsBrowserLine(line);
138  if (isMeta) {
139  if (haveData) {
140  xUngetLine(lr);
141  break;
142  }
143  }
144  readerData.push_back(TReaderLine{m_uLineNumber, line});
145  if (!isMeta) {
146  haveData = true;
147  }
148  ++m_uDataCount;
149  }
150 }
151 
152 // ----------------------------------------------------------------------------
153 void
155  const TReaderData& readerData,
156  CSeq_annot& annot)
157 // ----------------------------------------------------------------------------
158 {
159  for (auto curData = readerData.begin(); curData != readerData.end(); curData++) {
160  auto line = curData->mData;
161  if (xParseBrowserLine(line, annot)) {
162  continue;
163  }
164  if (xParseTrackLine(line)) {
165  continue;
166  }
167 
168  if (xProcessFixedStepData(curData, readerData)) {
169  break;
170  }
171  if (xProcessVariableStepData(curData, readerData)) {
172  break;
173  }
174  xProcessBedData(curData, readerData);
175  break;
176  }
177 }
178 
179 
180 // ----------------------------------------------------------------------------
181 bool
183  ILineReader& lr,
184  CRawWiggleTrack& rawdata,
185  ILineErrorListener* pMessageListener)
186 // ----------------------------------------------------------------------------
187 {
188  TReaderData readerData;
189  xGuardedGetData(lr, readerData, pMessageListener);
190  auto curIt = readerData.cbegin();
191  while (curIt != readerData.end()) {
192  auto firstLine(curIt->mData);
193  if (NStr::StartsWith(firstLine, "fixedStep")) {
194  SFixedStepInfo fixedStepInfo;
195  xGetFixedStepInfo(firstLine, fixedStepInfo);
196  return xReadFixedStepDataRaw(
197  fixedStepInfo, ++curIt, readerData, rawdata);
198  }
199  if (NStr::StartsWith(firstLine, "variableStep")) {
200  SVarStepInfo varStepInfo;
201  xGetVariableStepInfo(firstLine, varStepInfo);
203  varStepInfo, ++curIt, readerData, rawdata);
204  }
205  ++curIt;
206  }
207  return false;
208 }
209 
210 // ----------------------------------------------------------------------------
211 bool
213  const SFixedStepInfo& fixedStepInfo,
214  TReaderData::const_iterator& curIt,
215  const TReaderData& readerData,
216  CRawWiggleTrack& rawdata)
217 // ----------------------------------------------------------------------------
218 {
219  rawdata.Reset();
220 
221  CRef<CSeq_id> id =
222  CReadUtil::AsSeqId(fixedStepInfo.mChrom, m_iFlags);
223 
224  unsigned int pos(fixedStepInfo.mStart);
225  while (curIt != readerData.end()) {
226  auto line(curIt->mData);
227  double value(0);
228  xGetDouble(line, value);
229  rawdata.AddRecord(
230  CRawWiggleRecord(*id, pos, fixedStepInfo.mSpan, value));
231  pos += fixedStepInfo.mStep;
232  curIt++;
233  }
234  return rawdata.HasData();
235 }
236 
237 // ----------------------------------------------------------------------------
238 bool
240  const SVarStepInfo& varStepInfo,
241  TReaderData::const_iterator& curIt,
242  const TReaderData& readerData,
243  CRawWiggleTrack& rawdata)
244 // ----------------------------------------------------------------------------
245 {
246  rawdata.Reset();
247 
248  CRef<CSeq_id> id =
249  CReadUtil::AsSeqId(varStepInfo.mChrom, m_iFlags);
250 
251  while (curIt != readerData.end()) {
252  auto line(curIt->mData);
253  unsigned int pos(0);
254  xGetPos(line, pos);
255  xSkipWS(line);
256  double value(0);
257  xGetDouble(line, value);
258  rawdata.AddRecord(
259  CRawWiggleRecord(*id, pos, varStepInfo.mSpan, value));
260  curIt++;
261  }
262  return rawdata.HasData();
263 }
264 
265 
266 // ----------------------------------------------------------------------------
267 double CWiggleReader::xEstimateSize(size_t rows, bool fixed_span) const
268 // ----------------------------------------------------------------------------
269 {
270  double ret = 0;
271  ret += rows*4;
272  if ( !fixed_span )
273  ret += rows*4;
274  (m_iFlags & fAsByte) ? (ret += rows) : (ret += 8*rows);
275  return ret;
276 }
277 
278 // ----------------------------------------------------------------------------
280 // ----------------------------------------------------------------------------
281 {
282  bool sorted = true;
283  size_t size = m_Values.size();
284  if ( size ) {
285  stat.SetFirstSpan(m_Values[0].m_Span);
286  stat.SetFirstValue(m_Values[0].m_Value);
287 
288  for ( size_t i = 1; i < size; ++i ) {
289  stat.AddSpan(m_Values[i].m_Span);
290  stat.AddValue(m_Values[i].m_Value);
291  if ( sorted ) {
292  if ( m_Values[i].m_Pos < m_Values[i-1].m_Pos ) {
293  sorted = false;
294  }
295  if ( m_Values[i].m_Pos != m_Values[i-1].GetEnd() ) {
296  stat.m_HaveGaps = true;
297  }
298  }
299  }
300  }
301  if ( !sorted ) {
302  sort(m_Values.begin(), m_Values.end());
303  stat.m_HaveGaps = false;
304  for ( size_t i = 1; i < size; ++i ) {
305  if ( m_Values[i].m_Pos != m_Values[i-1].GetEnd() ) {
306  stat.m_HaveGaps = true;
307  break;
308  }
309  }
310  }
311  if ( (m_iFlags & fAsGraph) && stat.m_HaveGaps ) {
312  stat.AddValue(m_GapValue);
313  }
314 
315  const int range = 255;
316  if ( stat.m_Max > stat.m_Min &&
317  (!stat.m_IntValues || stat.m_Max-stat.m_Min > range) ) {
318  stat.m_Step = (stat.m_Max-stat.m_Min)/range;
319  stat.m_StepMul = 1/stat.m_Step;
320  }
321 
322  if ( !(m_iFlags & fAsGraph) && (m_iFlags & fJoinSame) && size ) {
323  TValues nv;
324  nv.reserve(size);
325  nv.push_back(m_Values[0]);
326  for ( size_t i = 1; i < size; ++i ) {
327  if ( m_Values[i].m_Pos == nv.back().GetEnd() &&
328  m_Values[i].m_Value == nv.back().m_Value ) {
329  nv.back().m_Span += m_Values[i].m_Span;
330  }
331  else {
332  nv.push_back(m_Values[i]);
333  }
334  }
335  if ( nv.size() != size ) {
336  double s = xEstimateSize(size, stat.m_FixedSpan);
337  double ns = xEstimateSize(nv.size(), false);
338  if ( ns < s*.75 ) {
339  m_Values.swap(nv);
340  size = m_Values.size();
341  LOG_POST("Joined size: "<<size);
342  stat.m_FixedSpan = false;
343  }
344  }
345  }
346 
347  if ( (m_iFlags & fAsGraph) && !stat.m_FixedSpan ) {
348  stat.m_Span = 1;
349  stat.m_FixedSpan = true;
350  }
351 }
352 
353 // ----------------------------------------------------------------------------
355 // ----------------------------------------------------------------------------
356 {
357  CRef<CSeq_id> id =
359  return id;
360 }
361 
362 // ----------------------------------------------------------------------------
364 // ----------------------------------------------------------------------------
365 {
366  if ( m_Values.empty() ) {
367  loc.SetEmpty(chrom_id);
368  }
369  else {
370  CSeq_interval& interval = loc.SetInt();
371  interval.SetId(chrom_id);
372  interval.SetFrom(m_Values.front().m_Pos);
373  interval.SetTo(m_Values.back().GetEnd()-1);
374  }
375 }
376 
377 // ----------------------------------------------------------------------------
379 // ----------------------------------------------------------------------------
380 {
381  size_t size = m_Values.size();
382 
384  table->SetFeat_type(0);
385 
386  CRef<CSeq_id> chrom_id = xMakeChromId();
387 
388  CRef<CSeq_loc> table_loc(new CSeq_loc);
389  { // Seq-table location
391  table->SetColumns().push_back(col_id);
392  col_id->SetHeader().SetField_name("Seq-table location");
393  col_id->SetDefault().SetLoc(*table_loc);
394  }
395 
396  { // Seq-id
398  table->SetColumns().push_back(col_id);
400  //col_id->SetDefault().SetId(*chrom_id);
401  if (this->xValuesAreFromSingleSequence()) {
402  CRef<CSeq_id> pId = CReadUtil::AsSeqId(m_Values.front().m_Chrom, m_iFlags);
403  col_id->SetDefault().SetId(*pId);
404  }
405  else {
406  CSeqTable_multi_data::TId& seq_id = col_id->SetData().SetId();
407  seq_id.reserve(size);
408  for (TValues::const_iterator cit = m_Values.begin(); cit != m_Values.end(); ++cit) {
409  CRef<CSeq_id> pId = CReadUtil::AsSeqId(cit->m_Chrom, m_iFlags);
410  seq_id.push_back(pId);
411  }
412  }
413  }
414 
415  // position
417  table->SetColumns().push_back(col_pos);
419  CSeqTable_multi_data::TInt& pos = col_pos->SetData().SetInt();
420 
421  SWiggleStat stat;
422  xPreprocessValues(stat);
423 
424  xSetTotalLoc(*table_loc, *chrom_id);
425 
426  table->SetNum_rows(static_cast<TSeqPos>(size));
427  pos.reserve(size);
428 
429  CSeqTable_multi_data::TInt* span_ptr = 0;
430  { // span
432  table->SetColumns().push_back(col_span);
433  col_span->SetHeader().SetField_name("span");
434  if ( stat.m_FixedSpan ) {
435  col_span->SetDefault().SetInt(stat.m_Span);
436  }
437  else {
438  span_ptr = &col_span->SetData().SetInt();
439  span_ptr->reserve(size);
440  }
441  }
442 
443  if ( stat.m_HaveGaps ) {
445  table->SetColumns().push_back(col_step);
446  col_step->SetHeader().SetField_name("value_gap");
447  col_step->SetDefault().SetReal(m_GapValue);
448  }
449 
450  if (m_iFlags & fAsByte) { // values
452  table->SetColumns().push_back(col_min);
453  col_min->SetHeader().SetField_name("value_min");
454  col_min->SetDefault().SetReal(stat.m_Min);
455 
457  table->SetColumns().push_back(col_step);
458  col_step->SetHeader().SetField_name("value_step");
459  col_step->SetDefault().SetReal(stat.m_Step);
460 
462  table->SetColumns().push_back(col_val);
463  col_val->SetHeader().SetField_name("values");
464 
465  CSeqTable_multi_data::TInt2& values = col_val->SetData().SetInt2();
466  values.reserve(size);
467  ITERATE ( TValues, it, m_Values ) {
468  pos.push_back(it->m_Pos);
469  if ( span_ptr ) {
470  span_ptr->push_back(it->m_Span);
471  }
472  values.push_back(stat.AsByte(it->m_Value));
473  }
474  }
475  else {
477  table->SetColumns().push_back(col_val);
478  col_val->SetHeader().SetField_name("values");
479  CSeqTable_multi_data::TReal& values = col_val->SetData().SetReal();
480  values.reserve(size);
481 
482  ITERATE ( TValues, it, m_Values ) {
483  pos.push_back(it->m_Pos);
484  if ( span_ptr ) {
485  span_ptr->push_back(it->m_Span);
486  }
487  values.push_back(it->m_Value);
488  }
489  }
490  return table;
491 }
492 
493 // ----------------------------------------------------------------------------
495 // ----------------------------------------------------------------------------
496 {
497  CRef<CSeq_graph> graph(new CSeq_graph);
498 
499  CRef<CSeq_id> chrom_id = xMakeChromId();
500 
501  CRef<CSeq_loc> graph_loc(new CSeq_loc);
502  graph->SetLoc(*graph_loc);
503 
504  SWiggleStat stat;
505  xPreprocessValues(stat);
506 
507  xSetTotalLoc(*graph_loc, *chrom_id);
508 
509  string trackName = m_pTrackDefaults->Name();
510  if (!trackName.empty()) {
511  graph->SetTitle(trackName);
512  }
513 
514  graph->SetComp(stat.m_Span);
515  graph->SetA(stat.m_Step);
516  graph->SetB(stat.m_Min);
517 
518  CByte_graph& b_graph = graph->SetGraph().SetByte();
519  b_graph.SetMin(stat.AsByte(stat.m_Min));
520  b_graph.SetMax(stat.AsByte(stat.m_Max));
521  b_graph.SetAxis(0);
522  CByte_graph::TValues& bytes = b_graph.SetValues();
523 
524  if ( m_Values.empty() ) {
525  graph->SetNumval(0);
526  }
527  else {
528  _ASSERT(stat.m_FixedSpan);
529  TSeqPos start = m_Values[0].m_Pos;
530  TSeqPos end = m_Values.back().GetEnd();
531  size_t size = (end-start)/stat.m_Span;
532  graph->SetNumval(static_cast<TSeqPos>(size));
533  bytes.resize(size, stat.AsByte(m_GapValue));
534  ITERATE ( TValues, it, m_Values ) {
535  TSeqPos pos = it->m_Pos - start;
536  TSeqPos span = it->m_Span;
537  _ASSERT(pos % stat.m_Span == 0);
538  _ASSERT(span % stat.m_Span == 0);
539  size_t i = pos / stat.m_Span;
540  int v = stat.AsByte(it->m_Value);
541  for ( ; span > 0; span -= stat.m_Span, ++i ) {
542  bytes[i] = v;
543  }
544  }
545  }
546  return graph;
547 }
548 
549 // ----------------------------------------------------------------------------
551  string& line)
552 // ----------------------------------------------------------------------------
553 {
554  const char* ptr = line.c_str();
555  size_t skip = 0;
556  for ( size_t len = line.size(); skip < len; ++skip ) {
557  char c = ptr[skip];
558  if ( c != ' ' && c != '\t' ) {
559  break;
560  }
561  }
562  line = line.substr(skip);
563  return !line.empty();
564 }
565 
566 // ----------------------------------------------------------------------------
568  string& line)
569 // ----------------------------------------------------------------------------
570 {
571  const char* ptr = line.c_str();
572  size_t skip = 0;
573  for ( size_t len = line.size(); skip < len; ++skip ) {
574  char c = ptr[skip];
575  if ( c == ' ' || c == '\t' ) {
576  break;
577  }
578  }
579  if ( skip == 0 ) {
582  "Identifier expected");
583  throw error;
584  }
585  string word(ptr, skip);
586  line = line.substr(skip);
587  return word;
588 }
589 
590 // ----------------------------------------------------------------------------
592  string& line)
593 // ----------------------------------------------------------------------------
594 {
595  const char* ptr = line.c_str();
596  size_t skip = 0;
597  for ( size_t len = line.size(); skip < len; ++skip ) {
598  char c = ptr[skip];
599  if ( c == '=' ) {
600  string name(ptr, skip);
601  line = line.substr(skip+1);
602  return name;
603  }
604  if ( c == ' ' || c == '\t' ) {
605  break;
606  }
607  }
610  "\"=\" expected");
611  throw error;
612 }
613 
614 // ----------------------------------------------------------------------------
616  string& line)
617 // ----------------------------------------------------------------------------
618 {
619  const char* ptr = line.c_str();
620  size_t len = line.size();
621  if ( len && *ptr == '"' ) {
622  size_t pos = 1;
623  for ( ; pos < len; ++pos ) {
624  char c = ptr[pos];
625  if ( c == '"' ) {
626  string value(ptr, pos);
627  line = line.substr(pos+1);
628  return value;
629  }
630  }
633  "Open quotes");
634  throw error;
635  }
636  return xGetWord(line);
637 }
638 
639 // ----------------------------------------------------------------------------
641  string& line,
642  TSeqPos& v)
643 // ----------------------------------------------------------------------------
644 {
647  "Integer value expected");
648 
649  char c = line.c_str()[0];
650  if ( c < '0' || c > '9' ) {
651  throw error;
652  }
653 
654  TSeqPos ret = 0;
655  const char* ptr = line.c_str();
656  for ( size_t skip = 0; ; ++skip ) {
657  char c = ptr[skip];
658  if ( c >= '0' && c <= '9' ) {
659  ret = ret*10 + (c-'0');
660  }
661  else if ( (c == ' ' || c == '\t' || c == '\0') && skip ) {
662  line = line.substr(skip);
663  v = ret;
664  return;
665  }
666  else {
667  throw error;
668  }
669  }
670 }
671 
672 // ----------------------------------------------------------------------------
674  string& line,
675  double& v)
676 // ----------------------------------------------------------------------------
677 {
678  double ret = 0;
679  const char* ptr = line.c_str();
680  size_t skip = 0;
681  bool negate = false, digits = false;
682  for ( ; ; ++skip ) {
683  char c = ptr[skip];
684  if ( !skip ) {
685  if ( c == '-' ) {
686  negate = true;
687  continue;
688  }
689  if ( c == '+' ) {
690  continue;
691  }
692  }
693  if ( c >= '0' && c <= '9' ) {
694  digits = true;
695  ret = ret*10 + (c-'0');
696  }
697  else if ( c == '.' ) {
698  ++skip;
699  break;
700  }
701  else if ( c == '\0' ) {
702  if ( !digits ) {
703  return false;
704  }
705  line.clear();
706  if ( negate ) {
707  ret = -ret;
708  }
709  v = ret;
710  return true;
711  }
712  else {
713  return false;
714  }
715  }
716  double digit_mul = 1;
717  for ( ; ; ++skip ) {
718  char c = ptr[skip];
719  if ( c >= '0' && c <= '9' ) {
720  digits = true;
721  digit_mul *= .1;
722  ret += (c-'0')*digit_mul;
723  }
724  else if ( (c == ' ' || c == '\t' || c == '\0') && digits ) {
725  line.clear();
726  v = (negate ? -ret : ret);
727  return true;
728  }
729  else {
730  return false;
731  }
732  }
733 }
734 
735 // ----------------------------------------------------------------------------
737  string& line,
738  double& v)
739 // ----------------------------------------------------------------------------
740 {
741  if (xTryGetDoubleSimple(line, v)) {
742  return;
743  }
744  const char* ptr = line.c_str();
745  char* endptr = 0;
746  v = strtod(ptr, &endptr);
747  if ( endptr == ptr ) {
750  "Floating point value expected");
751  throw error;
752  }
753  if ( *endptr ) {
756  "Extra text on line");
757  throw error;
758  }
759  line.clear();
760 }
761 
762 // ----------------------------------------------------------------------------
764  CSeq_annot& annot)
765 // ----------------------------------------------------------------------------
766 {
767  if ( m_ChromId.empty() ) {
768  return;
769  }
770  if (m_iFlags & fAsGraph) {
771  annot.SetData().SetGraph().push_back(xMakeGraph());
772  }
773  else {
774  annot.SetData().SetSeq_table(*xMakeTable());
775  }
776  if (annot.GetData().Which() != CSeq_annot::TData::e_not_set) {
777  xAssignTrackData(annot);
778  }
779  m_ChromId.clear();
780 }
781 
782 // ----------------------------------------------------------------------------
784 // ----------------------------------------------------------------------------
785 {
786  if ( m_ChromId.empty() ) {
787  return;
788  }
789  if ( !m_Annot ) {
791  }
792  if (m_iFlags & fAsGraph) {
793  m_Annot->SetData().SetGraph().push_back(xMakeGraph());
794  }
795  else {
796  m_Annot->SetData().SetSeq_table(*xMakeTable());
797  }
798 }
799 
800 // ----------------------------------------------------------------------------
802  const string& chrom)
803 // ----------------------------------------------------------------------------
804 {
805  if ( chrom != m_ChromId ) {
808  m_Values.clear();
809  }
810  m_ChromId = chrom;
811  }
812 }
813 
814 // ----------------------------------------------------------------------------
816  const string& line,
817  CSeq_annot&)
818 // ----------------------------------------------------------------------------
819 {
820  if (!NStr::StartsWith(line, "browser")) {
821  return false;
822  }
823  return true;
824 }
825 
826 // ----------------------------------------------------------------------------
828  const string& line)
829 // ----------------------------------------------------------------------------
830 {
831  if (!xIsTrackLine(line)) {
832  return false;
833  }
835 
837  if (m_pTrackDefaults->Type() == "wiggle_0") {
839  return true;
840  }
841  if (m_pTrackDefaults->Type() == "bedGraph") {
843  return true;
844  }
847  "Invalid track type");
848  throw error;
849 }
850 
851 // ----------------------------------------------------------------------------
852 bool
854  TReaderData::const_iterator& curIt,
855  const TReaderData& readerData)
856 // ----------------------------------------------------------------------------
857 {
858  auto firstLine(curIt->mData);
859  if (!NStr::StartsWith(firstLine, "fixedStep")) {
860  return false;
861  }
862 
863  SFixedStepInfo fixedStepInfo;
864  xGetFixedStepInfo(firstLine, fixedStepInfo);
865  ++curIt;
866  xReadFixedStepData(fixedStepInfo, curIt, readerData);
867  return true;
868 }
869 
870 
871 // ----------------------------------------------------------------------------
872 bool
874  TReaderData::const_iterator& curIt,
875  const TReaderData& readerData)
876 // ----------------------------------------------------------------------------
877 {
878  auto firstLine(curIt->mData);
879  if (!NStr::StartsWith(firstLine, "variableStep")) {
880  return false;
881  }
882 
883  SVarStepInfo variableStepInfo;
884  xGetVariableStepInfo(firstLine, variableStepInfo);
885  ++curIt;
886  xReadVariableStepData(variableStepInfo, curIt, readerData);
887  return true;
888 }
889 
890 
891 // ----------------------------------------------------------------------------
893  const string& directive,
894  SFixedStepInfo& fixedStepInfo)
895 // ----------------------------------------------------------------------------
896 {
897  if ( m_TrackType != eTrackType_wiggle_0 ) {
898  if ( m_TrackType != eTrackType_invalid ) {
901  "Track \"type=wiggle_0\" is required");
902  throw error;
903  }
904  else {
906  }
907  }
908 
909  auto line(directive.substr(string("fixedStep").size() + 1));
911  fixedStepInfo.Reset();
912  while (xSkipWS(line)) {
913  string name = xGetParamName(line);
914  string value = xGetParamValue(line);
915  if (name == "chrom") {
916  fixedStepInfo.mChrom = value;
917  continue;
918  }
919  if (name == "start") {
920  fixedStepInfo.mStart = NStr::StringToUInt(value);
921  if (0 == fixedStepInfo.mStart) {
924  "Bad start value: must be positive. Assuming \"start=1\"");
925  m_pMessageHandler->Report(warning);
926  fixedStepInfo.mStart = 1;
927  }
928  continue;
929  }
930  if (name == "step") {
931  fixedStepInfo.mStep = NStr::StringToUInt(value);
932  continue;
933  }
934  if (name == "span") {
935  fixedStepInfo.mSpan = NStr::StringToUInt(value);
936  continue;
937  }
940  "Bad parameter name. Ignored");
941  m_pMessageHandler->Report(warning);
942  }
943  if (fixedStepInfo.mChrom.empty()) {
946  "Missing chrom parameter");
947  throw error;
948  }
949  if (fixedStepInfo.mStart == 0) {
952  "Missing start parameter");
953  throw error;
954  }
955  if (fixedStepInfo.mStep == 0) {
958  "Missing step parameter");
959  throw error;
960  }
961 }
962 
963 // ----------------------------------------------------------------------------
965  const SFixedStepInfo& fixedStepInfo,
966  TReaderData::const_iterator& curIt,
967  const TReaderData& readerData)
968 // ----------------------------------------------------------------------------
969 {
970  xSetChrom(fixedStepInfo.mChrom);
972  value.m_Chrom = fixedStepInfo.mChrom;
973  value.m_Pos = fixedStepInfo.mStart-1;
974  value.m_Span = fixedStepInfo.mSpan;
975  while (curIt != readerData.end()) {
976  auto line(curIt->mData);
977  xGetDouble(line, value.m_Value);
978  xAddValue(value);
979  value.m_Pos += fixedStepInfo.mStep;
980  curIt++;
981  }
982 }
983 
984 // ----------------------------------------------------------------------------
986  const string& directive,
987  SVarStepInfo& varStepInfo)
988 // ----------------------------------------------------------------------------
989 {
990  if ( m_TrackType != eTrackType_wiggle_0 ) {
991  if ( m_TrackType != eTrackType_invalid ) {
994  "Track \"type=wiggle_0\" is required");
995  throw error;
996  }
997  else {
999  }
1000  }
1001 
1002  varStepInfo.Reset();
1003  auto line(directive.substr(string("variableStep").size() + 1));
1004  while (xSkipWS(line)) {
1005  string name = xGetParamName(line);
1006  string value = xGetParamValue(line);
1007  if ( name == "chrom" ) {
1008  varStepInfo.mChrom = value;
1009  }
1010  else if ( name == "span" ) {
1011  varStepInfo.mSpan = NStr::StringToUInt(value);
1012  }
1013  else {
1014  CReaderMessage warning(eDiag_Warning,
1015  m_uLineNumber,
1016  "Bad parameter name. Ignored");
1017  m_pMessageHandler->Report(warning);
1018  }
1019  }
1020  if ( varStepInfo.mChrom.empty() ) {
1022  m_uLineNumber,
1023  "Missing chrom parameter");
1024  throw error;
1025  }
1026 }
1027 
1028 // ----------------------------------------------------------------------------
1030  const SVarStepInfo& varStepInfo,
1031  TReaderData::const_iterator& curIt,
1032  const TReaderData& readerData)
1033 // ----------------------------------------------------------------------------
1034 {
1035  xSetChrom(varStepInfo.mChrom);
1036  SValueInfo value;
1037  value.m_Chrom = varStepInfo.mChrom;
1038  value.m_Span = varStepInfo.mSpan;
1039  while (curIt != readerData.end()) {
1040  string line(curIt->mData);
1041  xGetPos(line, value.m_Pos);
1042  xSkipWS(line);
1043  xGetDouble(line, value.m_Value);
1044  value.m_Pos -= 1;
1045  xAddValue(value);
1046  curIt++;
1047  }
1048 }
1049 
1050 // =========================================================================
1052  TReaderData::const_iterator& curIt,
1053  const TReaderData& readerData)
1054 // =========================================================================
1055 {
1056  while (curIt != readerData.end()) {
1057  auto line(curIt->mData);
1058  auto chrom = xGetWord(line);
1059  xSetChrom(chrom);
1060 
1061  SValueInfo value;
1062  xSkipWS(line);
1063  xGetPos(line, value.m_Pos);
1064  xSkipWS(line);
1065  xGetPos(line, value.m_Span);
1066  xSkipWS(line);
1067  xGetDouble(line, value.m_Value);
1068  value.m_Span -= value.m_Pos;
1069  xAddValue(value);
1070 
1071  curIt++;
1072  }
1073  return true;
1074 }
1075 
1076 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CByte_graph –.
Definition: Byte_graph.hpp:66
bool HasData() const
void AddRecord(CRawWiggleRecord record)
Common file reader utility functions.
Definition: read_util.hpp:47
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
unique_ptr< CReaderMessageHandler > m_pMessageHandler
void xGuardedGetData(ILineReader &, TReaderData &, ILineErrorListener *)
virtual bool xUngetLine(ILineReader &)
virtual CRef< CSeq_annot > xCreateSeqAnnot()
unsigned int m_uDataCount
unsigned int m_uLineNumber
unique_ptr< CTrackData > m_pTrackDefaults
virtual bool xProgressInit(ILineReader &istr)
vector< TReaderLine > TReaderData
Definition: reader_base.hpp:70
virtual bool xGetLine(ILineReader &, string &)
virtual void xGuardedProcessData(const TReaderData &, CSeq_annot &, ILineErrorListener *)
TReaderFlags m_iFlags
virtual bool xIsBrowserLine(const CTempString &)
virtual bool xParseTrackLine(const string &)
virtual void xAssignTrackData(CSeq_annot &)
struct SReaderLine { SReaderLine(unsigned int line, string data):mLine(line), mData(data) {} TReaderLine
Definition: reader_base.hpp:66
virtual bool xIsTrackLine(const CTempString &)
void xGetData(ILineReader &, TReaderData &) override
bool xProcessVariableStepData(TReaderData::const_iterator &, const TReaderData &)
bool xProcessFixedStepData(TReaderData::const_iterator &, const TReaderData &)
virtual bool ReadTrackData(ILineReader &, CRawWiggleTrack &, ILineErrorListener *=nullptr)
string xGetParamValue(string &)
bool xReadVariableStepDataRaw(const SVarStepInfo &, TReaderData::const_iterator &, const TReaderData &, CRawWiggleTrack &)
virtual ~CWiggleReader()
void xDumpChromValues()
CRef< CSeq_table > xMakeTable()
string xGetParamName(string &)
string xGetWord(string &)
vector< SValueInfo > TValues
bool xTryGetDoubleSimple(string &, double &v)
void xProcessData(const TReaderData &, CSeq_annot &) override
void xSetChrom(const string &chrom)
void xSetTotalLoc(CSeq_loc &loc, CSeq_id &chrom_id)
double xEstimateSize(size_t rows, bool fixed_span) const
void xGetPos(string &, TSeqPos &v)
void xGetVariableStepInfo(const string &, SVarStepInfo &)
void xPostProcessAnnot(CSeq_annot &) override
void xPreprocessValues(SWiggleStat &)
void xReadVariableStepData(const SVarStepInfo &, TReaderData::const_iterator &, const TReaderData &)
CRef< CSeq_annot > m_Annot
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &, ILineErrorListener *=nullptr) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
void xGetFixedStepInfo(const string &, SFixedStepInfo &)
ETrackType m_TrackType
bool xReadFixedStepDataRaw(const SFixedStepInfo &, TReaderData::const_iterator &, const TReaderData &, CRawWiggleTrack &)
void xReadFixedStepData(const SFixedStepInfo &, TReaderData::const_iterator &, const TReaderData &)
bool xParseTrackLine(const string &) override
void xAddValue(const SValueInfo &value)
bool xSkipWS(string &)
bool xValuesAreFromSingleSequence() const
bool xProcessBedData(TReaderData::const_iterator &, const TReaderData &)
void xGetDouble(string &line, double &v)
bool xParseBrowserLine(const string &, CSeq_annot &) override
CRef< CSeq_id > xMakeChromId()
CRef< CSeq_graph > xMakeGraph()
CWiggleReader(int=fDefaults, const string &="", const string &="", CReaderListener *=nullptr)
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void SetEmpty(TEmpty &v)
Definition: Seq_loc.hpp:981
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
void SetHeader(THeader &value)
Assign a value to Header data member.
void SetData(TData &value)
Assign a value to Data data member.
vector< CRef< CSeq_id > > TId
void SetDefault(TDefault &value)
Assign a value to Default data member.
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetA(TA value)
Assign a value to A data member.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
Definition: Seq_graph_.hpp:784
vector< char > TValues
Definition: Byte_graph_.hpp:89
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
Definition: Seq_graph_.cpp:250
void SetB(TB value)
Assign a value to B data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
Definition: Seq_graph_.cpp:224
void SetAxis(TAxis value)
Assign a value to Axis data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_annot_.hpp:586
@ e_not_set
No variant selected.
Definition: Seq_annot_.hpp:132
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
int len
Lightweight interface for getting lines of data with minimal memory copying.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
static SLJIT_INLINE sljit_ins lr(sljit_gpr dst, sljit_gpr src)
int AsByte(double v) const
void AddValue(double v)
void SetFirstSpan(TSeqPos span)
void SetFirstValue(double v)
void AddSpan(TSeqPos span)
#define _ASSERT
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:58:23 2024 by modify_doxy.py rev. 669887