NCBI C++ ToolKit
bed_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bed_reader.cpp 94401 2021-08-02 13:21:31Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * BED file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
45 
47 #include "bed_autosql.hpp"
49 #include "bed_column_data.hpp"
50 
51 #include <algorithm>
52 #include <deque>
53 
55 BEGIN_objects_SCOPE // namespace ncbi::objects::
56 
57 // ============================================================================
59 // ============================================================================
60 {
61 public:
62  using LinePreIt = deque<string>::const_iterator;
63 
65  ILineReader& lineReader):
66  mLineReader(lineReader),
67  mLineNumber(0)
68  {};
69 
70  virtual ~CLinePreBuffer() {};
71 
72  bool FillBuffer(
73  size_t numLines)
74  {
75  string line;
76  while (numLines && !mLineReader.AtEOF()) {
77  line = *++mLineReader;
79  mBuffer.push_back(line);
80  if (!IsCommentLine(line)) {
81  --numLines;
82  }
83  }
84  return true;
85  }
86 
87  virtual bool IsCommentLine(
88  const CTempString& line)
89  {
90  if (NStr::StartsWith(line, "#")) {
91  return true;
92  }
93  if (NStr::IsBlank(line)) {
94  return true;
95  }
96  return false;
97  };
98 
99  bool GetLine(
100  string& line)
101  {
102  while (!mBuffer.empty() || !mLineReader.AtEOF()) {
103  string temp;
104  if (!mBuffer.empty()) {
105  temp = mBuffer.front();
106  mBuffer.pop_front();
107  }
108  else {
109  temp = *++mLineReader;
111  }
112  if (!IsCommentLine(temp)) {
113  line = temp;
114  ++mLineNumber;
115  return true;
116  }
117  }
118  return false;
119  };
120 
121  bool UngetLine(
122  const string& line)
123  {
124  mBuffer.push_front(line);
125  --mLineNumber;
126  return true;
127  }
128 
129  int LineNumber() const
130  {
131  return mLineNumber;
132  };
133 
135  {
136  return mBuffer.begin();
137  };
138 
140  {
141  return mBuffer.end();
142  };
143 
144  void
146  ILineReader& lineReader) {
147  if (&mLineReader != &lineReader) {
148  mLineReader = lineReader;
149  mBuffer.clear();
150  mLineNumber = 0;
151  }
152  };
153 
154  static void
156  string& str)
157  {
158  if (str.empty()) {
159  return;
160  }
161  auto newFirst = 0;
162  while (str[newFirst] == ' ') {
163  ++newFirst;
164  }
165  auto newLast = str.length() - 1;
166  while (str[newLast] == ' ') {
167  --newLast;
168  }
169  str = str.substr(newFirst, newLast - newFirst + 1);
170  };
171 
172 protected:
174  deque<string> mBuffer;
176 };
177 
178 
179 // ----------------------------------------------------------------------------
180 void
182 // ----------------------------------------------------------------------------
183  CSeq_id& id,
184  unsigned int start,
185  unsigned int stop,
186  ENa_strand strand)
187 {
189  m_pInterval->SetId(id);
190  m_pInterval->SetFrom(start);
191  m_pInterval->SetTo(stop-1);
192  m_pInterval->SetStrand(strand);
193 };
194 
195 // ----------------------------------------------------------------------------
196 void
198  unsigned int score)
199 // ----------------------------------------------------------------------------
200 {
201  m_score = score;
202 };
203 
204 // ----------------------------------------------------------------------------
205 void
207  CNcbiOstream& ostr) const
208 // ----------------------------------------------------------------------------
209 {
210  ostr << " [CRawBedRecord" << endl;
211  ostr << "id=\"" << m_pInterval->GetId().AsFastaString() << "\" ";
212  ostr << "start=" << m_pInterval->GetFrom() << " ";
213  ostr << "stop=" << m_pInterval->GetTo() << " ";
214  ostr << "strand=" <<
215  (m_pInterval->GetStrand() == eNa_strand_minus ? "-" : "+") << " ";
216  if (m_score >= 0) {
217  ostr << "score=" << m_score << " ";
218  }
219  ostr << "]" << endl;
220 };
221 
222 // ----------------------------------------------------------------------------
223 void
225  CNcbiOstream& ostr) const
226 // ----------------------------------------------------------------------------
227 {
228  ostr << "[CRawBedTrack" << endl;
229  for (vector<CRawBedRecord>::const_iterator it = m_Records.begin();
230  it != m_Records.end(); ++it) {
231  it->Dump(ostr);
232  }
233  ostr << "]" << std::endl;
234 }
235 
236 // ----------------------------------------------------------------------------
238  int flags,
239  const string& annotName,
240  const string& annotTitle,
241  CReaderListener* pRL ) :
242 // ----------------------------------------------------------------------------
243  CReaderBase(flags, annotName, annotTitle, CReadUtil::AsSeqId, pRL),
244  m_currentId(""),
245  mColumnSeparator(""),
246  mColumnSplitFlags(0),
247  mRealColumnCount(0),
248  mValidColumnCount(0),
249  mAssumeErrorsAreRecordLevel(true),
250  m_CurrentFeatureCount(0),
251  m_usescore(false),
252  m_CurBatchSize(0),
253  m_MaxBatchSize(10000),
254  mLinePreBuffer(nullptr),
255  mpAutoSql(new CBedAutoSql(flags))
256 {
257 }
258 
259 // ----------------------------------------------------------------------------
261 // ----------------------------------------------------------------------------
262 {
263 }
264 
265 // ----------------------------------------------------------------------------
268  ILineReader& lineReader,
269  ILineErrorListener* pEC )
270 // ----------------------------------------------------------------------------
271 {
273  return CReaderBase::ReadSeqAnnot(lineReader, pEC);
274 }
275 
276 // ----------------------------------------------------------------------------
277 bool
279  const string& fileName)
280 // ----------------------------------------------------------------------------
281 {
282  CNcbiIfstream istr;
283  try {
284  auto origExceptions = istr.exceptions();
285  istr.exceptions(std::istream::failbit);
286  istr.open(fileName);
287  istr.exceptions(origExceptions);
288  }
289  catch (CException& e) {
290  cerr << e.GetMsg() << endl;
291  return false;
292  }
294  return SetAutoSql(istr);
295 }
296 
297 // ----------------------------------------------------------------------------
298 bool
300  CNcbiIstream& istr)
301 // ----------------------------------------------------------------------------
302 {
303  return mpAutoSql->Load(istr, *m_pMessageHandler);
304 }
305 
306 // ----------------------------------------------------------------------------
309 // ----------------------------------------------------------------------------
310 {
311  CRef<CSeq_annot> pAnnot(new CSeq_annot);
312  if (!m_AnnotName.empty()) {
313  pAnnot->SetNameDesc(m_AnnotName);
314  }
315  if (!m_AnnotTitle.empty()) {
316  pAnnot->SetTitleDesc(m_AnnotTitle);
317  }
318  CRef<CAnnot_descr> pDescr(new CAnnot_descr);
319  pAnnot->SetDesc(*pDescr);
320  return pAnnot;
321 }
322 
323 // ----------------------------------------------------------------------------
324 void
326  ILineReader& lr,
327  TReaderData& readerData)
328 // ----------------------------------------------------------------------------
329 {
330  if (!mLinePreBuffer) {
331  mLinePreBuffer.reset(new CLinePreBuffer(lr));
332  }
333  if (mRealColumnCount == 0) {
335  }
336 
337  readerData.clear();
338  string line;
339  if (!mLinePreBuffer->GetLine(line)) {
340  return;
341  }
342  bool isBrowserLine = NStr::StartsWith(line, "browser ");
343  bool isTrackLine = NStr::StartsWith(line, "track ");
344  if (xIsTrackLine(line) && m_uDataCount != 0) {
345  mLinePreBuffer->UngetLine(line);
346  return;
347  }
348  m_uLineNumber = mLinePreBuffer->LineNumber();
349  readerData.push_back(TReaderLine{m_uLineNumber, line});
350  if (!isBrowserLine && !isTrackLine) {
351  ++m_uDataCount;
352  }
353 }
354 
355 // ----------------------------------------------------------------------------
356 void
358  const TReaderData& readerData,
359  CSeq_annot& annot)
360 // ----------------------------------------------------------------------------
361 {
362  for (const auto& lineData: readerData) {
363  string line = lineData.mData;
364  if (xParseTrackLine(line)) {
365  return;
366  }
367  if (xParseBrowserLine(line, annot)) {
368  return;
369  }
370  xParseFeature(lineData, annot, nullptr);
372  }
373 }
374 
375 // ----------------------------------------------------------------------------
377  CLinePreBuffer& preBuffer,
378  ILineErrorListener* pEc)
379 // ----------------------------------------------------------------------------
380 {
381  if (this->m_iFlags & fAutoSql) {
382  mValidColumnCount = mRealColumnCount = mpAutoSql->ColumnCount();;
383  return true;
384  }
385 
386  using LineIt = CLinePreBuffer::LinePreIt;
387  int bufferLineNumber = 0;
388  CReaderMessage fatalColumns(
389  eDiag_Fatal,
390  0,
391  "Bad data line: Inconsistent column count.");
392 
393  CReaderMessage fatalChroms(
394  eDiag_Fatal,
395  0,
396  "Bad data line: Invalid chrom boundaries.");
397 
398  const size_t MIN_SAMPLE_SIZE = 50;
399  preBuffer.FillBuffer(MIN_SAMPLE_SIZE);
400 
402  vector<string>::size_type realColumnCount = 0;
403  vector<string>::size_type validColumnCount = 0;
404  for (LineIt lineIt = preBuffer.begin(); lineIt != preBuffer.end(); ++lineIt) {
405  bufferLineNumber++;
406  const auto& line = *lineIt;
407  if (preBuffer.IsCommentLine(line)) {
408  continue;
409  }
410  if (this->xIsTrackLine(line)) {
411  continue;
412  }
413  if (this->xIsBrowserLine(line)) {
414  continue;
415  }
416 
417  CBedColumnData columnData(SReaderLine(bufferLineNumber, line));
418  if (realColumnCount == 0 ) {
419  realColumnCount = columnData.ColumnCount();
420  }
421  if (realColumnCount != columnData.ColumnCount()) {
422  fatalColumns.SetLineNumber(bufferLineNumber);
423  throw(fatalColumns);
424  }
425 
426  if (validColumnCount == 0) {
427  validColumnCount = realColumnCount;
428  if (validColumnCount > 12) {
429  validColumnCount = 12;
430  }
431  }
432  unsigned long chromStart = 0, chromEnd = 0;
433  try {
434  chromStart = NStr::StringToULong(columnData[1]);
435  chromEnd = NStr::StringToULong(columnData[2]);
436  }
437  catch (CException&) {
438  fatalChroms.SetLineNumber(bufferLineNumber);
439  throw(fatalChroms);
440  }
441  if (validColumnCount >= 7) {
442  try {
443  auto thickStart = NStr::StringToULong(columnData[6]);
444  if (thickStart < chromStart || chromEnd < thickStart) {
445  validColumnCount = 6;
446  }
447  }
448  catch(CException&) {
449  validColumnCount = 6;
450  }
451  }
452  if (validColumnCount >= 8) {
453  try {
454  auto thickEnd = NStr::StringToULong(columnData[7]);
455  if (thickEnd < chromStart || chromEnd < thickEnd) {
456  validColumnCount = 6;
457  }
458  }
459  catch(CException&) {
460  validColumnCount = 6;
461  }
462  }
463 
464  int blockCount;
465  if (validColumnCount >= 10) {
466  try {
467  blockCount = NStr::StringToInt(
468  columnData[9], NStr::fDS_ProhibitFractions);
469  if (blockCount < 1) {
470  validColumnCount = 9;
471  }
472  }
473  catch(CException&) {
474  validColumnCount = 9;
475  }
476  }
477  if (validColumnCount >= 11) {
478  vector<string> blockSizes;
479  auto col10 = columnData[10];
480  if (NStr::EndsWith(col10, ",")) {
481  col10 = col10.substr(0, col10.size()-1);
482  }
483  NStr::Split(col10, ",", blockSizes, NStr::fSplit_MergeDelimiters);
484  if (blockSizes.size() != blockCount) {
485  validColumnCount = 9;
486  }
487  else {
488  try {
489  for (auto blockSize: blockSizes) {
490  NStr::StringToULong(blockSize);
491  }
492  }
493  catch(CException&) {
494  validColumnCount = 9;
495  }
496  }
497  }
498  if (validColumnCount >= 12) {
499  vector<string> blockStarts;
500  auto col11 = columnData[11];
501  if (NStr::EndsWith(col11, ",")) {
502  col11 = col11.substr(0, col11.size()-1);
503  }
504  NStr::Split(col11, ",", blockStarts, NStr::fSplit_MergeDelimiters);
505  if (blockStarts.size() != blockCount) {
506  validColumnCount = 9;
507  }
508  else {
509  try {
510  for (auto blockStart: blockStarts) {
511  NStr::StringToULong(blockStart);
512  }
513  }
514  catch(CException&) {
515  validColumnCount = 9;
516  }
517  }
518  }
519  }
520  mRealColumnCount = realColumnCount;
521  mValidColumnCount = validColumnCount;
523  validColumnCount == realColumnCount &&
524  validColumnCount != 7 &&
525  validColumnCount != 10 &&
526  validColumnCount != 11);
527 
528  return true;
529 }
530 
531 // ----------------------------------------------------------------------------
533  CSeq_annot& annot)
534 // ----------------------------------------------------------------------------
535 {
536  xAddConversionInfo(annot, nullptr);
537  xAssignTrackData(annot);
538  xAssignBedColumnCount(annot);
539 }
540 
541 // ----------------------------------------------------------------------------
542 bool
544  const string& strLine)
545 // ----------------------------------------------------------------------------
546 {
547  CReaderMessage warning(
550  "Bad track line: Expected \"track key1=value1 key2=value2 ...\". Ignored.");
551 
552  if ( ! NStr::StartsWith( strLine, "track" ) ) {
553  return false;
554  }
555  vector<string> parts;
556  CReadUtil::Tokenize( strLine, " \t", parts );
557  if (parts.size() >= 3) {
558  const string digits("0123456789");
559  bool col2_is_numeric =
560  (string::npos == parts[1].find_first_not_of(digits));
561  bool col3_is_numeric =
562  (string::npos == parts[2].find_first_not_of(digits));
563  if (col2_is_numeric && col3_is_numeric) {
564  return false;
565  }
566  }
567  m_currentId.clear();
568  if (!CReaderBase::xParseTrackLine(strLine)) {
569  m_pMessageHandler->Report(warning);
570  }
571  return true;
572 }
573 
574 // ----------------------------------------------------------------------------
575 bool
577  const SReaderLine& lineData,
578  CSeq_annot& annot,
579  ILineErrorListener* pEC)
580 // ----------------------------------------------------------------------------
581 {
582  CBedColumnData columnData(lineData);
583  if (columnData.ColumnCount()!= mRealColumnCount) {
585  eDiag_Error,
587  "Bad data line: Inconsistent column count.");
588  throw error;
589  }
590 
592  return xParseFeatureThreeFeatFormat(columnData, annot, pEC);
593  }
595  return xParseFeatureGeneModelFormat(columnData, annot, pEC);
596  }
597  else if (m_iFlags & CBedReader::fAutoSql) {
598  return xParseFeatureAutoSql(columnData, annot, pEC);
599  }
600  else {
601  return xParseFeatureUserFormat(columnData, annot, pEC);
602  }
603  return false;
604 }
605 
606 // ----------------------------------------------------------------------------
608  const CBedColumnData& columnData,
609  CSeq_annot& annot,
610  ILineErrorListener* pEC)
611 // ----------------------------------------------------------------------------
612 {
613  unsigned int baseId = 3*m_CurrentFeatureCount;
614 
615  if (!xAppendFeatureChrom(columnData, annot, baseId, pEC)) {
616  return false;
617  }
618  if (xContainsThickFeature(columnData) &&
619  !xAppendFeatureThick(columnData, annot, baseId, pEC)) {
620  return false;
621  }
622  if (xContainsBlockFeature(columnData) &&
623  !xAppendFeatureBlock(columnData, annot, baseId, pEC)) {
624  return false;
625  }
626  return true;
627 }
628 
629 // ----------------------------------------------------------------------------
631  const CBedColumnData& columnData,
632  CSeq_annot& annot,
633  ILineErrorListener* pEC)
634 // ----------------------------------------------------------------------------
635 {
636  unsigned int baseId = 3*m_CurrentFeatureCount;
637 
638  CRef<CSeq_feat> pGene = xAppendFeatureGene(columnData, annot, baseId, pEC);
639  if (!pGene) {
640  return false;
641  }
642 
643  CRef<CSeq_feat> pRna;
644  if (xContainsRnaFeature(columnData)) {//blocks
645  pRna = xAppendFeatureRna(columnData, annot, baseId, pEC);
646  if (!pRna) {
647  return false;
648  }
649  }
650 
651  CRef<CSeq_feat> pCds;
652  if (xContainsCdsFeature(columnData)) {//thick
653  pCds = xAppendFeatureCds(columnData, annot, baseId, pEC);
654  if (!pCds) {
655  return false;
656  }
657  }
658 
659  if (pRna && pCds) {
660  CRef<CSeq_loc> pRnaLoc(new CSeq_loc);
661  CRef<CSeq_loc> pClippedLoc = pRna->GetLocation().Intersect(pCds->GetLocation(), 0, 0);
662  pCds->SetLocation(*pClippedLoc);
663  }
664  return true;
665 }
666 
667 // ----------------------------------------------------------------------------
669  const CBedColumnData& columnData,
670  CSeq_annot& annot,
671  unsigned int baseId,
672  ILineErrorListener* pEC)
673 // ----------------------------------------------------------------------------
674 {
675  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
676  CRef<CSeq_feat> feature;
677  feature.Reset(new CSeq_feat);
678 
679  xSetFeatureLocationChrom(feature, columnData);
680  xSetFeatureIdsChrom(feature, columnData, baseId);
681  xSetFeatureBedData(feature, columnData, pEC);
682 
683  ftable.push_back(feature);
684  m_currentId = columnData[0];
685  return true;
686 }
687 
688 // ----------------------------------------------------------------------------
690  const CBedColumnData& columnData,
691  CSeq_annot& annot,
692  unsigned int baseId,
693  ILineErrorListener* pEC)
694 // ----------------------------------------------------------------------------
695 {
696  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
697  CRef<CSeq_feat> feature;
698  feature.Reset(new CSeq_feat);
699 
700  xSetFeatureLocationGene(feature, columnData);
701  xSetFeatureIdsGene(feature, columnData, baseId);
702  xSetFeatureBedData(feature, columnData, pEC);
703 
704  ftable.push_back(feature);
705  m_currentId = columnData[0];
706  return feature;
707 }
708 
709 // ----------------------------------------------------------------------------
711  const CBedColumnData& columnData,
712  CSeq_annot& annot,
713  unsigned int baseId,
714  ILineErrorListener* pEC)
715 // ----------------------------------------------------------------------------
716 {
717  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
718  CRef<CSeq_feat> feature;
719  feature.Reset(new CSeq_feat);
720 
721  xSetFeatureLocationThick(feature, columnData);
722  xSetFeatureIdsThick(feature, columnData, baseId);
723  xSetFeatureBedData(feature, columnData, pEC);
724 
725  ftable.push_back(feature);
726  return true;
727 }
728 
729 // ----------------------------------------------------------------------------
731  const CBedColumnData& columnData,
732  CSeq_annot& annot,
733  unsigned int baseId,
734  ILineErrorListener* pEC)
735 // ----------------------------------------------------------------------------
736 {
737  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
738  CRef<CSeq_feat> feature;
739  feature.Reset(new CSeq_feat);
740 
741  xSetFeatureLocationCds(feature, columnData);
742  xSetFeatureIdsCds(feature, columnData, baseId);
743  xSetFeatureBedData(feature, columnData, pEC);
744 
745  ftable.push_back(feature);
746  return feature;
747 }
748 
749 // ----------------------------------------------------------------------------
751  const CBedColumnData& columnData,
752  CSeq_annot& annot,
753  unsigned int baseId,
754  ILineErrorListener* pEC)
755 // ----------------------------------------------------------------------------
756 {
757  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
758  CRef<CSeq_feat> feature;
759  feature.Reset(new CSeq_feat);
760 
761  xSetFeatureLocationBlock(feature, columnData);
762  xSetFeatureIdsBlock(feature, columnData, baseId);
763  xSetFeatureBedData(feature, columnData, pEC);
764 
765  ftable.push_back(feature);
766  return true;
767 }
768 
769 // ----------------------------------------------------------------------------
771  const CBedColumnData& columnData,
772  CSeq_annot& annot,
773  unsigned int baseId,
774  ILineErrorListener* pEC)
775 // ----------------------------------------------------------------------------
776 {
777  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
778  CRef<CSeq_feat> feature;
779  feature.Reset(new CSeq_feat);
780 
781  xSetFeatureLocationRna(feature, columnData);
782  xSetFeatureIdsRna(feature, columnData, baseId);
783  xSetFeatureBedData(feature, columnData, pEC);
784 
785  ftable.push_back(feature);
786  return feature;
787 }
788 
789 
790 // ----------------------------------------------------------------------------
792  const CBedColumnData& columnData,
793  CSeq_annot& annot,
794  ILineErrorListener* pEC)
795 // ----------------------------------------------------------------------------
796 {
797  // assign
798  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
799  CRef<CSeq_feat> feature;
800  feature.Reset( new CSeq_feat );
801 
802  xSetFeatureTitle(feature, columnData);
803  xSetFeatureLocation(feature, columnData);
804  xSetFeatureDisplayData(feature, columnData);
805 
806  ftable.push_back( feature );
807  m_currentId = columnData[0];
808  return true;
809 }
810 
811 // ----------------------------------------------------------------------------
813  const CBedColumnData& columnData,
814  CSeq_annot& annot,
815  ILineErrorListener* pEC)
816 // ----------------------------------------------------------------------------
817 {
818  CRef<CSeq_feat> pFeat(new CSeq_feat);;
819  if (!mpAutoSql->ReadSeqFeat(columnData, *pFeat, *m_pMessageHandler)) {
820  return false;
821  }
822  CSeq_annot::C_Data::TFtable& ftable = annot.SetData().SetFtable();
823  ftable.push_back(pFeat);
824  m_currentId = columnData[0];
825  return true;
826 }
827 
828 
829 // ----------------------------------------------------------------------------
831  CRef<CSeq_feat>& feature,
832  const CBedColumnData& columnData)
833 // ----------------------------------------------------------------------------
834 {
835  CRef<CUser_object> display_data( new CUser_object );
836  display_data->SetType().SetStr( "Display Data" );
837  if (mValidColumnCount >= 4) {
838  display_data->AddField( "name", columnData[3] );
839  }
840  else {
841  display_data->AddField( "name", string("") );
842  feature->SetData().SetUser( *display_data );
843  return;
844  }
845  if (mValidColumnCount >= 5) {
846  if ( !m_usescore ) {
847  display_data->AddField(
848  "score",
849  NStr::StringToInt(columnData[4],
851  feature->AddOrReplaceQualifier("score", columnData[4]);
852  }
853  else {
854  display_data->AddField(
855  "greylevel",
856  NStr::StringToInt(columnData[4],
858  }
859  }
860  if (mValidColumnCount >= 7) {
861  display_data->AddField(
862  "thickStart",
864  }
865  if (mValidColumnCount >= 8) {
866  display_data->AddField(
867  "thickEnd",
868  NStr::StringToInt(columnData[7], NStr::fDS_ProhibitFractions) - 1 );
869  }
870  if (mValidColumnCount >= 9) {
871  display_data->AddField(
872  "itemRGB",
873  columnData[8]);
874  }
875  if (mValidColumnCount >= 10) {
876  display_data->AddField(
877  "blockCount",
879  }
880  if (mValidColumnCount >= 11) {
881  display_data->AddField( "blockSizes", columnData[10] );
882  }
883  if (mValidColumnCount >= 12) {
884  display_data->AddField( "blockStarts", columnData[11] );
885  }
886  feature->SetData().SetUser( *display_data );
887 }
888 
889 // ----------------------------------------------------------------------------
891  CRef<CSeq_feat>& feature,
892  const CBedColumnData& columnData)
893 // ----------------------------------------------------------------------------
894 {
895  xSetFeatureLocation(feature, columnData);
896 
897  CRef<CUser_object> pBed(new CUser_object());
898  pBed->SetType().SetStr("BED");
899  pBed->AddField("location", "chrom");
900  CSeq_feat::TExts& exts = feature->SetExts();
901  exts.push_back(pBed);
902 }
903 
904 // ----------------------------------------------------------------------------
906  CRef<CSeq_feat>& feature,
907  const CBedColumnData& columnData)
908 // ----------------------------------------------------------------------------
909 {
910  xSetFeatureLocation(feature, columnData);
911 
912  CRef<CUser_object> pBed(new CUser_object());
913  pBed->SetType().SetStr("BED");
914  pBed->AddField("location", "chrom");
915  CSeq_feat::TExts& exts = feature->SetExts();
916  exts.push_back(pBed);
917 }
918 
919 // ----------------------------------------------------------------------------
921  CRef<CSeq_feat>& feature,
922  const CBedColumnData& columnData)
923 // ----------------------------------------------------------------------------
924 {
926  int from, to;
927  from = to = -1;
928 
929  //already established: We got at least three columns
930  try {
931  from = NStr::StringToInt(columnData[6]);
932  }
933  catch (std::exception&) {
935  eDiag_Error,
937  "Invalid data line: Bad \"ThickStart\" value.");
938  throw error;
939  }
940  try {
941  to = NStr::StringToInt(columnData[7]) - 1;
942  }
943  catch (std::exception&) {
945  eDiag_Error,
947  "Invalid data line: Bad \"ThickStop\" value.");
948  throw error;
949  }
950  if (from == to) {
951  location->SetPnt().SetPoint(from);
952  }
953  else if (from < to) {
954  location->SetInt().SetFrom(from);
955  location->SetInt().SetTo(to);
956  }
957  else if (from > to) {
958  //below: flip commenting to switch from null locations to impossible
959  // intervals
960  //location->SetInt().SetFrom(from);
961  //location->SetInt().SetTo(to);
962  location->SetNull();
963  }
964 
965  if (!location->IsNull()) {
966  location->SetStrand(xGetStrand(columnData));
967  }
968  CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
969  location->SetId(*id);
970  feature->SetLocation(*location);
971 
972  CRef<CUser_object> pBed(new CUser_object());
973  pBed->SetType().SetStr("BED");
974  pBed->AddField("location", "thick");
975  CSeq_feat::TExts& exts = feature->SetExts();
976  exts.push_back(pBed);
977 }
978 
979 // ----------------------------------------------------------------------------
981  CRef<CSeq_feat>& feature,
982  const CBedColumnData& columnData)
983 // ----------------------------------------------------------------------------
984 {
986  int from, to;
987  from = to = -1;
988 
989  //already established: We got at least three columns
990  try {
991  from = NStr::StringToInt(columnData[6]);
992  }
993  catch (std::exception&) {
995  eDiag_Error,
997  "Invalid data line: Bad \"ThickStart\" value.");
998  throw error;
999  }
1000  try {
1001  to = NStr::StringToInt(columnData[7]) - 1;
1002  }
1003  catch (std::exception&) {
1005  eDiag_Error,
1006  m_uLineNumber,
1007  "Invalid data line: Bad \"ThickStop\" value.");
1008  throw error;
1009  }
1010  if (from == to) {
1011  location->SetPnt().SetPoint(from);
1012  }
1013  else if (from < to) {
1014  location->SetInt().SetFrom(from);
1015  location->SetInt().SetTo(to);
1016  }
1017  else if (from > to) {
1018  //below: flip commenting to switch from null locations to impossible
1019  // intervals
1020  //location->SetInt().SetFrom(from);
1021  //location->SetInt().SetTo(to);
1022  location->SetNull();
1023  }
1024 
1025  if (!location->IsNull()) {
1026  location->SetStrand(xGetStrand(columnData));
1027  }
1028  CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1029  location->SetId(*id);
1030  feature->SetLocation(*location);
1031 
1032  CRef<CUser_object> pBed(new CUser_object());
1033  pBed->SetType().SetStr("BED");
1034  pBed->AddField("location", "thick");
1035  CSeq_feat::TExts& exts = feature->SetExts();
1036  exts.push_back(pBed);
1037 }
1038 
1039 // ----------------------------------------------------------------------------
1041  const CBedColumnData& columnData) const
1042 // ----------------------------------------------------------------------------
1043 {
1044  size_t strand_field = 5;
1045  if (columnData.ColumnCount() == 5 &&
1046  (columnData[4] == "-" || columnData[4] == "+")) {
1047  strand_field = 4;
1048  }
1049  if (strand_field < columnData.ColumnCount()) {
1050  string strand = columnData[strand_field];
1051  if (strand != "+" && strand != "-" && strand != ".") {
1053  eDiag_Error,
1054  m_uLineNumber,
1055  "Invalid data line: Invalid strand character.");
1056  throw error;
1057  }
1058  }
1059  return (columnData[strand_field] == "-" ? eNa_strand_minus : eNa_strand_plus);
1060 }
1061 
1062 // ----------------------------------------------------------------------------
1064  CRef<CSeq_feat>& feature,
1065  const CBedColumnData& columnData)
1066 // ----------------------------------------------------------------------------
1067 {
1068  //already established: there are sufficient columns to do this
1069  size_t blockCount = NStr::StringToUInt(columnData[9]);
1070  vector<size_t> blockSizes;
1071  vector<size_t> blockStarts;
1072  {{
1073  blockSizes.reserve(blockCount);
1074  vector<string> vals;
1075  NStr::Split(columnData[10], ",", vals);
1076  if (vals.back() == "") {
1077  vals.erase(vals.end()-1);
1078  }
1079  if (vals.size() != blockCount) {
1081  eDiag_Error,
1082  columnData.LineNo(),
1083  "Invalid data line: Bad value count in \"blockSizes\".");
1084  throw error;
1085  }
1086  try {
1087  for (size_t i=0; i < blockCount; ++i) {
1088  blockSizes.push_back(NStr::StringToUInt(vals[i]));
1089  }
1090  }
1091  catch (std::exception&) {
1093  eDiag_Error,
1094  columnData.LineNo(),
1095  "Invalid data line: Malformed \"blockSizes\" column.");
1096  throw error;
1097  }
1098  }}
1099  {{
1100  blockStarts.reserve(blockCount);
1101  vector<string> vals;
1102  size_t baseStart = NStr::StringToUInt(columnData[1]);
1103  NStr::Split(columnData[11], ",", vals);
1104  if (vals.back() == "") {
1105  vals.erase(vals.end()-1);
1106  }
1107  if (vals.size() != blockCount) {
1109  eDiag_Error,
1110  columnData.LineNo(),
1111  "Invalid data line: Bad value count in \"blockStarts\".");
1112  throw error;
1113  }
1114  try {
1115  for (size_t i=0; i < blockCount; ++i) {
1116  blockStarts.push_back(baseStart + NStr::StringToUInt(vals[i]));
1117  }
1118  }
1119  catch (std::exception&) {
1121  eDiag_Error,
1122  columnData.LineNo(),
1123  "Invalid data line: Malformed \"blockStarts\" column.");
1124  throw error;
1125  }
1126  }}
1127 
1128  CPacked_seqint& location = feature->SetLocation().SetPacked_int();
1129  ENa_strand strand = xGetStrand(columnData);
1130  CRef<CSeq_id> pId = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1131 
1132  bool negative = columnData[5] == "-";
1133 
1135 
1136  for (size_t i=0; i < blockCount; ++i) {
1137  CRef<CSeq_interval> pInterval(new CSeq_interval);
1138  pInterval->SetId(*pId);
1139  pInterval->SetFrom(static_cast<CSeq_interval::TFrom>(blockStarts[i]));
1140  pInterval->SetTo(static_cast<CSeq_interval::TTo>(
1141  blockStarts[i] + blockSizes[i] - 1));
1142  pInterval->SetStrand(strand);
1143  if (negative)
1144  blocks.insert(blocks.begin(), pInterval);
1145  else
1146  blocks.push_back(pInterval);
1147  }
1148 
1149  CRef<CUser_object> pBed(new CUser_object());
1150  pBed->SetType().SetStr("BED");
1151  pBed->AddField("location", "block");
1152  CSeq_feat::TExts& exts = feature->SetExts();
1153  exts.push_back(pBed);
1154 }
1155 
1156 // ----------------------------------------------------------------------------
1158  CRef<CSeq_feat>& feature,
1159  const CBedColumnData& columnData)
1160 // ----------------------------------------------------------------------------
1161 {
1162  //already established: there are sufficient columns to do this
1163  size_t blockCount = NStr::StringToUInt(columnData[9]);
1164  vector<size_t> blockSizes;
1165  vector<size_t> blockStarts;
1166  {{
1167  blockSizes.reserve(blockCount);
1168  vector<string> vals;
1169  NStr::Split(columnData[10], ",", vals);
1170  if (vals.back() == "") {
1171  vals.erase(vals.end()-1);
1172  }
1173  if (vals.size() != blockCount) {
1175  eDiag_Error,
1176  columnData.LineNo(),
1177  "Invalid data line: Bad value count in \"blockSizes\".");
1178  throw error;
1179  }
1180  try {
1181  for (size_t i=0; i < blockCount; ++i) {
1182  blockSizes.push_back(NStr::StringToUInt(vals[i]));
1183  }
1184  }
1185  catch (std::exception&) {
1187  eDiag_Error,
1188  columnData.LineNo(),
1189  "Invalid data line: Malformed \"blockSizes\" column.");
1190  throw error;
1191  }
1192  }}
1193  {{
1194  blockStarts.reserve(blockCount);
1195  vector<string> vals;
1196  size_t baseStart = NStr::StringToUInt(columnData[1]);
1197  NStr::Split(columnData[11], ",", vals);
1198  if (vals.back() == "") {
1199  vals.erase(vals.end()-1);
1200  }
1201  if (vals.size() != blockCount) {
1203  eDiag_Error,
1204  columnData.LineNo(),
1205  "Invalid data line: Bad value count in \"blockStarts\".");
1206  throw error;
1207  }
1208  try {
1209  for (size_t i=0; i < blockCount; ++i) {
1210  blockStarts.push_back(baseStart + NStr::StringToUInt(vals[i]));
1211  }
1212  }
1213  catch (std::exception&) {
1215  eDiag_Error,
1216  columnData.LineNo(),
1217  "Invalid data line: Malformed \"blockStarts\" column.");
1218  throw error;
1219  }
1220  }}
1221 
1222  CPacked_seqint& location = feature->SetLocation().SetPacked_int();
1223  ENa_strand strand = xGetStrand(columnData);
1224  CRef<CSeq_id> pId = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1225 
1226  bool negative = columnData[5] == "-";
1227 
1229 
1230  for (size_t i=0; i < blockCount; ++i) {
1231  CRef<CSeq_interval> pInterval(new CSeq_interval);
1232  pInterval->SetId(*pId);
1233  pInterval->SetFrom(static_cast<CSeq_interval::TFrom>(blockStarts[i]));
1234  pInterval->SetTo(static_cast<CSeq_interval::TTo>(
1235  blockStarts[i] + blockSizes[i] -1));
1236  pInterval->SetStrand(strand);
1237  if (negative)
1238  blocks.insert(blocks.begin(), pInterval);
1239  else
1240  blocks.push_back(pInterval);
1241  }
1242 
1243  CRef<CUser_object> pBed(new CUser_object());
1244  pBed->SetType().SetStr("BED");
1245  pBed->AddField("location", "block");
1246  CSeq_feat::TExts& exts = feature->SetExts();
1247  exts.push_back(pBed);
1248 }
1249 
1250 // ----------------------------------------------------------------------------
1252  CRef<CSeq_feat>& feature,
1253  const CBedColumnData& columnData,
1254  unsigned int baseId)
1255 // ----------------------------------------------------------------------------
1256 {
1257  baseId++; //0-based to 1-based
1258  feature->SetId().SetLocal().SetId(baseId);
1259 
1260  if (xContainsThickFeature(columnData)) {
1261  CRef<CFeat_id> pIdThick(new CFeat_id);
1262  pIdThick->SetLocal().SetId(baseId+1);
1263  CRef<CSeqFeatXref> pXrefThick(new CSeqFeatXref);
1264  pXrefThick->SetId(*pIdThick);
1265  feature->SetXref().push_back(pXrefThick);
1266  }
1267 
1268  if (xContainsBlockFeature(columnData)) {
1269  CRef<CFeat_id> pIdBlock(new CFeat_id);
1270  pIdBlock->SetLocal().SetId(baseId+2);
1271  CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1272  pXrefBlock->SetId(*pIdBlock);
1273  feature->SetXref().push_back(pXrefBlock);
1274  }
1275 }
1276 
1277 // ----------------------------------------------------------------------------
1279  CRef<CSeq_feat>& feature,
1280  const CBedColumnData& columnData,
1281  unsigned int baseId)
1282 // ----------------------------------------------------------------------------
1283 {
1284  baseId++; //0-based to 1-based
1285  feature->SetId().SetLocal().SetId(baseId);
1286 }
1287 
1288 // ----------------------------------------------------------------------------
1290  CRef<CSeq_feat>& feature,
1291  const CBedColumnData& columnData,
1292  unsigned int baseId)
1293 // ----------------------------------------------------------------------------
1294 {
1295  baseId++; //0-based to 1-based
1296  feature->SetId().SetLocal().SetId(baseId+1);
1297 
1298  CRef<CFeat_id> pIdChrom(new CFeat_id);
1299  pIdChrom->SetLocal().SetId(baseId);
1300  CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1301  pXrefChrom->SetId(*pIdChrom);
1302  feature->SetXref().push_back(pXrefChrom);
1303 
1304  if (xContainsBlockFeature(columnData)) {
1305  CRef<CFeat_id> pIdBlock(new CFeat_id);
1306  pIdBlock->SetLocal().SetId(baseId+2);
1307  CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1308  pXrefBlock->SetId(*pIdBlock);
1309  feature->SetXref().push_back(pXrefBlock);
1310  }
1311 }
1312 
1313 // ----------------------------------------------------------------------------
1315  CRef<CSeq_feat>& feature,
1316  const CBedColumnData& columnData,
1317  unsigned int baseId)
1318 // ----------------------------------------------------------------------------
1319 {
1320  baseId++; //0-based to 1-based
1321  feature->SetId().SetLocal().SetId(baseId+1);
1322 
1323  if (xContainsBlockFeature(columnData)) {
1324  CRef<CFeat_id> pIdBlock(new CFeat_id);
1325  pIdBlock->SetLocal().SetId(baseId+2);
1326  CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1327  pXrefBlock->SetId(*pIdBlock);
1328  feature->SetXref().push_back(pXrefBlock);
1329  }
1330  else {
1331  CRef<CFeat_id> pIdChrom(new CFeat_id);
1332  pIdChrom->SetLocal().SetId(baseId);
1333  CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1334  pXrefChrom->SetId(*pIdChrom);
1335  feature->SetXref().push_back(pXrefChrom);
1336  }
1337 }
1338 
1339 // ----------------------------------------------------------------------------
1341  CRef<CSeq_feat>& feature,
1342  const CBedColumnData& columnData,
1343  unsigned int baseId)
1344 // ----------------------------------------------------------------------------
1345 {
1346  baseId++; //0-based to 1-based
1347  feature->SetId().SetLocal().SetId(baseId+2);
1348 
1349  CRef<CFeat_id> pIdChrom(new CFeat_id);
1350  pIdChrom->SetLocal().SetId(baseId);
1351  CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1352  pXrefChrom->SetId(*pIdChrom);
1353  feature->SetXref().push_back(pXrefChrom);
1354 
1355  if (xContainsThickFeature(columnData)) {
1356  CRef<CFeat_id> pIdThick(new CFeat_id);
1357  pIdThick->SetLocal().SetId(baseId+1);
1358  CRef<CSeqFeatXref> pXrefBlock(new CSeqFeatXref);
1359  pXrefBlock->SetId(*pIdThick);
1360  feature->SetXref().push_back(pXrefBlock);
1361  }
1362 }
1363 
1364 // ----------------------------------------------------------------------------
1366  CRef<CSeq_feat>& feature,
1367  const CBedColumnData& columnData,
1368  unsigned int baseId)
1369 // ----------------------------------------------------------------------------
1370 {
1371  baseId++; //0-based to 1-based
1372  feature->SetId().SetLocal().SetId(baseId+2);
1373 
1374  CRef<CFeat_id> pIdChrom(new CFeat_id);
1375  pIdChrom->SetLocal().SetId(baseId);
1376  CRef<CSeqFeatXref> pXrefChrom(new CSeqFeatXref);
1377  pXrefChrom->SetId(*pIdChrom);
1378  feature->SetXref().push_back(pXrefChrom);
1379 }
1380 
1381 // ----------------------------------------------------------------------------
1383  CRef<CSeq_feat>& feature,
1384  const CBedColumnData& columnData)
1385 // ----------------------------------------------------------------------------
1386 {
1387  if (columnData.ColumnCount() >= 4 &&
1388  !columnData[3].empty() && columnData[3] != ".") {
1389  feature->SetTitle(columnData[0]);
1390  }
1391  else {
1392  feature->SetTitle(string("line_") + NStr::IntToString(m_uLineNumber));
1393  }
1394 }
1395 
1396 
1397 // ----------------------------------------------------------------------------
1399  CRef<CUser_object> pDisplayData,
1400  const CBedColumnData& columnData)
1401 // ----------------------------------------------------------------------------
1402 {
1404  eDiag_Error,
1405  columnData.LineNo(),
1406  "Invalid data line: Bad \"score\" value.");
1407 
1408  string trackUseScore = m_pTrackDefaults->ValueOf("useScore");
1409  if (columnData.ColumnCount() < 5 || trackUseScore == "1") {
1410  //record does not carry score information
1411  return;
1412  }
1413 
1414  int int_score = NStr::StringToInt(columnData[4], NStr::fConvErr_NoThrow );
1415  double d_score = 0;
1416 
1417  if (int_score == 0 && columnData[4].compare("0") != 0) {
1418  try {
1419  d_score = NStr::StringToDouble(columnData[4]);
1420  }
1421  catch(std::exception&) {
1422  throw error;
1423  }
1424  }
1425 
1426  if (d_score < 0 || int_score < 0) {
1427  throw error;
1428  }
1429  else if (d_score > 0) {
1430  pDisplayData->AddField("score", d_score);
1431  }
1432  else {
1433  pDisplayData->AddField("score", int_score);
1434  }
1435 }
1436 
1437 
1438 // ----------------------------------------------------------------------------
1440  CRef<CUser_object> pDisplayData,
1441  const CBedColumnData& columnData,
1442  ILineErrorListener* pEC )
1443 // ----------------------------------------------------------------------------
1444 {
1445  //1: if track line itemRgb is set, try that first:
1446  string trackItemRgb = m_pTrackDefaults->ValueOf("itemRgb");
1447  if (trackItemRgb == "On" && columnData.ColumnCount() >= 9) {
1448  string featItemRgb = columnData[8];
1449  if (featItemRgb != ".") {
1450  xSetFeatureColorFromItemRgb(pDisplayData, featItemRgb, pEC);
1451  return;
1452  }
1453  }
1454 
1455  //2: if track useScore is set, try that next:
1456  string trackUseScore = m_pTrackDefaults->ValueOf("useScore");
1457  if (trackUseScore == "1" && columnData.ColumnCount() >= 5) {
1458  string featScore = columnData[4];
1459  if (featScore != ".") {
1460  xSetFeatureColorFromScore(pDisplayData, featScore);
1461  return;
1462  }
1463  }
1464 
1465  //3: if track colorByStrand is set, try that next:
1466  string trackColorByStrand = m_pTrackDefaults->ValueOf("colorByStrand");
1467  if (!trackColorByStrand.empty() && columnData.ColumnCount() >= 6) {
1468  ENa_strand strand =
1469  (columnData[5] == "-") ? eNa_strand_minus : eNa_strand_plus;
1470  xSetFeatureColorByStrand(pDisplayData, trackColorByStrand, strand, pEC);
1471  return;
1472  }
1473  //4: if none of the track color attributes are set, attempt feature itemRgb:
1474  if (columnData.ColumnCount() >= 9) {
1475  string featItemRgb = columnData[8];
1476  if (featItemRgb != ".") {
1477  xSetFeatureColorFromItemRgb(pDisplayData, featItemRgb, pEC);
1478  return;
1479  }
1480  }
1481 
1482  //5: if still here, assign default color:
1483  xSetFeatureColorDefault(pDisplayData);
1484 }
1485 
1486 // ----------------------------------------------------------------------------
1488  CRef<CUser_object> pDisplayData)
1489 // ----------------------------------------------------------------------------
1490 {
1491  const string colorDefault("0 0 0");
1492  pDisplayData->AddField("color", colorDefault);
1493 }
1494 
1495 // ----------------------------------------------------------------------------
1497  CRef<CUser_object> pDisplayData,
1498  const string& trackColorByStrand,
1499  ENa_strand strand,
1500  ILineErrorListener* pEC)
1501 // ----------------------------------------------------------------------------
1502 {
1503  try {
1504  string colorPlus, colorMinus;
1505  NStr::SplitInTwo(trackColorByStrand, " ", colorPlus, colorMinus);
1506  string useColor = (strand == eNa_strand_minus) ? colorMinus : colorPlus;
1507  xSetFeatureColorFromItemRgb(pDisplayData, useColor, pEC);
1508  }
1509  catch (std::exception&) {
1511  eDiag_Error,
1512  m_uLineNumber,
1513  "Invalid track line: Bad colorByStrand value.");
1514  throw error;
1515  }
1516 }
1517 
1518 // ----------------------------------------------------------------------------
1520  CRef<CUser_object> pDisplayData,
1521  const string& featScore )
1522 // ----------------------------------------------------------------------------
1523 {
1525  eDiag_Error,
1526  m_uLineNumber,
1527  "Invalid data line: Bad score value to be used for color.");
1528 
1529  int score = 0;
1530  try {
1531  score = static_cast<int>(NStr::StringToDouble(featScore));
1532  }
1533  catch (const std::exception&) {
1534  throw error;
1535  }
1536  if (score < 0 || 1000 < score) {
1537  throw error;
1538  }
1539  string greyValue = NStr::DoubleToString(255 - (score/4));
1540  vector<string> srgb{ greyValue, greyValue, greyValue};
1541  string rgbValue = NStr::Join(srgb, " ");
1542  pDisplayData->AddField("color", rgbValue);
1543 }
1544 
1545 // ----------------------------------------------------------------------------
1547  CRef<CUser_object> pDisplayData,
1548  const string& itemRgb,
1549  ILineErrorListener* pEC )
1550 // ----------------------------------------------------------------------------
1551 {
1552  CReaderMessage warning(
1553  eDiag_Warning,
1554  m_uLineNumber,
1555  "Bad color value - converted to BLACK.");
1556  const string rgbDefault = "0 0 0";
1557 
1558  //optimization for common case:
1559  if (itemRgb == "0") {
1560  pDisplayData->AddField("color", rgbDefault);
1561  return;
1562  }
1563 
1564  vector<string> srgb;
1565  NStr::Split(itemRgb, ",", srgb);
1566 
1567  if (srgb.size() == 3) {
1568  auto valuesOk = true;
1569  for (auto i=0; i<3; ++i) {
1570  int test;
1571  try {
1573  }
1574  catch(CException&) {
1575  valuesOk = false;
1576  break;
1577  }
1578  if ((test < 0) || (256 <= test)) {
1579  valuesOk = false;
1580  break;
1581  }
1582  }
1583  if (!valuesOk) {
1584  m_pMessageHandler->Report(warning);
1585  pDisplayData->AddField("color", rgbDefault);
1586  return;
1587  }
1588  auto outValue = srgb[0] + " " + srgb[1] + " " + srgb[2];
1589  pDisplayData->AddField("color", outValue);
1590  return;
1591  }
1592 
1593  if (srgb.size() == 1) {
1594  auto assumeHex = false;
1595  string itemRgbCopy(itemRgb);
1596  if (NStr::StartsWith(itemRgbCopy, "0x")) {
1597  assumeHex = true;
1598  itemRgbCopy = itemRgb.substr(2);
1599  }
1600  else if (NStr::StartsWith(itemRgbCopy, "#")) {
1601  assumeHex = true;
1602  itemRgbCopy = itemRgbCopy.substr(1);
1603  }
1604  unsigned long colorValue;
1605  int radix = (assumeHex ? 16 : 10);
1606  try {
1607  colorValue = NStr::StringToULong(
1608  itemRgbCopy, NStr::fDS_ProhibitFractions, radix);
1609  }
1610  catch (CStringException&) {
1611  m_pMessageHandler->Report(warning);
1612  pDisplayData->AddField("color", rgbDefault);
1613  return;
1614  }
1615  int blue = colorValue & 0xFF;
1616  colorValue >>= 8;
1617  int green = colorValue & 0xFF;
1618  colorValue >>= 8;
1619  int red = colorValue & 0xFF;
1620  auto outValue = NStr::IntToString(red) + " " + NStr::IntToString(green) +
1621  " " + NStr::IntToString(blue);
1622  pDisplayData->AddField("color", outValue);
1623  return;
1624  }
1625 
1626  m_pMessageHandler->Report(warning);
1627  pDisplayData->AddField("color", rgbDefault);
1628  return;
1629 }
1630 
1631 // ----------------------------------------------------------------------------
1633  CRef<CSeq_feat>& feature,
1634  const CBedColumnData& columnData,
1635  ILineErrorListener* pEc )
1636 // ----------------------------------------------------------------------------
1637 {
1638  CSeqFeatData& data = feature->SetData();
1639  if (columnData.ColumnCount() >= 4 && columnData[3] != ".") {
1640  data.SetRegion() = columnData[3];
1641  }
1642  else {
1643  data.SetRegion() = columnData[0];
1644  }
1645 
1646  CRef<CUser_object> pDisplayData(new CUser_object());
1647 
1648  CSeq_feat::TExts& exts = feature->SetExts();
1649  pDisplayData->SetType().SetStr("DisplaySettings");
1650  exts.push_front(pDisplayData);
1651 
1652  xSetFeatureScore(pDisplayData, columnData);
1653  xSetFeatureColor(pDisplayData, columnData, pEc);
1654 }
1655 
1656 // ----------------------------------------------------------------------------
1658  CRef<CSeq_feat>& feature,
1659  const CBedColumnData& columnData )
1660 // ----------------------------------------------------------------------------
1661 {
1662  //
1663  // Note:
1664  // BED convention for specifying intervals is 0-based, first in, first out.
1665  // ASN convention for specifying intervals is 0-based, first in, last in.
1666  // Hence, conversion BED->ASN leaves the first leaves the "from" coordinate
1667  // unchanged, and decrements the "to" coordinate by one.
1668  //
1669 
1671  int from, to;
1672  from = to = -1;
1673 
1674  //already established: We got at least three columns
1675  try {
1676  from = NStr::StringToInt(columnData[1]);
1677  }
1678  catch(std::exception&) {
1680  eDiag_Error,
1681  columnData.LineNo(),
1682  "Invalid data line: Bad \"SeqStart\" value.");
1683  throw error;
1684  }
1685  try {
1686  to = NStr::StringToInt(columnData[2]) - 1;
1687  }
1688  catch(std::exception&) {
1690  eDiag_Error,
1691  columnData.LineNo(),
1692  "Invalid data line: Bad \"SeqStop\" value.");
1693  throw error;
1694  }
1695  if (from == to) {
1696  location->SetPnt().SetPoint(from);
1697  }
1698  else if (from < to) {
1699  location->SetInt().SetFrom(from);
1700  location->SetInt().SetTo(to);
1701  }
1702  else {
1704  eDiag_Error,
1705  columnData.LineNo(),
1706  "Invalid data line: \"SeqStop\" less than \"SeqStart\".");
1707  throw error;
1708  }
1709 
1710  size_t strand_field = 5;
1711  if (columnData.ColumnCount() == 5 &&
1712  (columnData[4] == "-" || columnData[4] == "+")) {
1713  strand_field = 4;
1714  }
1715  if (strand_field < columnData.ColumnCount()) {
1716  string strand = columnData[strand_field];
1717  if (strand != "+" && strand != "-" && strand != ".") {
1719  eDiag_Error,
1720  columnData.LineNo(),
1721  "Invalid data line: Invalid strand character.");
1722  throw error;
1723  }
1724  location->SetStrand(( columnData[strand_field] == "+" ) ?
1726  }
1727 
1728  CRef<CSeq_id> id = CReadUtil::AsSeqId(columnData[0], m_iFlags, false);
1729  location->SetId(*id);
1730  feature->SetLocation(*location);
1731 }
1732 
1733 // ----------------------------------------------------------------------------
1734 bool
1736  ILineReader& lr,
1737  CRawBedTrack& rawdata,
1738  ILineErrorListener* pMessageListener)
1739 // ----------------------------------------------------------------------------
1740 {
1741  if (m_CurBatchSize == m_MaxBatchSize) {
1742  m_CurBatchSize = 0;
1743  return xReadBedDataRaw(lr, rawdata, pMessageListener);
1744  }
1745 
1746  string line;
1747  while (xGetLine(lr, line)) {
1748  m_CurBatchSize = 0;
1749  if (line == "browser" || NStr::StartsWith(line, "browser ")) {
1750  continue;
1751  }
1752  if (line == "track" || NStr::StartsWith(line, "track ")) {
1753  continue;
1754  }
1755  //data line
1756  lr.UngetLine();
1757  return xReadBedDataRaw(lr, rawdata, pMessageListener);
1758  }
1759  return false;
1760 }
1761 
1762 // ----------------------------------------------------------------------------
1763 bool
1765  const string& line,
1766  CRawBedRecord& record,
1767  ILineErrorListener* pMessageListener)
1768 // ----------------------------------------------------------------------------
1769 {
1770  if (line == "browser" || NStr::StartsWith(line, "browser ")
1771  || NStr::StartsWith(line, "browser\t")) {
1772  return false;
1773  }
1774  if (line == "track" || NStr::StartsWith(line, "track ")
1775  || NStr::StartsWith(line, "track\t")) {
1776  return false;
1777  }
1778 
1779  vector<string> columns;
1780  string linecopy = line;
1781  NStr::TruncateSpacesInPlace(linecopy);
1782 
1783  // parse
1784  NStr::Split(linecopy, " \t", columns, NStr::fSplit_MergeDelimiters);
1785  xCleanColumnValues(columns);
1786  if (mRealColumnCount == 0) {
1787  mRealColumnCount = columns.size();
1788  }
1789  if (columns.size() != mRealColumnCount) {
1791  eDiag_Error,
1792  m_uLineNumber,
1793  "Invalid data line: Inconsistent column count.");
1794  m_pMessageHandler->Report(error);
1795  return false;
1796  }
1797 
1798  //assign columns to record:
1799  CRef<CSeq_id> id = CReadUtil::AsSeqId(columns[0], m_iFlags, false);
1800 
1801  unsigned int start;
1802  try {
1803  start = NStr::StringToInt(columns[1]);
1804  }
1805  catch(std::exception&) {
1807  eDiag_Error,
1808  m_uLineNumber,
1809  "Invalid data line: Invalid \"SeqStart\" (column 2) value.");
1810  m_pMessageHandler->Report(error);
1811  return false;
1812  }
1813 
1814  unsigned int stop;
1815  try {
1816  stop = NStr::StringToInt(columns[2]);
1817  }
1818  catch(std::exception&) {
1820  eDiag_Error,
1821  m_uLineNumber,
1822  "Invalid data line: Invalid \"SeqStop\" (column 3) value.");
1823  m_pMessageHandler->Report(error);
1824  return false;
1825  }
1826 
1827  int score(-1);
1828  if (mValidColumnCount >= 5 && columns[4] != ".") {
1829  try {
1830  score = NStr::StringToInt(columns[4],
1832  }
1833  catch(std::exception&) {
1835  eDiag_Error,
1836  m_uLineNumber,
1837  "Invalid data line: Invalid \"Score\" (column 5) value.");
1838  m_pMessageHandler->Report(error);
1839  return false;
1840  }
1841  }
1842  ENa_strand strand = eNa_strand_plus;
1843  if (mValidColumnCount >= 6) {
1844  if (columns[5] == "-") {
1845  strand = eNa_strand_minus;
1846  }
1847  }
1848  record.SetInterval(*id, start, stop, strand);
1849  if (score >= 0) {
1850  record.SetScore(score);
1851  }
1852  return true;
1853 }
1854 
1855 // ----------------------------------------------------------------------------
1856 bool
1858  const CBedColumnData& columnData) const
1859 // ----------------------------------------------------------------------------
1860 {
1861  if (columnData.ColumnCount() < 8 || mValidColumnCount < 8) {
1862  return false;
1863  }
1864 
1865  int start = -1, from = -1, to = -1;
1866  try {
1867  start = NStr::StringToInt(columnData[1]);
1868  from = NStr::StringToInt(columnData[6]);
1869  to = NStr::StringToInt(columnData[7]);
1870  }
1871  catch (std::exception&) {
1873  eDiag_Error,
1874  columnData.LineNo(),
1875  "Invalid data line: Bad \"Start/ThickStart/ThickStop\" values.");
1876  throw error;
1877  }
1878  if (start == from && from == to) {
1879  return false;
1880  }
1881  return true;
1882 }
1883 
1884 
1885 // ----------------------------------------------------------------------------
1886 bool
1888  const CBedColumnData& columnData) const
1889 // ----------------------------------------------------------------------------
1890 {
1891  if (columnData.ColumnCount() < 12 || mValidColumnCount < 12) {
1892  return false;
1893  }
1894 
1895  int start = -1, from = -1, to = -1;
1896  try {
1897  start = NStr::StringToInt(columnData[1]);
1898  from = NStr::StringToInt(columnData[6]);
1899  to = NStr::StringToInt(columnData[7]);
1900  }
1901  catch (std::exception&) {
1903  eDiag_Error,
1904  columnData.LineNo(),
1905  "Invalid data line: Bad \"Start/ThickStart/ThickStop\" values.");
1906  throw error;
1907  }
1908  if (start == from && from == to) {
1909  return false;
1910  }
1911  return true;
1912 }
1913 
1914 
1915 // ----------------------------------------------------------------------------
1916 bool
1918  const CBedColumnData& columnData) const
1919 // ----------------------------------------------------------------------------
1920 {
1921  return (columnData.ColumnCount() >= 12 && mValidColumnCount >= 12);
1922 }
1923 
1924 
1925 // ----------------------------------------------------------------------------
1926 bool
1928  const CBedColumnData& columnData) const
1929 // ----------------------------------------------------------------------------
1930 {
1931  return (columnData.ColumnCount() >= 8 && mValidColumnCount >= 8);
1932 }
1933 
1934 
1935 // ----------------------------------------------------------------------------
1936 bool
1938  ILineReader& lr,
1939  CRawBedTrack& rawdata,
1940  ILineErrorListener* pMessageListener)
1941 // ----------------------------------------------------------------------------
1942 {
1943  rawdata.Reset();
1944  string line;
1945  while (xGetLine(lr, line)) {
1946  CRawBedRecord record;
1947  if (!xReadBedRecordRaw(line, record, pMessageListener)) {
1948  lr.UngetLine();
1949  break;
1950  }
1951  rawdata.AddRecord(record);
1952  ++m_CurBatchSize;
1953  if (m_CurBatchSize == m_MaxBatchSize) {
1954  return rawdata.HasData();
1955  }
1956  }
1957 
1958  return rawdata.HasData();
1959 }
1960 
1961 // ----------------------------------------------------------------------------
1962 void
1964  vector<string>& columns)
1965 // ----------------------------------------------------------------------------
1966 {
1967  string fixup;
1968 
1969  if (NStr::EqualNocase(columns[0], "chr") && columns.size() > 1) {
1970  columns[1] = columns[0] + columns[1];
1971  columns.erase(columns.begin());
1972  }
1973  if (columns.size() < 3) {
1975  eDiag_Error,
1976  0,
1977  "Invalid data line: Insufficient column count.");
1978  throw error;
1979  }
1980 
1981  try {
1982  NStr::Replace(columns[1], ",", "", fixup);
1983  columns[1] = fixup;
1984  }
1985  catch(std::exception&) {
1987  eDiag_Error,
1988  0,
1989  "Invalid data line: Invalid \"SeqStart\" (column 2) value.");
1990  throw error;
1991  }
1992 
1993  try {
1994  NStr::Replace(columns[2], ",", "", fixup);
1995  columns[2] = fixup;
1996  }
1997  catch(std::exception&) {
1999  eDiag_Error,
2000  0,
2001  "Invalid data line: Invalid \"SeqStop\" (column 3) value.");
2002  throw error;
2003  }
2004 }
2005 
2006 // ----------------------------------------------------------------------------
2007 void
2009  CSeq_annot& annot)
2010 // ----------------------------------------------------------------------------
2011 {
2012  if(mValidColumnCount < 3) {
2013  return;
2014  }
2015  CRef<CUser_object> columnCountUser(new CUser_object());
2016  columnCountUser->SetType().SetStr("NCBI_BED_COLUMN_COUNT");
2017  columnCountUser->AddField("NCBI_BED_COLUMN_COUNT", int (mValidColumnCount));
2018 
2019  CRef<CAnnotdesc> userDesc(new CAnnotdesc());
2020  userDesc->SetUser().Assign(*columnCountUser);
2021  annot.SetDesc().Set().push_back(userDesc);
2022 }
2023 
2024 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
CAnnot_descr –.
Definition: Annot_descr.hpp:66
CAnnotdesc –.
Definition: Annotdesc.hpp:66
unsigned int LineNo() const
size_t ColumnCount() const
bool xParseTrackLine(const string &) override
Definition: bed_reader.cpp:543
virtual bool ReadTrackData(ILineReader &, CRawBedTrack &, ILineErrorListener *=nullptr)
void xSetFeatureIdsThick(CRef< CSeq_feat > &, const CBedColumnData &, unsigned int)
void xSetFeatureBedData(CRef< CSeq_feat > &, const CBedColumnData &, ILineErrorListener *)
void xSetFeatureLocationCds(CRef< CSeq_feat > &, const CBedColumnData &)
Definition: bed_reader.cpp:980
bool xReadBedRecordRaw(const string &, CRawBedRecord &, ILineErrorListener *)
unique_ptr< CBedAutoSql > mpAutoSql
Definition: bed_reader.hpp:369
virtual bool SetAutoSql(const string &)
Definition: bed_reader.cpp:278
void xSetFeatureColorByStrand(CRef< CUser_object >, const string &, ENa_strand, ILineErrorListener *)
void xSetFeatureColorFromScore(CRef< CUser_object >, const string &)
virtual void xAssignBedColumnCount(CSeq_annot &)
bool xAppendFeatureChrom(const CBedColumnData &, CSeq_annot &, unsigned int, ILineErrorListener *)
Definition: bed_reader.cpp:668
ENa_strand xGetStrand(const CBedColumnData &) const
void xSetFeatureScore(CRef< CUser_object >, const CBedColumnData &)
unique_ptr< CLinePreBuffer > mLinePreBuffer
Definition: bed_reader.hpp:367
string m_currentId
Definition: bed_reader.hpp:357
void xSetFeatureLocationChrom(CRef< CSeq_feat > &, const CBedColumnData &)
Definition: bed_reader.cpp:890
CBedReader(int=fNormal, const string &="", const string &="", CReaderListener *=nullptr)
Definition: bed_reader.cpp:237
bool xReadBedDataRaw(ILineReader &, CRawBedTrack &, ILineErrorListener *)
bool mAssumeErrorsAreRecordLevel
Definition: bed_reader.hpp:362
void xSetFeatureLocationGene(CRef< CSeq_feat > &, const CBedColumnData &)
Definition: bed_reader.cpp:905
bool m_usescore
Definition: bed_reader.hpp:364
CRef< CSeq_feat > xAppendFeatureCds(const CBedColumnData &, CSeq_annot &, unsigned int, ILineErrorListener *)
Definition: bed_reader.cpp:730
virtual ~CBedReader()
Definition: bed_reader.cpp:260
unsigned int m_CurBatchSize
Definition: bed_reader.hpp:365
void xSetFeatureColor(CRef< CUser_object >, const CBedColumnData &, ILineErrorListener *)
bool xParseFeatureAutoSql(const CBedColumnData &, CSeq_annot &, ILineErrorListener *)
Definition: bed_reader.cpp:812
vector< string >::size_type mValidColumnCount
Definition: bed_reader.hpp:361
void xProcessData(const TReaderData &, CSeq_annot &) override
Definition: bed_reader.cpp:357
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read a single object from given line reader containing BED data.
Definition: bed_reader.cpp:267
void xSetFeatureLocationRna(CRef< CSeq_feat > &, const CBedColumnData &)
bool xAppendFeatureBlock(const CBedColumnData &, CSeq_annot &, unsigned int, ILineErrorListener *)
Definition: bed_reader.cpp:750
CRef< CSeq_feat > xAppendFeatureGene(const CBedColumnData &, CSeq_annot &, unsigned int, ILineErrorListener *)
Definition: bed_reader.cpp:689
static void xCleanColumnValues(vector< string > &)
bool xContainsCdsFeature(const CBedColumnData &) const
void xSetFeatureColorDefault(CRef< CUser_object >)
CRef< CSeq_feat > xAppendFeatureRna(const CBedColumnData &, CSeq_annot &, unsigned int, ILineErrorListener *)
Definition: bed_reader.cpp:770
const unsigned int m_MaxBatchSize
Definition: bed_reader.hpp:366
void xSetFeatureIdsGene(CRef< CSeq_feat > &, const CBedColumnData &, unsigned int)
void xSetFeatureIdsBlock(CRef< CSeq_feat > &, const CBedColumnData &, unsigned int)
void xSetFeatureLocation(CRef< CSeq_feat > &, const CBedColumnData &)
unsigned int m_CurrentFeatureCount
Definition: bed_reader.hpp:363
bool xContainsBlockFeature(const CBedColumnData &) const
bool xContainsThickFeature(const CBedColumnData &) const
bool xParseFeatureThreeFeatFormat(const CBedColumnData &, CSeq_annot &, ILineErrorListener *)
Definition: bed_reader.cpp:607
void xSetFeatureLocationBlock(CRef< CSeq_feat > &, const CBedColumnData &)
void xSetFeatureIdsRna(CRef< CSeq_feat > &, const CBedColumnData &, unsigned int)
void xSetFeatureTitle(CRef< CSeq_feat > &, const CBedColumnData &)
bool xParseFeatureGeneModelFormat(const CBedColumnData &, CSeq_annot &, ILineErrorListener *)
Definition: bed_reader.cpp:630
bool xAppendFeatureThick(const CBedColumnData &, CSeq_annot &, unsigned int, ILineErrorListener *)
Definition: bed_reader.cpp:710
void xPostProcessAnnot(CSeq_annot &) override
Definition: bed_reader.cpp:532
void xSetFeatureIdsChrom(CRef< CSeq_feat > &, const CBedColumnData &, unsigned int)
void xGetData(ILineReader &, TReaderData &) override
Definition: bed_reader.cpp:325
@ fDirectedFeatureModel
Definition: bed_reader.hpp:127
void xSetFeatureDisplayData(CRef< CSeq_feat > &, const CBedColumnData &)
Definition: bed_reader.cpp:830
virtual bool xDetermineLikelyColumnCount(CLinePreBuffer &, ILineErrorListener *)
Definition: bed_reader.cpp:376
void xSetFeatureIdsCds(CRef< CSeq_feat > &, const CBedColumnData &, unsigned int)
void xSetFeatureLocationThick(CRef< CSeq_feat > &, const CBedColumnData &)
Definition: bed_reader.cpp:920
bool xContainsRnaFeature(const CBedColumnData &) const
vector< string >::size_type mRealColumnCount
Definition: bed_reader.hpp:360
void xSetFeatureColorFromItemRgb(CRef< CUser_object >, const string &, ILineErrorListener *)
CRef< CSeq_annot > xCreateSeqAnnot() override
Definition: bed_reader.cpp:308
bool xParseFeatureUserFormat(const CBedColumnData &, CSeq_annot &, ILineErrorListener *)
Definition: bed_reader.cpp:791
bool xParseFeature(const SReaderLine &, CSeq_annot &, ILineErrorListener *)
Definition: bed_reader.cpp:576
CFeat_id –.
Definition: Feat_id.hpp:66
ILineReader & mLineReader
Definition: bed_reader.cpp:170
LinePreIt end()
Definition: bed_reader.cpp:139
deque< string >::const_iterator LinePreIt
Definition: bed_reader.cpp:62
virtual ~CLinePreBuffer()
Definition: bed_reader.cpp:70
deque< string > mBuffer
Definition: bed_reader.cpp:174
bool UngetLine(const string &line)
Definition: bed_reader.cpp:121
void AssignReader(ILineReader &lineReader)
Definition: bed_reader.cpp:145
static void StripSpaceCharsInPlace(string &str)
Definition: bed_reader.cpp:155
bool GetLine(string &line)
Definition: bed_reader.cpp:99
LinePreIt begin()
Definition: bed_reader.cpp:134
virtual bool IsCommentLine(const CTempString &line)
Definition: bed_reader.cpp:87
bool FillBuffer(size_t numLines)
Definition: bed_reader.cpp:72
int LineNumber() const
Definition: bed_reader.cpp:129
CLinePreBuffer(ILineReader &lineReader)
Definition: bed_reader.cpp:64
void Dump(CNcbiOstream &ostr) const
Definition: bed_reader.cpp:206
void SetInterval(CSeq_id &id, unsigned int start, unsigned int stop, ENa_strand strand)
Definition: bed_reader.cpp:181
CRef< CSeq_interval > m_pInterval
Definition: bed_reader.hpp:71
void SetScore(unsigned int score)
Definition: bed_reader.cpp:197
void Reset()
Definition: bed_reader.hpp:88
void Dump(CNcbiOstream &ostr) const
Definition: bed_reader.cpp:224
bool HasData() const
Definition: bed_reader.hpp:92
void AddRecord(CRawBedRecord &record)
Definition: bed_reader.hpp:89
Common file reader utility functions.
Definition: read_util.hpp:47
static void Tokenize(const string &instr, const string &delim, vector< string > &tokens)
Tokenize a given string, respecting quoted substrings an atomic units.
Definition: read_util.cpp:47
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
unique_ptr< CReaderMessageHandler > m_pMessageHandler
unsigned int m_uDataCount
unsigned int m_uLineNumber
string m_AnnotTitle
unique_ptr< CTrackData > m_pTrackDefaults
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
vector< TReaderLine > TReaderData
Definition: reader_base.hpp:70
virtual bool xGetLine(ILineReader &, string &)
virtual void xAddConversionInfo(CSeq_annot &, ILineErrorListener *)
TReaderFlags m_iFlags
virtual bool xIsBrowserLine(const CTempString &)
string m_AnnotName
virtual bool xParseTrackLine(const string &)
virtual void xAssignTrackData(CSeq_annot &)
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
struct SReaderLine { SReaderLine(unsigned int line, string data):mLine(line), mData(data) {} TReaderLine
Definition: reader_base.hpp:66
virtual bool xIsTrackLine(const CTempString &)
void SetLineNumber(int lineNumber)
void SetRegion(const TRegion &v)
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
void SetNameDesc(const string &name)
Definition: Seq_annot.cpp:66
void SetTitleDesc(const string &title)
Definition: Seq_annot.cpp:96
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void AddOrReplaceQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature, or replace the value for the first one if it already exists.
Definition: Seq_feat.cpp:299
CStringException –.
Definition: ncbistr.hpp:4506
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
static const char location[]
Definition: config.c:97
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
Definition: ncbidiag.hpp:655
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void UngetLine(void)=0
Unget current line, which must be valid.
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
Definition: Seq_loc.cpp:5183
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5187
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3197
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static unsigned long StringToULong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned long.
Definition: ncbistr.cpp:665
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3310
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3550
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
@ fDS_ProhibitFractions
StringToUInt8_DataSize(): Ignore any fraction part of a value, "1.2K" ~ "1K".
Definition: ncbistr.hpp:307
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
Definition: ncbistr.hpp:298
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2498
void SetType(TType &value)
Assign a value to Type data member.
TId & SetId(void)
Select the variant.
Definition: Object_id_.hpp:277
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
list< CRef< CUser_object > > TExts
Definition: Seq_feat_.hpp:127
TExts & SetExts(void)
Assign a value to Exts data member.
Definition: Seq_feat_.hpp:1483
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
void SetTitle(const TTitle &value)
Assign a value to Title data member.
Definition: Seq_feat_.hpp:1181
TLocal & SetLocal(void)
Select the variant.
Definition: Feat_id_.cpp:140
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void SetTo(TTo value)
Assign a value to To data member.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
TUser & SetUser(void)
Select the variant.
Definition: Annotdesc_.cpp:190
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
int i
#define nullptr
Definition: ncbimisc.hpp:45
T negative(T x_)
static const char * str(char *buf, int n)
Definition: stats.c:84
static DP_BlockInfo * blocks
int test(int srctype, const void *srcdata, int srclen, int dsttype, int dstlen)
Definition: t0019.c:43
#define ftable
Definition: utilfeat.h:37
Modified on Fri Feb 23 11:45:26 2024 by modify_doxy.py rev. 669887