NCBI C++ ToolKit
bed_autosql_standard.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * $Id: bed_autosql_standard.cpp 99919 2023-05-22 11:51:29Z foleyjp $
3  *
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Frank Ludwig
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistr.hpp>
41 #include "bed_autosql_standard.hpp"
42 
45 
46 // ============================================================================
48 // ============================================================================
49  mColChrom(-1), mColSeqStart(-1), mColSeqStop(-1), mColStrand(-1),
50  mColName(-1), mColScore(-1), mNumFields(0)
51 {}
52 
53 // ============================================================================
54 bool
56  size_t colIndex,
57  const string& colName,
58  const string& colFormat)
59 // ============================================================================
60 {
61  ++mNumFields;
62  if (colName == "chrom" && colFormat == "string") {
63  mColChrom = colIndex;
64  return true;
65  }
66  if (colName == "chromStart" && colFormat == "uint") {
67  mColSeqStart = colIndex;
68  return true;
69  }
70  if (colName == "chromEnd" && colFormat == "uint") {
71  mColSeqStop = colIndex;
72  return true;
73  }
74  if (colName == "strand" && colFormat == "char[1]") {
75  mColStrand = colIndex;
76  return true;
77  }
78  if (colName == "name" && colFormat == "string") {
79  mColName = colIndex;
80  return true;
81  }
82  if (colName == "score" && colFormat == "uint") {
83  mColScore = colIndex;
84  return true;
85  }
86  --mNumFields;
87  return false;
88 }
89 
90 // ============================================================================
91 bool
93  const CBedColumnData& columnData,
94  int bedFlags,
95  CSeq_feat& feat,
96  CReaderMessageHandler& messageHandler) const
97 // ============================================================================
98 {
99  CRef<CSeq_id> pId = CReadUtil::AsSeqId(columnData[mColChrom], bedFlags, false);
100 
101  auto& location = feat.SetLocation().SetInt();
102  location.SetId(*pId);
103  try {
104  location.SetFrom(NStr::StringToUInt(columnData[mColSeqStart]));
105  }
106  catch (CStringException&) {
108  eDiag_Error,
109  columnData.LineNo(),
110  "BED: Invalid data for column \"chromStart\". Feature omitted");
111  messageHandler.Report(error);
112  return false;
113  }
114 
115  try {
116  location.SetTo(NStr::StringToUInt(columnData[mColSeqStop])-1);
117  }
118  catch (CStringException&) {
120  eDiag_Error,
121  columnData.LineNo(),
122  "BED: Invalid data for column \"chromEnd\". Feature omitted");
123  messageHandler.Report(error);
124  return false;
125  }
126 
127  if (mColStrand == -1) {
128  return true;
129  }
130 
131  CReaderMessage warning(
133  columnData.LineNo(),
134  "BED: Invalid data for column \"strand\". Defaulting to \"+\"");
135 
136  location.SetStrand(eNa_strand_plus);
137  auto strandStr = columnData[mColStrand];
138  if (strandStr.size() != 1) {
139  messageHandler.Report(warning);
140  }
141  else {
142  auto strandChar = strandStr[0];
143  if (string("+-.").find(strandChar) == string::npos) {
144  messageHandler.Report(warning);
145  }
146  else if (strandChar == '-') {
147  location.SetStrand(eNa_strand_minus);
148  }
149  }
150  return true;
151 }
152 
153 // ============================================================================
154 bool
156  const CBedColumnData& columnData,
157  CSeq_feat& feat,
158  CReaderMessageHandler& messageHandler) const
159 // ============================================================================
160 {
161  if (mColChrom == -1) {
162  return true;
163  }
164  feat.SetTitle(columnData[mColChrom]);
165  return true;
166 }
167 
168 
169 bool
171  const CBedColumnData& columnData,
172  CSeq_feat& feat,
173  CReaderMessageHandler& messageHandler) const
174 {
175  if (mColName >= 0) {
176  feat.SetData().SetRegion(columnData[mColName]);
177  return true;
178  }
179 
180  if (mColChrom >= 0) {
181  feat.SetData().SetRegion(columnData[mColName]);
182  return true;
183  }
184 
185  return false;
186 }
187 
188 
189 bool
191  const CBedColumnData& columnData,
192  int bedFlags,
193  CSeq_feat& feat,
194  CReaderMessageHandler& messageHandler) const
195 {
196  if (mColScore == -1) {
197  return true;
198  }
199 
200  auto pDisplayData = Ref(new CUser_object());
201  pDisplayData->SetType().SetStr("DisplaySettings");
202  pDisplayData->AddField("score",
203  NStr::StringToInt(columnData[mColScore],
205  feat.SetExts().push_back(pDisplayData);
206 
207  return true;
208 }
209 
210 // ============================================================================
211 bool
213  CReaderMessageHandler& messageHandler) const
214 // ============================================================================
215 {
216  //at issue: do we have enough information to make a Seq-loc
217  if (mColChrom == -1 || mColSeqStart == -1 || mColSeqStop == -1) {
220  0,
221  "AutoSql: Table does not contain enough information to set a feature location.");
222  messageHandler.Report(fatal);
223  return false;
224  }
225  return true;
226 }
227 
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CStringException –.
Definition: ncbistr.hpp:4506
The NCBI C++ standard methods for dealing with std::string.
static void fatal(const char *msg,...)
Definition: attributes.c:18
static const char location[]
Definition: config.c:97
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
Definition: ncbistr.hpp:298
TExts & SetExts(void)
Assign a value to Exts data member.
Definition: Seq_feat_.hpp:1483
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
bool SetLocation(const CBedColumnData &, int bedFlags, CSeq_feat &, CReaderMessageHandler &) const
bool Validate(CReaderMessageHandler &) const
bool SetRegion(const CBedColumnData &columnData, CSeq_feat &feat, CReaderMessageHandler &messageHandler) const
bool SetDisplayData(const CBedColumnData &columnData, int bedFlags, CSeq_feat &feat, CReaderMessageHandler &messageHandler) const
bool ProcessTableRow(size_t, const string &, const string &)
bool SetTitle(const CBedColumnData &, CSeq_feat &, CReaderMessageHandler &) const
Modified on Mon Apr 22 04:02:35 2024 by modify_doxy.py rev. 669887