1 #ifndef UTIL___ROW_READER_EXCEL_CSV__HPP
2 #define UTIL___ROW_READER_EXCEL_CSV__HPP
75 size_t current_index= 0;
76 size_t token_begin_index = 0;
77 size_t lines_read = 0;
78 bool in_quotes =
false;
83 while (current_index < data->
size()) {
84 auto current_char = (*data)[current_index];
85 if (current_char ==
',') {
87 m_Tokens.emplace_back(token_begin_index);
88 token_begin_index = current_index + 1;
90 }
else if (current_char ==
'"') {
91 if (token_begin_index == current_index) {
95 if (current_index + 1 < data->size() &&
96 (*data)[current_index + 1] ==
'"') {
117 m_Tokens.push_back(token_begin_index);
129 vector<CTempString>& tokens)
132 if (!raw_line.
empty()) {
135 field_no <
m_Tokens.size(); ++field_no) {
140 tokens.emplace_back(raw_line.
data() +
m_Tokens[field_no],
157 if (raw_line.
empty())
176 info.second.first,
info.second.second);
179 translated,
info.second.first,
info.second.second);
188 string& translated_value)
193 if (raw_value[0] ==
'=') {
194 size_t dbl_quote_cnt = 0;
195 for (
size_t index = 0; index < raw_value.
size(); ++index)
196 if (raw_value[index] ==
'"')
199 if (dbl_quote_cnt == 0) {
200 translated_value =
string(raw_value.
data() + 1,
201 raw_value.
size() - 1);
212 if (dbl_quote_cnt % 2 == 0) {
213 if (raw_value[1] ==
'"' &&
214 raw_value[raw_value.
size() - 1] ==
'"') {
218 translated_value =
string(raw_value.
data() + 2,
219 raw_value.
size() - 3);
231 if (raw_value[0] ==
'"') {
232 size_t match_index = 1;
233 for (; match_index < raw_value.
size(); ++match_index) {
234 if (raw_value[match_index] ==
'"') {
235 if (match_index + 1< raw_value.
size() &&
236 raw_value[match_index + 1] ==
'"')
245 if (match_index < raw_value.
size()) {
247 translated_value =
string(raw_value.
data() + 1,
250 if (match_index < raw_value.
size() - 1) {
252 translated_value.append(
253 raw_value.
data() + match_index + 1,
254 raw_value.
size() - match_index - 1);
258 translated_value =
string(raw_value.
data() + 1,
259 raw_value.
size() - 1);
264 if (!translated_value.empty()) {
265 if (translated_value[0] ==
'=') {
266 size_t dbl_quote_cnt = 0;
267 for (
size_t index = 0;
268 index < translated_value.size(); ++index)
269 if (translated_value[index] ==
'"')
272 if (dbl_quote_cnt > 0 && (dbl_quote_cnt % 2 == 0)) {
273 if (translated_value[1] ==
'"' &&
274 translated_value[translated_value.size() - 1] ==
'"') {
275 translated_value = translated_value.substr(2, translated_value.size() - 3);
291 GetMyStream().x_ClearTraitsProvidedFieldsInfo();
326 for (
const auto&
info : GetMyStream().GetFieldsMetaInfo()) {
327 if (
info.is_type_initialized) {
328 auto field_type =
info.type.GetType();
332 make_pair(field_type,
info.type.GetProps());
339 return raw_field_value.
empty() ||
345 while (!tokens.empty()) {
static void ValidateBasicTypeFieldValue(const CTempString &str_value, ERR_FieldType field_type, const string &props)
ERR_TranslationResult Translate(TFieldNo, const CTempString raw_value, string &translated_value)
CRowReaderStream_Excel_CSV()
string m_PreviousLineSeparator
vector< size_t > m_Tokens
map< size_t, pair< ERR_FieldType, string > > m_FieldsToValidate
ERR_Action OnNextLine(CTempString)
ERR_EventAction OnEvent(ERR_Event event, ERR_EventMode event_mode)
ERR_Action Tokenize(const CTempString raw_line, vector< CTempString > &tokens)
void x_ReadOneLine(CNcbiIstream &is, string *data, bool joining)
bool x_IsNull(const CTempString &raw_field_value)
RR_TRAITS_PARENT_STREAM(CRowReaderStream_Excel_CSV)
vector< CTempString > m_ValidationTokens
void x_StripTrailingNullFields(vector< CTempString > &tokens)
size_t ReadRowData(CNcbiIstream &is, string *data)
void x_GetFieldTypesToValidate(void)
ERR_Action Validate(CTempString raw_line, ERR_FieldValidationMode field_validation_mode)
Partial specialization of the CRowReaderStream_CharDelimited<...> template for the case when the data...
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
size_type size(void) const
Return the length of the represented array.
const struct ncbi::grid::netcache::search::fields::SIZE size
Uint4 TFieldNo
Field number (zero based)
ERR_FieldValidationMode
Whether to check validity of the fields (names and/or values)
@ eRR_NoFieldValidation
don't validate fields' value and name
ERR_TranslationResult
The Translate() callback result. It is used to translate field values.
@ eRR_UseOriginal
No translation done.
@ eRR_Translated
The value has been translated to another string.
@ eRR_Null
The value has been translated to NULL.
ERR_Action
Delimited stream traits use the ERR_Action members to instruct what should be done next.
@ eRR_Continue_Data
Continue processing this line, in full.
@ eRR_Skip
Skip this line.
ERR_EventAction
How to react to the potentially disruptive events.
@ eRR_EventAction_Default
Do some default action.
ERR_EventMode
Indicate whether the "ERR_Event" event (passed to the OnEvent() callback) occured during regular read...
@ eRR_EventMode_Validating
We are performing data validation.
ERR_Event
CRowReader passes such events to the Traits via OnEvent() callback.
@ eRR_Event_SourceEnd
Data source has hit EOF.
@ eRR_Event_SourceBegin
Data source has started or been switched (no reads yet though).
@ eRR_Event_SourceError
Data source has hit an error on read.
const CTempString kNullFieldRepresentation
Note 1: Empty rows are allowed and treated as 0 fields rows Note 2: Both CRLF and LF are allowed Note...