1 #ifndef UTIL___ROW_READER_IANA_CSV__HPP
2 #define UTIL___ROW_READER_IANA_CSV__HPP
63 return "eUnbalancedDoubleQuote";
65 return "eUnexpectedDoubleQuote";
99 size_t current_index= 0;
100 size_t token_begin_index = 0;
101 size_t lines_read = 0;
102 bool in_quotes =
false;
107 while (current_index < data->
size()) {
108 auto current_char = (*data)[current_index];
109 if (current_char ==
',') {
111 m_Tokens.emplace_back(token_begin_index);
112 token_begin_index = current_index + 1;
114 }
else if (current_char ==
'"') {
115 if (!in_quotes && token_begin_index != current_index) {
117 eUnexpectedDoubleQuote,
118 "Unexpected double quote. "
119 "If a field is not quoted then a "
120 "double quote may not appear "
126 if (current_index + 1 < data->size() &&
127 (*data)[current_index + 1] ==
'"') {
130 if (current_index + 1 < data->size() &&
131 (*data)[current_index + 1] !=
',')
133 eUnexpectedDoubleQuote,
134 "Unexpected double quote. "
135 "Closing double quote must be the "
136 "last in a line or be followed "
137 "by a comma character");
154 eUnbalancedDoubleQuote,
155 "Unbalanced double quote detected");
159 m_Tokens.push_back(token_begin_index);
165 if (raw_line.
empty())
173 vector<CTempString>& tokens)
176 if (GetMyStream().GetCurrentLineNo() == 0) {
188 tokens.emplace_back(raw_line.
data() +
m_Tokens[field_no],
202 if (raw_line.
empty())
221 info.second.first,
info.second.second);
224 translated,
info.second.first,
info.second.second);
233 string& translated_value)
235 if (!raw_value.
empty()) {
236 if (raw_value[0] ==
'"') {
237 translated_value =
string(raw_value.
data() + 1,
238 raw_value.
size() - 2);
258 GetMyStream().x_ClearTraitsProvidedFieldsInfo();
304 GetMyStream().x_SetFieldName(field_no,
305 string(raw_field_name.
data(),
306 raw_field_name.
size()));
308 GetMyStream().x_SetFieldName(field_no, translated);
315 for (
const auto&
info : GetMyStream().GetFieldsMetaInfo()) {
316 if (
info.is_type_initialized) {
317 auto field_type =
info.type.GetType();
321 make_pair(field_type,
info.type.GetProps());
Note 1: Empty rows are allowed and silently skipped Note 2: Both CRLF and LF are allowed Note 3: Numb...
NCBI_EXCEPTION_DEFAULT(CCRowReaderStream_IANA_CSV_Exception, CException)
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
static void ValidateBasicTypeFieldValue(const CTempString &str_value, ERR_FieldType field_type, const string &props)
vector< CTempString > m_ValidationTokens
RR_TRAITS_PARENT_STREAM(CRowReaderStream_IANA_CSV)
ERR_TranslationResult Translate(TFieldNo, const CTempString raw_value, string &translated_value)
ERR_EventAction OnEvent(ERR_Event event, ERR_EventMode event_mode)
void x_GetFieldTypesToValidate(void)
ERR_Action Validate(CTempString raw_line, ERR_FieldValidationMode field_validation_mode)
void x_ReadOneLine(CNcbiIstream &is, string *data, bool joining)
void x_SetFieldNames(const CTempString &raw_line)
map< size_t, pair< ERR_FieldType, string > > m_FieldsToValidate
void SetHasHeader(bool has_header)
Tell if the source has a header.
ERR_Action OnNextLine(CTempString raw_line)
ERR_Action Tokenize(const CTempString raw_line, vector< CTempString > &tokens)
string m_PreviousLineSeparator
size_t ReadRowData(CNcbiIstream &is, string *data)
CRowReaderStream_IANA_CSV()
vector< size_t > m_Tokens
Partial specialization of the CRowReaderStream_CharDelimited<...> template for the case when the data...
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
EErrCode
Error types that an application can generate.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
size_type size(void) const
Return the length of the represented array.
const struct ncbi::grid::netcache::search::fields::SIZE size
Uint4 TFieldNo
Field number (zero based)
ERR_FieldValidationMode
Whether to check validity of the fields (names and/or values)
@ eRR_NoFieldValidation
don't validate fields' value and name
ERR_TranslationResult
The Translate() callback result. It is used to translate field values.
@ eRR_UseOriginal
No translation done.
@ eRR_Translated
The value has been translated to another string.
ERR_Action
Delimited stream traits use the ERR_Action members to instruct what should be done next.
@ eRR_Continue_Data
Continue processing this line, in full.
@ eRR_Skip
Skip this line.
ERR_EventAction
How to react to the potentially disruptive events.
@ eRR_EventAction_Default
Do some default action.
ERR_EventMode
Indicate whether the "ERR_Event" event (passed to the OnEvent() callback) occured during regular read...
@ eRR_EventMode_Validating
We are performing data validation.
ERR_Event
CRowReader passes such events to the Traits via OnEvent() callback.
@ eRR_Event_SourceEnd
Data source has hit EOF.
@ eRR_Event_SourceBegin
Data source has started or been switched (no reads yet though).
@ eRR_Event_SourceError
Data source has hit an error on read.