NCBI C++ ToolKit
cgi_entry_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cgi_entry_reader.cpp 81838 2018-04-09 17:42:02Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko
27 *
28 * File Description:
29 * Support classes for on-demand CGI input parsing.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
36 #include <cgi/cgi_exception.hpp>
37 #include <cgi/cgi_util.hpp>
38 #include <cgi/error_codes.hpp>
39 
40 #define NCBI_USE_ERRCODE_X Cgi_API
41 
43 
44 static const char* kBoundaryTag = "boundary=";
45 static const char* kContentDisposition = "Content-Disposition";
46 static const char* kContentType = "Content-Type";
47 
48 #define CCER "CCgiEntryReader: "
49 
50 
51 static bool s_MatchesBoundary(const string& l, const string& b)
52 {
53  return (l == b || (l.size() == b.size() + 2 && NStr::StartsWith(l, b)
54  && NStr::EndsWith(l, "--")));
55 }
56 
57 
59 {
60  if ((m_State & fHitBoundary) == 0) {
61  x_Flush();
62  x_HitBoundary(false);
63  }
64 }
65 
66 
67 ERW_Result CCgiEntryReader::Read(void* buf, size_t count, size_t* bytes_read)
68 {
71  if (count > 0) {
72  if (m_Buffer.empty()) {
73  x_FillBuffer(count);
74  }
75  size_t n = min(m_Buffer.size(), count);
76  memcpy(buf, m_Buffer.data(), n);
77  m_Buffer.erase(0, n);
78  if (bytes_read) {
79  *bytes_read = n;
80  }
81  if ((m_State & fHitBoundary) != 0 && !n ) {
82  return eRW_Eof;
83  }
84  } else if (bytes_read) {
85  *bytes_read = 0; // for the record
86  }
87  return eRW_Success;
88 }
89 
90 
92 {
93  _ASSERT(count);
94  if ( !m_Buffer.empty() ) {
95  *count = m_Buffer.size();
96  return eRW_Success;
97  } else if ((m_State & fHitBoundary) != 0) {
98  *count = 0;
99  return eRW_Eof;
100  } else if (m_Context.m_In.rdbuf()->in_avail() <= 0) {
101  return eRW_NotImplemented;
102  } else if ((m_State & fHitCRLF) == fHitCRLF
103  && CT_EQ_INT_TYPE(m_Context.m_In.peek(), CT_TO_INT_TYPE('-'))) {
104  return eRW_NotImplemented; // possible boundary
105  } else {
106  *count = 1;
107  return eRW_Success;
108  }
109 }
110 
111 
113 {
114  if (count == 0 || (m_State & fHitBoundary) != 0) {
115  return;
116  }
117  string line;
118  SIZE_TYPE n_min = count == NPOS ? count : m_Context.m_Boundary.size() + 3;
119  while ((m_State & fHitBoundary) == 0 && count > m_Buffer.size()) {
120  int prev_state = m_State;
121  m_State &= ~fUnread;
122  // Ensure that the boundary will actually register if present.
123  SIZE_TYPE n = max(count - m_Buffer.size(), n_min);
124  switch (m_Context.x_DelimitedRead(line, n)) {
125  case TContext::eRT_EOF:
126  // virtual boundary -- no more entries!
127  x_HitBoundary(true);
128  if ((m_State & fHitCRLF) == fHitCRLF
130  return;
131  }
132  break;
133 
135  if ((m_State & fHitCRLF) == fHitCRLF
138  return; // refrain from adding line to buffer
139  }
140  m_State |= fHitCRLF;
141  break;
142 
144  m_State &= ~fHitCRLF;
145  break;
146 
148  m_State |= fHitCR;
149  m_State &= ~fHitLF;
150  break;
151  }
152  if (m_Buffer.size() + line.size() + 2 > m_Buffer.capacity()) {
153  m_Buffer.reserve(min(m_Buffer.capacity() * 2,
154  m_Buffer.size() + line.size() + 2));
155  }
156  if ((prev_state & (fUnread | fHitCR)) == fHitCR) {
157  m_Buffer += '\r';
158  if ((prev_state & fHitLF) != 0) {
159  m_Buffer += '\n';
160  }
161  }
162  m_Buffer += line;
163  }
164 }
165 
166 
168 {
170  if (m_Context.m_CurrentReader == this) {
172  }
173  if (final) {
175  }
176 }
177 
178 
180  TCgiEntries& out,
181  const string& content_type,
182  size_t content_length,
183  string* content_log)
184  : m_In(in),
185  m_Out(out),
186  m_OutIter(out.begin()),
187  m_OutIterated(true), // By default do not iterate existing entries
188  m_ContentTypeDeclared(!content_type.empty()),
189  m_ContentLength(content_length),
190  m_ContentLog(content_log),
191  m_Position(0),
192  m_BytePos(0),
193  m_CurrentEntry(NULL),
194  m_CurrentReader(NULL)
195 {
196  if (NStr::StartsWith(content_type, "multipart/form-data", NStr::eNocase)) {
197  SIZE_TYPE pos = NStr::FindNoCase(content_type, kBoundaryTag);
198  if (pos == NPOS) {
200  CCER "no boundary field in " + content_type);
201  }
203  m_Boundary = "--" + content_type.substr(pos + strlen(kBoundaryTag));
204  string line;
206  : m_In.peek());
207  // work around a bug in IE 8 null submission handling
208  if ( line.empty() && !CT_EQ_INT_TYPE(next, CT_EOF) ) {
209  next = (x_DelimitedRead(line) == eRT_EOF ? CT_EOF : m_In.peek());
210  }
211  if ( !s_MatchesBoundary(line, m_Boundary)
212  || (line == m_Boundary && CT_EQ_INT_TYPE(next, CT_EOF))) {
214  CCER "multipart opening line " + line
215  + " differs from declared boundary " + m_Boundary);
216  }
217  if (line != m_Boundary) { // null submission(!)
219  }
220  } else {
222  m_Boundary = "&"; // ";" never really caught on
223  }
224 }
225 
226 
228 {
230 }
231 
232 
234 {
235  if ( !m_OutIterated ) {
236  _ASSERT(m_OutIter != m_Out.end());
237  auto it = m_OutIter;
238  m_CurrentEntry = &it->second;
239  if (++m_OutIter == m_Out.end()) {
240  m_OutIterated = true;
241  }
242  return it;
243  }
244 
245  // Disable IncludePreparsedEntries() if started parsing input.
246  m_OutIter = m_Out.end();
247 
248  string name, value, filename, content_type;
249 
251 
252  switch (m_ContentType) {
253  case eCT_Null:
254  return m_Out.end();
255 
256  case eCT_URLEncoded:
258  break;
259 
260  case eCT_Multipart:
261  x_ReadMultipartHeaders(name, filename, content_type);
262  break;
263  }
264 
265  if (name.empty() && m_ContentType == eCT_Null) {
266  return m_Out.end();
267  }
268 
269  CCgiEntry entry(value, filename, ++m_Position, content_type);
271  if (m_ContentType == eCT_Multipart) {
272  m_CurrentEntry = &it->second;
273  it->second.SetValue(m_CurrentReader = new CCgiEntryReader(*this));
274  }
275  return it;
276 }
277 
278 
280 {
281  // Include preparsed entries (if any) only once.
282  if (!m_OutIterated || m_OutIter == m_Out.end()) return;
283  m_OutIterated = false;
284 }
285 
286 
288 {
289  if (m_CurrentReader) {
293  }
294 }
295 
296 
299 {
300  char delim = '\r';
301  CT_INT_TYPE delim_read = CT_EOF;
303 
304  switch (m_ContentType) {
305  case eCT_URLEncoded:
306  _ASSERT(n == NPOS);
307  delim = m_Boundary[0];
308  break;
309 
310  case eCT_Multipart:
311  break;
312 
313  default:
314  _TROUBLE;
315  }
316 
317  // Add 1 to n when not up against the content length to compensate
318  // for get()'s insistence on producing (and counting) a trailing
319  // NUL. (When up against the content length, the last byte may
320  // require more finesse.)
321  if (n != NPOS) {
322  ++n;
323  }
326  }
327 
328  if (n == NPOS) {
329  NcbiGetline(m_In, s, delim);
330  m_BytePos += s.size();
331  if (m_In.eof()) {
332  reason = eRT_EOF;
333  } else {
334  m_In.unget();
335  delim_read = m_In.get();
336  _ASSERT(CT_EQ_INT_TYPE(delim_read, CT_TO_INT_TYPE(delim)));
337  ++m_BytePos;
338  }
339  } else {
340  s.clear();
341  while (n > 1) {
342  char buffer[4097];
343  m_In.get(buffer, min(n, sizeof(buffer)), delim);
344  size_t n_read = m_In.gcount();
345  s.append(buffer, n_read);
346  m_BytePos += n_read;
347  n -= n_read;
348  if (m_In.eof()) {
349  break;
350  } else {
351  CT_INT_TYPE next = m_In.peek();
353  || CT_EQ_INT_TYPE(next, CT_TO_INT_TYPE(delim))) {
354  break;
355  }
356  }
357  }
359  && m_BytePos == m_ContentLength - 1 && !m_In.eof() ) {
360  CT_INT_TYPE next = m_In.peek();
361  if ( !CT_EQ_INT_TYPE(next, CT_EOF)
362  && !CT_EQ_INT_TYPE(next, CT_TO_INT_TYPE(delim))) {
363  _VERIFY(next == m_In.get());
364  s += CT_TO_CHAR_TYPE(next);
365  ++m_BytePos;
366  }
367  }
368  if (m_In.eof() || m_BytePos >= m_ContentLength) {
369  reason = eRT_EOF;
370  } else {
371  // NB: this is an ugly workaround for a buggy STL behavior that
372  // lets short reads (e.g. originating from reading pipes) get
373  // through to the user level, causing istream::read() to
374  // wrongly assert EOF...
375  m_In.clear();
376  delim_read = m_In.get();
377  _ASSERT( !CT_EQ_INT_TYPE(delim_read, CT_EOF) );
378  if (CT_EQ_INT_TYPE(delim_read, CT_TO_INT_TYPE(delim))) {
379  ++m_BytePos;
380  } else {
381  reason = eRT_LengthBound;
382  m_In.unget();
383  }
384  }
385  }
386 
387  if (m_ContentLog) {
388  *m_ContentLog += s;
389  if (reason == eRT_Delimiter) {
390  *m_ContentLog += delim;
391  }
392  }
393 
394  if (m_ContentType == eCT_Multipart && reason == eRT_Delimiter) {
395  delim_read = m_In.get();
396  if (CT_EQ_INT_TYPE(delim_read, CT_TO_INT_TYPE('\n'))) {
397  ++m_BytePos;
398  if (m_ContentLog) {
399  *m_ContentLog += '\n';
400  }
401  } else {
402  m_In.unget();
403  reason = eRT_PartialDelimiter;
404  }
405  }
406 
408  && reason == eRT_EOF) {
409  // discard terminal CRLF
410  s.resize(s.size() - 2);
411  }
412 
413  return reason;
414 }
415 
416 
418 {
419  if (x_DelimitedRead(name) == eRT_EOF || m_In.eof()) {
421  }
422  ITERATE (string, it, name) {
423  if (*it < ' ' || *it > '~') {
424  if (m_ContentTypeDeclared) {
425  ERR_POST(Warning << "Unescaped binary content in"
426  " URL-encoded form data: "
427  << NStr::PrintableString(string(1, *it)));
428  }
429  name.clear();
431  return;
432  }
433  }
434  SIZE_TYPE name_len = name.find('=');
435  if (name_len != NPOS) {
436  value = name.substr(name_len + 1);
437  name.resize(name_len);
438  }
441 }
442 
443 
444 static CTempString s_FindAttribute(const CTempString& str, const string& name,
445  CT_POS_TYPE input_pos, bool required)
446 {
447  SIZE_TYPE att_pos = str.find("; " + name + "=\"");
448  if (att_pos == NPOS) {
449  if (required) {
450  NCBI_THROW2(CCgiParseException, eAttribute, CCER
451  "part header lacks required attribute " + name + ": " + str,
452  (std::string::size_type) NcbiStreamposToInt8(input_pos));
453  } else {
454  return kEmptyStr;
455  }
456  }
457  SIZE_TYPE att_start = att_pos + name.size() + 4;
458  SIZE_TYPE att_end = str.find('\"', att_start);
459  if (att_end == NPOS) {
460  NCBI_THROW2(CCgiParseException, eAttribute,
461  CCER "part header contains unterminated attribute " + name +
462  ": " + str.substr(att_pos),
463  (std::string::size_type) NcbiStreamposToInt8(input_pos) +
464  att_start);
465  }
466  return str.substr(att_start, att_end - att_start);
467 }
468 
469 
471  string& filename,
472  string& content_type)
473 {
474  string line;
475  for (;;) {
476  SIZE_TYPE input_pos = m_BytePos;
477  switch (x_DelimitedRead(line)) {
478  case eRT_Delimiter:
479  break;
480 
481  case eRT_EOF:
483  CCER "Hit end of input while reading part headers",
484  input_pos);
485 
486  case eRT_LengthBound:
487  _TROUBLE;
488 
491  CCER "CR in part header not followed by LF", input_pos);
492  }
493 
494  if (line.empty()) {
495  break;
496  }
497 
498  SIZE_TYPE pos = line.find(':');
499  if (pos == NPOS) {
501  CCER "part header lacks colon: " + line, input_pos);
502  }
503  CTempString field_name(line, 0, pos);
504  if (NStr::EqualNocase(field_name, kContentDisposition)) {
505  if (NStr::CompareNocase(line, pos, 13, ": form-data; ") != 0) {
507  CCER "malformatted Content-Disposition header: "
508  + line,
509  input_pos);
510  }
511  name = s_FindAttribute(line, "name", input_pos, true);
512  filename = s_FindAttribute(line, "filename", input_pos, false);
513  } else if (NStr::EqualNocase(field_name, kContentType)) {
514  content_type = line.substr(pos + 2);
515  } else {
516  ERR_POST_X(4, Warning << CCER "ignoring unrecognized part header: "
517  + line);
518  }
519  }
520 }
521 
522 
static const char * kContentType
static const char * kBoundaryTag
#define CCER
static CTempString s_FindAttribute(const CTempString &str, const string &name, CT_POS_TYPE input_pos, bool required)
static bool s_MatchesBoundary(const string &l, const string &b)
static const char * kContentDisposition
Support classes for on-demand CGI input parsing.
Exception classes used by the NCBI CGI framework.
CCgiParseException –.
CCgiRequestException –.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
const_iterator end() const
Definition: map.hpp:292
iterator insert(const value_type &val)
Definition: map.hpp:305
std::ofstream out("events_result.xml")
main entry point for tests
#define true
Definition: bool.h:35
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
void x_ReadMultipartHeaders(string &name, string &filename, string &content_type)
EReadTerminator x_DelimitedRead(string &s, SIZE_TYPE n=NPOS)
TCgiEntries::iterator TCgiEntriesI
Definition: ncbicgi.hpp:639
CCgiEntryReaderContext(CNcbiIstream &in, TCgiEntries &out, const string &content_type, size_t content_length=CCgiRequest::kContentLengthUnknown, string *content_log=NULL)
string & SetValue()
Definition: ncbicgi.hpp:475
void x_HitBoundary(bool final)
ERW_Result PendingCount(size_t *count)
Via parameter "count" (which is guaranteed to be supplied non-NULL) return the number of bytes that a...
TCgiEntriesI GetNextEntry(void)
void x_ReadURLEncodedEntry(string &name, string &value)
ERW_Result Read(void *buf, size_t count, size_t *bytes_read)
Read as many as "count" bytes into a buffer pointed to by the "buf" argument.
static const size_t kContentLengthUnknown
Get content length using value of the property 'eCgi_ContentLength'.
Definition: ncbicgi.hpp:781
void x_FillBuffer(SIZE_TYPE count)
#define NULL
Definition: ncbistd.hpp:225
#define _VERIFY(expr)
Definition: ncbidbg.hpp:161
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
ERW_Result
Result codes for I/O operations.
Int8 NcbiStreamposToInt8(NCBI_NS_STD::char_traits< char >::pos_type stream_pos)
Convert stream position to 64-bit int.
Definition: ncbistre.hpp:771
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
#define CT_TO_CHAR_TYPE
Definition: ncbistre.hpp:735
#define CT_EOF
Definition: ncbistre.hpp:732
#define CT_TO_INT_TYPE
Definition: ncbistre.hpp:734
#define CT_INT_TYPE
Definition: ncbistre.hpp:728
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define CT_EQ_INT_TYPE
Definition: ncbistre.hpp:736
#define CT_POS_TYPE
Definition: ncbistre.hpp:730
@ eRW_NotImplemented
Action / information is not available.
@ eRW_Eof
End of data, should be considered permanent.
@ eRW_Success
Everything is okay, I/O completed.
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
Definition: ncbistr.cpp:3953
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
#define NPOS
Definition: ncbistr.hpp:133
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static void URLDecodeInPlace(string &str, EUrlDecode flag=eUrlDec_All)
URL-decode string to itself.
Definition: ncbistr.cpp:6222
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
Definition of all error codes used in cgi (xcgi.lib).
char * buf
yy_size_t n
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define HTTP_EOL
Definition: ncbistre.hpp:120
T max(T x_, T y_)
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
static pcre_uint8 * buffer
Definition: pcretest.c:1051
#define _TROUBLE
#define _ASSERT
Modified on Sat Apr 13 11:46:34 2024 by modify_doxy.py rev. 669887