NCBI C++ ToolKit
pcretest.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4 
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9 
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13 
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16 
17  * Redistributions in binary form must reproduce the above copyright
18  notice, this list of conditions and the following disclaimer in the
19  documentation and/or other materials provided with the distribution.
20 
21  * Neither the name of the University of Cambridge nor the names of its
22  contributors may be used to endorse or promote products derived from
23  this software without specific prior written permission.
24 
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38 
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48 
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52 
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60 
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65 
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81 
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89 
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95 
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99  /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103 
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105 
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109 
110 /* Not Windows */
111 
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123 
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
128 
129 
130 #define PRIV(name) name
131 
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
136 
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
140 
141 #include "pcre.h"
142 #include "pcre_internal.h"
143 
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148 
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
158 
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
162 
163 #define PCRE_INCLUDED
164 
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
167 
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
173 
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
179 
180 #define PRINTOK(c) (locale_set? (((c) < 256) && isprint(c)) : PRINTABLE(c))
181 
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
186 
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
190 
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
194 
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
199 
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
205 
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
213 
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
219 
220 #ifdef SUPPORT_PCRE8
221 
222 #define PCHARS8(lv, p, offset, len, f) \
223  lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224 
225 #define PCHARSV8(p, offset, len, f) \
226  (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227 
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229  p = read_capture_name8(p, cn8, re)
230 
231 #define STRLEN8(p) ((int)strlen((char *)p))
232 
233 #define SET_PCRE_CALLOUT8(callout) \
234  pcre_callout = callout
235 
236 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237  pcre_stack_guard = stack_guard
238 
239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240  pcre_assign_jit_stack(extra, callback, userdata)
241 
242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243  re = pcre_compile((char *)pat, options, error, erroffset, tables)
244 
245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246  namesptr, cbuffer, size) \
247  rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248  (char *)namesptr, cbuffer, size)
249 
250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251  rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
252 
253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254  offsets, size_offsets, workspace, size_workspace) \
255  count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256  offsets, size_offsets, workspace, size_workspace)
257 
258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259  offsets, size_offsets) \
260  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261  offsets, size_offsets)
262 
263 #define PCRE_FREE_STUDY8(extra) \
264  pcre_free_study(extra)
265 
266 #define PCRE_FREE_SUBSTRING8(substring) \
267  pcre_free_substring(substring)
268 
269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270  pcre_free_substring_list(listptr)
271 
272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273  getnamesptr, subsptr) \
274  rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275  (char *)getnamesptr, subsptr)
276 
277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278  n = pcre_get_stringnumber(re, (char *)ptr)
279 
280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281  rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
282 
283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284  rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
285 
286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287  rc = pcre_pattern_to_host_byte_order(re, extra, tables)
288 
289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290  pcre_printint(re, outfile, debug_lengths)
291 
292 #define PCRE_STUDY8(extra, re, options, error) \
293  extra = pcre_study(re, options, error)
294 
295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296  pcre_jit_stack_alloc(startsize, maxsize)
297 
298 #define PCRE_JIT_STACK_FREE8(stack) \
299  pcre_jit_stack_free(stack)
300 
301 #define pcre8_maketables pcre_maketables
302 
303 #endif /* SUPPORT_PCRE8 */
304 
305 /* -----------------------------------------------------------*/
306 
307 #ifdef SUPPORT_PCRE16
308 
309 #define PCHARS16(lv, p, offset, len, f) \
310  lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
311 
312 #define PCHARSV16(p, offset, len, f) \
313  (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
314 
315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316  p = read_capture_name16(p, cn16, re)
317 
318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
319 
320 #define SET_PCRE_CALLOUT16(callout) \
321  pcre16_callout = (int (*)(pcre16_callout_block *))callout
322 
323 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324  pcre16_stack_guard = (int (*)(void))stack_guard
325 
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327  pcre16_assign_jit_stack((pcre16_extra *)extra, \
328  (pcre16_jit_callback)callback, userdata)
329 
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331  re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332  tables)
333 
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335  namesptr, cbuffer, size) \
336  rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337  count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338 
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340  rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341  (PCRE_UCHAR16 *)cbuffer, size/2)
342 
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344  offsets, size_offsets, workspace, size_workspace) \
345  count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346  (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347  workspace, size_workspace)
348 
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350  offsets, size_offsets) \
351  count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352  len, start_offset, options, offsets, size_offsets)
353 
354 #define PCRE_FREE_STUDY16(extra) \
355  pcre16_free_study((pcre16_extra *)extra)
356 
357 #define PCRE_FREE_SUBSTRING16(substring) \
358  pcre16_free_substring((PCRE_SPTR16)substring)
359 
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361  pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362 
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364  getnamesptr, subsptr) \
365  rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366  count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367 
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369  n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370 
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372  rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373  (PCRE_SPTR16 *)(void*)subsptr)
374 
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376  rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377  (PCRE_SPTR16 **)(void*)listptr)
378 
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380  rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381  tables)
382 
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384  pcre16_printint(re, outfile, debug_lengths)
385 
386 #define PCRE_STUDY16(extra, re, options, error) \
387  extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388 
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390  (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391 
392 #define PCRE_JIT_STACK_FREE16(stack) \
393  pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394 
395 #endif /* SUPPORT_PCRE16 */
396 
397 /* -----------------------------------------------------------*/
398 
399 #ifdef SUPPORT_PCRE32
400 
401 #define PCHARS32(lv, p, offset, len, f) \
402  lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403 
404 #define PCHARSV32(p, offset, len, f) \
405  (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406 
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408  p = read_capture_name32(p, cn32, re)
409 
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411 
412 #define SET_PCRE_CALLOUT32(callout) \
413  pcre32_callout = (int (*)(pcre32_callout_block *))callout
414 
415 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416  pcre32_stack_guard = (int (*)(void))stack_guard
417 
418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419  pcre32_assign_jit_stack((pcre32_extra *)extra, \
420  (pcre32_jit_callback)callback, userdata)
421 
422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423  re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424  tables)
425 
426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427  namesptr, cbuffer, size) \
428  rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429  count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/4)
430 
431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432  rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433  (PCRE_UCHAR32 *)cbuffer, size/4)
434 
435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436  offsets, size_offsets, workspace, size_workspace) \
437  count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438  (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439  workspace, size_workspace)
440 
441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442  offsets, size_offsets) \
443  count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444  len, start_offset, options, offsets, size_offsets)
445 
446 #define PCRE_FREE_STUDY32(extra) \
447  pcre32_free_study((pcre32_extra *)extra)
448 
449 #define PCRE_FREE_SUBSTRING32(substring) \
450  pcre32_free_substring((PCRE_SPTR32)substring)
451 
452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453  pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
454 
455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456  getnamesptr, subsptr) \
457  rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458  count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
459 
460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461  n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
462 
463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464  rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465  (PCRE_SPTR32 *)(void*)subsptr)
466 
467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468  rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469  (PCRE_SPTR32 **)(void*)listptr)
470 
471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472  rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473  tables)
474 
475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476  pcre32_printint(re, outfile, debug_lengths)
477 
478 #define PCRE_STUDY32(extra, re, options, error) \
479  extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
480 
481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482  (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
483 
484 #define PCRE_JIT_STACK_FREE32(stack) \
485  pcre32_jit_stack_free((pcre32_jit_stack *)stack)
486 
487 #endif /* SUPPORT_PCRE32 */
488 
489 
490 /* ----- More than one mode is supported; a runtime test is needed, except for
491 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 available version is called. ----- */
493 
494 enum {
498 };
499 
500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501  defined (SUPPORT_PCRE32)) >= 2
502 
503 #define CHAR_SIZE (1 << pcre_mode)
504 
505 /* There doesn't seem to be an easy way of writing these macros that can cope
506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507 cases separately. */
508 
509 /* ----- All three modes supported ----- */
510 
511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
512 
513 #define PCHARS(lv, p, offset, len, f) \
514  if (pcre_mode == PCRE32_MODE) \
515  PCHARS32(lv, p, offset, len, f); \
516  else if (pcre_mode == PCRE16_MODE) \
517  PCHARS16(lv, p, offset, len, f); \
518  else \
519  PCHARS8(lv, p, offset, len, f)
520 
521 #define PCHARSV(p, offset, len, f) \
522  if (pcre_mode == PCRE32_MODE) \
523  PCHARSV32(p, offset, len, f); \
524  else if (pcre_mode == PCRE16_MODE) \
525  PCHARSV16(p, offset, len, f); \
526  else \
527  PCHARSV8(p, offset, len, f)
528 
529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530  if (pcre_mode == PCRE32_MODE) \
531  READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532  else if (pcre_mode == PCRE16_MODE) \
533  READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534  else \
535  READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
536 
537 #define SET_PCRE_CALLOUT(callout) \
538  if (pcre_mode == PCRE32_MODE) \
539  SET_PCRE_CALLOUT32(callout); \
540  else if (pcre_mode == PCRE16_MODE) \
541  SET_PCRE_CALLOUT16(callout); \
542  else \
543  SET_PCRE_CALLOUT8(callout)
544 
545 #define SET_PCRE_STACK_GUARD(stack_guard) \
546  if (pcre_mode == PCRE32_MODE) \
547  SET_PCRE_STACK_GUARD32(stack_guard); \
548  else if (pcre_mode == PCRE16_MODE) \
549  SET_PCRE_STACK_GUARD16(stack_guard); \
550  else \
551  SET_PCRE_STACK_GUARD8(stack_guard)
552 
553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
554 
555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556  if (pcre_mode == PCRE32_MODE) \
557  PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558  else if (pcre_mode == PCRE16_MODE) \
559  PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560  else \
561  PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
562 
563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564  if (pcre_mode == PCRE32_MODE) \
565  PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566  else if (pcre_mode == PCRE16_MODE) \
567  PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568  else \
569  PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
570 
571 #define PCRE_CONFIG pcre_config
572 
573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574  namesptr, cbuffer, size) \
575  if (pcre_mode == PCRE32_MODE) \
576  PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577  namesptr, cbuffer, size); \
578  else if (pcre_mode == PCRE16_MODE) \
579  PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580  namesptr, cbuffer, size); \
581  else \
582  PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583  namesptr, cbuffer, size)
584 
585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586  if (pcre_mode == PCRE32_MODE) \
587  PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588  else if (pcre_mode == PCRE16_MODE) \
589  PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590  else \
591  PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
592 
593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594  offsets, size_offsets, workspace, size_workspace) \
595  if (pcre_mode == PCRE32_MODE) \
596  PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597  offsets, size_offsets, workspace, size_workspace); \
598  else if (pcre_mode == PCRE16_MODE) \
599  PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600  offsets, size_offsets, workspace, size_workspace); \
601  else \
602  PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603  offsets, size_offsets, workspace, size_workspace)
604 
605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606  offsets, size_offsets) \
607  if (pcre_mode == PCRE32_MODE) \
608  PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609  offsets, size_offsets); \
610  else if (pcre_mode == PCRE16_MODE) \
611  PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612  offsets, size_offsets); \
613  else \
614  PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615  offsets, size_offsets)
616 
617 #define PCRE_FREE_STUDY(extra) \
618  if (pcre_mode == PCRE32_MODE) \
619  PCRE_FREE_STUDY32(extra); \
620  else if (pcre_mode == PCRE16_MODE) \
621  PCRE_FREE_STUDY16(extra); \
622  else \
623  PCRE_FREE_STUDY8(extra)
624 
625 #define PCRE_FREE_SUBSTRING(substring) \
626  if (pcre_mode == PCRE32_MODE) \
627  PCRE_FREE_SUBSTRING32(substring); \
628  else if (pcre_mode == PCRE16_MODE) \
629  PCRE_FREE_SUBSTRING16(substring); \
630  else \
631  PCRE_FREE_SUBSTRING8(substring)
632 
633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634  if (pcre_mode == PCRE32_MODE) \
635  PCRE_FREE_SUBSTRING_LIST32(listptr); \
636  else if (pcre_mode == PCRE16_MODE) \
637  PCRE_FREE_SUBSTRING_LIST16(listptr); \
638  else \
639  PCRE_FREE_SUBSTRING_LIST8(listptr)
640 
641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642  getnamesptr, subsptr) \
643  if (pcre_mode == PCRE32_MODE) \
644  PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645  getnamesptr, subsptr); \
646  else if (pcre_mode == PCRE16_MODE) \
647  PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648  getnamesptr, subsptr); \
649  else \
650  PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651  getnamesptr, subsptr)
652 
653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654  if (pcre_mode == PCRE32_MODE) \
655  PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656  else if (pcre_mode == PCRE16_MODE) \
657  PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658  else \
659  PCRE_GET_STRINGNUMBER8(n, rc, ptr)
660 
661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662  if (pcre_mode == PCRE32_MODE) \
663  PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664  else if (pcre_mode == PCRE16_MODE) \
665  PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666  else \
667  PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
668 
669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670  if (pcre_mode == PCRE32_MODE) \
671  PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672  else if (pcre_mode == PCRE16_MODE) \
673  PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674  else \
675  PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
676 
677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678  (pcre_mode == PCRE32_MODE ? \
679  PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680  : pcre_mode == PCRE16_MODE ? \
681  PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682  : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
683 
684 #define PCRE_JIT_STACK_FREE(stack) \
685  if (pcre_mode == PCRE32_MODE) \
686  PCRE_JIT_STACK_FREE32(stack); \
687  else if (pcre_mode == PCRE16_MODE) \
688  PCRE_JIT_STACK_FREE16(stack); \
689  else \
690  PCRE_JIT_STACK_FREE8(stack)
691 
692 #define PCRE_MAKETABLES \
693  (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
694 
695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696  if (pcre_mode == PCRE32_MODE) \
697  PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698  else if (pcre_mode == PCRE16_MODE) \
699  PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700  else \
701  PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
702 
703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704  if (pcre_mode == PCRE32_MODE) \
705  PCRE_PRINTINT32(re, outfile, debug_lengths); \
706  else if (pcre_mode == PCRE16_MODE) \
707  PCRE_PRINTINT16(re, outfile, debug_lengths); \
708  else \
709  PCRE_PRINTINT8(re, outfile, debug_lengths)
710 
711 #define PCRE_STUDY(extra, re, options, error) \
712  if (pcre_mode == PCRE32_MODE) \
713  PCRE_STUDY32(extra, re, options, error); \
714  else if (pcre_mode == PCRE16_MODE) \
715  PCRE_STUDY16(extra, re, options, error); \
716  else \
717  PCRE_STUDY8(extra, re, options, error)
718 
719 
720 /* ----- Two out of three modes are supported ----- */
721 
722 #else
723 
724 /* We can use some macro trickery to make a single set of definitions work in
725 the three different cases. */
726 
727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
728 
729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730 #define BITONE 32
731 #define BITTWO 16
732 
733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
734 
735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 #define BITONE 32
737 #define BITTWO 8
738 
739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
740 
741 #else
742 #define BITONE 16
743 #define BITTWO 8
744 #endif
745 
746 #define glue(a,b) a##b
747 #define G(a,b) glue(a,b)
748 
749 
750 /* ----- Common macros for two-mode cases ----- */
751 
752 #define PCHARS(lv, p, offset, len, f) \
753  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754  G(PCHARS,BITONE)(lv, p, offset, len, f); \
755  else \
756  G(PCHARS,BITTWO)(lv, p, offset, len, f)
757 
758 #define PCHARSV(p, offset, len, f) \
759  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760  G(PCHARSV,BITONE)(p, offset, len, f); \
761  else \
762  G(PCHARSV,BITTWO)(p, offset, len, f)
763 
764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766  G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767  else \
768  G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
769 
770 #define SET_PCRE_CALLOUT(callout) \
771  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772  G(SET_PCRE_CALLOUT,BITONE)(callout); \
773  else \
774  G(SET_PCRE_CALLOUT,BITTWO)(callout)
775 
776 #define SET_PCRE_STACK_GUARD(stack_guard) \
777  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778  G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779  else \
780  G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
781 
782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783  G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
784 
785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787  G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788  else \
789  G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
790 
791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793  G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794  else \
795  G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
796 
797 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
798 
799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800  namesptr, cbuffer, size) \
801  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802  G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803  namesptr, cbuffer, size); \
804  else \
805  G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806  namesptr, cbuffer, size)
807 
808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810  G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811  else \
812  G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
813 
814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815  offsets, size_offsets, workspace, size_workspace) \
816  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817  G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818  offsets, size_offsets, workspace, size_workspace); \
819  else \
820  G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821  offsets, size_offsets, workspace, size_workspace)
822 
823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824  offsets, size_offsets) \
825  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826  G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827  offsets, size_offsets); \
828  else \
829  G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830  offsets, size_offsets)
831 
832 #define PCRE_FREE_STUDY(extra) \
833  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834  G(PCRE_FREE_STUDY,BITONE)(extra); \
835  else \
836  G(PCRE_FREE_STUDY,BITTWO)(extra)
837 
838 #define PCRE_FREE_SUBSTRING(substring) \
839  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840  G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841  else \
842  G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
843 
844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846  G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847  else \
848  G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
849 
850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851  getnamesptr, subsptr) \
852  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853  G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854  getnamesptr, subsptr); \
855  else \
856  G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857  getnamesptr, subsptr)
858 
859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861  G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862  else \
863  G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
864 
865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867  G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868  else \
869  G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
870 
871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873  G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874  else \
875  G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
876 
877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878  (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879  G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880  : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
881 
882 #define PCRE_JIT_STACK_FREE(stack) \
883  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884  G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885  else \
886  G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
887 
888 #define PCRE_MAKETABLES \
889  (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890  G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
891 
892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894  G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895  else \
896  G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
897 
898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900  G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901  else \
902  G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
903 
904 #define PCRE_STUDY(extra, re, options, error) \
905  if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906  G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907  else \
908  G(PCRE_STUDY,BITTWO)(extra, re, options, error)
909 
910 #endif /* Two out of three modes */
911 
912 /* ----- End of cases where more than one mode is supported ----- */
913 
914 
915 /* ----- Only 8-bit mode is supported ----- */
916 
917 #elif defined SUPPORT_PCRE8
918 #define CHAR_SIZE 1
919 #define PCHARS PCHARS8
920 #define PCHARSV PCHARSV8
921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 #define STRLEN STRLEN8
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 #define PCRE_COMPILE PCRE_COMPILE8
927 #define PCRE_CONFIG pcre_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931 #define PCRE_EXEC PCRE_EXEC8
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 #define PCRE_MAKETABLES pcre_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943 #define PCRE_PRINTINT PCRE_PRINTINT8
944 #define PCRE_STUDY PCRE_STUDY8
945 
946 /* ----- Only 16-bit mode is supported ----- */
947 
948 #elif defined SUPPORT_PCRE16
949 #define CHAR_SIZE 2
950 #define PCHARS PCHARS16
951 #define PCHARSV PCHARSV16
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 #define STRLEN STRLEN16
956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 #define PCRE_COMPILE PCRE_COMPILE16
958 #define PCRE_CONFIG pcre16_config
959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962 #define PCRE_EXEC PCRE_EXEC16
963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 #define PCRE_MAKETABLES pcre16_maketables()
973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974 #define PCRE_PRINTINT PCRE_PRINTINT16
975 #define PCRE_STUDY PCRE_STUDY16
976 
977 /* ----- Only 32-bit mode is supported ----- */
978 
979 #elif defined SUPPORT_PCRE32
980 #define CHAR_SIZE 4
981 #define PCHARS PCHARS32
982 #define PCHARSV PCHARSV32
983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 #define STRLEN STRLEN32
987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988 #define PCRE_COMPILE PCRE_COMPILE32
989 #define PCRE_CONFIG pcre32_config
990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993 #define PCRE_EXEC PCRE_EXEC32
994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003 #define PCRE_MAKETABLES pcre32_maketables()
1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005 #define PCRE_PRINTINT PCRE_PRINTINT32
1006 #define PCRE_STUDY PCRE_STUDY32
1007 
1008 #endif
1009 
1010 /* ----- End of mode-specific function call macros ----- */
1011 
1012 
1013 /* Other parameters */
1014 
1015 #ifndef CLOCKS_PER_SEC
1016 #ifdef CLK_TCK
1017 #define CLOCKS_PER_SEC CLK_TCK
1018 #else
1019 #define CLOCKS_PER_SEC 100
1020 #endif
1021 #endif
1022 
1023 #if !defined NODFA
1024 #define DFA_WS_DIMENSION 1000
1025 #endif
1026 
1027 /* This is the default loop count for timing. */
1028 
1029 #define LOOPREPEAT 500000
1030 
1031 /* Static variables */
1032 
1033 static FILE *outfile;
1034 static int log_store = 0;
1035 static int callout_count;
1036 static int callout_extra;
1038 static int callout_fail_id;
1039 static int debug_lengths;
1040 static int first_callout;
1041 static int jit_was_used;
1042 static int locale_set = 0;
1043 static int show_malloc;
1045 static int use_utf;
1046 static const unsigned char *last_callout_mark = NULL;
1047 
1048 /* The buffers grow automatically if very long input lines are encountered. */
1049 
1050 static int buffer_size = 50000;
1053 
1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1055 
1056 #ifdef COMPILE_PCRE16
1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058 #endif
1059 
1060 #ifdef COMPILE_PCRE32
1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062 #endif
1063 
1064 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066 pattern for saving/reloading testing. Luckily, the data for these tables is
1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069 LINK_SIZE is also used later in this program. */
1070 
1071 #ifdef SUPPORT_PCRE16
1072 #undef IMM2_SIZE
1073 #define IMM2_SIZE 1
1074 
1075 #if LINK_SIZE == 2
1076 #undef LINK_SIZE
1077 #define LINK_SIZE 1
1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079 #undef LINK_SIZE
1080 #define LINK_SIZE 2
1081 #else
1082 #error LINK_SIZE must be either 2, 3, or 4
1083 #endif
1084 
1085 static int buffer16_size = 0;
1086 static pcre_uint16 *buffer16 = NULL;
1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088 #endif /* SUPPORT_PCRE16 */
1089 
1090 #ifdef SUPPORT_PCRE32
1091 #undef IMM2_SIZE
1092 #define IMM2_SIZE 1
1093 #undef LINK_SIZE
1094 #define LINK_SIZE 1
1095 
1096 static int buffer32_size = 0;
1097 static pcre_uint32 *buffer32 = NULL;
1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099 #endif /* SUPPORT_PCRE32 */
1100 
1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102 support, it can be changed by an option. If there is no 8-bit support, there
1103 must be 16-or 32-bit support, so default it to 1. */
1104 
1105 #if defined SUPPORT_PCRE8
1106 static int pcre_mode = PCRE8_MODE;
1107 #elif defined SUPPORT_PCRE16
1108 static int pcre_mode = PCRE16_MODE;
1109 #elif defined SUPPORT_PCRE32
1110 static int pcre_mode = PCRE32_MODE;
1111 #endif
1112 
1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1114 
1115 static int jit_study_bits[] =
1116  {
1125 };
1126 
1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1129 
1130 /* Textual explanations for runtime error codes */
1131 
1132 static const char *errtexts[] = {
1133  NULL, /* 0 is no error */
1134  NULL, /* NOMATCH is handled specially */
1135  "NULL argument passed",
1136  "bad option value",
1137  "magic number missing",
1138  "unknown opcode - pattern overwritten?",
1139  "no more memory",
1140  NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141  "match limit exceeded",
1142  "callout error code",
1143  NULL, /* BADUTF8/16 is handled specially */
1144  NULL, /* BADUTF8/16 offset is handled specially */
1145  NULL, /* PARTIAL is handled specially */
1146  "not used - internal error",
1147  "internal error - pattern overwritten?",
1148  "bad count value",
1149  "item unsupported for DFA matching",
1150  "backreference condition or recursion test not supported for DFA matching",
1151  "match limit not supported for DFA matching",
1152  "workspace size exceeded in DFA matching",
1153  "too much recursion for DFA matching",
1154  "recursion limit exceeded",
1155  "not used - internal error",
1156  "invalid combination of newline options",
1157  "bad offset value",
1158  NULL, /* SHORTUTF8/16 is handled specially */
1159  "nested recursion at the same subject position",
1160  "JIT stack limit reached",
1161  "pattern compiled in wrong mode: 8-bit/16-bit error",
1162  "pattern compiled with other endianness",
1163  "invalid data in workspace for DFA restart",
1164  "bad JIT option",
1165  "bad length"
1166 };
1167 
1168 
1169 /*************************************************
1170 * Alternate character tables *
1171 *************************************************/
1172 
1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174 using the default tables of the library. However, the T option can be used to
1175 select alternate sets of tables, for different kinds of testing. Note also that
1176 the L (locale) option also adjusts the tables. */
1177 
1178 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 only ASCII characters. */
1180 
1181 static const pcre_uint8 tables0[] = {
1182 
1183 /* This table is a lower casing table. */
1184 
1185  0, 1, 2, 3, 4, 5, 6, 7,
1186  8, 9, 10, 11, 12, 13, 14, 15,
1187  16, 17, 18, 19, 20, 21, 22, 23,
1188  24, 25, 26, 27, 28, 29, 30, 31,
1189  32, 33, 34, 35, 36, 37, 38, 39,
1190  40, 41, 42, 43, 44, 45, 46, 47,
1191  48, 49, 50, 51, 52, 53, 54, 55,
1192  56, 57, 58, 59, 60, 61, 62, 63,
1193  64, 97, 98, 99,100,101,102,103,
1194  104,105,106,107,108,109,110,111,
1195  112,113,114,115,116,117,118,119,
1196  120,121,122, 91, 92, 93, 94, 95,
1197  96, 97, 98, 99,100,101,102,103,
1198  104,105,106,107,108,109,110,111,
1199  112,113,114,115,116,117,118,119,
1200  120,121,122,123,124,125,126,127,
1201  128,129,130,131,132,133,134,135,
1202  136,137,138,139,140,141,142,143,
1203  144,145,146,147,148,149,150,151,
1204  152,153,154,155,156,157,158,159,
1205  160,161,162,163,164,165,166,167,
1206  168,169,170,171,172,173,174,175,
1207  176,177,178,179,180,181,182,183,
1208  184,185,186,187,188,189,190,191,
1209  192,193,194,195,196,197,198,199,
1210  200,201,202,203,204,205,206,207,
1211  208,209,210,211,212,213,214,215,
1212  216,217,218,219,220,221,222,223,
1213  224,225,226,227,228,229,230,231,
1214  232,233,234,235,236,237,238,239,
1215  240,241,242,243,244,245,246,247,
1216  248,249,250,251,252,253,254,255,
1217 
1218 /* This table is a case flipping table. */
1219 
1220  0, 1, 2, 3, 4, 5, 6, 7,
1221  8, 9, 10, 11, 12, 13, 14, 15,
1222  16, 17, 18, 19, 20, 21, 22, 23,
1223  24, 25, 26, 27, 28, 29, 30, 31,
1224  32, 33, 34, 35, 36, 37, 38, 39,
1225  40, 41, 42, 43, 44, 45, 46, 47,
1226  48, 49, 50, 51, 52, 53, 54, 55,
1227  56, 57, 58, 59, 60, 61, 62, 63,
1228  64, 97, 98, 99,100,101,102,103,
1229  104,105,106,107,108,109,110,111,
1230  112,113,114,115,116,117,118,119,
1231  120,121,122, 91, 92, 93, 94, 95,
1232  96, 65, 66, 67, 68, 69, 70, 71,
1233  72, 73, 74, 75, 76, 77, 78, 79,
1234  80, 81, 82, 83, 84, 85, 86, 87,
1235  88, 89, 90,123,124,125,126,127,
1236  128,129,130,131,132,133,134,135,
1237  136,137,138,139,140,141,142,143,
1238  144,145,146,147,148,149,150,151,
1239  152,153,154,155,156,157,158,159,
1240  160,161,162,163,164,165,166,167,
1241  168,169,170,171,172,173,174,175,
1242  176,177,178,179,180,181,182,183,
1243  184,185,186,187,188,189,190,191,
1244  192,193,194,195,196,197,198,199,
1245  200,201,202,203,204,205,206,207,
1246  208,209,210,211,212,213,214,215,
1247  216,217,218,219,220,221,222,223,
1248  224,225,226,227,228,229,230,231,
1249  232,233,234,235,236,237,238,239,
1250  240,241,242,243,244,245,246,247,
1251  248,249,250,251,252,253,254,255,
1252 
1253 /* This table contains bit maps for various character classes. Each map is 32
1254 bytes long and the bits run from the least significant end of each byte. The
1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256 graph, print, punct, and cntrl. Other classes are built from combinations. */
1257 
1258  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262 
1263  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267 
1268  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272 
1273  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277 
1278  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282 
1283  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287 
1288  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1292 
1293  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1297 
1298  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1302 
1303  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1307 
1308 /* This table identifies various classes of character by individual bits:
1309  0x01 white space character
1310  0x02 letter
1311  0x04 decimal digit
1312  0x08 hexadecimal digit
1313  0x10 alphanumeric or '_'
1314  0x80 regular expression metacharacter or binary zero
1315 */
1316 
1317  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318  0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321  0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322  0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328  0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332  0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1349 
1350 /* This is a set of tables that came originally from a Windows user. It seems
1351 to be at least an approximation of ISO 8859. In particular, there are
1352 characters greater than 128 that are marked as spaces, letters, etc. */
1353 
1354 static const pcre_uint8 tables1[] = {
1355 0,1,2,3,4,5,6,7,
1356 8,9,10,11,12,13,14,15,
1357 16,17,18,19,20,21,22,23,
1358 24,25,26,27,28,29,30,31,
1359 32,33,34,35,36,37,38,39,
1360 40,41,42,43,44,45,46,47,
1361 48,49,50,51,52,53,54,55,
1362 56,57,58,59,60,61,62,63,
1363 64,97,98,99,100,101,102,103,
1364 104,105,106,107,108,109,110,111,
1365 112,113,114,115,116,117,118,119,
1366 120,121,122,91,92,93,94,95,
1367 96,97,98,99,100,101,102,103,
1368 104,105,106,107,108,109,110,111,
1369 112,113,114,115,116,117,118,119,
1370 120,121,122,123,124,125,126,127,
1371 128,129,130,131,132,133,134,135,
1372 136,137,138,139,140,141,142,143,
1373 144,145,146,147,148,149,150,151,
1374 152,153,154,155,156,157,158,159,
1375 160,161,162,163,164,165,166,167,
1376 168,169,170,171,172,173,174,175,
1377 176,177,178,179,180,181,182,183,
1378 184,185,186,187,188,189,190,191,
1379 224,225,226,227,228,229,230,231,
1380 232,233,234,235,236,237,238,239,
1381 240,241,242,243,244,245,246,215,
1382 248,249,250,251,252,253,254,223,
1383 224,225,226,227,228,229,230,231,
1384 232,233,234,235,236,237,238,239,
1385 240,241,242,243,244,245,246,247,
1386 248,249,250,251,252,253,254,255,
1387 0,1,2,3,4,5,6,7,
1388 8,9,10,11,12,13,14,15,
1389 16,17,18,19,20,21,22,23,
1390 24,25,26,27,28,29,30,31,
1391 32,33,34,35,36,37,38,39,
1392 40,41,42,43,44,45,46,47,
1393 48,49,50,51,52,53,54,55,
1394 56,57,58,59,60,61,62,63,
1395 64,97,98,99,100,101,102,103,
1396 104,105,106,107,108,109,110,111,
1397 112,113,114,115,116,117,118,119,
1398 120,121,122,91,92,93,94,95,
1399 96,65,66,67,68,69,70,71,
1400 72,73,74,75,76,77,78,79,
1401 80,81,82,83,84,85,86,87,
1402 88,89,90,123,124,125,126,127,
1403 128,129,130,131,132,133,134,135,
1404 136,137,138,139,140,141,142,143,
1405 144,145,146,147,148,149,150,151,
1406 152,153,154,155,156,157,158,159,
1407 160,161,162,163,164,165,166,167,
1408 168,169,170,171,172,173,174,175,
1409 176,177,178,179,180,181,182,183,
1410 184,185,186,187,188,189,190,191,
1411 224,225,226,227,228,229,230,231,
1412 232,233,234,235,236,237,238,239,
1413 240,241,242,243,244,245,246,215,
1414 248,249,250,251,252,253,254,223,
1415 192,193,194,195,196,197,198,199,
1416 200,201,202,203,204,205,206,207,
1417 208,209,210,211,212,213,214,247,
1418 216,217,218,219,220,221,222,255,
1419 0,62,0,0,1,0,0,0,
1420 0,0,0,0,0,0,0,0,
1421 32,0,0,0,1,0,0,0,
1422 0,0,0,0,0,0,0,0,
1423 0,0,0,0,0,0,255,3,
1424 126,0,0,0,126,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 0,0,0,0,0,0,0,0,
1427 0,0,0,0,0,0,255,3,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,12,2,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 254,255,255,7,0,0,0,0,
1433 0,0,0,0,0,0,0,0,
1434 255,255,127,127,0,0,0,0,
1435 0,0,0,0,0,0,0,0,
1436 0,0,0,0,254,255,255,7,
1437 0,0,0,0,0,4,32,4,
1438 0,0,0,128,255,255,127,255,
1439 0,0,0,0,0,0,255,3,
1440 254,255,255,135,254,255,255,7,
1441 0,0,0,0,0,4,44,6,
1442 255,255,127,255,255,255,127,255,
1443 0,0,0,0,254,255,255,255,
1444 255,255,255,255,255,255,255,127,
1445 0,0,0,0,254,255,255,255,
1446 255,255,255,255,255,255,255,255,
1447 0,2,0,0,255,255,255,255,
1448 255,255,255,255,255,255,255,127,
1449 0,0,0,0,255,255,255,255,
1450 255,255,255,255,255,255,255,255,
1451 0,0,0,0,254,255,0,252,
1452 1,0,0,248,1,0,0,120,
1453 0,0,0,0,254,255,255,255,
1454 0,0,128,0,0,0,128,0,
1455 255,255,255,255,0,0,0,0,
1456 0,0,0,0,0,0,0,128,
1457 255,255,255,255,0,0,0,0,
1458 0,0,0,0,0,0,0,0,
1459 128,0,0,0,0,0,0,0,
1460 0,1,1,0,1,1,0,0,
1461 0,0,0,0,0,0,0,0,
1462 0,0,0,0,0,0,0,0,
1463 1,0,0,0,128,0,0,0,
1464 128,128,128,128,0,0,128,0,
1465 28,28,28,28,28,28,28,28,
1466 28,28,0,0,0,0,0,128,
1467 0,26,26,26,26,26,26,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,128,128,0,128,16,
1471 0,26,26,26,26,26,26,18,
1472 18,18,18,18,18,18,18,18,
1473 18,18,18,18,18,18,18,18,
1474 18,18,18,128,128,0,0,0,
1475 0,0,0,0,0,1,0,0,
1476 0,0,0,0,0,0,0,0,
1477 0,0,0,0,0,0,0,0,
1478 0,0,0,0,0,0,0,0,
1479 1,0,0,0,0,0,0,0,
1480 0,0,18,0,0,0,0,0,
1481 0,0,20,20,0,18,0,0,
1482 0,20,18,0,0,0,0,0,
1483 18,18,18,18,18,18,18,18,
1484 18,18,18,18,18,18,18,18,
1485 18,18,18,18,18,18,18,0,
1486 18,18,18,18,18,18,18,18,
1487 18,18,18,18,18,18,18,18,
1488 18,18,18,18,18,18,18,18,
1489 18,18,18,18,18,18,18,0,
1490 18,18,18,18,18,18,18,18
1491 };
1492 
1493 
1494 
1495 
1496 #ifndef HAVE_STRERROR
1497 /*************************************************
1498 * Provide strerror() for non-ANSI libraries *
1499 *************************************************/
1500 
1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502 in their libraries, but can provide the same facility by this simple
1503 alternative function. */
1504 
1505 extern int sys_nerr;
1506 extern char *sys_errlist[];
1507 
1508 char *
1510 {
1511 if (n < 0 || n >= sys_nerr) return "unknown error number";
1512 return sys_errlist[n];
1513 }
1514 #endif /* HAVE_STRERROR */
1515 
1516 
1517 
1518 /*************************************************
1519 * Print newline configuration *
1520 *************************************************/
1521 
1522 /*
1523 Arguments:
1524  rc the return code from PCRE_CONFIG_NEWLINE
1525  isc TRUE if called from "-C newline"
1526 Returns: nothing
1527 */
1528 
1529 static void
1531 {
1532 const char *s = NULL;
1533 if (!isc) printf(" Newline sequence is ");
1534 switch(rc)
1535  {
1536  case CHAR_CR: s = "CR"; break;
1537  case CHAR_LF: s = "LF"; break;
1538  case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539  case -1: s = "ANY"; break;
1540  case -2: s = "ANYCRLF"; break;
1541 
1542  default:
1543  printf("a non-standard value: 0x%04x\n", rc);
1544  return;
1545  }
1546 
1547 printf("%s\n", s);
1548 }
1549 
1550 
1551 
1552 /*************************************************
1553 * JIT memory callback *
1554 *************************************************/
1555 
1556 static pcre_jit_stack* jit_callback(void *arg)
1557 {
1558 jit_was_used = TRUE;
1559 return (pcre_jit_stack *)arg;
1560 }
1561 
1562 
1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 /*************************************************
1565 * Convert UTF-8 string to value *
1566 *************************************************/
1567 
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1569 and returns the value of the character.
1570 
1571 Argument:
1572  utf8bytes a pointer to the byte vector
1573  vptr a pointer to an int to receive the value
1574 
1575 Returns: > 0 => the number of bytes consumed
1576  -6 to 0 => malformed UTF-8 character at offset = (-return)
1577 */
1578 
1579 static int
1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1581 {
1582 pcre_uint32 c = *utf8bytes++;
1583 pcre_uint32 d = c;
1584 int i, j, s;
1585 
1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1587  {
1588  if ((d & 0x80) == 0) break;
1589  d <<= 1;
1590  }
1591 
1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1594 
1595 /* i now has a value in the range 1-5 */
1596 
1597 s = 6*i;
1598 d = (c & utf8_table3[i]) << s;
1599 
1600 for (j = 0; j < i; j++)
1601  {
1602  c = *utf8bytes++;
1603  if ((c & 0xc0) != 0x80) return -(j+1);
1604  s -= 6;
1605  d |= (c & 0x3f) << s;
1606  }
1607 
1608 /* Check that encoding was the correct unique one */
1609 
1610 for (j = 0; j < utf8_table1_size; j++)
1611  if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 if (j != i) return -(i+1);
1613 
1614 /* Valid value */
1615 
1616 *vptr = d;
1617 return i+1;
1618 }
1619 #endif /* NOUTF || SUPPORT_PCRE16 */
1620 
1621 
1622 
1623 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 /*************************************************
1625 * Convert character value to UTF-8 *
1626 *************************************************/
1627 
1628 /* This function takes an integer value in the range 0 - 0x7fffffff
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1630 
1631 Arguments:
1632  cvalue the character value
1633  utf8bytes pointer to buffer for result - at least 6 bytes long
1634 
1635 Returns: number of characters placed in the buffer
1636 */
1637 
1638 static int
1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1640 {
1641 register int i, j;
1642 if (cvalue > 0x7fffffffu)
1643  return -1;
1644 for (i = 0; i < utf8_table1_size; i++)
1645  if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 utf8bytes += i;
1647 for (j = i; j > 0; j--)
1648  {
1649  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650  cvalue >>= 6;
1651  }
1652 *utf8bytes = utf8_table2[i] | cvalue;
1653 return i + 1;
1654 }
1655 #endif
1656 
1657 
1658 #ifdef SUPPORT_PCRE16
1659 /*************************************************
1660 * Convert a string to 16-bit *
1661 *************************************************/
1662 
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667 result is always left in buffer16.
1668 
1669 Note that this function does not object to surrogate values. This is
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671 for the purpose of testing that they are correctly faulted.
1672 
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1675 
1676 Arguments:
1677  data TRUE if converting a data line; FALSE for a regex
1678  p points to a byte string
1679  utf true if UTF-8 (to be converted to UTF-16)
1680  len number of bytes in the string (excluding trailing zero)
1681 
1682 Returns: number of 16-bit data items used (excluding trailing zero)
1683  OR -1 if a UTF-8 string is malformed
1684  OR -2 if a value > 0x10ffff is encountered
1685  OR -3 if a value > 0xffff is encountered when not in UTF mode
1686 */
1687 
1688 static int
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1690 {
1691 pcre_uint16 *pp;
1692 
1693 if (buffer16_size < 2*len + 2)
1694  {
1695  if (buffer16 != NULL) free(buffer16);
1696  buffer16_size = 2*len + 2;
1697  buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698  if (buffer16 == NULL)
1699  {
1700  fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701  exit(1);
1702  }
1703  }
1704 
1705 pp = buffer16;
1706 
1707 if (!utf && !data)
1708  {
1709  while (len-- > 0) *pp++ = *p++;
1710  }
1711 
1712 else
1713  {
1714  pcre_uint32 c = 0;
1715  while (len > 0)
1716  {
1717  int chlen = utf82ord(p, &c);
1718  if (chlen <= 0) return -1;
1719  if (c > 0x10ffff) return -2;
1720  p += chlen;
1721  len -= chlen;
1722  if (c < 0x10000) *pp++ = c; else
1723  {
1724  if (!utf) return -3;
1725  c -= 0x10000;
1726  *pp++ = 0xD800 | (c >> 10);
1727  *pp++ = 0xDC00 | (c & 0x3ff);
1728  }
1729  }
1730  }
1731 
1732 *pp = 0;
1733 return pp - buffer16;
1734 }
1735 #endif
1736 
1737 #ifdef SUPPORT_PCRE32
1738 /*************************************************
1739 * Convert a string to 32-bit *
1740 *************************************************/
1741 
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746 result is always left in buffer32.
1747 
1748 Note that this function does not object to surrogate values. This is
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750 for the purpose of testing that they are correctly faulted.
1751 
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1754 
1755 Arguments:
1756  data TRUE if converting a data line; FALSE for a regex
1757  p points to a byte string
1758  utf true if UTF-8 (to be converted to UTF-32)
1759  len number of bytes in the string (excluding trailing zero)
1760 
1761 Returns: number of 32-bit data items used (excluding trailing zero)
1762  OR -1 if a UTF-8 string is malformed
1763  OR -2 if a value > 0x10ffff is encountered
1764  OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1765 */
1766 
1767 static int
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1769 {
1770 pcre_uint32 *pp;
1771 
1772 if (buffer32_size < 4*len + 4)
1773  {
1774  if (buffer32 != NULL) free(buffer32);
1775  buffer32_size = 4*len + 4;
1776  buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777  if (buffer32 == NULL)
1778  {
1779  fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780  exit(1);
1781  }
1782  }
1783 
1784 pp = buffer32;
1785 
1786 if (!utf && !data)
1787  {
1788  while (len-- > 0) *pp++ = *p++;
1789  }
1790 
1791 else
1792  {
1793  pcre_uint32 c = 0;
1794  while (len > 0)
1795  {
1796  int chlen = utf82ord(p, &c);
1797  if (chlen <= 0) return -1;
1798  if (utf)
1799  {
1800  if (c > 0x10ffff) return -2;
1801  if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1802  }
1803 
1804  p += chlen;
1805  len -= chlen;
1806  *pp++ = c;
1807  }
1808  }
1809 
1810 *pp = 0;
1811 return pp - buffer32;
1812 }
1813 
1814 /* Check that a 32-bit character string is valid UTF-32.
1815 
1816 Arguments:
1817  string points to the string
1818  length length of string, or -1 if the string is zero-terminated
1819 
1820 Returns: TRUE if the string is a valid UTF-32 string
1821  FALSE otherwise
1822 */
1823 
1824 #ifdef NEVER /* Not used */
1825 #ifdef SUPPORT_UTF
1826 static BOOL
1827 valid_utf32(pcre_uint32 *string, int length)
1828 {
1829 register pcre_uint32 *p;
1830 register pcre_uint32 c;
1831 
1832 for (p = string; length-- > 0; p++)
1833  {
1834  c = *p;
1835  if (c > 0x10ffffu) return FALSE; /* Too big */
1836  if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1837  }
1838 
1839 return TRUE;
1840 }
1841 #endif /* SUPPORT_UTF */
1842 #endif /* NEVER */
1843 #endif /* SUPPORT_PCRE32 */
1844 
1845 
1846 /*************************************************
1847 * Read or extend an input line *
1848 *************************************************/
1849 
1850 /* Input lines are read into buffer, but both patterns and data lines can be
1851 continued over multiple input lines. In addition, if the buffer fills up, we
1852 want to automatically expand it so as to be able to handle extremely large
1853 lines that are needed for certain stress tests. When the input buffer is
1854 expanded, the other two buffers must also be expanded likewise, and the
1855 contents of pbuffer, which are a copy of the input for callouts, must be
1856 preserved (for when expansion happens for a data line). This is not the most
1857 optimal way of handling this, but hey, this is just a test program!
1858 
1859 Arguments:
1860  f the file to read
1861  start where in buffer to start (this *must* be within buffer)
1862  prompt for stdin or readline()
1863 
1864 Returns: pointer to the start of new data
1865  could be a copy of start, or could be moved
1866  NULL if no data read and EOF reached
1867 */
1868 
1869 static pcre_uint8 *
1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1871 {
1872 pcre_uint8 *here = start;
1873 
1874 for (;;)
1875  {
1876  size_t rlen = (size_t)(buffer_size - (here - buffer));
1877 
1878  if (rlen > 1000)
1879  {
1880  int dlen;
1881 
1882  /* If libreadline or libedit support is required, use readline() to read a
1883  line if the input is a terminal. Note that readline() removes the trailing
1884  newline, so we must put it back again, to be compatible with fgets(). */
1885 
1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887  if (isatty(fileno(f)))
1888  {
1889  size_t len;
1890  char *s = readline(prompt);
1891  if (s == NULL) return (here == start)? NULL : start;
1892  len = strlen(s);
1893  if (len > 0) add_history(s);
1894  if (len > rlen - 1) len = rlen - 1;
1895  memcpy(here, s, len);
1896  here[len] = '\n';
1897  here[len+1] = 0;
1898  free(s);
1899  }
1900  else
1901 #endif
1902 
1903  /* Read the next line by normal means, prompting if the file is stdin. */
1904 
1905  {
1906  if (f == stdin) printf("%s", prompt);
1907  if (fgets((char *)here, rlen, f) == NULL)
1908  return (here == start)? NULL : start;
1909  }
1910 
1911  dlen = (int)strlen((char *)here);
1912  if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913  here += dlen;
1914  }
1915 
1916  else
1917  {
1918  int new_buffer_size = 2*buffer_size;
1919  pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920  pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1921 
1922  if (new_buffer == NULL || new_pbuffer == NULL)
1923  {
1924  fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925  exit(1);
1926  }
1927 
1928  memcpy(new_buffer, buffer, buffer_size);
1929  memcpy(new_pbuffer, pbuffer, buffer_size);
1930 
1931  buffer_size = new_buffer_size;
1932 
1933  start = new_buffer + (start - buffer);
1934  here = new_buffer + (here - buffer);
1935 
1936  free(buffer);
1937  free(pbuffer);
1938 
1939  buffer = new_buffer;
1940  pbuffer = new_pbuffer;
1941  }
1942  }
1943 
1944 /* Control never gets here */
1945 }
1946 
1947 
1948 
1949 /*************************************************
1950 * Read number from string *
1951 *************************************************/
1952 
1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954 around with conditional compilation, just do the job by hand. It is only used
1955 for unpicking arguments, so just keep it simple.
1956 
1957 Arguments:
1958  str string to be converted
1959  endptr where to put the end pointer
1960 
1961 Returns: the unsigned long
1962 */
1963 
1964 static int
1966 {
1967 int result = 0;
1968 while(*str != 0 && isspace(*str)) str++;
1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970 *endptr = str;
1971 return(result);
1972 }
1973 
1974 
1975 
1976 /*************************************************
1977 * Print one character *
1978 *************************************************/
1979 
1980 /* Print a single character either literally, or as a hex escape. */
1981 
1982 static int pchar(pcre_uint32 c, FILE *f)
1983 {
1984 int n = 0;
1985 char tempbuffer[16];
1986 if (PRINTOK(c))
1987  {
1988  if (f != NULL) fprintf(f, "%c", c);
1989  return 1;
1990  }
1991 
1992 if (c < 0x100)
1993  {
1994  if (use_utf)
1995  {
1996  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1997  return 6;
1998  }
1999  else
2000  {
2001  if (f != NULL) fprintf(f, "\\x%02x", c);
2002  return 4;
2003  }
2004  }
2005 
2006 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2007  else n = sprintf(tempbuffer, "\\x{%02x}", c);
2008 
2009 return n >= 0 ? n : 0;
2010 }
2011 
2012 
2013 
2014 #ifdef SUPPORT_PCRE8
2015 /*************************************************
2016 * Print 8-bit character string *
2017 *************************************************/
2018 
2019 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2020 If handed a NULL file, just counts chars without printing. */
2021 
2022 static int pchars(pcre_uint8 *p, int length, FILE *f)
2023 {
2024 pcre_uint32 c = 0;
2025 int yield = 0;
2026 
2027 if (length < 0)
2028  length = strlen((char *)p);
2029 
2030 while (length-- > 0)
2031  {
2032 #if !defined NOUTF
2033  if (use_utf)
2034  {
2035  int rc = utf82ord(p, &c);
2036  if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2037  {
2038  length -= rc - 1;
2039  p += rc;
2040  yield += pchar(c, f);
2041  continue;
2042  }
2043  }
2044 #endif
2045  c = *p++;
2046  yield += pchar(c, f);
2047  }
2048 
2049 return yield;
2050 }
2051 #endif
2052 
2053 
2054 
2055 #ifdef SUPPORT_PCRE16
2056 /*************************************************
2057 * Find length of 0-terminated 16-bit string *
2058 *************************************************/
2059 
2060 static int strlen16(PCRE_SPTR16 p)
2061 {
2062 PCRE_SPTR16 pp = p;
2063 while (*pp != 0) pp++;
2064 return (int)(pp - p);
2065 }
2066 #endif /* SUPPORT_PCRE16 */
2067 
2068 
2069 
2070 #ifdef SUPPORT_PCRE32
2071 /*************************************************
2072 * Find length of 0-terminated 32-bit string *
2073 *************************************************/
2074 
2075 static int strlen32(PCRE_SPTR32 p)
2076 {
2077 PCRE_SPTR32 pp = p;
2078 while (*pp != 0) pp++;
2079 return (int)(pp - p);
2080 }
2081 #endif /* SUPPORT_PCRE32 */
2082 
2083 
2084 
2085 #ifdef SUPPORT_PCRE16
2086 /*************************************************
2087 * Print 16-bit character string *
2088 *************************************************/
2089 
2090 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2091 If handed a NULL file, just counts chars without printing. */
2092 
2093 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2094 {
2095 int yield = 0;
2096 
2097 if (length < 0)
2098  length = strlen16(p);
2099 
2100 while (length-- > 0)
2101  {
2102  pcre_uint32 c = *p++ & 0xffff;
2103 #if !defined NOUTF
2104  if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2105  {
2106  int d = *p & 0xffff;
2107  if (d >= 0xDC00 && d <= 0xDFFF)
2108  {
2109  c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2110  length--;
2111  p++;
2112  }
2113  }
2114 #endif
2115  yield += pchar(c, f);
2116  }
2117 
2118 return yield;
2119 }
2120 #endif /* SUPPORT_PCRE16 */
2121 
2122 
2123 
2124 #ifdef SUPPORT_PCRE32
2125 /*************************************************
2126 * Print 32-bit character string *
2127 *************************************************/
2128 
2129 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2130 If handed a NULL file, just counts chars without printing. */
2131 
2132 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2133 {
2134 int yield = 0;
2135 
2136 (void)(utf); /* Avoid compiler warning */
2137 
2138 if (length < 0)
2139  length = strlen32(p);
2140 
2141 while (length-- > 0)
2142  {
2143  pcre_uint32 c = *p++;
2144  yield += pchar(c, f);
2145  }
2146 
2147 return yield;
2148 }
2149 #endif /* SUPPORT_PCRE32 */
2150 
2151 
2152 
2153 #ifdef SUPPORT_PCRE8
2154 /*************************************************
2155 * Read a capture name (8-bit) and check it *
2156 *************************************************/
2157 
2158 static pcre_uint8 *
2159 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2160 {
2161 pcre_uint8 *npp = *pp;
2162 while (isalnum(*p)) *npp++ = *p++;
2163 *npp++ = 0;
2164 *npp = 0;
2165 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2166  {
2167  fprintf(outfile, "no parentheses with name \"");
2168  PCHARSV(*pp, 0, -1, outfile);
2169  fprintf(outfile, "\"\n");
2170  }
2171 
2172 *pp = npp;
2173 return p;
2174 }
2175 #endif /* SUPPORT_PCRE8 */
2176 
2177 
2178 
2179 #ifdef SUPPORT_PCRE16
2180 /*************************************************
2181 * Read a capture name (16-bit) and check it *
2182 *************************************************/
2183 
2184 /* Note that the text being read is 8-bit. */
2185 
2186 static pcre_uint8 *
2187 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2188 {
2189 pcre_uint16 *npp = *pp;
2190 while (isalnum(*p)) *npp++ = *p++;
2191 *npp++ = 0;
2192 *npp = 0;
2193 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2194  {
2195  fprintf(outfile, "no parentheses with name \"");
2196  PCHARSV(*pp, 0, -1, outfile);
2197  fprintf(outfile, "\"\n");
2198  }
2199 *pp = npp;
2200 return p;
2201 }
2202 #endif /* SUPPORT_PCRE16 */
2203 
2204 
2205 
2206 #ifdef SUPPORT_PCRE32
2207 /*************************************************
2208 * Read a capture name (32-bit) and check it *
2209 *************************************************/
2210 
2211 /* Note that the text being read is 8-bit. */
2212 
2213 static pcre_uint8 *
2214 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2215 {
2216 pcre_uint32 *npp = *pp;
2217 while (isalnum(*p)) *npp++ = *p++;
2218 *npp++ = 0;
2219 *npp = 0;
2220 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2221  {
2222  fprintf(outfile, "no parentheses with name \"");
2223  PCHARSV(*pp, 0, -1, outfile);
2224  fprintf(outfile, "\"\n");
2225  }
2226 *pp = npp;
2227 return p;
2228 }
2229 #endif /* SUPPORT_PCRE32 */
2230 
2231 
2232 
2233 /*************************************************
2234 * Stack guard function *
2235 *************************************************/
2236 
2237 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2238 return when a count overflows. */
2239 
2240 static int stack_guard(void)
2241 {
2242 return stack_guard_return;
2243 }
2244 
2245 /*************************************************
2246 * Callout function *
2247 *************************************************/
2248 
2249 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2250 the match. Yield zero unless more callouts than the fail count, or the callout
2251 data is not zero. */
2252 
2254 {
2255 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2256 int i, current_position, pre_start, post_start, subject_length;
2257 
2258 if (callout_extra)
2259  {
2260  fprintf(f, "Callout %d: last capture = %d\n",
2261  cb->callout_number, cb->capture_last);
2262 
2263  if (cb->offset_vector != NULL)
2264  {
2265  for (i = 0; i < cb->capture_top * 2; i += 2)
2266  {
2267  if (cb->offset_vector[i] < 0)
2268  fprintf(f, "%2d: <unset>\n", i/2);
2269  else
2270  {
2271  fprintf(f, "%2d: ", i/2);
2272  PCHARSV(cb->subject, cb->offset_vector[i],
2273  cb->offset_vector[i+1] - cb->offset_vector[i], f);
2274  fprintf(f, "\n");
2275  }
2276  }
2277  }
2278  }
2279 
2280 /* Re-print the subject in canonical form, the first time or if giving full
2281 datails. On subsequent calls in the same match, we use pchars just to find the
2282 printed lengths of the substrings. */
2283 
2284 if (f != NULL) fprintf(f, "--->");
2285 
2286 /* If a lookbehind is involved, the current position may be earlier than the
2287 match start. If so, use the match start instead. */
2288 
2289 current_position = (cb->current_position >= cb->start_match)?
2290  cb->current_position : cb->start_match;
2291 
2292 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2293 PCHARS(post_start, cb->subject, cb->start_match,
2294  current_position - cb->start_match, f);
2295 
2296 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2297 
2298 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
2299 
2300 if (f != NULL) fprintf(f, "\n");
2301 
2302 /* Always print appropriate indicators, with callout number if not already
2303 shown. For automatic callouts, show the pattern offset. */
2304 
2305 if (cb->callout_number == 255)
2306  {
2307  fprintf(outfile, "%+3d ", cb->pattern_position);
2308  if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2309  }
2310 else
2311  {
2312  if (callout_extra) fprintf(outfile, " ");
2313  else fprintf(outfile, "%3d ", cb->callout_number);
2314  }
2315 
2316 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2317 fprintf(outfile, "^");
2318 
2319 if (post_start > 0)
2320  {
2321  for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2322  fprintf(outfile, "^");
2323  }
2324 
2325 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2326  fprintf(outfile, " ");
2327 
2328 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2329  pbuffer + cb->pattern_position);
2330 
2331 fprintf(outfile, "\n");
2332 first_callout = 0;
2333 
2334 if (cb->mark != last_callout_mark)
2335  {
2336  if (cb->mark == NULL)
2337  fprintf(outfile, "Latest Mark: <unset>\n");
2338  else
2339  {
2340  fprintf(outfile, "Latest Mark: ");
2341  PCHARSV(cb->mark, 0, -1, outfile);
2342  putc('\n', outfile);
2343  }
2344  last_callout_mark = cb->mark;
2345  }
2346 
2347 if (cb->callout_data != NULL)
2348  {
2349  int callout_data = *((int *)(cb->callout_data));
2350  if (callout_data != 0)
2351  {
2352  fprintf(outfile, "Callout data = %d\n", callout_data);
2353  return callout_data;
2354  }
2355  }
2356 
2357 return (cb->callout_number != callout_fail_id)? 0 :
2358  (++callout_count >= callout_fail_count)? 1 : 0;
2359 }
2360 
2361 
2362 /*************************************************
2363 * Local malloc functions *
2364 *************************************************/
2365 
2366 /* Alternative malloc function, to test functionality and save the size of a
2367 compiled re, which is the first store request that pcre_compile() makes. The
2368 show_malloc variable is set only during matching. */
2369 
2370 static void *new_malloc(size_t size)
2371 {
2372 void *block = malloc(size);
2373 if (show_malloc)
2374  fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2375 return block;
2376 }
2377 
2378 static void new_free(void *block)
2379 {
2380 if (show_malloc)
2381  fprintf(outfile, "free %p\n", block);
2382 free(block);
2383 }
2384 
2385 /* For recursion malloc/free, to test stacking calls */
2386 
2387 static void *stack_malloc(size_t size)
2388 {
2389 void *block = malloc(size);
2390 if (show_malloc)
2391  fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2392 return block;
2393 }
2394 
2395 static void stack_free(void *block)
2396 {
2397 if (show_malloc)
2398  fprintf(outfile, "stack_free %p\n", block);
2399 free(block);
2400 }
2401 
2402 
2403 /*************************************************
2404 * Call pcre_fullinfo() *
2405 *************************************************/
2406 
2407 /* Get one piece of information from the pcre_fullinfo() function. When only
2408 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2409 value, but the code is defensive.
2410 
2411 Arguments:
2412  re compiled regex
2413  study study data
2414  option PCRE_INFO_xxx option
2415  ptr where to put the data
2416 
2417 Returns: 0 when OK, < 0 on error
2418 */
2419 
2420 static int
2421 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2422 {
2423 int rc;
2424 
2425 if (pcre_mode == PCRE32_MODE)
2426 #ifdef SUPPORT_PCRE32
2427  rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2428 #else
2429  rc = PCRE_ERROR_BADMODE;
2430 #endif
2431 else if (pcre_mode == PCRE16_MODE)
2432 #ifdef SUPPORT_PCRE16
2433  rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2434 #else
2435  rc = PCRE_ERROR_BADMODE;
2436 #endif
2437 else
2438 #ifdef SUPPORT_PCRE8
2439  rc = pcre_fullinfo(re, study, option, ptr);
2440 #else
2441  rc = PCRE_ERROR_BADMODE;
2442 #endif
2443 
2444 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2445  {
2446  fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2447  pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2448  if (rc == PCRE_ERROR_BADMODE)
2449  fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2450  "%d-bit mode\n", 8 * CHAR_SIZE,
2451  8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2452  }
2453 
2454 return rc;
2455 }
2456 
2457 
2458 
2459 /*************************************************
2460 * Swap byte functions *
2461 *************************************************/
2462 
2463 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2464 value, respectively.
2465 
2466 Arguments:
2467  value any number
2468 
2469 Returns: the byte swapped value
2470 */
2471 
2472 static pcre_uint32
2473 swap_uint32(pcre_uint32 value)
2474 {
2475 return ((value & 0x000000ff) << 24) |
2476  ((value & 0x0000ff00) << 8) |
2477  ((value & 0x00ff0000) >> 8) |
2478  (value >> 24);
2479 }
2480 
2481 static pcre_uint16
2482 swap_uint16(pcre_uint16 value)
2483 {
2484 return (value >> 8) | (value << 8);
2485 }
2486 
2487 
2488 
2489 /*************************************************
2490 * Flip bytes in a compiled pattern *
2491 *************************************************/
2492 
2493 /* This function is called if the 'F' option was present on a pattern that is
2494 to be written to a file. We flip the bytes of all the integer fields in the
2495 regex data block and the study block. In 16-bit mode this also flips relevant
2496 bytes in the pattern itself. This is to make it possible to test PCRE's
2497 ability to reload byte-flipped patterns, e.g. those compiled on a different
2498 architecture. */
2499 
2500 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2501 static void
2502 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2503 {
2504 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2505 #ifdef SUPPORT_PCRE16
2506 int op;
2507 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2508 int length = re->name_count * re->name_entry_size;
2509 #ifdef SUPPORT_UTF
2510 BOOL utf = (re->options & PCRE_UTF16) != 0;
2511 BOOL utf16_char = FALSE;
2512 #endif /* SUPPORT_UTF */
2513 #endif /* SUPPORT_PCRE16 */
2514 
2515 /* Always flip the bytes in the main data block and study blocks. */
2516 
2518 re->size = swap_uint32(re->size);
2519 re->options = swap_uint32(re->options);
2520 re->flags = swap_uint32(re->flags);
2523 re->first_char = swap_uint16(re->first_char);
2524 re->req_char = swap_uint16(re->req_char);
2530 re->name_count = swap_uint16(re->name_count);
2531 re->ref_count = swap_uint16(re->ref_count);
2532 
2533 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2534  {
2535  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2536  rsd->size = swap_uint32(rsd->size);
2537  rsd->flags = swap_uint32(rsd->flags);
2538  rsd->minlength = swap_uint32(rsd->minlength);
2539  }
2540 
2541 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2542 in the name table, if present, and then in the pattern itself. */
2543 
2544 #ifdef SUPPORT_PCRE16
2545 if (pcre_mode != PCRE16_MODE) return;
2546 
2547 while(TRUE)
2548  {
2549  /* Swap previous characters. */
2550  while (length-- > 0)
2551  {
2552  *ptr = swap_uint16(*ptr);
2553  ptr++;
2554  }
2555 #ifdef SUPPORT_UTF
2556  if (utf16_char)
2557  {
2558  if ((ptr[-1] & 0xfc00) == 0xd800)
2559  {
2560  /* We know that there is only one extra character in UTF-16. */
2561  *ptr = swap_uint16(*ptr);
2562  ptr++;
2563  }
2564  }
2565  utf16_char = FALSE;
2566 #endif /* SUPPORT_UTF */
2567 
2568  /* Get next opcode. */
2569 
2570  length = 0;
2571  op = *ptr;
2572  *ptr++ = swap_uint16(op);
2573 
2574  switch (op)
2575  {
2576  case OP_END:
2577  return;
2578 
2579 #ifdef SUPPORT_UTF
2580  case OP_CHAR:
2581  case OP_CHARI:
2582  case OP_NOT:
2583  case OP_NOTI:
2584  case OP_STAR:
2585  case OP_MINSTAR:
2586  case OP_PLUS:
2587  case OP_MINPLUS:
2588  case OP_QUERY:
2589  case OP_MINQUERY:
2590  case OP_UPTO:
2591  case OP_MINUPTO:
2592  case OP_EXACT:
2593  case OP_POSSTAR:
2594  case OP_POSPLUS:
2595  case OP_POSQUERY:
2596  case OP_POSUPTO:
2597  case OP_STARI:
2598  case OP_MINSTARI:
2599  case OP_PLUSI:
2600  case OP_MINPLUSI:
2601  case OP_QUERYI:
2602  case OP_MINQUERYI:
2603  case OP_UPTOI:
2604  case OP_MINUPTOI:
2605  case OP_EXACTI:
2606  case OP_POSSTARI:
2607  case OP_POSPLUSI:
2608  case OP_POSQUERYI:
2609  case OP_POSUPTOI:
2610  case OP_NOTSTAR:
2611  case OP_NOTMINSTAR:
2612  case OP_NOTPLUS:
2613  case OP_NOTMINPLUS:
2614  case OP_NOTQUERY:
2615  case OP_NOTMINQUERY:
2616  case OP_NOTUPTO:
2617  case OP_NOTMINUPTO:
2618  case OP_NOTEXACT:
2619  case OP_NOTPOSSTAR:
2620  case OP_NOTPOSPLUS:
2621  case OP_NOTPOSQUERY:
2622  case OP_NOTPOSUPTO:
2623  case OP_NOTSTARI:
2624  case OP_NOTMINSTARI:
2625  case OP_NOTPLUSI:
2626  case OP_NOTMINPLUSI:
2627  case OP_NOTQUERYI:
2628  case OP_NOTMINQUERYI:
2629  case OP_NOTUPTOI:
2630  case OP_NOTMINUPTOI:
2631  case OP_NOTEXACTI:
2632  case OP_NOTPOSSTARI:
2633  case OP_NOTPOSPLUSI:
2634  case OP_NOTPOSQUERYI:
2635  case OP_NOTPOSUPTOI:
2636  if (utf) utf16_char = TRUE;
2637 #endif
2638  /* Fall through. */
2639 
2640  default:
2641  length = OP_lengths16[op] - 1;
2642  break;
2643 
2644  case OP_CLASS:
2645  case OP_NCLASS:
2646  /* Skip the character bit map. */
2647  ptr += 32/sizeof(pcre_uint16);
2648  length = 0;
2649  break;
2650 
2651  case OP_XCLASS:
2652  /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2653  if (LINK_SIZE > 1)
2654  length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2655  - (1 + LINK_SIZE + 1));
2656  else
2657  length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2658 
2659  /* Reverse the size of the XCLASS instance. */
2660  *ptr = swap_uint16(*ptr);
2661  ptr++;
2662  if (LINK_SIZE > 1)
2663  {
2664  *ptr = swap_uint16(*ptr);
2665  ptr++;
2666  }
2667 
2668  op = *ptr;
2669  *ptr = swap_uint16(op);
2670  ptr++;
2671  if ((op & XCL_MAP) != 0)
2672  {
2673  /* Skip the character bit map. */
2674  ptr += 32/sizeof(pcre_uint16);
2675  length -= 32/sizeof(pcre_uint16);
2676  }
2677  break;
2678  }
2679  }
2680 /* Control should never reach here in 16 bit mode. */
2681 #endif /* SUPPORT_PCRE16 */
2682 }
2683 #endif /* SUPPORT_PCRE[8|16] */
2684 
2685 
2686 
2687 #if defined SUPPORT_PCRE32
2688 static void
2689 regexflip_32(pcre *ere, pcre_extra *extra)
2690 {
2691 real_pcre32 *re = (real_pcre32 *)ere;
2692 int op;
2693 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2694 int length = re->name_count * re->name_entry_size;
2695 
2696 /* Always flip the bytes in the main data block and study blocks. */
2697 
2699 re->size = swap_uint32(re->size);
2700 re->options = swap_uint32(re->options);
2701 re->flags = swap_uint32(re->flags);
2704 re->first_char = swap_uint32(re->first_char);
2705 re->req_char = swap_uint32(re->req_char);
2711 re->name_count = swap_uint16(re->name_count);
2712 re->ref_count = swap_uint16(re->ref_count);
2713 
2714 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2715  {
2716  pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2717  rsd->size = swap_uint32(rsd->size);
2718  rsd->flags = swap_uint32(rsd->flags);
2719  rsd->minlength = swap_uint32(rsd->minlength);
2720  }
2721 
2722 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2723 the pattern itself. */
2724 
2725 while(TRUE)
2726  {
2727  /* Swap previous characters. */
2728  while (length-- > 0)
2729  {
2730  *ptr = swap_uint32(*ptr);
2731  ptr++;
2732  }
2733 
2734  /* Get next opcode. */
2735 
2736  length = 0;
2737  op = *ptr;
2738  *ptr++ = swap_uint32(op);
2739 
2740  switch (op)
2741  {
2742  case OP_END:
2743  return;
2744 
2745  default:
2746  length = OP_lengths32[op] - 1;
2747  break;
2748 
2749  case OP_CLASS:
2750  case OP_NCLASS:
2751  /* Skip the character bit map. */
2752  ptr += 32/sizeof(pcre_uint32);
2753  length = 0;
2754  break;
2755 
2756  case OP_XCLASS:
2757  /* LINK_SIZE can only be 1 in 32-bit mode. */
2758  length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2759 
2760  /* Reverse the size of the XCLASS instance. */
2761  *ptr = swap_uint32(*ptr);
2762  ptr++;
2763 
2764  op = *ptr;
2765  *ptr = swap_uint32(op);
2766  ptr++;
2767  if ((op & XCL_MAP) != 0)
2768  {
2769  /* Skip the character bit map. */
2770  ptr += 32/sizeof(pcre_uint32);
2771  length -= 32/sizeof(pcre_uint32);
2772  }
2773  break;
2774  }
2775  }
2776 /* Control should never reach here in 32 bit mode. */
2777 }
2778 
2779 #endif /* SUPPORT_PCRE32 */
2780 
2781 
2782 
2783 static void
2785 {
2786 #if defined SUPPORT_PCRE32
2787  if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2788  regexflip_32(ere, extra);
2789 #endif
2790 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2791  if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2792  regexflip8_or_16(ere, extra);
2793 #endif
2794 }
2795 
2796 
2797 
2798 /*************************************************
2799 * Check match or recursion limit *
2800 *************************************************/
2801 
2802 static int
2804  int start_offset, int options, int *use_offsets, int use_size_offsets,
2805  int flag, unsigned long int *limit, int errnumber, const char *msg)
2806 {
2807 int count;
2808 int min = 0;
2809 int mid = 64;
2810 int max = -1;
2811 
2812 extra->flags |= flag;
2813 
2814 for (;;)
2815  {
2816  *limit = mid;
2817 
2818  PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2819  use_offsets, use_size_offsets);
2820 
2821  if (count == errnumber)
2822  {
2823  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2824  min = mid;
2825  mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2826  }
2827 
2828  else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2829  count == PCRE_ERROR_PARTIAL)
2830  {
2831  if (mid == min + 1)
2832  {
2833  fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2834  break;
2835  }
2836  /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2837  max = mid;
2838  mid = (min + mid)/2;
2839  }
2840  else break; /* Some other error */
2841  }
2842 
2843 extra->flags &= ~flag;
2844 return count;
2845 }
2846 
2847 
2848 
2849 /*************************************************
2850 * Case-independent strncmp() function *
2851 *************************************************/
2852 
2853 /*
2854 Arguments:
2855  s first string
2856  t second string
2857  n number of characters to compare
2858 
2859 Returns: < 0, = 0, or > 0, according to the comparison
2860 */
2861 
2862 static int
2864 {
2865 while (n--)
2866  {
2867  int c = tolower(*s++) - tolower(*t++);
2868  if (c) return c;
2869  }
2870 return 0;
2871 }
2872 
2873 
2874 
2875 /*************************************************
2876 * Check multicharacter option *
2877 *************************************************/
2878 
2879 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2880 a message and return 0 if there is no match.
2881 
2882 Arguments:
2883  p points after the leading '<'
2884  f file for error message
2885  nl TRUE to check only for newline settings
2886  stype "modifier" or "escape sequence"
2887 
2888 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2889 */
2890 
2891 static int
2892 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2893 {
2894 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2895 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2896 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2897 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2898 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2899 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2900 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2901 
2902 if (!nl)
2903  {
2904  if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2905  }
2906 
2907 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2908 return 0;
2909 }
2910 
2911 
2912 
2913 /*************************************************
2914 * Usage function *
2915 *************************************************/
2916 
2917 static void
2918 usage(void)
2919 {
2920 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2921 printf("Input and output default to stdin and stdout.\n");
2922 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2923 printf("If input is a terminal, readline() is used to read from it.\n");
2924 #else
2925 printf("This version of pcretest is not linked with readline().\n");
2926 #endif
2927 printf("\nOptions:\n");
2928 #ifdef SUPPORT_PCRE16
2929 printf(" -16 use the 16-bit library\n");
2930 #endif
2931 #ifdef SUPPORT_PCRE32
2932 printf(" -32 use the 32-bit library\n");
2933 #endif
2934 printf(" -b show compiled code\n");
2935 printf(" -C show PCRE compile-time options and exit\n");
2936 printf(" -C arg show a specific compile-time option and exit\n");
2937 printf(" with its value if numeric (else 0). The arg can be:\n");
2938 printf(" linksize internal link size [2, 3, 4]\n");
2939 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2940 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2941 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2942 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2943 printf(" ucp Unicode Properties supported [0, 1]\n");
2944 printf(" jit Just-in-time compiler supported [0, 1]\n");
2945 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2946 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2947 printf(" -d debug: show compiled code and information (-b and -i)\n");
2948 #if !defined NODFA
2949 printf(" -dfa force DFA matching for all subjects\n");
2950 #endif
2951 printf(" -help show usage information\n");
2952 printf(" -i show information about compiled patterns\n"
2953  " -M find MATCH_LIMIT minimum for each subject\n"
2954  " -m output memory used information\n"
2955  " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2956  " -o <n> set size of offsets vector to <n>\n");
2957 #if !defined NOPOSIX
2958 printf(" -p use POSIX interface\n");
2959 #endif
2960 printf(" -q quiet: do not output PCRE version number at start\n");
2961 printf(" -S <n> set stack size to <n> megabytes\n");
2962 printf(" -s force each pattern to be studied at basic level\n"
2963  " -s+ force each pattern to be studied, using JIT if available\n"
2964  " -s++ ditto, verifying when JIT was actually used\n"
2965  " -s+n force each pattern to be studied, using JIT if available,\n"
2966  " where 1 <= n <= 7 selects JIT options\n"
2967  " -s++n ditto, verifying when JIT was actually used\n"
2968  " -t time compilation and execution\n");
2969 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2970 printf(" -tm time execution (matching) only\n");
2971 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2972 printf(" -T same as -t, but show total times at the end\n");
2973 printf(" -TM same as -tm, but show total time at the end\n");
2974 }
2975 
2976 
2977 
2978 /*************************************************
2979 * Main Program *
2980 *************************************************/
2981 
2982 /* Read lines from named file or stdin and write to named file or stdout; lines
2983 consist of a regular expression, in delimiters and optionally followed by
2984 options, followed by a set of test data, terminated by an empty line. */
2985 
2986 int main(int argc, char **argv)
2987 {
2988 FILE *infile = stdin;
2989 const char *version;
2990 int options = 0;
2991 int study_options = 0;
2992 int default_find_match_limit = FALSE;
2993 pcre_uint32 default_options = 0;
2994 int op = 1;
2995 int timeit = 0;
2996 int timeitm = 0;
2997 int showtotaltimes = 0;
2998 int showinfo = 0;
2999 int showstore = 0;
3000 int force_study = -1;
3001 int force_study_options = 0;
3002 int quiet = 0;
3003 int size_offsets = 45;
3004 int size_offsets_max;
3005 int *offsets = NULL;
3006 int debug = 0;
3007 int done = 0;
3008 int all_use_dfa = 0;
3009 int verify_jit = 0;
3010 int yield = 0;
3011 int stack_size;
3012 pcre_uint8 *dbuffer = NULL;
3013 pcre_uint8 lockout[24] = { 0 };
3014 size_t dbuffer_size = 1u << 14;
3015 clock_t total_compile_time = 0;
3016 clock_t total_study_time = 0;
3017 clock_t total_match_time = 0;
3018 
3019 #if !defined NOPOSIX
3020 int posix = 0;
3021 #endif
3022 #if !defined NODFA
3023 int *dfa_workspace = NULL;
3024 #endif
3025 
3026 pcre_jit_stack *jit_stack = NULL;
3027 
3028 /* These vectors store, end-to-end, a list of zero-terminated captured
3029 substring names, each list itself being terminated by an empty name. Assume
3030 that 1024 is plenty long enough for the few names we'll be testing. It is
3031 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3032 for the actual memory, to ensure alignment. */
3033 
3034 pcre_uint32 copynames[1024];
3035 pcre_uint32 getnames[1024];
3036 
3037 #ifdef SUPPORT_PCRE32
3038 pcre_uint32 *cn32ptr;
3039 pcre_uint32 *gn32ptr;
3040 #endif
3041 
3042 #ifdef SUPPORT_PCRE16
3043 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3044 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3045 pcre_uint16 *cn16ptr;
3046 pcre_uint16 *gn16ptr;
3047 #endif
3048 
3049 #ifdef SUPPORT_PCRE8
3050 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3051 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3052 pcre_uint8 *cn8ptr;
3053 pcre_uint8 *gn8ptr;
3054 #endif
3055 
3056 /* Get buffers from malloc() so that valgrind will check their misuse when
3057 debugging. They grow automatically when very long lines are read. The 16-
3058 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3059 
3062 
3063 /* The outfile variable is static so that new_malloc can use it. */
3064 
3065 outfile = stdout;
3066 
3067 /* The following _setmode() stuff is some Windows magic that tells its runtime
3068 library to translate CRLF into a single LF character. At least, that's what
3069 I've been told: never having used Windows I take this all on trust. Originally
3070 it set 0x8000, but then I was advised that _O_BINARY was better. */
3071 
3072 #if defined(_WIN32) || defined(WIN32)
3073 _setmode( _fileno( stdout ), _O_BINARY );
3074 #endif
3075 
3076 /* Get the version number: both pcre_version() and pcre16_version() give the
3077 same answer. We just need to ensure that we call one that is available. */
3078 
3079 #if defined SUPPORT_PCRE8
3080 version = pcre_version();
3081 #elif defined SUPPORT_PCRE16
3083 #elif defined SUPPORT_PCRE32
3085 #endif
3086 
3087 /* Scan options */
3088 
3089 while (argc > 1 && argv[op][0] == '-')
3090  {
3091  pcre_uint8 *endptr;
3092  char *arg = argv[op];
3093 
3094  if (strcmp(arg, "-m") == 0) showstore = 1;
3095  else if (strcmp(arg, "-s") == 0) force_study = 0;
3096 
3097  else if (strncmp(arg, "-s+", 3) == 0)
3098  {
3099  arg += 3;
3100  if (*arg == '+') { arg++; verify_jit = TRUE; }
3101  force_study = 1;
3102  if (*arg == 0)
3103  force_study_options = jit_study_bits[6];
3104  else if (*arg >= '1' && *arg <= '7')
3105  force_study_options = jit_study_bits[*arg - '1'];
3106  else goto BAD_ARG;
3107  }
3108  else if (strcmp(arg, "-8") == 0)
3109  {
3110 #ifdef SUPPORT_PCRE8
3111  pcre_mode = PCRE8_MODE;
3112 #else
3113  printf("** This version of PCRE was built without 8-bit support\n");
3114  exit(1);
3115 #endif
3116  }
3117  else if (strcmp(arg, "-16") == 0)
3118  {
3119 #ifdef SUPPORT_PCRE16
3120  pcre_mode = PCRE16_MODE;
3121 #else
3122  printf("** This version of PCRE was built without 16-bit support\n");
3123  exit(1);
3124 #endif
3125  }
3126  else if (strcmp(arg, "-32") == 0)
3127  {
3128 #ifdef SUPPORT_PCRE32
3129  pcre_mode = PCRE32_MODE;
3130 #else
3131  printf("** This version of PCRE was built without 32-bit support\n");
3132  exit(1);
3133 #endif
3134  }
3135  else if (strcmp(arg, "-q") == 0) quiet = 1;
3136  else if (strcmp(arg, "-b") == 0) debug = 1;
3137  else if (strcmp(arg, "-i") == 0) showinfo = 1;
3138  else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3139  else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3140  else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3141 #if !defined NODFA
3142  else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3143 #endif
3144  else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3145  ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3146  *endptr == 0))
3147  {
3148  op++;
3149  argc--;
3150  }
3151  else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3152  strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3153  {
3154  int temp;
3155  int both = arg[2] == 0;
3156  showtotaltimes = arg[1] == 'T';
3157  if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3158  *endptr == 0))
3159  {
3160  timeitm = temp;
3161  op++;
3162  argc--;
3163  }
3164  else timeitm = LOOPREPEAT;
3165  if (both) timeit = timeitm;
3166  }
3167  else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3168  ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3169  *endptr == 0))
3170  {
3171 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3172  printf("PCRE: -S not supported on this OS\n");
3173  exit(1);
3174 #else
3175  int rc;
3176  struct rlimit rlim;
3177  getrlimit(RLIMIT_STACK, &rlim);
3178  rlim.rlim_cur = stack_size * 1024 * 1024;
3179  rc = setrlimit(RLIMIT_STACK, &rlim);
3180  if (rc != 0)
3181  {
3182  printf("PCRE: setrlimit() failed with error %d\n", rc);
3183  exit(1);
3184  }
3185  op++;
3186  argc--;
3187 #endif
3188  }
3189 #if !defined NOPOSIX
3190  else if (strcmp(arg, "-p") == 0) posix = 1;
3191 #endif
3192  else if (strcmp(arg, "-C") == 0)
3193  {
3194  int rc = 0;
3195  unsigned long int lrc;
3196 
3197  if (argc > 2)
3198  {
3199  if (strcmp(argv[op + 1], "linksize") == 0)
3200  {
3201  (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3202  printf("%d\n", rc);
3203  yield = rc;
3204 
3205 #ifdef __VMS
3206  vms_setsymbol("LINKSIZE",0,yield );
3207 #endif
3208  }
3209  else if (strcmp(argv[op + 1], "pcre8") == 0)
3210  {
3211 #ifdef SUPPORT_PCRE8
3212  printf("1\n");
3213  yield = 1;
3214 #else
3215  printf("0\n");
3216  yield = 0;
3217 #endif
3218 #ifdef __VMS
3219  vms_setsymbol("PCRE8",0,yield );
3220 #endif
3221  }
3222  else if (strcmp(argv[op + 1], "pcre16") == 0)
3223  {
3224 #ifdef SUPPORT_PCRE16
3225  printf("1\n");
3226  yield = 1;
3227 #else
3228  printf("0\n");
3229  yield = 0;
3230 #endif
3231 #ifdef __VMS
3232  vms_setsymbol("PCRE16",0,yield );
3233 #endif
3234  }
3235  else if (strcmp(argv[op + 1], "pcre32") == 0)
3236  {
3237 #ifdef SUPPORT_PCRE32
3238  printf("1\n");
3239  yield = 1;
3240 #else
3241  printf("0\n");
3242  yield = 0;
3243 #endif
3244 #ifdef __VMS
3245  vms_setsymbol("PCRE32",0,yield );
3246 #endif
3247  }
3248  else if (strcmp(argv[op + 1], "utf") == 0)
3249  {
3250 #ifdef SUPPORT_PCRE8
3251  if (pcre_mode == PCRE8_MODE)
3252  (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3253 #endif
3254 #ifdef SUPPORT_PCRE16
3255  if (pcre_mode == PCRE16_MODE)
3256  (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3257 #endif
3258 #ifdef SUPPORT_PCRE32
3259  if (pcre_mode == PCRE32_MODE)
3260  (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3261 #endif
3262  printf("%d\n", rc);
3263  yield = rc;
3264 #ifdef __VMS
3265  vms_setsymbol("UTF",0,yield );
3266 #endif
3267  }
3268  else if (strcmp(argv[op + 1], "ucp") == 0)
3269  {
3270  (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3271  printf("%d\n", rc);
3272  yield = rc;
3273  }
3274  else if (strcmp(argv[op + 1], "jit") == 0)
3275  {
3276  (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3277  printf("%d\n", rc);
3278  yield = rc;
3279  }
3280  else if (strcmp(argv[op + 1], "newline") == 0)
3281  {
3282  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3284  }
3285  else if (strcmp(argv[op + 1], "bsr") == 0)
3286  {
3287  (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3288  printf("%s\n", rc? "ANYCRLF" : "ANY");
3289  }
3290  else if (strcmp(argv[op + 1], "ebcdic") == 0)
3291  {
3292 #ifdef EBCDIC
3293  printf("1\n");
3294  yield = 1;
3295 #else
3296  printf("0\n");
3297 #endif
3298  }
3299  else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3300  {
3301 #ifdef EBCDIC
3302  printf("0x%02x\n", CHAR_LF);
3303 #else
3304  printf("0\n");
3305 #endif
3306  }
3307  else
3308  {
3309  printf("Unknown -C option: %s\n", argv[op + 1]);
3310  }
3311  goto EXIT;
3312  }
3313 
3314  /* No argument for -C: output all configuration information. */
3315 
3316  printf("PCRE version %s\n", version);
3317  printf("Compiled with\n");
3318 
3319 #ifdef EBCDIC
3320  printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3321 #endif
3322 
3323 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3324 are set, either both UTFs are supported or both are not supported. */
3325 
3326 #ifdef SUPPORT_PCRE8
3327  printf(" 8-bit support\n");
3328  (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3329  printf (" %sUTF-8 support\n", rc ? "" : "No ");
3330 #endif
3331 #ifdef SUPPORT_PCRE16
3332  printf(" 16-bit support\n");
3333  (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3334  printf (" %sUTF-16 support\n", rc ? "" : "No ");
3335 #endif
3336 #ifdef SUPPORT_PCRE32
3337  printf(" 32-bit support\n");
3338  (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3339  printf (" %sUTF-32 support\n", rc ? "" : "No ");
3340 #endif
3341 
3342  (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3343  printf(" %sUnicode properties support\n", rc? "" : "No ");
3344  (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3345  if (rc)
3346  {
3347  const char *arch;
3348  (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3349  printf(" Just-in-time compiler support: %s\n", arch);
3350  }
3351  else
3352  printf(" No just-in-time compiler support\n");
3353  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3355  (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3356  printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3357  "all Unicode newlines");
3358  (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3359  printf(" Internal link size = %d\n", rc);
3360  (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3361  printf(" POSIX malloc threshold = %d\n", rc);
3362  (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3363  printf(" Parentheses nest limit = %ld\n", lrc);
3364  (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3365  printf(" Default match limit = %ld\n", lrc);
3366  (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3367  printf(" Default recursion depth limit = %ld\n", lrc);
3368  (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3369  printf(" Match recursion uses %s", rc? "stack" : "heap");
3370  if (showstore)
3371  {
3372  PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3373  printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3374  }
3375  printf("\n");
3376  goto EXIT;
3377  }
3378  else if (strcmp(arg, "-help") == 0 ||
3379  strcmp(arg, "--help") == 0)
3380  {
3381  usage();
3382  goto EXIT;
3383  }
3384  else
3385  {
3386  BAD_ARG:
3387  printf("** Unknown or malformed option %s\n", arg);
3388  usage();
3389  yield = 1;
3390  goto EXIT;
3391  }
3392  op++;
3393  argc--;
3394  }
3395 
3396 /* Get the store for the offsets vector, and remember what it was */
3397 
3398 size_offsets_max = size_offsets;
3399 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3400 if (offsets == NULL)
3401  {
3402  printf("** Failed to get %d bytes of memory for offsets vector\n",
3403  (int)(size_offsets_max * sizeof(int)));
3404  yield = 1;
3405  goto EXIT;
3406  }
3407 
3408 /* Sort out the input and output files */
3409 
3410 if (argc > 1)
3411  {
3412  infile = fopen(argv[op], INPUT_MODE);
3413  if (infile == NULL)
3414  {
3415  printf("** Failed to open %s\n", argv[op]);
3416  yield = 1;
3417  goto EXIT;
3418  }
3419  }
3420 
3421 if (argc > 2)
3422  {
3423  outfile = fopen(argv[op+1], OUTPUT_MODE);
3424  if (outfile == NULL)
3425  {
3426  printf("** Failed to open %s\n", argv[op+1]);
3427  yield = 1;
3428  goto EXIT;
3429  }
3430  }
3431 
3432 /* Set alternative malloc function */
3433 
3434 #ifdef SUPPORT_PCRE8
3439 #endif
3440 
3441 #ifdef SUPPORT_PCRE16
3446 #endif
3447 
3448 #ifdef SUPPORT_PCRE32
3453 #endif
3454 
3455 /* Heading line unless quiet */
3456 
3457 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3458 
3459 /* Main loop */
3460 
3461 while (!done)
3462  {
3463  pcre *re = NULL;
3464  pcre_extra *extra = NULL;
3465 
3466 #if !defined NOPOSIX /* There are still compilers that require no indent */
3467  regex_t preg = { NULL, 0, 0} ;
3468  int do_posix = 0;
3469 #endif
3470 
3471  const char *error;
3472  pcre_uint8 *markptr;
3473  pcre_uint8 *p, *pp, *ppp;
3474  pcre_uint8 *to_file = NULL;
3475  const pcre_uint8 *tables = NULL;
3476  unsigned long int get_options;
3477  unsigned long int true_size, true_study_size = 0;
3478  size_t size;
3479  int do_allcaps = 0;
3480  int do_mark = 0;
3481  int do_study = 0;
3482  int no_force_study = 0;
3483  int do_debug = debug;
3484  int do_G = 0;
3485  int do_g = 0;
3486  int do_showinfo = showinfo;
3487  int do_showrest = 0;
3488  int do_showcaprest = 0;
3489  int do_flip = 0;
3490  int erroroffset, len, delimiter, poffset;
3491 
3492 #if !defined NODFA
3493  int dfa_matched = 0;
3494 #endif
3495 
3496  use_utf = 0;
3497  debug_lengths = 1;
3498  SET_PCRE_STACK_GUARD(NULL);
3499 
3500  if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3501  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3502  fflush(outfile);
3503 
3504  p = buffer;
3505  while (isspace(*p)) p++;
3506  if (*p == 0) continue;
3507 
3508  /* Handle option lock-out setting */
3509 
3510  if (*p == '<' && p[1] == ' ')
3511  {
3512  p += 2;
3513  while (isspace(*p)) p++;
3514  if (strncmp((char *)p, "forbid ", 7) == 0)
3515  {
3516  p += 7;
3517  while (isspace(*p)) p++;
3518  pp = lockout;
3519  while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3520  *pp++ = *p++;
3521  *pp = 0;
3522  }
3523  else
3524  {
3525  printf("** Unrecognized special command '%s'\n", p);
3526  yield = 1;
3527  goto EXIT;
3528  }
3529  continue;
3530  }
3531 
3532  /* See if the pattern is to be loaded pre-compiled from a file. */
3533 
3534  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3535  {
3536  pcre_uint32 magic;
3537  pcre_uint8 sbuf[8];
3538  FILE *f;
3539 
3540  p++;
3541  if (*p == '!')
3542  {
3543  do_debug = TRUE;
3544  do_showinfo = TRUE;
3545  p++;
3546  }
3547 
3548  pp = p + (int)strlen((char *)p);
3549  while (isspace(pp[-1])) pp--;
3550  *pp = 0;
3551 
3552  f = fopen((char *)p, "rb");
3553  if (f == NULL)
3554  {
3555  fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3556  continue;
3557  }
3558  if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3559 
3560  true_size =
3561  (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3562  true_study_size =
3563  (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3564 
3565  re = (pcre *)new_malloc(true_size);
3566  if (re == NULL)
3567  {
3568  printf("** Failed to get %d bytes of memory for pcre object\n",
3569  (int)true_size);
3570  yield = 1;
3571  goto EXIT;
3572  }
3573  if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3574 
3575  magic = REAL_PCRE_MAGIC(re);
3576  if (magic != MAGIC_NUMBER)
3577  {
3578  if (swap_uint32(magic) == MAGIC_NUMBER)
3579  {
3580  do_flip = 1;
3581  }
3582  else
3583  {
3584  fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3585  new_free(re);
3586  fclose(f);
3587  continue;
3588  }
3589  }
3590 
3591  /* We hide the byte-invert info for little and big endian tests. */
3592  fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3593  do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3594 
3595  /* Now see if there is any following study data. */
3596 
3597  if (true_study_size != 0)
3598  {
3599  pcre_study_data *psd;
3600 
3601  extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3602  extra->flags = PCRE_EXTRA_STUDY_DATA;
3603 
3604  psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3605  extra->study_data = psd;
3606 
3607  if (fread(psd, 1, true_study_size, f) != true_study_size)
3608  {
3609  FAIL_READ:
3610  fprintf(outfile, "Failed to read data from %s\n", p);
3611  if (extra != NULL)
3612  {
3613  PCRE_FREE_STUDY(extra);
3614  }
3615  new_free(re);
3616  fclose(f);
3617  continue;
3618  }
3619  fprintf(outfile, "Study data loaded from %s\n", p);
3620  do_study = 1; /* To get the data output if requested */
3621  }
3622  else fprintf(outfile, "No study data\n");
3623 
3624  /* Flip the necessary bytes. */
3625  if (do_flip)
3626  {
3627  int rc;
3628  PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3629  if (rc == PCRE_ERROR_BADMODE)
3630  {
3631  pcre_uint32 flags_in_host_byte_order;
3632  if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3633  flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3634  else
3635  flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3636  /* Simulate the result of the function call below. */
3637  fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3638  pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3640  fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3641  "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3642  new_free(re);
3643  fclose(f);
3644  continue;
3645  }
3646  }
3647 
3648  /* Need to know if UTF-8 for printing data strings. */
3649 
3650  if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3651  {
3652  new_free(re);
3653  fclose(f);
3654  continue;
3655  }
3656  use_utf = (get_options & PCRE_UTF8) != 0;
3657 
3658  fclose(f);
3659  goto SHOW_INFO;
3660  }
3661 
3662  /* In-line pattern (the usual case). Get the delimiter and seek the end of
3663  the pattern; if it isn't complete, read more. */
3664 
3665  delimiter = *p++;
3666 
3667  if (isalnum(delimiter) || delimiter == '\\')
3668  {
3669  fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3670  goto SKIP_DATA;
3671  }
3672 
3673  pp = p;
3674  poffset = (int)(p - buffer);
3675 
3676  for(;;)
3677  {
3678  while (*pp != 0)
3679  {
3680  if (*pp == '\\' && pp[1] != 0) pp++;
3681  else if (*pp == delimiter) break;
3682  pp++;
3683  }
3684  if (*pp != 0) break;
3685  if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3686  {
3687  fprintf(outfile, "** Unexpected EOF\n");
3688  done = 1;
3689  goto CONTINUE;
3690  }
3691  if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3692  }
3693 
3694  /* The buffer may have moved while being extended; reset the start of data
3695  pointer to the correct relative point in the buffer. */
3696 
3697  p = buffer + poffset;
3698 
3699  /* If the first character after the delimiter is backslash, make
3700  the pattern end with backslash. This is purely to provide a way
3701  of testing for the error message when a pattern ends with backslash. */
3702 
3703  if (pp[1] == '\\') *pp++ = '\\';
3704 
3705  /* Terminate the pattern at the delimiter, and save a copy of the pattern
3706  for callouts. */
3707 
3708  *pp++ = 0;
3709  strcpy((char *)pbuffer, (char *)p);
3710 
3711  /* Look for modifiers and options after the final delimiter. */
3712 
3713  options = default_options;
3714  study_options = force_study_options;
3715  log_store = showstore; /* default from command line */
3716 
3717  while (*pp != 0)
3718  {
3719  /* Check to see whether this modifier has been locked out for this file.
3720  This is complicated for the multi-character options that begin with '<'.
3721  If there is no '>' in the lockout string, all multi-character modifiers are
3722  locked out. */
3723 
3724  if (strchr((char *)lockout, *pp) != NULL)
3725  {
3726  if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3727  {
3728  int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3729  if (x == 0) goto SKIP_DATA;
3730 
3731  for (ppp = lockout; *ppp != 0; ppp++)
3732  {
3733  if (*ppp == '<')
3734  {
3735  int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3736  if (y == 0)
3737  {
3738  printf("** Error in modifier forbid data - giving up.\n");
3739  yield = 1;
3740  goto EXIT;
3741  }
3742  if (x == y)
3743  {
3744  ppp = pp;
3745  while (*ppp != '>') ppp++;
3746  printf("** The %.*s modifier is locked out - giving up.\n",
3747  (int)(ppp - pp + 1), pp);
3748  yield = 1;
3749  goto EXIT;
3750  }
3751  }
3752  }
3753  }
3754 
3755  /* The single-character modifiers are straightforward. */
3756 
3757  else
3758  {
3759  printf("** The /%c modifier is locked out - giving up.\n", *pp);
3760  yield = 1;
3761  goto EXIT;
3762  }
3763  }
3764 
3765  /* The modifier is not locked out; handle it. */
3766 
3767  switch (*pp++)
3768  {
3769  case 'f': options |= PCRE_FIRSTLINE; break;
3770  case 'g': do_g = 1; break;
3771  case 'i': options |= PCRE_CASELESS; break;
3772  case 'm': options |= PCRE_MULTILINE; break;
3773  case 's': options |= PCRE_DOTALL; break;
3774  case 'x': options |= PCRE_EXTENDED; break;
3775 
3776  case '+':
3777  if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3778  break;
3779 
3780  case '=': do_allcaps = 1; break;
3781  case 'A': options |= PCRE_ANCHORED; break;
3782  case 'B': do_debug = 1; break;
3783  case 'C': options |= PCRE_AUTO_CALLOUT; break;
3784  case 'D': do_debug = do_showinfo = 1; break;
3785  case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3786  case 'F': do_flip = 1; break;
3787  case 'G': do_G = 1; break;
3788  case 'I': do_showinfo = 1; break;
3789  case 'J': options |= PCRE_DUPNAMES; break;
3790  case 'K': do_mark = 1; break;
3791  case 'M': log_store = 1; break;
3792  case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3793  case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3794 
3795 #if !defined NOPOSIX
3796  case 'P': do_posix = 1; break;
3797 #endif
3798 
3799  case 'Q':
3800  switch (*pp)
3801  {
3802  case '0':
3803  case '1':
3804  stack_guard_return = *pp++ - '0';
3805  break;
3806 
3807  default:
3808  fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3809  goto SKIP_DATA;
3810  }
3811  SET_PCRE_STACK_GUARD(stack_guard);
3812  break;
3813 
3814  case 'S':
3815  do_study = 1;
3816  for (;;)
3817  {
3818  switch (*pp++)
3819  {
3820  case 'S':
3821  do_study = 0;
3822  no_force_study = 1;
3823  break;
3824 
3825  case '!':
3827  break;
3828 
3829  case '+':
3830  if (*pp == '+')
3831  {
3832  verify_jit = TRUE;
3833  pp++;
3834  }
3835  if (*pp >= '1' && *pp <= '7')
3836  study_options |= jit_study_bits[*pp++ - '1'];
3837  else
3839  break;
3840 
3841  case '-':
3843  break;
3844 
3845  default:
3846  pp--;
3847  goto ENDLOOP;
3848  }
3849  }
3850  ENDLOOP:
3851  break;
3852 
3853  case 'U': options |= PCRE_UNGREEDY; break;
3854  case 'W': options |= PCRE_UCP; break;
3855  case 'X': options |= PCRE_EXTRA; break;
3856  case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3857  case 'Z': debug_lengths = 0; break;
3858  case '8': options |= PCRE_UTF8; use_utf = 1; break;
3859  case '9': options |= PCRE_NEVER_UTF; break;
3860  case '?': options |= PCRE_NO_UTF8_CHECK; break;
3861 
3862  case 'T':
3863  switch (*pp++)
3864  {
3865  case '0': tables = tables0; break;
3866  case '1': tables = tables1; break;
3867 
3868  case '\r':
3869  case '\n':
3870  case ' ':
3871  case 0:
3872  fprintf(outfile, "** Missing table number after /T\n");
3873  goto SKIP_DATA;
3874 
3875  default:
3876  fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3877  goto SKIP_DATA;
3878  }
3879  break;
3880 
3881  case 'L':
3882  ppp = pp;
3883  /* The '\r' test here is so that it works on Windows. */
3884  /* The '0' test is just in case this is an unterminated line. */
3885  while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3886  *ppp = 0;
3887  if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3888  {
3889  fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3890  goto SKIP_DATA;
3891  }
3892  locale_set = 1;
3893  tables = PCRE_MAKETABLES;
3894  pp = ppp;
3895  break;
3896 
3897  case '>':
3898  to_file = pp;
3899  while (*pp != 0) pp++;
3900  while (isspace(pp[-1])) pp--;
3901  *pp = 0;
3902  break;
3903 
3904  case '<':
3905  {
3906  int x = check_mc_option(pp, outfile, FALSE, "modifier");
3907  if (x == 0) goto SKIP_DATA;
3908  options |= x;
3909  while (*pp++ != '>');
3910  }
3911  break;
3912 
3913  case '\r': /* So that it works in Windows */
3914  case '\n':
3915  case ' ':
3916  break;
3917 
3918  default:
3919  fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3920  goto SKIP_DATA;
3921  }
3922  }
3923 
3924  /* Handle compiling via the POSIX interface, which doesn't support the
3925  timing, showing, or debugging options, nor the ability to pass over
3926  local character tables. Neither does it have 16-bit support. */
3927 
3928 #if !defined NOPOSIX
3929  if (posix || do_posix)
3930  {
3931  int rc;
3932  int cflags = 0;
3933 
3934  if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3935  if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3936  if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3937  if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3938  if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3939  if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3940  if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3941 
3942  rc = regcomp(&preg, (char *)p, cflags);
3943 
3944  /* Compilation failed; go back for another re, skipping to blank line
3945  if non-interactive. */
3946 
3947  if (rc != 0)
3948  {
3949  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3950  fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3951  goto SKIP_DATA;
3952  }
3953  }
3954 
3955  /* Handle compiling via the native interface */
3956 
3957  else
3958 #endif /* !defined NOPOSIX */
3959 
3960  {
3961  /* In 16- or 32-bit mode, convert the input. */
3962 
3963 #ifdef SUPPORT_PCRE16
3964  if (pcre_mode == PCRE16_MODE)
3965  {
3966  switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3967  {
3968  case -1:
3969  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3970  "converted to UTF-16\n");
3971  goto SKIP_DATA;
3972 
3973  case -2:
3974  fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3975  "cannot be converted to UTF-16\n");
3976  goto SKIP_DATA;
3977 
3978  case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3979  fprintf(outfile, "**Failed: character value greater than 0xffff "
3980  "cannot be converted to 16-bit in non-UTF mode\n");
3981  goto SKIP_DATA;
3982 
3983  default:
3984  break;
3985  }
3986  p = (pcre_uint8 *)buffer16;
3987  }
3988 #endif
3989 
3990 #ifdef SUPPORT_PCRE32
3991  if (pcre_mode == PCRE32_MODE)
3992  {
3993  switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3994  {
3995  case -1:
3996  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3997  "converted to UTF-32\n");
3998  goto SKIP_DATA;
3999 
4000  case -2:
4001  fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4002  "cannot be converted to UTF-32\n");
4003  goto SKIP_DATA;
4004 
4005  case -3:
4006  fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4007  goto SKIP_DATA;
4008 
4009  default:
4010  break;
4011  }
4012  p = (pcre_uint8 *)buffer32;
4013  }
4014 #endif
4015 
4016  /* Compile many times when timing */
4017 
4018  if (timeit > 0)
4019  {
4020  register int i;
4021  clock_t time_taken;
4022  clock_t start_time = clock();
4023  for (i = 0; i < timeit; i++)
4024  {
4025  PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4026  if (re != NULL) free(re);
4027  }
4028  total_compile_time += (time_taken = clock() - start_time);
4029  fprintf(outfile, "Compile time %.4f milliseconds\n",
4030  (((double)time_taken * 1000.0) / (double)timeit) /
4031  (double)CLOCKS_PER_SEC);
4032  }
4033 
4034  PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4035 
4036  /* Compilation failed; go back for another re, skipping to blank line
4037  if non-interactive. */
4038 
4039  if (re == NULL)
4040  {
4041  fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4042  SKIP_DATA:
4043  if (infile != stdin)
4044  {
4045  for (;;)
4046  {
4047  if (extend_inputline(infile, buffer, NULL) == NULL)
4048  {
4049  done = 1;
4050  goto CONTINUE;
4051  }
4052  len = (int)strlen((char *)buffer);
4053  while (len > 0 && isspace(buffer[len-1])) len--;
4054  if (len == 0) break;
4055  }
4056  fprintf(outfile, "\n");
4057  }
4058  goto CONTINUE;
4059  }
4060 
4061  /* Compilation succeeded. It is now possible to set the UTF-8 option from
4062  within the regex; check for this so that we know how to process the data
4063  lines. */
4064 
4065  if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4066  goto SKIP_DATA;
4067  if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4068 
4069  /* Extract the size for possible writing before possibly flipping it,
4070  and remember the store that was got. */
4071 
4072  true_size = REAL_PCRE_SIZE(re);
4073 
4074  /* Output code size information if requested */
4075 
4076  if (log_store)
4077  {
4078  int name_count, name_entry_size, real_pcre_size;
4079 
4080  new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4081  new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4082  real_pcre_size = 0;
4083 #ifdef SUPPORT_PCRE8
4084  if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4085  real_pcre_size = sizeof(real_pcre);
4086 #endif
4087 #ifdef SUPPORT_PCRE16
4088  if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4089  real_pcre_size = sizeof(real_pcre16);
4090 #endif
4091 #ifdef SUPPORT_PCRE32
4092  if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4093  real_pcre_size = sizeof(real_pcre32);
4094 #endif
4095  new_info(re, NULL, PCRE_INFO_SIZE, &size);
4096  fprintf(outfile, "Memory allocation (code space): %d\n",
4097  (int)(size - real_pcre_size - name_count * name_entry_size));
4098  }
4099 
4100  /* If -s or /S was present, study the regex to generate additional info to
4101  help with the matching, unless the pattern has the SS option, which
4102  suppresses the effect of /S (used for a few test patterns where studying is
4103  never sensible). */
4104 
4105  if (do_study || (force_study >= 0 && !no_force_study))
4106  {
4107  if (timeit > 0)
4108  {
4109  register int i;
4110  clock_t time_taken;
4111  clock_t start_time = clock();
4112  for (i = 0; i < timeit; i++)
4113  {
4114  PCRE_STUDY(extra, re, study_options, &error);
4115  }
4116  total_study_time = (time_taken = clock() - start_time);
4117  if (extra != NULL)
4118  {
4119  PCRE_FREE_STUDY(extra);
4120  }
4121  fprintf(outfile, " Study time %.4f milliseconds\n",
4122  (((double)time_taken * 1000.0) / (double)timeit) /
4123  (double)CLOCKS_PER_SEC);
4124  }
4125  PCRE_STUDY(extra, re, study_options, &error);
4126  if (error != NULL)
4127  fprintf(outfile, "Failed to study: %s\n", error);
4128  else if (extra != NULL)
4129  {
4130  true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4131  if (log_store)
4132  {
4133  size_t jitsize;
4134  if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4135  jitsize != 0)
4136  fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4137  }
4138  }
4139  }
4140 
4141  /* If /K was present, we set up for handling MARK data. */
4142 
4143  if (do_mark)
4144  {
4145  if (extra == NULL)
4146  {
4147  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4148  extra->flags = 0;
4149  }
4150  extra->mark = &markptr;
4151  extra->flags |= PCRE_EXTRA_MARK;
4152  }
4153 
4154  /* Extract and display information from the compiled data if required. */
4155 
4156  SHOW_INFO:
4157 
4158  if (do_debug)
4159  {
4160  fprintf(outfile, "------------------------------------------------------------------\n");
4161  PCRE_PRINTINT(re, outfile, debug_lengths);
4162  }
4163 
4164  /* We already have the options in get_options (see above) */
4165 
4166  if (do_showinfo)
4167  {
4168  unsigned long int all_options;
4169  pcre_uint32 first_char, need_char;
4170  pcre_uint32 match_limit, recursion_limit;
4171  int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4172  hascrorlf, maxlookbehind, match_empty;
4173  int nameentrysize, namecount;
4174  const pcre_uint8 *nametable;
4175 
4176  if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4177  new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4178  new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4179  new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4180  new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4181  new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4182  new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4183  new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4184  new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4185  new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4186  new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4187  new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4188  new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4189  new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4190  != 0)
4191  goto SKIP_DATA;
4192 
4193  fprintf(outfile, "Capturing subpattern count = %d\n", count);
4194 
4195  if (backrefmax > 0)
4196  fprintf(outfile, "Max back reference = %d\n", backrefmax);
4197 
4198  if (maxlookbehind > 0)
4199  fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4200 
4201  if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4202  fprintf(outfile, "Match limit = %u\n", match_limit);
4203 
4204  if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4205  fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4206 
4207  if (namecount > 0)
4208  {
4209  fprintf(outfile, "Named capturing subpatterns:\n");
4210  while (namecount-- > 0)
4211  {
4212  int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4213  int length = (int)STRLEN(nametable + imm2_size);
4214  fprintf(outfile, " ");
4215  PCHARSV(nametable, imm2_size, length, outfile);
4216  while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4217 #ifdef SUPPORT_PCRE32
4218  if (pcre_mode == PCRE32_MODE)
4219  fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4220 #endif
4221 #ifdef SUPPORT_PCRE16
4222  if (pcre_mode == PCRE16_MODE)
4223  fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4224 #endif
4225 #ifdef SUPPORT_PCRE8
4226  if (pcre_mode == PCRE8_MODE)
4227  fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4228 #endif
4229  nametable += nameentrysize * CHAR_SIZE;
4230  }
4231  }
4232 
4233  if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4234  if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4235  if (match_empty) fprintf(outfile, "May match empty string\n");
4236 
4237  all_options = REAL_PCRE_OPTIONS(re);
4238  if (do_flip) all_options = swap_uint32(all_options);
4239 
4240  if (get_options == 0) fprintf(outfile, "No options\n");
4241  else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4242  ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4243  ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4244  ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4245  ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4246  ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4247  ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4248  ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4249  ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4250  ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4251  ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4252  ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4253  ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4254  ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
4255  ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4256  ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4257  ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4258  ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4259  ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4260  ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4261 
4262  if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4263 
4264  switch (get_options & PCRE_NEWLINE_BITS)
4265  {
4266  case PCRE_NEWLINE_CR:
4267  fprintf(outfile, "Forced newline sequence: CR\n");
4268  break;
4269 
4270  case PCRE_NEWLINE_LF:
4271  fprintf(outfile, "Forced newline sequence: LF\n");
4272  break;
4273 
4274  case PCRE_NEWLINE_CRLF:
4275  fprintf(outfile, "Forced newline sequence: CRLF\n");
4276  break;
4277 
4278  case PCRE_NEWLINE_ANYCRLF:
4279  fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4280  break;
4281 
4282  case PCRE_NEWLINE_ANY:
4283  fprintf(outfile, "Forced newline sequence: ANY\n");
4284  break;
4285 
4286  default:
4287  break;
4288  }
4289 
4290  if (first_char_set == 2)
4291  {
4292  fprintf(outfile, "First char at start or follows newline\n");
4293  }
4294  else if (first_char_set == 1)
4295  {
4296  const char *caseless =
4297  ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4298  "" : " (caseless)";
4299 
4300  if (PRINTOK(first_char))
4301  fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4302  else
4303  {
4304  fprintf(outfile, "First char = ");
4305  pchar(first_char, outfile);
4306  fprintf(outfile, "%s\n", caseless);
4307  }
4308  }
4309  else
4310  {
4311  fprintf(outfile, "No first char\n");
4312  }
4313 
4314  if (need_char_set == 0)
4315  {
4316  fprintf(outfile, "No need char\n");
4317  }
4318  else
4319  {
4320  const char *caseless =
4321  ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4322  "" : " (caseless)";
4323 
4324  if (PRINTOK(need_char))
4325  fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4326  else
4327  {
4328  fprintf(outfile, "Need char = ");
4329  pchar(need_char, outfile);
4330  fprintf(outfile, "%s\n", caseless);
4331  }
4332  }
4333 
4334  /* Don't output study size; at present it is in any case a fixed
4335  value, but it varies, depending on the computer architecture, and
4336  so messes up the test suite. (And with the /F option, it might be
4337  flipped.) If study was forced by an external -s, don't show this
4338  information unless -i or -d was also present. This means that, except
4339  when auto-callouts are involved, the output from runs with and without
4340  -s should be identical. */
4341 
4342  if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4343  {
4344  if (extra == NULL)
4345  fprintf(outfile, "Study returned NULL\n");
4346  else
4347  {
4348  pcre_uint8 *start_bits = NULL;
4349  int minlength;
4350 
4351  if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4352  fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4353 
4354  if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4355  {
4356  if (start_bits == NULL)
4357  fprintf(outfile, "No starting char list\n");
4358  else
4359  {
4360  int i;
4361  int c = 24;
4362  fprintf(outfile, "Starting chars: ");
4363  for (i = 0; i < 256; i++)
4364  {
4365  if ((start_bits[i/8] & (1<<(i&7))) != 0)
4366  {
4367  if (c > 75)
4368  {
4369  fprintf(outfile, "\n ");
4370  c = 2;
4371  }
4372  if (PRINTOK(i) && i != ' ')
4373  {
4374  fprintf(outfile, "%c ", i);
4375  c += 2;
4376  }
4377  else
4378  {
4379  fprintf(outfile, "\\x%02x ", i);
4380  c += 5;
4381  }
4382  }
4383  }
4384  fprintf(outfile, "\n");
4385  }
4386  }
4387  }
4388 
4389  /* Show this only if the JIT was set by /S, not by -s. */
4390 
4391  if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4392  (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4393  {
4394  int jit;
4395  if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4396  {
4397  if (jit)
4398  fprintf(outfile, "JIT study was successful\n");
4399  else
4400 #ifdef SUPPORT_JIT
4401  fprintf(outfile, "JIT study was not successful\n");
4402 #else
4403  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4404 #endif
4405  }
4406  }
4407  }
4408  }
4409 
4410  /* If the '>' option was present, we write out the regex to a file, and
4411  that is all. The first 8 bytes of the file are the regex length and then
4412  the study length, in big-endian order. */
4413 
4414  if (to_file != NULL)
4415  {
4416  FILE *f = fopen((char *)to_file, "wb");
4417  if (f == NULL)
4418  {
4419  fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4420  }
4421  else
4422  {
4423  pcre_uint8 sbuf[8];
4424 
4425  if (do_flip) regexflip(re, extra);
4426  sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4427  sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4428  sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4429  sbuf[3] = (pcre_uint8)((true_size) & 255);
4430  sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4431  sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4432  sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4433  sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4434 
4435  if (fwrite(sbuf, 1, 8, f) < 8 ||
4436  fwrite(re, 1, true_size, f) < true_size)
4437  {
4438  fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4439  }
4440  else
4441  {
4442  fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4443 
4444  /* If there is study data, write it. */
4445 
4446  if (extra != NULL)
4447  {
4448  if (fwrite(extra->study_data, 1, true_study_size, f) <
4449  true_study_size)
4450  {
4451  fprintf(outfile, "Write error on %s: %s\n", to_file,
4452  strerror(errno));
4453  }
4454  else fprintf(outfile, "Study data written to %s\n", to_file);
4455  }
4456  }
4457  fclose(f);
4458  }
4459 
4460  new_free(re);
4461  if (extra != NULL)
4462  {
4463  PCRE_FREE_STUDY(extra);
4464  }
4465  if (locale_set)
4466  {
4467  new_free((void *)tables);
4468  setlocale(LC_CTYPE, "C");
4469  locale_set = 0;
4470  }
4471  continue; /* With next regex */
4472  }
4473  } /* End of non-POSIX compile */
4474 
4475  /* Read data lines and test them */
4476 
4477  for (;;)
4478  {
4479 #ifdef SUPPORT_PCRE8
4480  pcre_uint8 *q8;
4481 #endif
4482 #ifdef SUPPORT_PCRE16
4483  pcre_uint16 *q16;
4484 #endif
4485 #ifdef SUPPORT_PCRE32
4486  pcre_uint32 *q32;
4487 #endif
4488  pcre_uint8 *bptr;
4489  int *use_offsets = offsets;
4490  int use_size_offsets = size_offsets;
4491  int callout_data = 0;
4492  int callout_data_set = 0;
4493  int count;
4494  pcre_uint32 c;
4495  int copystrings = 0;
4496  int find_match_limit = default_find_match_limit;
4497  int getstrings = 0;
4498  int getlist = 0;
4499  int gmatched = 0;
4500  int start_offset = 0;
4501  int start_offset_sign = 1;
4502  int g_notempty = 0;
4503  int use_dfa = 0;
4504 
4505  *copynames = 0;
4506  *getnames = 0;
4507 
4508 #ifdef SUPPORT_PCRE32
4509  cn32ptr = copynames;
4510  gn32ptr = getnames;
4511 #endif
4512 #ifdef SUPPORT_PCRE16
4513  cn16ptr = copynames16;
4514  gn16ptr = getnames16;
4515 #endif
4516 #ifdef SUPPORT_PCRE8
4517  cn8ptr = copynames8;
4518  gn8ptr = getnames8;
4519 #endif
4520 
4521  SET_PCRE_CALLOUT(callout);
4522  first_callout = 1;
4524  callout_extra = 0;
4525  callout_count = 0;
4526  callout_fail_count = 999999;
4527  callout_fail_id = -1;
4528  show_malloc = 0;
4529  options = 0;
4530 
4531  if (extra != NULL) extra->flags &=
4533 
4534  len = 0;
4535  for (;;)
4536  {
4537  if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4538  {
4539  if (len > 0) /* Reached EOF without hitting a newline */
4540  {
4541  fprintf(outfile, "\n");
4542  break;
4543  }
4544  done = 1;
4545  goto CONTINUE;
4546  }
4547  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4548  len = (int)strlen((char *)buffer);
4549  if (buffer[len-1] == '\n') break;
4550  }
4551 
4552  while (len > 0 && isspace(buffer[len-1])) len--;
4553  buffer[len] = 0;
4554  if (len == 0) break;
4555 
4556  p = buffer;
4557  while (isspace(*p)) p++;
4558 
4559 #ifndef NOUTF
4560  /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4561  invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4562 
4563  if (use_utf)
4564  {
4565  pcre_uint8 *q;
4566  pcre_uint32 cc;
4567  int n = 1;
4568 
4569  for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4570  if (n <= 0)
4571  {
4572  fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4573  goto NEXT_DATA;
4574  }
4575  }
4576 #endif
4577 
4578 #ifdef SUPPORT_VALGRIND
4579  /* Mark the dbuffer as addressable but undefined again. */
4580 
4581  if (dbuffer != NULL)
4582  {
4583  VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4584  }
4585 #endif
4586 
4587  /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4588  the number of pcre_uchar units that will be needed. */
4589 
4590  while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4591  {
4592  dbuffer_size *= 2;
4593  dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4594  if (dbuffer == NULL)
4595  {
4596  fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4597  exit(1);
4598  }
4599  }
4600 
4601 #ifdef SUPPORT_PCRE8
4602  q8 = (pcre_uint8 *) dbuffer;
4603 #endif
4604 #ifdef SUPPORT_PCRE16
4605  q16 = (pcre_uint16 *) dbuffer;
4606 #endif
4607 #ifdef SUPPORT_PCRE32
4608  q32 = (pcre_uint32 *) dbuffer;
4609 #endif
4610 
4611  while ((c = *p++) != 0)
4612  {
4613  int i = 0;
4614  int n = 0;
4615 
4616  /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4617  In non-UTF mode, allow the value of the byte to fall through to later,
4618  where values greater than 127 are turned into UTF-8 when running in
4619  16-bit or 32-bit mode. */
4620 
4621  if (c != '\\')
4622  {
4623 #ifndef NOUTF
4624  if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4625 #endif
4626  }
4627 
4628  /* Handle backslash escapes */
4629 
4630  else switch ((c = *p++))
4631  {
4632  case 'a': c = CHAR_BEL; break;
4633  case 'b': c = '\b'; break;
4634  case 'e': c = CHAR_ESC; break;
4635  case 'f': c = '\f'; break;
4636  case 'n': c = '\n'; break;
4637  case 'r': c = '\r'; break;
4638  case 't': c = '\t'; break;
4639  case 'v': c = '\v'; break;
4640 
4641  case '0': case '1': case '2': case '3':
4642  case '4': case '5': case '6': case '7':
4643  c -= '0';
4644  while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4645  c = c * 8 + *p++ - '0';
4646  break;
4647 
4648  case 'o':
4649  if (*p == '{')
4650  {
4651  pcre_uint8 *pt = p;
4652  c = 0;
4653  for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
4654  {
4655  if (++i == 12)
4656  fprintf(outfile, "** Too many octal digits in \\o{...} item; "
4657  "using only the first twelve.\n");
4658  else c = c * 8 + *pt - '0';
4659  }
4660  if (*pt == '}') p = pt + 1;
4661  else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
4662  }
4663  break;
4664 
4665  case 'x':
4666  if (*p == '{')
4667  {
4668  pcre_uint8 *pt = p;
4669  c = 0;
4670 
4671  /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4672  when isxdigit() is a macro that refers to its argument more than
4673  once. This is banned by the C Standard, but apparently happens in at
4674  least one MacOS environment. */
4675 
4676  for (pt++; isxdigit(*pt); pt++)
4677  {
4678  if (++i == 9)
4679  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4680  "using only the first eight.\n");
4681  else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4682  }
4683  if (*pt == '}')
4684  {
4685  p = pt + 1;
4686  break;
4687  }
4688  /* Not correct form for \x{...}; fall through */
4689  }
4690 
4691  /* \x without {} always defines just one byte in 8-bit mode. This
4692  allows UTF-8 characters to be constructed byte by byte, and also allows
4693  invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4694  Otherwise, pass it down to later code so that it can be turned into
4695  UTF-8 when running in 16/32-bit mode. */
4696 
4697  c = 0;
4698  while (i++ < 2 && isxdigit(*p))
4699  {
4700  c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4701  p++;
4702  }
4703 #if !defined NOUTF && defined SUPPORT_PCRE8
4704  if (use_utf && (pcre_mode == PCRE8_MODE))
4705  {
4706  *q8++ = c;
4707  continue;
4708  }
4709 #endif
4710  break;
4711 
4712  case 0: /* \ followed by EOF allows for an empty line */
4713  p--;
4714  continue;
4715 
4716  case '>':
4717  if (*p == '-')
4718  {
4719  start_offset_sign = -1;
4720  p++;
4721  }
4722  while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4723  start_offset *= start_offset_sign;
4724  continue;
4725 
4726  case 'A': /* Option setting */
4727  options |= PCRE_ANCHORED;
4728  continue;
4729 
4730  case 'B':
4731  options |= PCRE_NOTBOL;
4732  continue;
4733 
4734  case 'C':
4735  if (isdigit(*p)) /* Set copy string */
4736  {
4737  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4738  copystrings |= 1 << n;
4739  }
4740  else if (isalnum(*p))
4741  {
4742  READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4743  }
4744  else if (*p == '+')
4745  {
4746  callout_extra = 1;
4747  p++;
4748  }
4749  else if (*p == '-')
4750  {
4751  SET_PCRE_CALLOUT(NULL);
4752  p++;
4753  }
4754  else if (*p == '!')
4755  {
4756  callout_fail_id = 0;
4757  p++;
4758  while(isdigit(*p))
4759  callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4760  callout_fail_count = 0;
4761  if (*p == '!')
4762  {
4763  p++;
4764  while(isdigit(*p))
4765  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4766  }
4767  }
4768  else if (*p == '*')
4769  {
4770  int sign = 1;
4771  callout_data = 0;
4772  if (*(++p) == '-') { sign = -1; p++; }
4773  while(isdigit(*p))
4774  callout_data = callout_data * 10 + *p++ - '0';
4775  callout_data *= sign;
4776  callout_data_set = 1;
4777  }
4778  continue;
4779 
4780 #if !defined NODFA
4781  case 'D':
4782 #if !defined NOPOSIX
4783  if (posix || do_posix)
4784  printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4785  else
4786 #endif
4787  use_dfa = 1;
4788  continue;
4789 #endif
4790 
4791 #if !defined NODFA
4792  case 'F':
4793  options |= PCRE_DFA_SHORTEST;
4794  continue;
4795 #endif
4796 
4797  case 'G':
4798  if (isdigit(*p))
4799  {
4800  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4801  getstrings |= 1 << n;
4802  }
4803  else if (isalnum(*p))
4804  {
4805  READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4806  }
4807  continue;
4808 
4809  case 'J':
4810  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4811  if (extra != NULL
4812  && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4813  && extra->executable_jit != NULL)
4814  {
4815  if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4816  jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4817  PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4818  }
4819  continue;
4820 
4821  case 'L':
4822  getlist = 1;
4823  continue;
4824 
4825  case 'M':
4826  find_match_limit = 1;
4827  continue;
4828 
4829  case 'N':
4830  if ((options & PCRE_NOTEMPTY) != 0)
4831  options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4832  else
4833  options |= PCRE_NOTEMPTY;
4834  continue;
4835 
4836  case 'O':
4837  while(isdigit(*p))
4838  {
4839  if (n > (INT_MAX-10)/10) /* Hack to stop fuzzers */
4840  {
4841  printf("** \\O argument is too big\n");
4842  yield = 1;
4843  goto EXIT;
4844  }
4845  n = n * 10 + *p++ - '0';
4846  }
4847  if (n > size_offsets_max)
4848  {
4849  size_offsets_max = n;
4850  free(offsets);
4851  use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4852  if (offsets == NULL)
4853  {
4854  printf("** Failed to get %d bytes of memory for offsets vector\n",
4855  (int)(size_offsets_max * sizeof(int)));
4856  yield = 1;
4857  goto EXIT;
4858  }
4859  }
4860  use_size_offsets = n;
4861  if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4862  else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4863  continue;
4864 
4865  case 'P':
4866  options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4868  continue;
4869 
4870  case 'Q':
4871  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4872  if (extra == NULL)
4873  {
4874  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4875  extra->flags = 0;
4876  }
4878  extra->match_limit_recursion = n;
4879  continue;
4880 
4881  case 'q':
4882  while(isdigit(*p)) n = n * 10 + *p++ - '0';
4883  if (extra == NULL)
4884  {
4885  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4886  extra->flags = 0;
4887  }
4888  extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4889  extra->match_limit = n;
4890  continue;
4891 
4892 #if !defined NODFA
4893  case 'R':
4894  options |= PCRE_DFA_RESTART;
4895  continue;
4896 #endif
4897 
4898  case 'S':
4899  show_malloc = 1;
4900  continue;
4901 
4902  case 'Y':
4903  options |= PCRE_NO_START_OPTIMIZE;
4904  continue;
4905 
4906  case 'Z':
4907  options |= PCRE_NOTEOL;
4908  continue;
4909 
4910  case '?':
4911  options |= PCRE_NO_UTF8_CHECK;
4912  continue;
4913 
4914  case '<':
4915  {
4916  int x = check_mc_option(p, outfile, TRUE, "escape sequence");
4917  if (x == 0) goto NEXT_DATA;
4918  options |= x;
4919  while (*p++ != '>');
4920  }
4921  continue;
4922  }
4923 
4924  /* We now have a character value in c that may be greater than 255.
4925  In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4926  than 127 in UTF mode must have come from \x{...} or octal constructs
4927  because values from \x.. get this far only in non-UTF mode. */
4928 
4929 #ifdef SUPPORT_PCRE8
4930  if (pcre_mode == PCRE8_MODE)
4931  {
4932 #ifndef NOUTF
4933  if (use_utf)
4934  {
4935  if (c > 0x7fffffff)
4936  {
4937  fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4938  "and so cannot be converted to UTF-8\n", c);
4939  goto NEXT_DATA;
4940  }
4941  q8 += ord2utf8(c, q8);
4942  }
4943  else
4944 #endif
4945  {
4946  if (c > 0xffu)
4947  {
4948  fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4949  "and UTF-8 mode is not enabled.\n", c);
4950  fprintf(outfile, "** Truncation will probably give the wrong "
4951  "result.\n");
4952  }
4953  *q8++ = c;
4954  }
4955  }
4956 #endif
4957 #ifdef SUPPORT_PCRE16
4958  if (pcre_mode == PCRE16_MODE)
4959  {
4960 #ifndef NOUTF
4961  if (use_utf)
4962  {
4963  if (c > 0x10ffffu)
4964  {
4965  fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4966  "0x10ffff and so cannot be converted to UTF-16\n", c);
4967  goto NEXT_DATA;
4968  }
4969  else if (c >= 0x10000u)
4970  {
4971  c-= 0x10000u;
4972  *q16++ = 0xD800 | (c >> 10);
4973  *q16++ = 0xDC00 | (c & 0x3ff);
4974  }
4975  else
4976  *q16++ = c;
4977  }
4978  else
4979 #endif
4980  {
4981  if (c > 0xffffu)
4982  {
4983  fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4984  "and UTF-16 mode is not enabled.\n", c);
4985  fprintf(outfile, "** Truncation will probably give the wrong "
4986  "result.\n");
4987  }
4988 
4989  *q16++ = c;
4990  }
4991  }
4992 #endif
4993 #ifdef SUPPORT_PCRE32
4994  if (pcre_mode == PCRE32_MODE)
4995  {
4996  *q32++ = c;
4997  }
4998 #endif
4999 
5000  }
5001 
5002  /* Reached end of subject string */
5003 
5004 #ifdef SUPPORT_PCRE8
5005  if (pcre_mode == PCRE8_MODE)
5006  {
5007  *q8 = 0;
5008  len = (int)(q8 - (pcre_uint8 *)dbuffer);
5009  }
5010 #endif
5011 #ifdef SUPPORT_PCRE16
5012  if (pcre_mode == PCRE16_MODE)
5013  {
5014  *q16 = 0;
5015  len = (int)(q16 - (pcre_uint16 *)dbuffer);
5016  }
5017 #endif
5018 #ifdef SUPPORT_PCRE32
5019  if (pcre_mode == PCRE32_MODE)
5020  {
5021  *q32 = 0;
5022  len = (int)(q32 - (pcre_uint32 *)dbuffer);
5023  }
5024 #endif
5025 
5026  /* If we're compiling with explicit valgrind support, Mark the data from after
5027  its end to the end of the buffer as unaddressable, so that a read over the end
5028  of the buffer will be seen by valgrind, even if it doesn't cause a crash.
5029  If we're not building with valgrind support, at least move the data to the end
5030  of the buffer so that it might at least cause a crash.
5031  If we are using the POSIX interface, we must include the terminating zero. */
5032 
5033  bptr = dbuffer;
5034 
5035 #if !defined NOPOSIX
5036  if (posix || do_posix)
5037  {
5038 #ifdef SUPPORT_VALGRIND
5039  VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
5040 #else
5041  memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5042  bptr += dbuffer_size - len - 1;
5043 #endif
5044  }
5045  else
5046 #endif
5047  {
5048 #ifdef SUPPORT_VALGRIND
5049  VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
5050 #else
5051  bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5052 #endif
5053  }
5054 
5055  if ((all_use_dfa || use_dfa) && find_match_limit)
5056  {
5057  printf("** Match limit not relevant for DFA matching: ignored\n");
5058  find_match_limit = 0;
5059  }
5060 
5061  /* Handle matching via the POSIX interface, which does not
5062  support timing or playing with the match limit or callout data. */
5063 
5064 #if !defined NOPOSIX
5065  if (posix || do_posix)
5066  {
5067  int rc;
5068  int eflags = 0;
5069  regmatch_t *pmatch = NULL;
5070  if (use_size_offsets > 0)
5071  pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5072  if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5073  if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5074  if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5075 
5076  rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5077 
5078  if (rc != 0)
5079  {
5080  (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5081  fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5082  }
5083  else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5084  {
5085  fprintf(outfile, "Matched with REG_NOSUB\n");
5086  }
5087  else
5088  {
5089  size_t i;
5090  for (i = 0; i < (size_t)use_size_offsets; i++)
5091  {
5092  if (pmatch[i].rm_so >= 0)
5093  {
5094  fprintf(outfile, "%2d: ", (int)i);
5095  PCHARSV(dbuffer, pmatch[i].rm_so,
5096  pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5097  fprintf(outfile, "\n");
5098  if (do_showcaprest || (i == 0 && do_showrest))
5099  {
5100  fprintf(outfile, "%2d+ ", (int)i);
5101  PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5102  outfile);
5103  fprintf(outfile, "\n");
5104  }
5105  }
5106  }
5107  }
5108  free(pmatch);
5109  goto NEXT_DATA;
5110  }
5111 
5112 #endif /* !defined NOPOSIX */
5113 
5114  /* Handle matching via the native interface - repeats for /g and /G */
5115 
5116  /* Ensure that there is a JIT callback if we want to verify that JIT was
5117  actually used. If jit_stack == NULL, no stack has yet been assigned. */
5118 
5119  if (verify_jit && jit_stack == NULL && extra != NULL)
5120  { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5121 
5122  for (;; gmatched++) /* Loop for /g or /G */
5123  {
5124  markptr = NULL;
5125  jit_was_used = FALSE;
5126 
5127  if (timeitm > 0)
5128  {
5129  register int i;
5130  clock_t time_taken;
5131  clock_t start_time = clock();
5132 
5133 #if !defined NODFA
5134  if (all_use_dfa || use_dfa)
5135  {
5136  if ((options & PCRE_DFA_RESTART) != 0)
5137  {
5138  fprintf(outfile, "Timing DFA restarts is not supported\n");
5139  break;
5140  }
5141  if (dfa_workspace == NULL)
5142  dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5143  for (i = 0; i < timeitm; i++)
5144  {
5145  PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5146  (options | g_notempty), use_offsets, use_size_offsets,
5147  dfa_workspace, DFA_WS_DIMENSION);
5148  }
5149  }
5150  else
5151 #endif
5152 
5153  for (i = 0; i < timeitm; i++)
5154  {
5155  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5156  (options | g_notempty), use_offsets, use_size_offsets);
5157  }
5158  total_match_time += (time_taken = clock() - start_time);
5159  fprintf(outfile, "Execute time %.4f milliseconds\n",
5160  (((double)time_taken * 1000.0) / (double)timeitm) /
5161  (double)CLOCKS_PER_SEC);
5162  }
5163 
5164  /* If find_match_limit is set, we want to do repeated matches with
5165  varying limits in order to find the minimum value for the match limit and
5166  for the recursion limit. The match limits are relevant only to the normal
5167  running of pcre_exec(), so disable the JIT optimization. This makes it
5168  possible to run the same set of tests with and without JIT externally
5169  requested. */
5170 
5171  if (find_match_limit)
5172  {
5173  if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5174  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5175  extra->flags = 0;
5176 
5177  (void)check_match_limit(re, extra, bptr, len, start_offset,
5178  options|g_notempty, use_offsets, use_size_offsets,
5180  PCRE_ERROR_MATCHLIMIT, "match()");
5181 
5182  count = check_match_limit(re, extra, bptr, len, start_offset,
5183  options|g_notempty, use_offsets, use_size_offsets,
5185  PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5186  }
5187 
5188  /* If callout_data is set, use the interface with additional data */
5189 
5190  else if (callout_data_set)
5191  {
5192  if (extra == NULL)
5193  {
5194  extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5195  extra->flags = 0;
5196  }
5197  extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5198  extra->callout_data = &callout_data;
5199  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5200  options | g_notempty, use_offsets, use_size_offsets);
5201  extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5202  }
5203 
5204  /* The normal case is just to do the match once, with the default
5205  value of match_limit. */
5206 
5207 #if !defined NODFA
5208  else if (all_use_dfa || use_dfa)
5209  {
5210  if (dfa_workspace == NULL)
5211  dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5212  if (dfa_matched++ == 0)
5213  dfa_workspace[0] = -1; /* To catch bad restart */
5214  PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5215  (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5217  if (count == 0)
5218  {
5219  fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5220  count = use_size_offsets/2;
5221  }
5222  }
5223 #endif
5224 
5225  else
5226  {
5227  PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5228  options | g_notempty, use_offsets, use_size_offsets);
5229  if (count == 0)
5230  {
5231  fprintf(outfile, "Matched, but too many substrings\n");
5232  /* 2 is a special case; match can be returned */
5233  count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5234  }
5235  }
5236 
5237  /* Matched */
5238 
5239  if (count >= 0)
5240  {
5241  int i, maxcount;
5242  void *cnptr, *gnptr;
5243 
5244 #if !defined NODFA
5245  if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5246 #endif
5247  /* 2 is a special case; match can be returned */
5248  maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5249 
5250  /* This is a check against a lunatic return value. */
5251 
5252  if (count > maxcount)
5253  {
5254  fprintf(outfile,
5255  "** PCRE error: returned count %d is too big for offset size %d\n",
5256  count, use_size_offsets);
5257  count = use_size_offsets/3;
5258  if (do_g || do_G)
5259  {
5260  fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5261  do_g = do_G = FALSE; /* Break g/G loop */
5262  }
5263  }
5264 
5265  /* do_allcaps requests showing of all captures in the pattern, to check
5266  unset ones at the end. */
5267 
5268  if (do_allcaps)
5269  {
5270  if (all_use_dfa || use_dfa)
5271  {
5272  fprintf(outfile, "** Show all captures ignored after DFA matching\n");
5273  }
5274  else
5275  {
5276  if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5277  goto SKIP_DATA;
5278  count++; /* Allow for full match */
5279  if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5280  }
5281  }
5282 
5283  /* Output the captured substrings. Note that, for the matched string,
5284  the use of \K in an assertion can make the start later than the end. */
5285 
5286  for (i = 0; i < count * 2; i += 2)
5287  {
5288  if (use_offsets[i] < 0)
5289  {
5290  if (use_offsets[i] != -1)
5291  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5292  use_offsets[i], i);
5293  if (use_offsets[i+1] != -1)
5294  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5295  use_offsets[i+1], i+1);
5296  fprintf(outfile, "%2d: <unset>\n", i/2);
5297  }
5298  else
5299  {
5300  int start = use_offsets[i];
5301  int end = use_offsets[i+1];
5302 
5303  if (start > end)
5304  {
5305  start = use_offsets[i+1];
5306  end = use_offsets[i];
5307  fprintf(outfile, "Start of matched string is beyond its end - "
5308  "displaying from end to start.\n");
5309  }
5310 
5311  fprintf(outfile, "%2d: ", i/2);
5312  PCHARSV(bptr, start, end - start, outfile);
5313  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5314  fprintf(outfile, "\n");
5315 
5316  /* Note: don't use the start/end variables here because we want to
5317  show the text from what is reported as the end. */
5318 
5319  if (do_showcaprest || (i == 0 && do_showrest))
5320  {
5321  fprintf(outfile, "%2d+ ", i/2);
5322  PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5323  outfile);
5324  fprintf(outfile, "\n");
5325  }
5326  }
5327  }
5328 
5329  if (markptr != NULL)
5330  {
5331  fprintf(outfile, "MK: ");
5332  PCHARSV(markptr, 0, -1, outfile);
5333  fprintf(outfile, "\n");
5334  }
5335 
5336  for (i = 0; i < 32; i++)
5337  {
5338  if ((copystrings & (1 << i)) != 0)
5339  {
5340  int rc;
5341  char copybuffer[256];
5342  PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5343  copybuffer, sizeof(copybuffer));
5344  if (rc < 0)
5345  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5346  else
5347  {
5348  fprintf(outfile, "%2dC ", i);
5349  PCHARSV(copybuffer, 0, rc, outfile);
5350  fprintf(outfile, " (%d)\n", rc);
5351  }
5352  }
5353  }
5354 
5355  cnptr = copynames;
5356  for (;;)
5357  {
5358  int rc;
5359  char copybuffer[256];
5360 
5361 #ifdef SUPPORT_PCRE32
5362  if (pcre_mode == PCRE32_MODE)
5363  {
5364  if (*(pcre_uint32 *)cnptr == 0) break;
5365  }
5366 #endif
5367 #ifdef SUPPORT_PCRE16
5368  if (pcre_mode == PCRE16_MODE)
5369  {
5370  if (*(pcre_uint16 *)cnptr == 0) break;
5371  }
5372 #endif
5373 #ifdef SUPPORT_PCRE8
5374  if (pcre_mode == PCRE8_MODE)
5375  {
5376  if (*(pcre_uint8 *)cnptr == 0) break;
5377  }
5378 #endif
5379 
5380  PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5381  cnptr, copybuffer, sizeof(copybuffer));
5382 
5383  if (rc < 0)
5384  {
5385  fprintf(outfile, "copy substring ");
5386  PCHARSV(cnptr, 0, -1, outfile);
5387  fprintf(outfile, " failed %d\n", rc);
5388  }
5389  else
5390  {
5391  fprintf(outfile, " C ");
5392  PCHARSV(copybuffer, 0, rc, outfile);
5393  fprintf(outfile, " (%d) ", rc);
5394  PCHARSV(cnptr, 0, -1, outfile);
5395  putc('\n', outfile);
5396  }
5397 
5398  cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5399  }
5400 
5401  for (i = 0; i < 32; i++)
5402  {
5403  if ((getstrings & (1 << i)) != 0)
5404  {
5405  int rc;
5406  const char *substring;
5407  PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5408  if (rc < 0)
5409  fprintf(outfile, "get substring %d failed %d\n", i, rc);
5410  else
5411  {
5412  fprintf(outfile, "%2dG ", i);
5413  PCHARSV(substring, 0, rc, outfile);
5414  fprintf(outfile, " (%d)\n", rc);
5415  PCRE_FREE_SUBSTRING(substring);
5416  }
5417  }
5418  }
5419 
5420  gnptr = getnames;
5421  for (;;)
5422  {
5423  int rc;
5424  const char *substring;
5425 
5426 #ifdef SUPPORT_PCRE32
5427  if (pcre_mode == PCRE32_MODE)
5428  {
5429  if (*(pcre_uint32 *)gnptr == 0) break;
5430  }
5431 #endif
5432 #ifdef SUPPORT_PCRE16
5433  if (pcre_mode == PCRE16_MODE)
5434  {
5435  if (*(pcre_uint16 *)gnptr == 0) break;
5436  }
5437 #endif
5438 #ifdef SUPPORT_PCRE8
5439  if (pcre_mode == PCRE8_MODE)
5440  {
5441  if (*(pcre_uint8 *)gnptr == 0) break;
5442  }
5443 #endif
5444 
5445  PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5446  gnptr, &substring);
5447  if (rc < 0)
5448  {
5449  fprintf(outfile, "get substring ");
5450  PCHARSV(gnptr, 0, -1, outfile);
5451  fprintf(outfile, " failed %d\n", rc);
5452  }
5453  else
5454  {
5455  fprintf(outfile, " G ");
5456  PCHARSV(substring, 0, rc, outfile);
5457  fprintf(outfile, " (%d) ", rc);
5458  PCHARSV(gnptr, 0, -1, outfile);
5459  PCRE_FREE_SUBSTRING(substring);
5460  putc('\n', outfile);
5461  }
5462 
5463  gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5464  }
5465 
5466  if (getlist)
5467  {
5468  int rc;
5469  const char **stringlist;
5470  PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5471  if (rc < 0)
5472  fprintf(outfile, "get substring list failed %d\n", rc);
5473  else
5474  {
5475  for (i = 0; i < count; i++)
5476  {
5477  fprintf(outfile, "%2dL ", i);
5478  PCHARSV(stringlist[i], 0, -1, outfile);
5479  putc('\n', outfile);
5480  }
5481  if (stringlist[i] != NULL)
5482  fprintf(outfile, "string list not terminated by NULL\n");
5483  PCRE_FREE_SUBSTRING_LIST(stringlist);
5484  }
5485  }
5486  }
5487 
5488  /* There was a partial match. If the bumpalong point is not the same as
5489  the first inspected character, show the offset explicitly. */
5490 
5491  else if (count == PCRE_ERROR_PARTIAL)
5492  {
5493  fprintf(outfile, "Partial match");
5494  if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5495  fprintf(outfile, " at offset %d", use_offsets[2]);
5496  if (markptr != NULL)
5497  {
5498  fprintf(outfile, ", mark=");
5499  PCHARSV(markptr, 0, -1, outfile);
5500  }
5501  if (use_size_offsets > 1)
5502  {
5503  fprintf(outfile, ": ");
5504  PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5505  outfile);
5506  }
5507  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5508  fprintf(outfile, "\n");
5509  break; /* Out of the /g loop */
5510  }
5511 
5512  /* Failed to match. If this is a /g or /G loop and we previously set
5513  g_notempty after a null match, this is not necessarily the end. We want
5514  to advance the start offset, and continue. We won't be at the end of the
5515  string - that was checked before setting g_notempty.
5516 
5517  Complication arises in the case when the newline convention is "any",
5518  "crlf", or "anycrlf". If the previous match was at the end of a line
5519  terminated by CRLF, an advance of one character just passes the \r,
5520  whereas we should prefer the longer newline sequence, as does the code in
5521  pcre_exec(). Fudge the offset value to achieve this. We check for a
5522  newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5523  find the default.
5524 
5525  Otherwise, in the case of UTF-8 matching, the advance must be one
5526  character, not one byte. */
5527 
5528  else
5529  {
5530  if (g_notempty != 0)
5531  {
5532  int onechar = 1;
5533  unsigned int obits = REAL_PCRE_OPTIONS(re);
5534  use_offsets[0] = start_offset;
5535  if ((obits & PCRE_NEWLINE_BITS) == 0)
5536  {
5537  int d;
5538  (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5539  /* Note that these values are always the ASCII ones, even in
5540  EBCDIC environments. CR = 13, NL = 10. */
5541  obits = (d == 13)? PCRE_NEWLINE_CR :
5542  (d == 10)? PCRE_NEWLINE_LF :
5543  (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5544  (d == -2)? PCRE_NEWLINE_ANYCRLF :
5545  (d == -1)? PCRE_NEWLINE_ANY : 0;
5546  }
5547  if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5548  (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5550  &&
5551  start_offset < len - 1 && (
5552 #ifdef SUPPORT_PCRE8
5553  (pcre_mode == PCRE8_MODE &&
5554  bptr[start_offset] == '\r' &&
5555  bptr[start_offset + 1] == '\n') ||
5556 #endif
5557 #ifdef SUPPORT_PCRE16
5558  (pcre_mode == PCRE16_MODE &&
5559  ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5560  ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5561 #endif
5562 #ifdef SUPPORT_PCRE32
5563  (pcre_mode == PCRE32_MODE &&
5564  ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5565  ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5566 #endif
5567  0))
5568  onechar++;
5569  else if (use_utf)
5570  {
5571  while (start_offset + onechar < len)
5572  {
5573  if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5574  onechar++;
5575  }
5576  }
5577  use_offsets[1] = start_offset + onechar;
5578  }
5579  else
5580  {
5581  switch(count)
5582  {
5583  case PCRE_ERROR_NOMATCH:
5584  if (gmatched == 0)
5585  {
5586  if (markptr == NULL)
5587  {
5588  fprintf(outfile, "No match");
5589  }
5590  else
5591  {
5592  fprintf(outfile, "No match, mark = ");
5593  PCHARSV(markptr, 0, -1, outfile);
5594  }
5595  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5596  putc('\n', outfile);
5597  }
5598  break;
5599 
5600  case PCRE_ERROR_BADUTF8:
5601  case PCRE_ERROR_SHORTUTF8:
5602  fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5603  (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5604  8 * CHAR_SIZE);
5605  if (use_size_offsets >= 2)
5606  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5607  use_offsets[1]);
5608  fprintf(outfile, "\n");
5609  break;
5610 
5612  fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5613  8 * CHAR_SIZE);
5614  break;
5615 
5616  default:
5617  if (count < 0 &&
5618  (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5619  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5620  else
5621  fprintf(outfile, "Error %d (Unexpected value)\n", count);
5622  break;
5623  }
5624 
5625  break; /* Out of the /g loop */
5626  }
5627  }
5628 
5629  /* If not /g or /G we are done */
5630 
5631  if (!do_g && !do_G) break;
5632 
5633  if (use_offsets == NULL)
5634  {
5635  fprintf(outfile, "Cannot do global matching without an ovector\n");
5636  break;
5637  }
5638 
5639  if (use_size_offsets < 2)
5640  {
5641  fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
5642  break;
5643  }
5644 
5645  /* If we have matched an empty string, first check to see if we are at
5646  the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5647  Perl's /g options does. This turns out to be rather cunning. First we set
5648  PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5649  same point. If this fails (picked up above) we advance to the next
5650  character. */
5651 
5652  g_notempty = 0;
5653 
5654  if (use_offsets[0] == use_offsets[1])
5655  {
5656  if (use_offsets[0] == len) break;
5657  g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5658  }
5659 
5660  /* For /g, update the start offset, leaving the rest alone. There is a
5661  tricky case when \K is used in a positive lookbehind assertion. This can
5662  cause the end of the match to be less than or equal to the start offset.
5663  In this case we restart at one past the start offset. This may return the
5664  same match if the original start offset was bumped along during the
5665  match, but eventually the new start offset will hit the actual start
5666  offset. (In PCRE2 the true start offset is available, and this can be
5667  done better. It is not worth doing more than making sure we do not loop
5668  at this stage in the life of PCRE1.) */
5669 
5670  if (do_g)
5671  {
5672  if (g_notempty == 0 && use_offsets[1] <= start_offset)
5673  {
5674  if (start_offset >= len) break; /* End of subject */
5675  start_offset++;
5676  if (use_utf)
5677  {
5678  while (start_offset < len)
5679  {
5680  if ((bptr[start_offset] & 0xc0) != 0x80) break;
5681  start_offset++;
5682  }
5683  }
5684  }
5685  else start_offset = use_offsets[1];
5686  }
5687 
5688  /* For /G, update the pointer and length */
5689 
5690  else
5691  {
5692  bptr += use_offsets[1] * CHAR_SIZE;
5693  len -= use_offsets[1];
5694  }
5695  } /* End of loop for /g and /G */
5696 
5697  NEXT_DATA: continue;
5698  } /* End of loop for data lines */
5699 
5700  CONTINUE:
5701 
5702 #if !defined NOPOSIX
5703  if ((posix || do_posix) && preg.re_pcre != 0) regfree(&preg);
5704 #endif
5705 
5706  if (re != NULL) new_free(re);
5707  if (extra != NULL)
5708  {
5709  PCRE_FREE_STUDY(extra);
5710  }
5711  if (locale_set)
5712  {
5713  new_free((void *)tables);
5714  setlocale(LC_CTYPE, "C");
5715  locale_set = 0;
5716  }
5717  if (jit_stack != NULL)
5718  {
5719  PCRE_JIT_STACK_FREE(jit_stack);
5720  jit_stack = NULL;
5721  }
5722  }
5723 
5724 if (infile == stdin) fprintf(outfile, "\n");
5725 
5726 if (showtotaltimes)
5727  {
5728  fprintf(outfile, "--------------------------------------\n");
5729  if (timeit > 0)
5730  {
5731  fprintf(outfile, "Total compile time %.4f milliseconds\n",
5732  (((double)total_compile_time * 1000.0) / (double)timeit) /
5733  (double)CLOCKS_PER_SEC);
5734  fprintf(outfile, "Total study time %.4f milliseconds\n",
5735  (((double)total_study_time * 1000.0) / (double)timeit) /
5736  (double)CLOCKS_PER_SEC);
5737  }
5738  fprintf(outfile, "Total execute time %.4f milliseconds\n",
5739  (((double)total_match_time * 1000.0) / (double)timeitm) /
5740  (double)CLOCKS_PER_SEC);
5741  }
5742 
5743 EXIT:
5744 
5745 if (infile != NULL && infile != stdin) fclose(infile);
5746 if (outfile != NULL && outfile != stdout) fclose(outfile);
5747 
5748 free(buffer);
5749 free(dbuffer);
5750 free(pbuffer);
5751 free(offsets);
5752 
5753 #ifdef SUPPORT_PCRE16
5754 if (buffer16 != NULL) free(buffer16);
5755 #endif
5756 #ifdef SUPPORT_PCRE32
5757 if (buffer32 != NULL) free(buffer32);
5758 #endif
5759 
5760 #if !defined NODFA
5761 if (dfa_workspace != NULL)
5762  free(dfa_workspace);
5763 #endif
5764 
5765 #if defined(__VMS)
5766  yield = SS$_NORMAL; /* Return values via DCL symbols */
5767 #endif
5768 
5769 return yield;
5770 }
5771 
5772 /* End of pcretest.c */
5773 
static time_t start_time
Definition: timeout.c:14
static const char * str(char *buf, int n)
Definition: stats.c:84
int BOOL
Definition: sybdb.h:150
#define option
char data[12]
Definition: iconv.c:80
void debug()
#define NULL
Definition: ncbistd.hpp:225
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
exit(2)
int i
yy_size_t n
int len
static int readline(MDB_val *out, MDB_val *buf)
Definition: mdb_load.c:177
static int version
Definition: mdb_load.c:29
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
int strncmp(const char *str1, const char *str2, size_t count)
Definition: odbc_utils.hpp:133
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
EIPRangeType t
Definition: ncbi_localip.c:101
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int isalnum(Uchar c)
Definition: ncbictype.hpp:62
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int isxdigit(Uchar c)
Definition: ncbictype.hpp:71
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
T min(T x_, T y_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
#define PCRE_CONFIG_LINK_SIZE
Definition: pcre.h:296
#define PCRE_INFO_NAMETABLE
Definition: pcre.h:273
#define PCRE_NEVER_UTF
Definition: pcre.h:156
#define PCRE_SPTR32
Definition: pcre.h:367
#define PCRE_INFO_MAXLOOKBEHIND
Definition: pcre.h:282
#define PCRE_CONFIG_UNICODE_PROPERTIES
Definition: pcre.h:300
#define PCRE_INFO_NAMEENTRYSIZE
Definition: pcre.h:271
#define PCRE_CONFIG_JIT
Definition: pcre.h:303
#define PCRE_ERROR_BADUTF8_OFFSET
Definition: pcre.h:194
#define PCRE_NO_UTF8_CHECK
Definition: pcre.h:148
#define PCRE_INFO_FIRSTCHARACTERFLAGS
Definition: pcre.h:284
#define PCRE_INFO_OPTIONS
Definition: pcre.h:263
#define PCRE_STUDY_EXTRA_NEEDED
Definition: pcre.h:315
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
Definition: pcre.h:314
#define PCRE_CONFIG_JITTARGET
Definition: pcre.h:305
int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int, void *)
#define PCRE_FIRSTLINE
Definition: pcre.h:163
#define PCRE_INFO_SIZE
Definition: pcre.h:264
#define PCRE_ERROR_PARTIAL
Definition: pcre.h:196
#define PCRE_CONFIG_UTF16
Definition: pcre.h:304
#define PCRE_NOTBOL
Definition: pcre.h:140
#define PCRE_UTF8
Definition: pcre.h:144
#define PCRE_EXTRA_CALLOUT_DATA
Definition: pcre.h:322
#define PCRE_ERROR_BADMODE
Definition: pcre.h:213
#define PCRE_NEWLINE_ANY
Definition: pcre.h:168
#define PCRE_UTF32
Definition: pcre.h:146
#define PCRE_INFO_CAPTURECOUNT
Definition: pcre.h:265
int pcre16_get_stringnumber(const pcre16 *, const unsigned short *)
int pcre32_config(int, void *)
#define PCRE_EXTRA_STUDY_DATA
Definition: pcre.h:320
void *(* pcre32_malloc)(size_t)
#define PCRE_EXTENDED
Definition: pcre.h:136
#define PCRE_UTF16
Definition: pcre.h:145
void *(* pcre_malloc)(size_t)
Definition: pcre_globals.c:78
void(* pcre32_free)(void *)
void *(* pcre16_malloc)(size_t)
#define PCRE_SPTR16
Definition: pcre.h:356
void(* pcre16_free)(void *)
#define PCRE_CASELESS
Definition: pcre.h:133
#define PCRE_INFO_JITSIZE
Definition: pcre.h:281
#define PCRE_AUTO_CALLOUT
Definition: pcre.h:151
int pcre32_get_stringnumber(const pcre32 *, const unsigned int *)
#define PCRE_INFO_BACKREFMAX
Definition: pcre.h:266
#define PCRE_ERROR_SHORTUTF8
Definition: pcre.h:209
#define PCRE_EXTRA_MATCH_LIMIT
Definition: pcre.h:321
void *(* pcre_stack_malloc)(size_t)
Definition: pcre_globals.c:80
#define PCRE_CONFIG_MATCH_LIMIT
Definition: pcre.h:298
#define PCRE_DFA_SHORTEST
Definition: pcre.h:157
#define PCRE_INFO_HASCRORLF
Definition: pcre.h:278
int pcre_get_stringnumber(const pcre *, const char *)
Definition: pcre_get.c:70
struct real_pcre_jit_stack pcre_jit_stack
Definition: pcre.h:340
void(* pcre32_stack_free)(void *)
#define PCRE_NOTEOL
Definition: pcre.h:141
#define PCRE_MULTILINE
Definition: pcre.h:134
void(* pcre_free)(void *)
Definition: pcre_globals.c:79
#define PCRE_INFO_NAMECOUNT
Definition: pcre.h:272
#define PCRE_INFO_FIRSTTABLE
Definition: pcre.h:269
#define PCRE_NEWLINE_CRLF
Definition: pcre.h:167
#define PCRE_INFO_REQUIREDCHAR
Definition: pcre.h:285
void *(* pcre32_stack_malloc)(size_t)
#define PCRE_INFO_FIRSTCHARACTER
Definition: pcre.h:283
void(* pcre16_stack_free)(void *)
const char * pcre_version(void)
Definition: pcre_version.c:84
#define PCRE_NO_START_OPTIMISE
Definition: pcre.h:174
#define PCRE_NO_AUTO_CAPTURE
Definition: pcre.h:147
#define PCRE_ERROR_RECURSIONLIMIT
Definition: pcre.h:205
#define PCRE_PARTIAL_SOFT
Definition: pcre.h:152
#define PCRE_INFO_REQUIREDCHARFLAGS
Definition: pcre.h:286
#define PCRE_CONFIG_PARENS_LIMIT
Definition: pcre.h:307
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION
Definition: pcre.h:301
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
Definition: pcre.h:297
#define PCRE_INFO_MINLENGTH
Definition: pcre.h:279
#define PCRE_INFO_MATCHLIMIT
Definition: pcre.h:287
#define PCRE_INFO_JIT
Definition: pcre.h:280
#define PCRE_NOTEMPTY
Definition: pcre.h:143
const char * pcre32_version(void)
int pcre_config(int, void *)
Definition: pcre_config.c:70
#define PCRE_NEWLINE_LF
Definition: pcre.h:166
#define PCRE_CONFIG_UTF32
Definition: pcre.h:306
#define PCRE_DOTALL
Definition: pcre.h:135
#define PCRE_ANCHORED
Definition: pcre.h:137
#define PCRE_UCP
Definition: pcre.h:177
#define PCRE_INFO_MATCH_EMPTY
Definition: pcre.h:289
#define PCRE_EXTRA_MARK
Definition: pcre.h:325
#define PCRE_DFA_RESTART
Definition: pcre.h:161
#define PCRE_NO_START_OPTIMIZE
Definition: pcre.h:173
#define PCRE_EXTRA_EXECUTABLE_JIT
Definition: pcre.h:326
#define PCRE_NEWLINE_CR
Definition: pcre.h:165
#define PCRE_ERROR_UNSET
Definition: pcre.h:218
#define PCRE_INFO_JCHANGED
Definition: pcre.h:277
#define PCRE_ERROR_MATCHLIMIT
Definition: pcre.h:189
#define PCRE_DUPNAMES
Definition: pcre.h:164
#define PCRE_BSR_UNICODE
Definition: pcre.h:171
#define PCRE_INFO_OKPARTIAL
Definition: pcre.h:276
#define PCRE_BSR_ANYCRLF
Definition: pcre.h:170
#define PCRE_CONFIG_NEWLINE
Definition: pcre.h:295
#define PCRE_CONFIG_STACKRECURSE
Definition: pcre.h:299
int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int, void *)
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
Definition: pcre.h:313
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION
Definition: pcre.h:324
#define PCRE_JAVASCRIPT_COMPAT
Definition: pcre.h:172
#define PCRE_CONFIG_UTF8
Definition: pcre.h:294
void(* pcre_stack_free)(void *)
Definition: pcre_globals.c:81
#define PCRE_ERROR_NOMATCH
Definition: pcre.h:181
#define PCRE_DOLLAR_ENDONLY
Definition: pcre.h:138
#define PCRE_PARTIAL_HARD
Definition: pcre.h:175
#define PCRE_STUDY_JIT_COMPILE
Definition: pcre.h:312
const char * pcre16_version(void)
#define PCRE_EXTRA
Definition: pcre.h:139
void *(* pcre16_stack_malloc)(size_t)
int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *)
Definition: pcre_fullinfo.c:70
#define PCRE_CONFIG_BSR
Definition: pcre.h:302
int pcre16_config(int, void *)
#define PCRE_NEWLINE_ANYCRLF
Definition: pcre.h:169
#define PCRE_ERROR_BADUTF8
Definition: pcre.h:191
#define PCRE_INFO_RECURSIONLIMIT
Definition: pcre.h:288
#define PCRE_NOTEMPTY_ATSTART
Definition: pcre.h:176
#define PCRE_NO_AUTO_POSSESS
Definition: pcre.h:160
#define PCRE_UNGREEDY
Definition: pcre.h:142
#define PCRE_MODE8
@ OP_STARI
@ OP_END
@ OP_NOTMINSTARI
@ OP_CHAR
@ OP_NOTPOSUPTOI
@ OP_NOTPLUS
@ OP_CLASS
@ OP_MINSTARI
@ OP_NOTSTARI
@ OP_NOT
@ OP_NOTMINPLUSI
@ OP_POSSTAR
@ OP_NOTUPTO
@ OP_EXACTI
@ OP_NOTPLUSI
@ OP_NOTQUERYI
@ OP_EXACT
@ OP_PLUS
@ OP_NOTPOSPLUSI
@ OP_POSUPTO
@ OP_MINUPTOI
@ OP_NOTPOSUPTO
@ OP_NCLASS
@ OP_MINPLUS
@ OP_MINQUERY
@ OP_UPTOI
@ OP_UPTO
@ OP_QUERY
@ OP_POSQUERYI
@ OP_NOTPOSSTARI
@ OP_NOTPOSSTAR
@ OP_PLUSI
@ OP_NOTMINPLUS
@ OP_QUERYI
@ OP_POSPLUSI
@ OP_CHARI
@ OP_NOTMINQUERYI
@ OP_NOTMINSTAR
@ OP_NOTSTAR
@ OP_MINUPTO
@ OP_NOTPOSQUERYI
@ OP_POSQUERY
@ OP_MINSTAR
@ OP_STAR
@ OP_NOTMINUPTO
@ OP_NOTMINQUERY
@ OP_POSPLUS
@ OP_NOTPOSQUERY
@ OP_XCLASS
@ OP_POSSTARI
@ OP_MINQUERYI
@ OP_MINPLUSI
@ OP_NOTMINUPTOI
@ OP_NOTI
@ OP_NOTQUERY
@ OP_POSUPTOI
@ OP_NOTUPTOI
@ OP_NOTEXACTI
@ OP_NOTEXACT
@ OP_NOTPOSPLUS
#define PCRE_NEWLINE_BITS
#define REAL_PCRE_MAGIC(re)
#define MAGIC_NUMBER
#define REVERSED_MAGIC_NUMBER
unsigned char pcre_uint8
struct real_pcre8_or_16 real_pcre
#define PCRE_FCH_CASELESS
#define REAL_PCRE_OPTIONS(re)
#define CHAR_CR
#define PCRE_MODE16
#define PCRE_RCH_CASELESS
#define PCRE_MODE_MASK
#define CHAR_LF
#define CHAR_BEL
#define CHAR_ESC
#define OP_LENGTHS
#define REAL_PCRE_SIZE(re)
#define memmove(a, b, c)
#define PCRE_MODE32
#define XCL_MAP
#define REAL_PCRE_FLAGS(re)
static const unsigned char * tables(int mode)
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
const int utf8_table3[]
Definition: pcregrep.c:413
static BOOL quiet
Definition: pcregrep.c:196
static unsigned long int match_limit
Definition: pcregrep.c:182
static int study_options
Definition: pcregrep.c:179
#define LOOPREPEAT
Definition: pcretest.c:1029
static int jit_study_bits[]
Definition: pcretest.c:1115