NCBI C++ ToolKit
pcre_exec.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Written by Philip Hazel
9  Copyright (c) 1997-2018 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15  * Redistributions of source code must retain the above copyright notice,
16  this list of conditions and the following disclaimer.
17 
18  * Redistributions in binary form must reproduce the above copyright
19  notice, this list of conditions and the following disclaimer in the
20  documentation and/or other materials provided with the distribution.
21 
22  * Neither the name of the University of Cambridge nor the names of its
23  contributors may be used to endorse or promote products derived from
24  this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 /* This module contains pcre_exec(), the externally visible function that does
41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42 possible. There are also some static supporting functions. */
43 
44 #ifdef HAVE_CONFIG_H
45 #include "config.h"
46 #endif
47 
48 #define NLBLOCK md /* Block containing newline information */
49 #define PSSTART start_subject /* Field containing processed string start */
50 #define PSEND end_subject /* Field containing processed string end */
51 
52 #include "pcre_internal.h"
53 
54 /* Undefine some potentially clashing cpp symbols */
55 
56 #undef min
57 #undef max
58 
59 /* The md->capture_last field uses the lower 16 bits for the last captured
60 substring (which can never be greater than 65535) and a bit in the top half
61 to mean "capture vector overflowed". This odd way of doing things was
62 implemented when it was realized that preserving and restoring the overflow bit
63 whenever the last capture number was saved/restored made for a neater
64 interface, and doing it this way saved on (a) another variable, which would
65 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66 separate set of save/restore instructions. The following defines are used in
67 implementing this. */
68 
69 #define CAPLMASK 0x0000ffff /* The bits used for last_capture */
70 #define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */
71 #define OVFLBIT 0x00010000 /* The bit that is set for overflow */
72 
73 /* Values for setting in md->match_function_type to indicate two special types
74 of call to match(). We do it this way to save on using another stack variable,
75 as stack usage is to be discouraged. */
76 
77 #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
78 #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
79 
80 /* Non-error returns from the match() function. Error returns are externally
81 defined PCRE_ERROR_xxx codes, which are all negative. */
82 
83 #define MATCH_MATCH 1
84 #define MATCH_NOMATCH 0
85 
86 /* Special internal returns from the match() function. Make them sufficiently
87 negative to avoid the external error codes. */
88 
89 #define MATCH_ACCEPT (-999)
90 #define MATCH_KETRPOS (-998)
91 #define MATCH_ONCE (-997)
92 /* The next 5 must be kept together and in sequence so that a test that checks
93 for any one of them can use a range. */
94 #define MATCH_COMMIT (-996)
95 #define MATCH_PRUNE (-995)
96 #define MATCH_SKIP (-994)
97 #define MATCH_SKIP_ARG (-993)
98 #define MATCH_THEN (-992)
99 #define MATCH_BACKTRACK_MAX MATCH_THEN
100 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101 
102 /* Maximum number of ints of offset to save on the stack for recursive calls.
103 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
104 because the offset vector is always a multiple of 3 long. */
105 
106 #define REC_STACK_SAVE_MAX 30
107 
108 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
109 
110 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
111 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
112 
113 #ifdef PCRE_DEBUG
114 /*************************************************
115 * Debugging function to print chars *
116 *************************************************/
117 
118 /* Print a sequence of chars in printable format, stopping at the end of the
119 subject if the requested.
120 
121 Arguments:
122  p points to characters
123  length number to print
124  is_subject TRUE if printing from within md->start_subject
125  md pointer to matching data block, if is_subject is TRUE
126 
127 Returns: nothing
128 */
129 
130 static void
131 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
132 {
133 pcre_uint32 c;
134 BOOL utf = md->utf;
135 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
136 while (length-- > 0)
137  if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
138 }
139 #endif
140 
141 
142 
143 /*************************************************
144 * Match a back-reference *
145 *************************************************/
146 
147 /* Normally, if a back reference hasn't been set, the length that is passed is
148 negative, so the match always fails. However, in JavaScript compatibility mode,
149 the length passed is zero. Note that in caseless UTF-8 mode, the number of
150 subject bytes matched may be different to the number of reference bytes.
151 
152 Arguments:
153  offset index into the offset vector
154  eptr pointer into the subject
155  length length of reference to be matched (number of bytes)
156  md points to match data block
157  caseless TRUE if caseless
158 
159 Returns: >= 0 the number of subject bytes matched
160  -1 no match
161  -2 partial match; always given if at end subject
162 */
163 
164 static int
165 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
166  BOOL caseless)
167 {
168 PCRE_PUCHAR eptr_start = eptr;
169 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170 #if defined SUPPORT_UTF && defined SUPPORT_UCP
171 BOOL utf = md->utf;
172 #endif
173 
174 #ifdef PCRE_DEBUG
175 if (eptr >= md->end_subject)
176  printf("matching subject <null>");
177 else
178  {
179  printf("matching subject ");
180  pchars(eptr, length, TRUE, md);
181  }
182 printf(" against backref ");
183 pchars(p, length, FALSE, md);
184 printf("\n");
185 #endif
186 
187 /* Always fail if reference not set (and not JavaScript compatible - in that
188 case the length is passed as zero). */
189 
190 if (length < 0) return -1;
191 
192 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
193 properly if Unicode properties are supported. Otherwise, we can check only
194 ASCII characters. */
195 
196 if (caseless)
197  {
198 #if defined SUPPORT_UTF && defined SUPPORT_UCP
199  if (utf)
200  {
201  /* Match characters up to the end of the reference. NOTE: the number of
202  data units matched may differ, because in UTF-8 there are some characters
203  whose upper and lower case versions code have different numbers of bytes.
204  For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
205  (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
206  sequence of two of the latter. It is important, therefore, to check the
207  length along the reference, not along the subject (earlier code did this
208  wrong). */
209 
210  PCRE_PUCHAR endptr = p + length;
211  while (p < endptr)
212  {
213  pcre_uint32 c, d;
214  const ucd_record *ur;
215  if (eptr >= md->end_subject) return -2; /* Partial match */
216  GETCHARINC(c, eptr);
217  GETCHARINC(d, p);
218  ur = GET_UCD(d);
219  if (c != d && c != d + ur->other_case)
220  {
221  const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
222  for (;;)
223  {
224  if (c < *pp) return -1;
225  if (c == *pp++) break;
226  }
227  }
228  }
229  }
230  else
231 #endif
232 
233  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
234  is no UCP support. */
235  {
236  while (length-- > 0)
237  {
238  pcre_uint32 cc, cp;
239  if (eptr >= md->end_subject) return -2; /* Partial match */
240  cc = UCHAR21TEST(eptr);
241  cp = UCHAR21TEST(p);
242  if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
243  p++;
244  eptr++;
245  }
246  }
247  }
248 
249 /* In the caseful case, we can just compare the bytes, whether or not we
250 are in UTF-8 mode. */
251 
252 else
253  {
254  while (length-- > 0)
255  {
256  if (eptr >= md->end_subject) return -2; /* Partial match */
257  if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
258  }
259  }
260 
261 return (int)(eptr - eptr_start);
262 }
263 
264 
265 
266 /***************************************************************************
267 ****************************************************************************
268  RECURSION IN THE match() FUNCTION
269 
270 The match() function is highly recursive, though not every recursive call
271 increases the recursive depth. Nevertheless, some regular expressions can cause
272 it to recurse to a great depth. I was writing for Unix, so I just let it call
273 itself recursively. This uses the stack for saving everything that has to be
274 saved for a recursive call. On Unix, the stack can be large, and this works
275 fine.
276 
277 It turns out that on some non-Unix-like systems there are problems with
278 programs that use a lot of stack. (This despite the fact that every last chip
279 has oodles of memory these days, and techniques for extending the stack have
280 been known for decades.) So....
281 
282 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
283 calls by keeping local variables that need to be preserved in blocks of memory
284 obtained from malloc() instead instead of on the stack. Macros are used to
285 achieve this so that the actual code doesn't look very different to what it
286 always used to.
287 
288 The original heap-recursive code used longjmp(). However, it seems that this
289 can be very slow on some operating systems. Following a suggestion from Stan
290 Switzer, the use of longjmp() has been abolished, at the cost of having to
291 provide a unique number for each call to RMATCH. There is no way of generating
292 a sequence of numbers at compile time in C. I have given them names, to make
293 them stand out more clearly.
294 
295 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
296 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
297 tests. Furthermore, not using longjmp() means that local dynamic variables
298 don't have indeterminate values; this has meant that the frame size can be
299 reduced because the result can be "passed back" by straight setting of the
300 variable instead of being passed in the frame.
301 ****************************************************************************
302 ***************************************************************************/
303 
304 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
305 below must be updated in sync. */
306 
307 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
314 
315 /* These versions of the macros use the stack, as normal. There are debugging
316 versions and production versions. Note that the "rw" argument of RMATCH isn't
317 actually used in this definition. */
318 
319 #ifndef NO_RECURSE
320 #define REGISTER register
321 
322 #ifdef PCRE_DEBUG
323 #define RMATCH(ra,rb,rc,rd,re,rw) \
324  { \
325  printf("match() called in line %d\n", __LINE__); \
326  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
327  printf("to line %d\n", __LINE__); \
328  }
329 #define RRETURN(ra) \
330  { \
331  printf("match() returned %d from line %d\n", ra, __LINE__); \
332  return ra; \
333  }
334 #else
335 #define RMATCH(ra,rb,rc,rd,re,rw) \
336  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
337 #define RRETURN(ra) return ra
338 #endif
339 
340 #else
341 
342 
343 /* These versions of the macros manage a private stack on the heap. Note that
344 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
345 argument of match(), which never changes. */
346 
347 #define REGISTER
348 
349 #define RMATCH(ra,rb,rc,rd,re,rw)\
350  {\
351  heapframe *newframe = frame->Xnextframe;\
352  if (newframe == NULL)\
353  {\
354  newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
355  if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
356  newframe->Xnextframe = NULL;\
357  frame->Xnextframe = newframe;\
358  }\
359  frame->Xwhere = rw;\
360  newframe->Xeptr = ra;\
361  newframe->Xecode = rb;\
362  newframe->Xmstart = mstart;\
363  newframe->Xoffset_top = rc;\
364  newframe->Xeptrb = re;\
365  newframe->Xrdepth = frame->Xrdepth + 1;\
366  newframe->Xprevframe = frame;\
367  frame = newframe;\
368  DPRINTF(("restarting from line %d\n", __LINE__));\
369  goto HEAP_RECURSE;\
370  L_##rw:\
371  DPRINTF(("jumped back to line %d\n", __LINE__));\
372  }
373 
374 #define RRETURN(ra)\
375  {\
376  heapframe *oldframe = frame;\
377  frame = oldframe->Xprevframe;\
378  if (frame != NULL)\
379  {\
380  rrc = ra;\
381  goto HEAP_RETURN;\
382  }\
383  return ra;\
384  }
385 
386 
387 /* Structure for remembering the local variables in a private frame */
388 
389 typedef struct heapframe {
390  struct heapframe *Xprevframe;
391  struct heapframe *Xnextframe;
392 
393  /* Function arguments that may change */
394 
395  PCRE_PUCHAR Xeptr;
396  const pcre_uchar *Xecode;
397  PCRE_PUCHAR Xmstart;
398  int Xoffset_top;
399  eptrblock *Xeptrb;
400  unsigned int Xrdepth;
401 
402  /* Function local variables */
403 
404  PCRE_PUCHAR Xcallpat;
405 #ifdef SUPPORT_UTF
406  PCRE_PUCHAR Xcharptr;
407 #endif
408  PCRE_PUCHAR Xdata;
409  PCRE_PUCHAR Xnext;
410  PCRE_PUCHAR Xpp;
411  PCRE_PUCHAR Xprev;
412  PCRE_PUCHAR Xsaved_eptr;
413 
414  recursion_info Xnew_recursive;
415 
416  BOOL Xcur_is_word;
417  BOOL Xcondition;
418  BOOL Xprev_is_word;
419 
420 #ifdef SUPPORT_UCP
421  int Xprop_type;
422  unsigned int Xprop_value;
423  int Xprop_fail_result;
424  int Xoclength;
425  pcre_uchar Xocchars[6];
426 #endif
427 
428  int Xcodelink;
429  int Xctype;
430  unsigned int Xfc;
431  int Xfi;
432  int Xlength;
433  int Xmax;
434  int Xmin;
435  unsigned int Xnumber;
436  int Xoffset;
437  unsigned int Xop;
438  pcre_int32 Xsave_capture_last;
439  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
440  int Xstacksave[REC_STACK_SAVE_MAX];
441 
442  eptrblock Xnewptrb;
443 
444  /* Where to jump back to */
445 
446  int Xwhere;
447 
448 } heapframe;
449 
450 #endif
451 
452 
453 /***************************************************************************
454 ***************************************************************************/
455 
456 
457 
458 /*************************************************
459 * Match from current position *
460 *************************************************/
461 
462 /* This function is called recursively in many circumstances. Whenever it
463 returns a negative (error) response, the outer incarnation must also return the
464 same response. */
465 
466 /* These macros pack up tests that are used for partial matching, and which
467 appear several times in the code. We set the "hit end" flag if the pointer is
468 at the end of the subject and also past the start of the subject (i.e.
469 something has been matched). For hard partial matching, we then return
470 immediately. The second one is used when we already know we are past the end of
471 the subject. */
472 
473 #define CHECK_PARTIAL()\
474  if (md->partial != 0 && eptr >= md->end_subject && \
475  eptr > md->start_used_ptr) \
476  { \
477  md->hitend = TRUE; \
478  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
479  }
480 
481 #define SCHECK_PARTIAL()\
482  if (md->partial != 0 && eptr > md->start_used_ptr) \
483  { \
484  md->hitend = TRUE; \
485  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
486  }
487 
488 
489 /* Performance note: It might be tempting to extract commonly used fields from
490 the md structure (e.g. utf, end_subject) into individual variables to improve
491 performance. Tests using gcc on a SPARC disproved this; in the first case, it
492 made performance worse.
493 
494 Arguments:
495  eptr pointer to current character in subject
496  ecode pointer to current position in compiled code
497  mstart pointer to the current match start position (can be modified
498  by encountering \K)
499  offset_top current top pointer
500  md pointer to "static" info for the match
501  eptrb pointer to chain of blocks containing eptr at start of
502  brackets - for testing for empty matches
503  rdepth the recursion depth
504 
505 Returns: MATCH_MATCH if matched ) these values are >= 0
506  MATCH_NOMATCH if failed to match )
507  a negative MATCH_xxx value for PRUNE, SKIP, etc
508  a negative PCRE_ERROR_xxx value if aborted by an error condition
509  (e.g. stopped by repeated call or recursion limit)
510 */
511 
512 static int
514  PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
515  unsigned int rdepth)
516 {
517 /* These variables do not need to be preserved over recursion in this function,
518 so they can be ordinary variables in all cases. Mark some of them with
519 "register" because they are used a lot in loops. */
520 
521 register int rrc; /* Returns from recursive calls */
522 register int i; /* Used for loops not involving calls to RMATCH() */
523 register pcre_uint32 c; /* Character values not kept over RMATCH() calls */
524 register BOOL utf; /* Local copy of UTF flag for speed */
525 
526 BOOL minimize, possessive; /* Quantifier options */
527 BOOL caseless;
528 int condcode;
529 
530 /* When recursion is not being used, all "local" variables that have to be
531 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
532 frame on the stack here; subsequent instantiations are obtained from the heap
533 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
534 the top-level on the stack rather than malloc-ing them all gives a performance
535 boost in many cases where there is not much "recursion". */
536 
537 #ifdef NO_RECURSE
538 heapframe *frame = (heapframe *)md->match_frames_base;
539 
540 /* Copy in the original argument variables */
541 
542 frame->Xeptr = eptr;
543 frame->Xecode = ecode;
544 frame->Xmstart = mstart;
545 frame->Xoffset_top = offset_top;
546 frame->Xeptrb = eptrb;
547 frame->Xrdepth = rdepth;
548 
549 /* This is where control jumps back to to effect "recursion" */
550 
551 HEAP_RECURSE:
552 
553 /* Macros make the argument variables come from the current frame */
554 
555 #define eptr frame->Xeptr
556 #define ecode frame->Xecode
557 #define mstart frame->Xmstart
558 #define offset_top frame->Xoffset_top
559 #define eptrb frame->Xeptrb
560 #define rdepth frame->Xrdepth
561 
562 /* Ditto for the local variables */
563 
564 #ifdef SUPPORT_UTF
565 #define charptr frame->Xcharptr
566 #endif
567 #define callpat frame->Xcallpat
568 #define codelink frame->Xcodelink
569 #define data frame->Xdata
570 #define next frame->Xnext
571 #define pp frame->Xpp
572 #define prev frame->Xprev
573 #define saved_eptr frame->Xsaved_eptr
574 
575 #define new_recursive frame->Xnew_recursive
576 
577 #define cur_is_word frame->Xcur_is_word
578 #define condition frame->Xcondition
579 #define prev_is_word frame->Xprev_is_word
580 
581 #ifdef SUPPORT_UCP
582 #define prop_type frame->Xprop_type
583 #define prop_value frame->Xprop_value
584 #define prop_fail_result frame->Xprop_fail_result
585 #define oclength frame->Xoclength
586 #define occhars frame->Xocchars
587 #endif
588 
589 #define ctype frame->Xctype
590 #define fc frame->Xfc
591 #define fi frame->Xfi
592 #define length frame->Xlength
593 #define max frame->Xmax
594 #define min frame->Xmin
595 #define number frame->Xnumber
596 #define offset frame->Xoffset
597 #define op frame->Xop
598 #define save_capture_last frame->Xsave_capture_last
599 #define save_offset1 frame->Xsave_offset1
600 #define save_offset2 frame->Xsave_offset2
601 #define save_offset3 frame->Xsave_offset3
602 #define stacksave frame->Xstacksave
603 
604 #define newptrb frame->Xnewptrb
605 
606 /* When recursion is being used, local variables are allocated on the stack and
607 get preserved during recursion in the normal way. In this environment, fi and
608 i, and fc and c, can be the same variables. */
609 
610 #else /* NO_RECURSE not defined */
611 #define fi i
612 #define fc c
613 
614 /* Many of the following variables are used only in small blocks of the code.
615 My normal style of coding would have declared them within each of those blocks.
616 However, in order to accommodate the version of this code that uses an external
617 "stack" implemented on the heap, it is easier to declare them all here, so the
618 declarations can be cut out in a block. The only declarations within blocks
619 below are for variables that do not have to be preserved over a recursive call
620 to RMATCH(). */
621 
622 #ifdef SUPPORT_UTF
623 const pcre_uchar *charptr;
624 #endif
625 const pcre_uchar *callpat;
626 const pcre_uchar *data;
627 const pcre_uchar *next;
628 PCRE_PUCHAR pp;
629 const pcre_uchar *prev;
630 PCRE_PUCHAR saved_eptr;
631 
632 recursion_info new_recursive;
633 
634 BOOL cur_is_word;
635 BOOL condition;
636 BOOL prev_is_word;
637 
638 #ifdef SUPPORT_UCP
639 int prop_type;
640 unsigned int prop_value;
641 int prop_fail_result;
642 int oclength;
643 pcre_uchar occhars[6];
644 #endif
645 
646 int codelink;
647 int ctype;
648 int length;
649 int max;
650 int min;
651 unsigned int number;
652 int offset;
653 unsigned int op;
654 pcre_int32 save_capture_last;
655 int save_offset1, save_offset2, save_offset3;
656 int stacksave[REC_STACK_SAVE_MAX];
657 
658 eptrblock newptrb;
659 
660 /* There is a special fudge for calling match() in a way that causes it to
661 measure the size of its basic stack frame when the stack is being used for
662 recursion. The second argument (ecode) being NULL triggers this behaviour. It
663 cannot normally ever be NULL. The return is the negated value of the frame
664 size. */
665 
666 if (ecode == NULL)
667  {
668  if (rdepth == 0)
669  return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
670  else
671  {
672  int len = (int)((char *)&rdepth - (char *)eptr);
673  return (len > 0)? -len : len;
674  }
675  }
676 #endif /* NO_RECURSE */
677 
678 /* To save space on the stack and in the heap frame, I have doubled up on some
679 of the local variables that are used only in localised parts of the code, but
680 still need to be preserved over recursive calls of match(). These macros define
681 the alternative names that are used. */
682 
683 #define allow_zero cur_is_word
684 #define cbegroup condition
685 #define code_offset codelink
686 #define condassert condition
687 #define matched_once prev_is_word
688 #define foc number
689 #define save_mark data
690 
691 /* These statements are here to stop the compiler complaining about unitialized
692 variables. */
693 
694 #ifdef SUPPORT_UCP
695 prop_value = 0;
696 prop_fail_result = 0;
697 #endif
698 
699 
700 /* This label is used for tail recursion, which is used in a few cases even
701 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
702 used. Thanks to Ian Taylor for noticing this possibility and sending the
703 original patch. */
704 
705 TAIL_RECURSE:
706 
707 /* OK, now we can get on with the real code of the function. Recursive calls
708 are specified by the macro RMATCH and RRETURN is used to return. When
709 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
710 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
711 defined). However, RMATCH isn't like a function call because it's quite a
712 complicated macro. It has to be used in one particular way. This shouldn't,
713 however, impact performance when true recursion is being used. */
714 
715 #ifdef SUPPORT_UTF
716 utf = md->utf; /* Local copy of the flag */
717 #else
718 utf = FALSE;
719 #endif
720 
721 /* First check that we haven't called match() too many times, or that we
722 haven't exceeded the recursive call limit. */
723 
724 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
725 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
726 
727 /* At the start of a group with an unlimited repeat that may match an empty
728 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
729 done this way to save having to use another function argument, which would take
730 up space on the stack. See also MATCH_CONDASSERT below.
731 
732 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
733 such remembered pointers, to be checked when we hit the closing ket, in order
734 to break infinite loops that match no characters. When match() is called in
735 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
736 NOT be used with tail recursion, because the memory block that is used is on
737 the stack, so a new one may be required for each match(). */
738 
739 if (md->match_function_type == MATCH_CBEGROUP)
740  {
741  newptrb.epb_saved_eptr = eptr;
742  newptrb.epb_prev = eptrb;
743  eptrb = &newptrb;
744  md->match_function_type = 0;
745  }
746 
747 /* Now start processing the opcodes. */
748 
749 for (;;)
750  {
751  minimize = possessive = FALSE;
752  op = *ecode;
753 
754  switch(op)
755  {
756  case OP_MARK:
757  md->nomatch_mark = ecode + 2;
758  md->mark = NULL; /* In case previously set by assertion */
759  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
760  eptrb, RM55);
761  if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
762  md->mark == NULL) md->mark = ecode + 2;
763 
764  /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
765  argument, and we must check whether that argument matches this MARK's
766  argument. It is passed back in md->start_match_ptr (an overloading of that
767  variable). If it does match, we reset that variable to the current subject
768  position and return MATCH_SKIP. Otherwise, pass back the return code
769  unaltered. */
770 
771  else if (rrc == MATCH_SKIP_ARG &&
772  STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
773  {
774  md->start_match_ptr = eptr;
776  }
777  RRETURN(rrc);
778 
779  case OP_FAIL:
781 
782  case OP_COMMIT:
783  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
784  eptrb, RM52);
785  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
787 
788  case OP_PRUNE:
789  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
790  eptrb, RM51);
791  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
793 
794  case OP_PRUNE_ARG:
795  md->nomatch_mark = ecode + 2;
796  md->mark = NULL; /* In case previously set by assertion */
797  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
798  eptrb, RM56);
799  if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
800  md->mark == NULL) md->mark = ecode + 2;
801  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
803 
804  case OP_SKIP:
805  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
806  eptrb, RM53);
807  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
808  md->start_match_ptr = eptr; /* Pass back current position */
810 
811  /* Note that, for Perl compatibility, SKIP with an argument does NOT set
812  nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
813  not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
814  that failed and any that precede it (either they also failed, or were not
815  triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
816  SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
817  set to the count of the one that failed. */
818 
819  case OP_SKIP_ARG:
820  md->skip_arg_count++;
821  if (md->skip_arg_count <= md->ignore_skip_arg)
822  {
823  ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
824  break;
825  }
826  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
827  eptrb, RM57);
828  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829 
830  /* Pass back the current skip name by overloading md->start_match_ptr and
831  returning the special MATCH_SKIP_ARG return code. This will either be
832  caught by a matching MARK, or get to the top, where it causes a rematch
833  with md->ignore_skip_arg set to the value of md->skip_arg_count. */
834 
835  md->start_match_ptr = ecode + 2;
837 
838  /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
839  the branch in which it occurs can be determined. Overload the start of
840  match pointer to do this. */
841 
842  case OP_THEN:
843  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
844  eptrb, RM54);
845  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
846  md->start_match_ptr = ecode;
848 
849  case OP_THEN_ARG:
850  md->nomatch_mark = ecode + 2;
851  md->mark = NULL; /* In case previously set by assertion */
852  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
853  md, eptrb, RM58);
854  if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
855  md->mark == NULL) md->mark = ecode + 2;
856  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
857  md->start_match_ptr = ecode;
859 
860  /* Handle an atomic group that does not contain any capturing parentheses.
861  This can be handled like an assertion. Prior to 8.13, all atomic groups
862  were handled this way. In 8.13, the code was changed as below for ONCE, so
863  that backups pass through the group and thereby reset captured values.
864  However, this uses a lot more stack, so in 8.20, atomic groups that do not
865  contain any captures generate OP_ONCE_NC, which can be handled in the old,
866  less stack intensive way.
867 
868  Check the alternative branches in turn - the matching won't pass the KET
869  for this kind of subpattern. If any one branch matches, we carry on as at
870  the end of a normal bracket, leaving the subject pointer, but resetting
871  the start-of-match value in case it was changed by \K. */
872 
873  case OP_ONCE_NC:
874  prev = ecode;
875  saved_eptr = eptr;
876  save_mark = md->mark;
877  do
878  {
879  RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
880  if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
881  {
882  mstart = md->start_match_ptr;
883  break;
884  }
885  if (rrc == MATCH_THEN)
886  {
887  next = ecode + GET(ecode,1);
888  if (md->start_match_ptr < next &&
889  (*ecode == OP_ALT || *next == OP_ALT))
890  rrc = MATCH_NOMATCH;
891  }
892 
893  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
894  ecode += GET(ecode,1);
895  md->mark = save_mark;
896  }
897  while (*ecode == OP_ALT);
898 
899  /* If hit the end of the group (which could be repeated), fail */
900 
901  if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
902 
903  /* Continue as from after the group, updating the offsets high water
904  mark, since extracts may have been taken. */
905 
906  do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
907 
908  offset_top = md->end_offset_top;
909  eptr = md->end_match_ptr;
910 
911  /* For a non-repeating ket, just continue at this level. This also
912  happens for a repeating ket if no characters were matched in the group.
913  This is the forcible breaking of infinite loops as implemented in Perl
914  5.005. */
915 
916  if (*ecode == OP_KET || eptr == saved_eptr)
917  {
918  ecode += 1+LINK_SIZE;
919  break;
920  }
921 
922  /* The repeating kets try the rest of the pattern or restart from the
923  preceding bracket, in the appropriate order. The second "call" of match()
924  uses tail recursion, to avoid using another stack frame. */
925 
926  if (*ecode == OP_KETRMIN)
927  {
928  RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
929  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
930  ecode = prev;
931  goto TAIL_RECURSE;
932  }
933  else /* OP_KETRMAX */
934  {
935  RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
936  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
937  ecode += 1 + LINK_SIZE;
938  goto TAIL_RECURSE;
939  }
940  /* Control never gets here */
941 
942  /* Handle a capturing bracket, other than those that are possessive with an
943  unlimited repeat. If there is space in the offset vector, save the current
944  subject position in the working slot at the top of the vector. We mustn't
945  change the current values of the data slot, because they may be set from a
946  previous iteration of this group, and be referred to by a reference inside
947  the group. A failure to match might occur after the group has succeeded,
948  if something later on doesn't match. For this reason, we need to restore
949  the working value and also the values of the final offsets, in case they
950  were set by a previous iteration of the same bracket.
951 
952  If there isn't enough space in the offset vector, treat this as if it were
953  a non-capturing bracket. Don't worry about setting the flag for the error
954  case here; that is handled in the code for KET. */
955 
956  case OP_CBRA:
957  case OP_SCBRA:
958  number = GET2(ecode, 1+LINK_SIZE);
959  offset = number << 1;
960 
961 #ifdef PCRE_DEBUG
962  printf("start bracket %d\n", number);
963  printf("subject=");
964  pchars(eptr, 16, TRUE, md);
965  printf("\n");
966 #endif
967 
968  if (offset < md->offset_max)
969  {
970  save_offset1 = md->offset_vector[offset];
971  save_offset2 = md->offset_vector[offset+1];
972  save_offset3 = md->offset_vector[md->offset_end - number];
973  save_capture_last = md->capture_last;
974  save_mark = md->mark;
975 
976  DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
977  md->offset_vector[md->offset_end - number] =
978  (int)(eptr - md->start_subject);
979 
980  for (;;)
981  {
982  if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
983  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
984  eptrb, RM1);
985  if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
986 
987  /* If we backed up to a THEN, check whether it is within the current
988  branch by comparing the address of the THEN that is passed back with
989  the end of the branch. If it is within the current branch, and the
990  branch is one of two or more alternatives (it either starts or ends
991  with OP_ALT), we have reached the limit of THEN's action, so convert
992  the return code to NOMATCH, which will cause normal backtracking to
993  happen from now on. Otherwise, THEN is passed back to an outer
994  alternative. This implements Perl's treatment of parenthesized groups,
995  where a group not containing | does not affect the current alternative,
996  that is, (X) is NOT the same as (X|(*F)). */
997 
998  if (rrc == MATCH_THEN)
999  {
1000  next = ecode + GET(ecode,1);
1001  if (md->start_match_ptr < next &&
1002  (*ecode == OP_ALT || *next == OP_ALT))
1003  rrc = MATCH_NOMATCH;
1004  }
1005 
1006  /* Anything other than NOMATCH is passed back. */
1007 
1008  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1009  md->capture_last = save_capture_last;
1010  ecode += GET(ecode, 1);
1011  md->mark = save_mark;
1012  if (*ecode != OP_ALT) break;
1013  }
1014 
1015  DPRINTF(("bracket %d failed\n", number));
1016  md->offset_vector[offset] = save_offset1;
1017  md->offset_vector[offset+1] = save_offset2;
1018  md->offset_vector[md->offset_end - number] = save_offset3;
1019 
1020  /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1021 
1022  RRETURN(rrc);
1023  }
1024 
1025  /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1026  as a non-capturing bracket. */
1027 
1028  /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1029  /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1030 
1031  DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1032 
1033  /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1034  /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1035 
1036  /* Non-capturing or atomic group, except for possessive with unlimited
1037  repeat and ONCE group with no captures. Loop for all the alternatives.
1038 
1039  When we get to the final alternative within the brackets, we used to return
1040  the result of a recursive call to match() whatever happened so it was
1041  possible to reduce stack usage by turning this into a tail recursion,
1042  except in the case of a possibly empty group. However, now that there is
1043  the possiblity of (*THEN) occurring in the final alternative, this
1044  optimization is no longer always possible.
1045 
1046  We can optimize if we know there are no (*THEN)s in the pattern; at present
1047  this is the best that can be done.
1048 
1049  MATCH_ONCE is returned when the end of an atomic group is successfully
1050  reached, but subsequent matching fails. It passes back up the tree (causing
1051  captured values to be reset) until the original atomic group level is
1052  reached. This is tested by comparing md->once_target with the start of the
1053  group. At this point, the return is converted into MATCH_NOMATCH so that
1054  previous backup points can be taken. */
1055 
1056  case OP_ONCE:
1057  case OP_BRA:
1058  case OP_SBRA:
1059  DPRINTF(("start non-capturing bracket\n"));
1060 
1061  for (;;)
1062  {
1063  if (op >= OP_SBRA || op == OP_ONCE)
1064  md->match_function_type = MATCH_CBEGROUP;
1065 
1066  /* If this is not a possibly empty group, and there are no (*THEN)s in
1067  the pattern, and this is the final alternative, optimize as described
1068  above. */
1069 
1070  else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1071  {
1072  ecode += PRIV(OP_lengths)[*ecode];
1073  goto TAIL_RECURSE;
1074  }
1075 
1076  /* In all other cases, we have to make another call to match(). */
1077 
1078  save_mark = md->mark;
1079  save_capture_last = md->capture_last;
1080  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1081  RM2);
1082 
1083  /* See comment in the code for capturing groups above about handling
1084  THEN. */
1085 
1086  if (rrc == MATCH_THEN)
1087  {
1088  next = ecode + GET(ecode,1);
1089  if (md->start_match_ptr < next &&
1090  (*ecode == OP_ALT || *next == OP_ALT))
1091  rrc = MATCH_NOMATCH;
1092  }
1093 
1094  if (rrc != MATCH_NOMATCH)
1095  {
1096  if (rrc == MATCH_ONCE)
1097  {
1098  const pcre_uchar *scode = ecode;
1099  if (*scode != OP_ONCE) /* If not at start, find it */
1100  {
1101  while (*scode == OP_ALT) scode += GET(scode, 1);
1102  scode -= GET(scode, 1);
1103  }
1104  if (md->once_target == scode) rrc = MATCH_NOMATCH;
1105  }
1106  RRETURN(rrc);
1107  }
1108  ecode += GET(ecode, 1);
1109  md->mark = save_mark;
1110  if (*ecode != OP_ALT) break;
1111  md->capture_last = save_capture_last;
1112  }
1113 
1115 
1116  /* Handle possessive capturing brackets with an unlimited repeat. We come
1117  here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1118  handled similarly to the normal case above. However, the matching is
1119  different. The end of these brackets will always be OP_KETRPOS, which
1120  returns MATCH_KETRPOS without going further in the pattern. By this means
1121  we can handle the group by iteration rather than recursion, thereby
1122  reducing the amount of stack needed. */
1123 
1124  case OP_CBRAPOS:
1125  case OP_SCBRAPOS:
1126  allow_zero = FALSE;
1127 
1128  POSSESSIVE_CAPTURE:
1129  number = GET2(ecode, 1+LINK_SIZE);
1130  offset = number << 1;
1131 
1132 #ifdef PCRE_DEBUG
1133  printf("start possessive bracket %d\n", number);
1134  printf("subject=");
1135  pchars(eptr, 16, TRUE, md);
1136  printf("\n");
1137 #endif
1138 
1139  if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1140 
1141  matched_once = FALSE;
1142  code_offset = (int)(ecode - md->start_code);
1143 
1144  save_offset1 = md->offset_vector[offset];
1145  save_offset2 = md->offset_vector[offset+1];
1146  save_offset3 = md->offset_vector[md->offset_end - number];
1147  save_capture_last = md->capture_last;
1148 
1149  DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1150 
1151  /* Each time round the loop, save the current subject position for use
1152  when the group matches. For MATCH_MATCH, the group has matched, so we
1153  restart it with a new subject starting position, remembering that we had
1154  at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1155  usual. If we haven't matched any alternatives in any iteration, check to
1156  see if a previous iteration matched. If so, the group has matched;
1157  continue from afterwards. Otherwise it has failed; restore the previous
1158  capture values before returning NOMATCH. */
1159 
1160  for (;;)
1161  {
1162  md->offset_vector[md->offset_end - number] =
1163  (int)(eptr - md->start_subject);
1164  if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1165  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1166  eptrb, RM63);
1167  if (rrc == MATCH_KETRPOS)
1168  {
1169  offset_top = md->end_offset_top;
1170  ecode = md->start_code + code_offset;
1171  save_capture_last = md->capture_last;
1172  matched_once = TRUE;
1173  mstart = md->start_match_ptr; /* In case \K changed it */
1174  if (eptr == md->end_match_ptr) /* Matched an empty string */
1175  {
1176  do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1177  break;
1178  }
1179  eptr = md->end_match_ptr;
1180  continue;
1181  }
1182 
1183  /* See comment in the code for capturing groups above about handling
1184  THEN. */
1185 
1186  if (rrc == MATCH_THEN)
1187  {
1188  next = ecode + GET(ecode,1);
1189  if (md->start_match_ptr < next &&
1190  (*ecode == OP_ALT || *next == OP_ALT))
1191  rrc = MATCH_NOMATCH;
1192  }
1193 
1194  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195  md->capture_last = save_capture_last;
1196  ecode += GET(ecode, 1);
1197  if (*ecode != OP_ALT) break;
1198  }
1199 
1200  if (!matched_once)
1201  {
1202  md->offset_vector[offset] = save_offset1;
1203  md->offset_vector[offset+1] = save_offset2;
1204  md->offset_vector[md->offset_end - number] = save_offset3;
1205  }
1206 
1207  if (allow_zero || matched_once)
1208  {
1209  ecode += 1 + LINK_SIZE;
1210  break;
1211  }
1212 
1214 
1215  /* Non-capturing possessive bracket with unlimited repeat. We come here
1216  from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1217  without the capturing complication. It is written out separately for speed
1218  and cleanliness. */
1219 
1220  case OP_BRAPOS:
1221  case OP_SBRAPOS:
1222  allow_zero = FALSE;
1223 
1224  POSSESSIVE_NON_CAPTURE:
1225  matched_once = FALSE;
1226  code_offset = (int)(ecode - md->start_code);
1227  save_capture_last = md->capture_last;
1228 
1229  for (;;)
1230  {
1231  if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1232  RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1233  eptrb, RM48);
1234  if (rrc == MATCH_KETRPOS)
1235  {
1236  offset_top = md->end_offset_top;
1237  ecode = md->start_code + code_offset;
1238  matched_once = TRUE;
1239  mstart = md->start_match_ptr; /* In case \K reset it */
1240  if (eptr == md->end_match_ptr) /* Matched an empty string */
1241  {
1242  do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1243  break;
1244  }
1245  eptr = md->end_match_ptr;
1246  continue;
1247  }
1248 
1249  /* See comment in the code for capturing groups above about handling
1250  THEN. */
1251 
1252  if (rrc == MATCH_THEN)
1253  {
1254  next = ecode + GET(ecode,1);
1255  if (md->start_match_ptr < next &&
1256  (*ecode == OP_ALT || *next == OP_ALT))
1257  rrc = MATCH_NOMATCH;
1258  }
1259 
1260  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1261  ecode += GET(ecode, 1);
1262  if (*ecode != OP_ALT) break;
1263  md->capture_last = save_capture_last;
1264  }
1265 
1266  if (matched_once || allow_zero)
1267  {
1268  ecode += 1 + LINK_SIZE;
1269  break;
1270  }
1272 
1273  /* Control never reaches here. */
1274 
1275  /* Conditional group: compilation checked that there are no more than two
1276  branches. If the condition is false, skipping the first branch takes us
1277  past the end of the item if there is only one branch, but that's exactly
1278  what we want. */
1279 
1280  case OP_COND:
1281  case OP_SCOND:
1282 
1283  /* The variable codelink will be added to ecode when the condition is
1284  false, to get to the second branch. Setting it to the offset to the ALT
1285  or KET, then incrementing ecode achieves this effect. We now have ecode
1286  pointing to the condition or callout. */
1287 
1288  codelink = GET(ecode, 1); /* Offset to the second branch */
1289  ecode += 1 + LINK_SIZE; /* From this opcode */
1290 
1291  /* Because of the way auto-callout works during compile, a callout item is
1292  inserted between OP_COND and an assertion condition. */
1293 
1294  if (*ecode == OP_CALLOUT)
1295  {
1296  if (PUBL(callout) != NULL)
1297  {
1298  PUBL(callout_block) cb;
1299  cb.version = 2; /* Version 1 of the callout block */
1300  cb.callout_number = ecode[1];
1301  cb.offset_vector = md->offset_vector;
1302 #if defined COMPILE_PCRE8
1303  cb.subject = (PCRE_SPTR)md->start_subject;
1304 #elif defined COMPILE_PCRE16
1305  cb.subject = (PCRE_SPTR16)md->start_subject;
1306 #elif defined COMPILE_PCRE32
1307  cb.subject = (PCRE_SPTR32)md->start_subject;
1308 #endif
1309  cb.subject_length = (int)(md->end_subject - md->start_subject);
1310  cb.start_match = (int)(mstart - md->start_subject);
1311  cb.current_position = (int)(eptr - md->start_subject);
1312  cb.pattern_position = GET(ecode, 2);
1313  cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1314  cb.capture_top = offset_top/2;
1315  cb.capture_last = md->capture_last & CAPLMASK;
1316  /* Internal change requires this for API compatibility. */
1317  if (cb.capture_last == 0) cb.capture_last = -1;
1318  cb.callout_data = md->callout_data;
1319  cb.mark = md->nomatch_mark;
1320  if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1321  if (rrc < 0) RRETURN(rrc);
1322  }
1323 
1324  /* Advance ecode past the callout, so it now points to the condition. We
1325  must adjust codelink so that the value of ecode+codelink is unchanged. */
1326 
1327  ecode += PRIV(OP_lengths)[OP_CALLOUT];
1328  codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1329  }
1330 
1331  /* Test the various possible conditions */
1332 
1333  condition = FALSE;
1334  switch(condcode = *ecode)
1335  {
1336  case OP_RREF: /* Numbered group recursion test */
1337  if (md->recursive != NULL) /* Not recursing => FALSE */
1338  {
1339  unsigned int recno = GET2(ecode, 1); /* Recursion group number*/
1340  condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1341  }
1342  break;
1343 
1344  case OP_DNRREF: /* Duplicate named group recursion test */
1345  if (md->recursive != NULL)
1346  {
1347  int count = GET2(ecode, 1 + IMM2_SIZE);
1348  pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1349  while (count-- > 0)
1350  {
1351  unsigned int recno = GET2(slot, 0);
1352  condition = recno == md->recursive->group_num;
1353  if (condition) break;
1354  slot += md->name_entry_size;
1355  }
1356  }
1357  break;
1358 
1359  case OP_CREF: /* Numbered group used test */
1360  offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1361  condition = offset < offset_top && md->offset_vector[offset] >= 0;
1362  break;
1363 
1364  case OP_DNCREF: /* Duplicate named group used test */
1365  {
1366  int count = GET2(ecode, 1 + IMM2_SIZE);
1367  pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1368  while (count-- > 0)
1369  {
1370  offset = GET2(slot, 0) << 1;
1371  condition = offset < offset_top && md->offset_vector[offset] >= 0;
1372  if (condition) break;
1373  slot += md->name_entry_size;
1374  }
1375  }
1376  break;
1377 
1378  case OP_DEF: /* DEFINE - always false */
1379  case OP_FAIL: /* From optimized (?!) condition */
1380  break;
1381 
1382  /* The condition is an assertion. Call match() to evaluate it - setting
1383  md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1384  of an assertion. */
1385 
1386  default:
1387  md->match_function_type = MATCH_CONDASSERT;
1388  RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1389  if (rrc == MATCH_MATCH)
1390  {
1391  if (md->end_offset_top > offset_top)
1392  offset_top = md->end_offset_top; /* Captures may have happened */
1393  condition = TRUE;
1394 
1395  /* Advance ecode past the assertion to the start of the first branch,
1396  but adjust it so that the general choosing code below works. If the
1397  assertion has a quantifier that allows zero repeats we must skip over
1398  the BRAZERO. This is a lunatic thing to do, but somebody did! */
1399 
1400  if (*ecode == OP_BRAZERO) ecode++;
1401  ecode += GET(ecode, 1);
1402  while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1403  ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1404  }
1405 
1406  /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1407  assertion; it is therefore treated as NOMATCH. Any other return is an
1408  error. */
1409 
1410  else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1411  {
1412  RRETURN(rrc); /* Need braces because of following else */
1413  }
1414  break;
1415  }
1416 
1417  /* Choose branch according to the condition */
1418 
1419  ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1420 
1421  /* We are now at the branch that is to be obeyed. As there is only one, we
1422  can use tail recursion to avoid using another stack frame, except when
1423  there is unlimited repeat of a possibly empty group. In the latter case, a
1424  recursive call to match() is always required, unless the second alternative
1425  doesn't exist, in which case we can just plough on. Note that, for
1426  compatibility with Perl, the | in a conditional group is NOT treated as
1427  creating two alternatives. If a THEN is encountered in the branch, it
1428  propagates out to the enclosing alternative (unless nested in a deeper set
1429  of alternatives, of course). */
1430 
1431  if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1432  {
1433  if (op != OP_SCOND)
1434  {
1435  goto TAIL_RECURSE;
1436  }
1437 
1438  md->match_function_type = MATCH_CBEGROUP;
1439  RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1440  RRETURN(rrc);
1441  }
1442 
1443  /* Condition false & no alternative; continue after the group. */
1444 
1445  else
1446  {
1447  }
1448  break;
1449 
1450 
1451  /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1452  to close any currently open capturing brackets. */
1453 
1454  case OP_CLOSE:
1455  number = GET2(ecode, 1); /* Must be less than 65536 */
1456  offset = number << 1;
1457 
1458 #ifdef PCRE_DEBUG
1459  printf("end bracket %d at *ACCEPT", number);
1460  printf("\n");
1461 #endif
1462 
1463  md->capture_last = (md->capture_last & OVFLMASK) | number;
1464  if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1465  {
1466  md->offset_vector[offset] =
1467  md->offset_vector[md->offset_end - number];
1468  md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1469 
1470  /* If this group is at or above the current highwater mark, ensure that
1471  any groups between the current high water mark and this group are marked
1472  unset and then update the high water mark. */
1473 
1474  if (offset >= offset_top)
1475  {
1476  register int *iptr = md->offset_vector + offset_top;
1477  register int *iend = md->offset_vector + offset;
1478  while (iptr < iend) *iptr++ = -1;
1479  offset_top = offset + 2;
1480  }
1481  }
1482  ecode += 1 + IMM2_SIZE;
1483  break;
1484 
1485 
1486  /* End of the pattern, either real or forced. */
1487 
1488  case OP_END:
1489  case OP_ACCEPT:
1490  case OP_ASSERT_ACCEPT:
1491 
1492  /* If we have matched an empty string, fail if not in an assertion and not
1493  in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1494  is set and we have matched at the start of the subject. In both cases,
1495  backtracking will then try other alternatives, if any. */
1496 
1497  if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1498  md->recursive == NULL &&
1499  (md->notempty ||
1500  (md->notempty_atstart &&
1501  mstart == md->start_subject + md->start_offset)))
1503 
1504  /* Otherwise, we have a match. */
1505 
1506  md->end_match_ptr = eptr; /* Record where we ended */
1507  md->end_offset_top = offset_top; /* and how many extracts were taken */
1508  md->start_match_ptr = mstart; /* and the start (\K can modify) */
1509 
1510  /* For some reason, the macros don't work properly if an expression is
1511  given as the argument to RRETURN when the heap is in use. */
1512 
1513  rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1514  RRETURN(rrc);
1515 
1516  /* Assertion brackets. Check the alternative branches in turn - the
1517  matching won't pass the KET for an assertion. If any one branch matches,
1518  the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1519  start of each branch to move the current point backwards, so the code at
1520  this level is identical to the lookahead case. When the assertion is part
1521  of a condition, we want to return immediately afterwards. The caller of
1522  this incarnation of the match() function will have set MATCH_CONDASSERT in
1523  md->match_function type, and one of these opcodes will be the first opcode
1524  that is processed. We use a local variable that is preserved over calls to
1525  match() to remember this case. */
1526 
1527  case OP_ASSERT:
1528  case OP_ASSERTBACK:
1529  save_mark = md->mark;
1530  if (md->match_function_type == MATCH_CONDASSERT)
1531  {
1532  condassert = TRUE;
1533  md->match_function_type = 0;
1534  }
1535  else condassert = FALSE;
1536 
1537  /* Loop for each branch */
1538 
1539  do
1540  {
1541  RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1542 
1543  /* A match means that the assertion is true; break out of the loop
1544  that matches its alternatives. */
1545 
1546  if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1547  {
1548  mstart = md->start_match_ptr; /* In case \K reset it */
1549  break;
1550  }
1551 
1552  /* If not matched, restore the previous mark setting. */
1553 
1554  md->mark = save_mark;
1555 
1556  /* See comment in the code for capturing groups above about handling
1557  THEN. */
1558 
1559  if (rrc == MATCH_THEN)
1560  {
1561  next = ecode + GET(ecode,1);
1562  if (md->start_match_ptr < next &&
1563  (*ecode == OP_ALT || *next == OP_ALT))
1564  rrc = MATCH_NOMATCH;
1565  }
1566 
1567  /* Anything other than NOMATCH causes the entire assertion to fail,
1568  passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1569  uncaptured THEN, which means they take their normal effect. This
1570  consistent approach does not always have exactly the same effect as in
1571  Perl. */
1572 
1573  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1574  ecode += GET(ecode, 1);
1575  }
1576  while (*ecode == OP_ALT); /* Continue for next alternative */
1577 
1578  /* If we have tried all the alternative branches, the assertion has
1579  failed. If not, we broke out after a match. */
1580 
1581  if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1582 
1583  /* If checking an assertion for a condition, return MATCH_MATCH. */
1584 
1586 
1587  /* Continue from after a successful assertion, updating the offsets high
1588  water mark, since extracts may have been taken during the assertion. */
1589 
1590  do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1591  ecode += 1 + LINK_SIZE;
1592  offset_top = md->end_offset_top;
1593  continue;
1594 
1595  /* Negative assertion: all branches must fail to match for the assertion to
1596  succeed. */
1597 
1598  case OP_ASSERT_NOT:
1599  case OP_ASSERTBACK_NOT:
1600  save_mark = md->mark;
1601  if (md->match_function_type == MATCH_CONDASSERT)
1602  {
1603  condassert = TRUE;
1604  md->match_function_type = 0;
1605  }
1606  else condassert = FALSE;
1607 
1608  /* Loop for each alternative branch. */
1609 
1610  do
1611  {
1612  RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1613  md->mark = save_mark; /* Always restore the mark setting */
1614 
1615  switch(rrc)
1616  {
1617  case MATCH_MATCH: /* A successful match means */
1618  case MATCH_ACCEPT: /* the assertion has failed. */
1620 
1621  case MATCH_NOMATCH: /* Carry on with next branch */
1622  break;
1623 
1624  /* See comment in the code for capturing groups above about handling
1625  THEN. */
1626 
1627  case MATCH_THEN:
1628  next = ecode + GET(ecode,1);
1629  if (md->start_match_ptr < next &&
1630  (*ecode == OP_ALT || *next == OP_ALT))
1631  {
1632  rrc = MATCH_NOMATCH;
1633  break;
1634  }
1635  /* Otherwise fall through. */
1636 
1637  /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1638  assertion to fail to match, without considering any more alternatives.
1639  Failing to match means the assertion is true. This is a consistent
1640  approach, but does not always have the same effect as in Perl. */
1641 
1642  case MATCH_COMMIT:
1643  case MATCH_SKIP:
1644  case MATCH_SKIP_ARG:
1645  case MATCH_PRUNE:
1646  do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1647  goto NEG_ASSERT_TRUE; /* Break out of alternation loop */
1648 
1649  /* Anything else is an error */
1650 
1651  default:
1652  RRETURN(rrc);
1653  }
1654 
1655  /* Continue with next branch */
1656 
1657  ecode += GET(ecode,1);
1658  }
1659  while (*ecode == OP_ALT);
1660 
1661  /* All branches in the assertion failed to match. */
1662 
1663  NEG_ASSERT_TRUE:
1664  if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
1665  ecode += 1 + LINK_SIZE; /* Continue with current branch */
1666  continue;
1667 
1668  /* Move the subject pointer back. This occurs only at the start of
1669  each branch of a lookbehind assertion. If we are too close to the start to
1670  move back, this match function fails. When working with UTF-8 we move
1671  back a number of characters, not bytes. */
1672 
1673  case OP_REVERSE:
1674 #ifdef SUPPORT_UTF
1675  if (utf)
1676  {
1677  i = GET(ecode, 1);
1678  while (i-- > 0)
1679  {
1680  eptr--;
1681  if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1682  BACKCHAR(eptr);
1683  }
1684  }
1685  else
1686 #endif
1687 
1688  /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1689 
1690  {
1691  eptr -= GET(ecode, 1);
1692  if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1693  }
1694 
1695  /* Save the earliest consulted character, then skip to next op code */
1696 
1697  if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1698  ecode += 1 + LINK_SIZE;
1699  break;
1700 
1701  /* The callout item calls an external function, if one is provided, passing
1702  details of the match so far. This is mainly for debugging, though the
1703  function is able to force a failure. */
1704 
1705  case OP_CALLOUT:
1706  if (PUBL(callout) != NULL)
1707  {
1708  PUBL(callout_block) cb;
1709  cb.version = 2; /* Version 1 of the callout block */
1710  cb.callout_number = ecode[1];
1711  cb.offset_vector = md->offset_vector;
1712 #if defined COMPILE_PCRE8
1713  cb.subject = (PCRE_SPTR)md->start_subject;
1714 #elif defined COMPILE_PCRE16
1715  cb.subject = (PCRE_SPTR16)md->start_subject;
1716 #elif defined COMPILE_PCRE32
1717  cb.subject = (PCRE_SPTR32)md->start_subject;
1718 #endif
1719  cb.subject_length = (int)(md->end_subject - md->start_subject);
1720  cb.start_match = (int)(mstart - md->start_subject);
1721  cb.current_position = (int)(eptr - md->start_subject);
1722  cb.pattern_position = GET(ecode, 2);
1723  cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1724  cb.capture_top = offset_top/2;
1725  cb.capture_last = md->capture_last & CAPLMASK;
1726  /* Internal change requires this for API compatibility. */
1727  if (cb.capture_last == 0) cb.capture_last = -1;
1728  cb.callout_data = md->callout_data;
1729  cb.mark = md->nomatch_mark;
1730  if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1731  if (rrc < 0) RRETURN(rrc);
1732  }
1733  ecode += 2 + 2*LINK_SIZE;
1734  break;
1735 
1736  /* Recursion either matches the current regex, or some subexpression. The
1737  offset data is the offset to the starting bracket from the start of the
1738  whole pattern. (This is so that it works from duplicated subpatterns.)
1739 
1740  The state of the capturing groups is preserved over recursion, and
1741  re-instated afterwards. We don't know how many are started and not yet
1742  finished (offset_top records the completed total) so we just have to save
1743  all the potential data. There may be up to 65535 such values, which is too
1744  large to put on the stack, but using malloc for small numbers seems
1745  expensive. As a compromise, the stack is used when there are no more than
1746  REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1747 
1748  There are also other values that have to be saved. We use a chained
1749  sequence of blocks that actually live on the stack. Thanks to Robin Houston
1750  for the original version of this logic. It has, however, been hacked around
1751  a lot, so he is not to blame for the current way it works. */
1752 
1753  case OP_RECURSE:
1754  {
1755  recursion_info *ri;
1756  unsigned int recno;
1757 
1758  callpat = md->start_code + GET(ecode, 1);
1759  recno = (callpat == md->start_code)? 0 :
1760  GET2(callpat, 1 + LINK_SIZE);
1761 
1762  /* Check for repeating a recursion without advancing the subject pointer.
1763  This should catch convoluted mutual recursions. (Some simple cases are
1764  caught at compile time.) */
1765 
1766  for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1767  if (recno == ri->group_num && eptr == ri->subject_position)
1769 
1770  /* Add to "recursing stack" */
1771 
1772  new_recursive.group_num = recno;
1773  new_recursive.saved_capture_last = md->capture_last;
1774  new_recursive.subject_position = eptr;
1775  new_recursive.prevrec = md->recursive;
1776  md->recursive = &new_recursive;
1777 
1778  /* Where to continue from afterwards */
1779 
1780  ecode += 1 + LINK_SIZE;
1781 
1782  /* Now save the offset data */
1783 
1784  new_recursive.saved_max = md->offset_end;
1785  if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1786  new_recursive.offset_save = stacksave;
1787  else
1788  {
1789  new_recursive.offset_save =
1790  (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1791  if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1792  }
1793  memcpy(new_recursive.offset_save, md->offset_vector,
1794  new_recursive.saved_max * sizeof(int));
1795 
1796  /* OK, now we can do the recursion. After processing each alternative,
1797  restore the offset data and the last captured value. If there were nested
1798  recursions, md->recursive might be changed, so reset it before looping.
1799  */
1800 
1801  DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1802  cbegroup = (*callpat >= OP_SBRA);
1803  do
1804  {
1805  if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1806  RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1807  md, eptrb, RM6);
1808  memcpy(md->offset_vector, new_recursive.offset_save,
1809  new_recursive.saved_max * sizeof(int));
1810  md->capture_last = new_recursive.saved_capture_last;
1811  md->recursive = new_recursive.prevrec;
1812  if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1813  {
1814  DPRINTF(("Recursion matched\n"));
1815  if (new_recursive.offset_save != stacksave)
1816  (PUBL(free))(new_recursive.offset_save);
1817 
1818  /* Set where we got to in the subject, and reset the start in case
1819  it was changed by \K. This *is* propagated back out of a recursion,
1820  for Perl compatibility. */
1821 
1822  eptr = md->end_match_ptr;
1823  mstart = md->start_match_ptr;
1824  goto RECURSION_MATCHED; /* Exit loop; end processing */
1825  }
1826 
1827  /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1828  recursion; they cause a NOMATCH for the entire recursion. These codes
1829  are defined in a range that can be tested for. */
1830 
1831  if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1832  {
1833  if (new_recursive.offset_save != stacksave)
1834  (PUBL(free))(new_recursive.offset_save);
1836  }
1837 
1838  /* Any return code other than NOMATCH is an error. */
1839 
1840  if (rrc != MATCH_NOMATCH)
1841  {
1842  DPRINTF(("Recursion gave error %d\n", rrc));
1843  if (new_recursive.offset_save != stacksave)
1844  (PUBL(free))(new_recursive.offset_save);
1845  RRETURN(rrc);
1846  }
1847 
1848  md->recursive = &new_recursive;
1849  callpat += GET(callpat, 1);
1850  }
1851  while (*callpat == OP_ALT);
1852 
1853  DPRINTF(("Recursion didn't match\n"));
1854  md->recursive = new_recursive.prevrec;
1855  if (new_recursive.offset_save != stacksave)
1856  (PUBL(free))(new_recursive.offset_save);
1858  }
1859 
1860  RECURSION_MATCHED:
1861  break;
1862 
1863  /* An alternation is the end of a branch; scan along to find the end of the
1864  bracketed group and go to there. */
1865 
1866  case OP_ALT:
1867  do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1868  break;
1869 
1870  /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1871  indicating that it may occur zero times. It may repeat infinitely, or not
1872  at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1873  with fixed upper repeat limits are compiled as a number of copies, with the
1874  optional ones preceded by BRAZERO or BRAMINZERO. */
1875 
1876  case OP_BRAZERO:
1877  next = ecode + 1;
1878  RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1879  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1880  do next += GET(next, 1); while (*next == OP_ALT);
1881  ecode = next + 1 + LINK_SIZE;
1882  break;
1883 
1884  case OP_BRAMINZERO:
1885  next = ecode + 1;
1886  do next += GET(next, 1); while (*next == OP_ALT);
1887  RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1888  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1889  ecode++;
1890  break;
1891 
1892  case OP_SKIPZERO:
1893  next = ecode+1;
1894  do next += GET(next,1); while (*next == OP_ALT);
1895  ecode = next + 1 + LINK_SIZE;
1896  break;
1897 
1898  /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1899  here; just jump to the group, with allow_zero set TRUE. */
1900 
1901  case OP_BRAPOSZERO:
1902  op = *(++ecode);
1903  allow_zero = TRUE;
1904  if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1905  goto POSSESSIVE_NON_CAPTURE;
1906 
1907  /* End of a group, repeated or non-repeating. */
1908 
1909  case OP_KET:
1910  case OP_KETRMIN:
1911  case OP_KETRMAX:
1912  case OP_KETRPOS:
1913  prev = ecode - GET(ecode, 1);
1914 
1915  /* If this was a group that remembered the subject start, in order to break
1916  infinite repeats of empty string matches, retrieve the subject start from
1917  the chain. Otherwise, set it NULL. */
1918 
1919  if (*prev >= OP_SBRA || *prev == OP_ONCE)
1920  {
1921  saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1922  eptrb = eptrb->epb_prev; /* Backup to previous group */
1923  }
1924  else saved_eptr = NULL;
1925 
1926  /* If we are at the end of an assertion group or a non-capturing atomic
1927  group, stop matching and return MATCH_MATCH, but record the current high
1928  water mark for use by positive assertions. We also need to record the match
1929  start in case it was changed by \K. */
1930 
1931  if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1932  *prev == OP_ONCE_NC)
1933  {
1934  md->end_match_ptr = eptr; /* For ONCE_NC */
1935  md->end_offset_top = offset_top;
1936  md->start_match_ptr = mstart;
1937  RRETURN(MATCH_MATCH); /* Sets md->mark */
1938  }
1939 
1940  /* For capturing groups we have to check the group number back at the start
1941  and if necessary complete handling an extraction by setting the offsets and
1942  bumping the high water mark. Whole-pattern recursion is coded as a recurse
1943  into group 0, so it won't be picked up here. Instead, we catch it when the
1944  OP_END is reached. Other recursion is handled here. We just have to record
1945  the current subject position and start match pointer and give a MATCH
1946  return. */
1947 
1948  if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1949  *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1950  {
1951  number = GET2(prev, 1+LINK_SIZE);
1952  offset = number << 1;
1953 
1954 #ifdef PCRE_DEBUG
1955  printf("end bracket %d", number);
1956  printf("\n");
1957 #endif
1958 
1959  /* Handle a recursively called group. */
1960 
1961  if (md->recursive != NULL && md->recursive->group_num == number)
1962  {
1963  md->end_match_ptr = eptr;
1964  md->start_match_ptr = mstart;
1966  }
1967 
1968  /* Deal with capturing */
1969 
1970  md->capture_last = (md->capture_last & OVFLMASK) | number;
1971  if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1972  {
1973  /* If offset is greater than offset_top, it means that we are
1974  "skipping" a capturing group, and that group's offsets must be marked
1975  unset. In earlier versions of PCRE, all the offsets were unset at the
1976  start of matching, but this doesn't work because atomic groups and
1977  assertions can cause a value to be set that should later be unset.
1978  Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1979  part of the atomic group, but this is not on the final matching path,
1980  so must be unset when 2 is set. (If there is no group 2, there is no
1981  problem, because offset_top will then be 2, indicating no capture.) */
1982 
1983  if (offset > offset_top)
1984  {
1985  register int *iptr = md->offset_vector + offset_top;
1986  register int *iend = md->offset_vector + offset;
1987  while (iptr < iend) *iptr++ = -1;
1988  }
1989 
1990  /* Now make the extraction */
1991 
1992  md->offset_vector[offset] =
1993  md->offset_vector[md->offset_end - number];
1994  md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1995  if (offset_top <= offset) offset_top = offset + 2;
1996  }
1997  }
1998 
1999  /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2000  and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2001  at a time from the outer level, thus saving stack. This must precede the
2002  empty string test - in this case that test is done at the outer level. */
2003 
2004  if (*ecode == OP_KETRPOS)
2005  {
2006  md->start_match_ptr = mstart; /* In case \K reset it */
2007  md->end_match_ptr = eptr;
2008  md->end_offset_top = offset_top;
2010  }
2011 
2012  /* For an ordinary non-repeating ket, just continue at this level. This
2013  also happens for a repeating ket if no characters were matched in the
2014  group. This is the forcible breaking of infinite loops as implemented in
2015  Perl 5.005. For a non-repeating atomic group that includes captures,
2016  establish a backup point by processing the rest of the pattern at a lower
2017  level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2018  original OP_ONCE level, thereby bypassing intermediate backup points, but
2019  resetting any captures that happened along the way. */
2020 
2021  if (*ecode == OP_KET || eptr == saved_eptr)
2022  {
2023  if (*prev == OP_ONCE)
2024  {
2025  RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2026  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2027  md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
2029  }
2030  ecode += 1 + LINK_SIZE; /* Carry on at this level */
2031  break;
2032  }
2033 
2034  /* The normal repeating kets try the rest of the pattern or restart from
2035  the preceding bracket, in the appropriate order. In the second case, we can
2036  use tail recursion to avoid using another stack frame, unless we have an
2037  an atomic group or an unlimited repeat of a group that can match an empty
2038  string. */
2039 
2040  if (*ecode == OP_KETRMIN)
2041  {
2042  RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2043  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2044  if (*prev == OP_ONCE)
2045  {
2046  RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2047  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048  md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
2050  }
2051  if (*prev >= OP_SBRA) /* Could match an empty string */
2052  {
2053  RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2054  RRETURN(rrc);
2055  }
2056  ecode = prev;
2057  goto TAIL_RECURSE;
2058  }
2059  else /* OP_KETRMAX */
2060  {
2061  RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2062  if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2063  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2064  if (*prev == OP_ONCE)
2065  {
2066  RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2067  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2068  md->once_target = prev;
2070  }
2071  ecode += 1 + LINK_SIZE;
2072  goto TAIL_RECURSE;
2073  }
2074  /* Control never gets here */
2075 
2076  /* Not multiline mode: start of subject assertion, unless notbol. */
2077 
2078  case OP_CIRC:
2079  if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2080 
2081  /* Start of subject assertion */
2082 
2083  case OP_SOD:
2084  if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2085  ecode++;
2086  break;
2087 
2088  /* Multiline mode: start of subject unless notbol, or after any newline. */
2089 
2090  case OP_CIRCM:
2091  if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2092  if (eptr != md->start_subject &&
2093  (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2095  ecode++;
2096  break;
2097 
2098  /* Start of match assertion */
2099 
2100  case OP_SOM:
2101  if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2102  ecode++;
2103  break;
2104 
2105  /* Reset the start of match point */
2106 
2107  case OP_SET_SOM:
2108  mstart = eptr;
2109  ecode++;
2110  break;
2111 
2112  /* Multiline mode: assert before any newline, or before end of subject
2113  unless noteol is set. */
2114 
2115  case OP_DOLLM:
2116  if (eptr < md->end_subject)
2117  {
2118  if (!IS_NEWLINE(eptr))
2119  {
2120  if (md->partial != 0 &&
2121  eptr + 1 >= md->end_subject &&
2122  NLBLOCK->nltype == NLTYPE_FIXED &&
2123  NLBLOCK->nllen == 2 &&
2124  UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2125  {
2126  md->hitend = TRUE;
2127  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2128  }
2130  }
2131  }
2132  else
2133  {
2134  if (md->noteol) RRETURN(MATCH_NOMATCH);
2135  SCHECK_PARTIAL();
2136  }
2137  ecode++;
2138  break;
2139 
2140  /* Not multiline mode: assert before a terminating newline or before end of
2141  subject unless noteol is set. */
2142 
2143  case OP_DOLL:
2144  if (md->noteol) RRETURN(MATCH_NOMATCH);
2145  if (!md->endonly) goto ASSERT_NL_OR_EOS;
2146 
2147  /* ... else fall through for endonly */
2148 
2149  /* End of subject assertion (\z) */
2150 
2151  case OP_EOD:
2152  if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2153  SCHECK_PARTIAL();
2154  ecode++;
2155  break;
2156 
2157  /* End of subject or ending \n assertion (\Z) */
2158 
2159  case OP_EODN:
2160  ASSERT_NL_OR_EOS:
2161  if (eptr < md->end_subject &&
2162  (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2163  {
2164  if (md->partial != 0 &&
2165  eptr + 1 >= md->end_subject &&
2166  NLBLOCK->nltype == NLTYPE_FIXED &&
2167  NLBLOCK->nllen == 2 &&
2168  UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2169  {
2170  md->hitend = TRUE;
2171  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2172  }
2174  }
2175 
2176  /* Either at end of string or \n before end. */
2177 
2178  SCHECK_PARTIAL();
2179  ecode++;
2180  break;
2181 
2182  /* Word boundary assertions */
2183 
2184  case OP_NOT_WORD_BOUNDARY:
2185  case OP_WORD_BOUNDARY:
2186  {
2187 
2188  /* Find out if the previous and current characters are "word" characters.
2189  It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2190  be "non-word" characters. Remember the earliest consulted character for
2191  partial matching. */
2192 
2193 #ifdef SUPPORT_UTF
2194  if (utf)
2195  {
2196  /* Get status of previous character */
2197 
2198  if (eptr == md->start_subject) prev_is_word = FALSE; else
2199  {
2200  PCRE_PUCHAR lastptr = eptr - 1;
2201  BACKCHAR(lastptr);
2202  if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2203  GETCHAR(c, lastptr);
2204 #ifdef SUPPORT_UCP
2205  if (md->use_ucp)
2206  {
2207  if (c == '_') prev_is_word = TRUE; else
2208  {
2209  int cat = UCD_CATEGORY(c);
2210  prev_is_word = (cat == ucp_L || cat == ucp_N);
2211  }
2212  }
2213  else
2214 #endif
2215  prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2216  }
2217 
2218  /* Get status of next character */
2219 
2220  if (eptr >= md->end_subject)
2221  {
2222  SCHECK_PARTIAL();
2223  cur_is_word = FALSE;
2224  }
2225  else
2226  {
2227  GETCHAR(c, eptr);
2228 #ifdef SUPPORT_UCP
2229  if (md->use_ucp)
2230  {
2231  if (c == '_') cur_is_word = TRUE; else
2232  {
2233  int cat = UCD_CATEGORY(c);
2234  cur_is_word = (cat == ucp_L || cat == ucp_N);
2235  }
2236  }
2237  else
2238 #endif
2239  cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2240  }
2241  }
2242  else
2243 #endif
2244 
2245  /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2246  consistency with the behaviour of \w we do use it in this case. */
2247 
2248  {
2249  /* Get status of previous character */
2250 
2251  if (eptr == md->start_subject) prev_is_word = FALSE; else
2252  {
2253  if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2254 #ifdef SUPPORT_UCP
2255  if (md->use_ucp)
2256  {
2257  c = eptr[-1];
2258  if (c == '_') prev_is_word = TRUE; else
2259  {
2260  int cat = UCD_CATEGORY(c);
2261  prev_is_word = (cat == ucp_L || cat == ucp_N);
2262  }
2263  }
2264  else
2265 #endif
2266  prev_is_word = MAX_255(eptr[-1])
2267  && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2268  }
2269 
2270  /* Get status of next character */
2271 
2272  if (eptr >= md->end_subject)
2273  {
2274  SCHECK_PARTIAL();
2275  cur_is_word = FALSE;
2276  }
2277  else
2278 #ifdef SUPPORT_UCP
2279  if (md->use_ucp)
2280  {
2281  c = *eptr;
2282  if (c == '_') cur_is_word = TRUE; else
2283  {
2284  int cat = UCD_CATEGORY(c);
2285  cur_is_word = (cat == ucp_L || cat == ucp_N);
2286  }
2287  }
2288  else
2289 #endif
2290  cur_is_word = MAX_255(*eptr)
2291  && ((md->ctypes[*eptr] & ctype_word) != 0);
2292  }
2293 
2294  /* Now see if the situation is what we want */
2295 
2296  if ((*ecode++ == OP_WORD_BOUNDARY)?
2297  cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2299  }
2300  break;
2301 
2302  /* Match any single character type except newline; have to take care with
2303  CRLF newlines and partial matching. */
2304 
2305  case OP_ANY:
2306  if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2307  if (md->partial != 0 &&
2308  eptr == md->end_subject - 1 &&
2309  NLBLOCK->nltype == NLTYPE_FIXED &&
2310  NLBLOCK->nllen == 2 &&
2311  UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2312  {
2313  md->hitend = TRUE;
2314  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2315  }
2316 
2317  /* Fall through */
2318 
2319  /* Match any single character whatsoever. */
2320 
2321  case OP_ALLANY:
2322  if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2323  { /* not be updated before SCHECK_PARTIAL. */
2324  SCHECK_PARTIAL();
2326  }
2327  eptr++;
2328 #ifdef SUPPORT_UTF
2329  if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2330 #endif
2331  ecode++;
2332  break;
2333 
2334  /* Match a single byte, even in UTF-8 mode. This opcode really does match
2335  any byte, even newline, independent of the setting of PCRE_DOTALL. */
2336 
2337  case OP_ANYBYTE:
2338  if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2339  { /* not be updated before SCHECK_PARTIAL. */
2340  SCHECK_PARTIAL();
2342  }
2343  eptr++;
2344  ecode++;
2345  break;
2346 
2347  case OP_NOT_DIGIT:
2348  if (eptr >= md->end_subject)
2349  {
2350  SCHECK_PARTIAL();
2352  }
2353  GETCHARINCTEST(c, eptr);
2354  if (
2355 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2356  c < 256 &&
2357 #endif
2358  (md->ctypes[c] & ctype_digit) != 0
2359  )
2361  ecode++;
2362  break;
2363 
2364  case OP_DIGIT:
2365  if (eptr >= md->end_subject)
2366  {
2367  SCHECK_PARTIAL();
2369  }
2370  GETCHARINCTEST(c, eptr);
2371  if (
2372 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2373  c > 255 ||
2374 #endif
2375  (md->ctypes[c] & ctype_digit) == 0
2376  )
2378  ecode++;
2379  break;
2380 
2381  case OP_NOT_WHITESPACE:
2382  if (eptr >= md->end_subject)
2383  {
2384  SCHECK_PARTIAL();
2386  }
2387  GETCHARINCTEST(c, eptr);
2388  if (
2389 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2390  c < 256 &&
2391 #endif
2392  (md->ctypes[c] & ctype_space) != 0
2393  )
2395  ecode++;
2396  break;
2397 
2398  case OP_WHITESPACE:
2399  if (eptr >= md->end_subject)
2400  {
2401  SCHECK_PARTIAL();
2403  }
2404  GETCHARINCTEST(c, eptr);
2405  if (
2406 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2407  c > 255 ||
2408 #endif
2409  (md->ctypes[c] & ctype_space) == 0
2410  )
2412  ecode++;
2413  break;
2414 
2415  case OP_NOT_WORDCHAR:
2416  if (eptr >= md->end_subject)
2417  {
2418  SCHECK_PARTIAL();
2420  }
2421  GETCHARINCTEST(c, eptr);
2422  if (
2423 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2424  c < 256 &&
2425 #endif
2426  (md->ctypes[c] & ctype_word) != 0
2427  )
2429  ecode++;
2430  break;
2431 
2432  case OP_WORDCHAR:
2433  if (eptr >= md->end_subject)
2434  {
2435  SCHECK_PARTIAL();
2437  }
2438  GETCHARINCTEST(c, eptr);
2439  if (
2440 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2441  c > 255 ||
2442 #endif
2443  (md->ctypes[c] & ctype_word) == 0
2444  )
2446  ecode++;
2447  break;
2448 
2449  case OP_ANYNL:
2450  if (eptr >= md->end_subject)
2451  {
2452  SCHECK_PARTIAL();
2454  }
2455  GETCHARINCTEST(c, eptr);
2456  switch(c)
2457  {
2458  default: RRETURN(MATCH_NOMATCH);
2459 
2460  case CHAR_CR:
2461  if (eptr >= md->end_subject)
2462  {
2463  SCHECK_PARTIAL();
2464  }
2465  else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2466  break;
2467 
2468  case CHAR_LF:
2469  break;
2470 
2471  case CHAR_VT:
2472  case CHAR_FF:
2473  case CHAR_NEL:
2474 #ifndef EBCDIC
2475  case 0x2028:
2476  case 0x2029:
2477 #endif /* Not EBCDIC */
2478  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2479  break;
2480  }
2481  ecode++;
2482  break;
2483 
2484  case OP_NOT_HSPACE:
2485  if (eptr >= md->end_subject)
2486  {
2487  SCHECK_PARTIAL();
2489  }
2490  GETCHARINCTEST(c, eptr);
2491  switch(c)
2492  {
2493  HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
2494  default: break;
2495  }
2496  ecode++;
2497  break;
2498 
2499  case OP_HSPACE:
2500  if (eptr >= md->end_subject)
2501  {
2502  SCHECK_PARTIAL();
2504  }
2505  GETCHARINCTEST(c, eptr);
2506  switch(c)
2507  {
2508  HSPACE_CASES: break; /* Byte and multibyte cases */
2509  default: RRETURN(MATCH_NOMATCH);
2510  }
2511  ecode++;
2512  break;
2513 
2514  case OP_NOT_VSPACE:
2515  if (eptr >= md->end_subject)
2516  {
2517  SCHECK_PARTIAL();
2519  }
2520  GETCHARINCTEST(c, eptr);
2521  switch(c)
2522  {
2524  default: break;
2525  }
2526  ecode++;
2527  break;
2528 
2529  case OP_VSPACE:
2530  if (eptr >= md->end_subject)
2531  {
2532  SCHECK_PARTIAL();
2534  }
2535  GETCHARINCTEST(c, eptr);
2536  switch(c)
2537  {
2538  VSPACE_CASES: break;
2539  default: RRETURN(MATCH_NOMATCH);
2540  }
2541  ecode++;
2542  break;
2543 
2544 #ifdef SUPPORT_UCP
2545  /* Check the next character by Unicode property. We will get here only
2546  if the support is in the binary; otherwise a compile-time error occurs. */
2547 
2548  case OP_PROP:
2549  case OP_NOTPROP:
2550  if (eptr >= md->end_subject)
2551  {
2552  SCHECK_PARTIAL();
2554  }
2555  GETCHARINCTEST(c, eptr);
2556  {
2557  const pcre_uint32 *cp;
2558  const ucd_record *prop = GET_UCD(c);
2559 
2560  switch(ecode[1])
2561  {
2562  case PT_ANY:
2563  if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2564  break;
2565 
2566  case PT_LAMP:
2567  if ((prop->chartype == ucp_Lu ||
2568  prop->chartype == ucp_Ll ||
2569  prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2571  break;
2572 
2573  case PT_GC:
2574  if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2576  break;
2577 
2578  case PT_PC:
2579  if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2581  break;
2582 
2583  case PT_SC:
2584  if ((ecode[2] != prop->script) == (op == OP_PROP))
2586  break;
2587 
2588  /* These are specials */
2589 
2590  case PT_ALNUM:
2591  if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2592  PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2594  break;
2595 
2596  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2597  which means that Perl space and POSIX space are now identical. PCRE
2598  was changed at release 8.34. */
2599 
2600  case PT_SPACE: /* Perl space */
2601  case PT_PXSPACE: /* POSIX space */
2602  switch(c)
2603  {
2604  HSPACE_CASES:
2605  VSPACE_CASES:
2606  if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2607  break;
2608 
2609  default:
2610  if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2611  (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2612  break;
2613  }
2614  break;
2615 
2616  case PT_WORD:
2617  if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2618  PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2619  c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2621  break;
2622 
2623  case PT_CLIST:
2624  cp = PRIV(ucd_caseless_sets) + ecode[2];
2625  for (;;)
2626  {
2627  if (c < *cp)
2628  { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2629  if (c == *cp++)
2630  { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2631  }
2632  break;
2633 
2634  case PT_UCNC:
2635  if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2636  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2637  c >= 0xe000) == (op == OP_NOTPROP))
2639  break;
2640 
2641  /* This should never occur */
2642 
2643  default:
2645  }
2646 
2647  ecode += 3;
2648  }
2649  break;
2650 
2651  /* Match an extended Unicode sequence. We will get here only if the support
2652  is in the binary; otherwise a compile-time error occurs. */
2653 
2654  case OP_EXTUNI:
2655  if (eptr >= md->end_subject)
2656  {
2657  SCHECK_PARTIAL();
2659  }
2660  else
2661  {
2662  int lgb, rgb;
2663  GETCHARINCTEST(c, eptr);
2664  lgb = UCD_GRAPHBREAK(c);
2665  while (eptr < md->end_subject)
2666  {
2667  int len = 1;
2668  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2669  rgb = UCD_GRAPHBREAK(c);
2670  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2671  lgb = rgb;
2672  eptr += len;
2673  }
2674  }
2675  CHECK_PARTIAL();
2676  ecode++;
2677  break;
2678 #endif /* SUPPORT_UCP */
2679 
2680 
2681  /* Match a back reference, possibly repeatedly. Look past the end of the
2682  item to see if there is repeat information following. The code is similar
2683  to that for character classes, but repeated for efficiency. Then obey
2684  similar code to character type repeats - written out again for speed.
2685  However, if the referenced string is the empty string, always treat
2686  it as matched, any number of times (otherwise there could be infinite
2687  loops). If the reference is unset, there are two possibilities:
2688 
2689  (a) In the default, Perl-compatible state, set the length negative;
2690  this ensures that every attempt at a match fails. We can't just fail
2691  here, because of the possibility of quantifiers with zero minima.
2692 
2693  (b) If the JavaScript compatibility flag is set, set the length to zero
2694  so that the back reference matches an empty string.
2695 
2696  Otherwise, set the length to the length of what was matched by the
2697  referenced subpattern.
2698 
2699  The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2700  or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2701  and OP_DNREFI are used. In this case we must scan the list of groups to
2702  which the name refers, and use the first one that is set. */
2703 
2704  case OP_DNREF:
2705  case OP_DNREFI:
2706  caseless = op == OP_DNREFI;
2707  {
2708  int count = GET2(ecode, 1+IMM2_SIZE);
2709  pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2710  ecode += 1 + 2*IMM2_SIZE;
2711 
2712  /* Setting the default length first and initializing 'offset' avoids
2713  compiler warnings in the REF_REPEAT code. */
2714 
2715  length = (md->jscript_compat)? 0 : -1;
2716  offset = 0;
2717 
2718  while (count-- > 0)
2719  {
2720  offset = GET2(slot, 0) << 1;
2721  if (offset < offset_top && md->offset_vector[offset] >= 0)
2722  {
2723  length = md->offset_vector[offset+1] - md->offset_vector[offset];
2724  break;
2725  }
2726  slot += md->name_entry_size;
2727  }
2728  }
2729  goto REF_REPEAT;
2730 
2731  case OP_REF:
2732  case OP_REFI:
2733  caseless = op == OP_REFI;
2734  offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2735  ecode += 1 + IMM2_SIZE;
2736  if (offset >= offset_top || md->offset_vector[offset] < 0)
2737  length = (md->jscript_compat)? 0 : -1;
2738  else
2739  length = md->offset_vector[offset+1] - md->offset_vector[offset];
2740 
2741  /* Set up for repetition, or handle the non-repeated case */
2742 
2743  REF_REPEAT:
2744  switch (*ecode)
2745  {
2746  case OP_CRSTAR:
2747  case OP_CRMINSTAR:
2748  case OP_CRPLUS:
2749  case OP_CRMINPLUS:
2750  case OP_CRQUERY:
2751  case OP_CRMINQUERY:
2752  c = *ecode++ - OP_CRSTAR;
2753  minimize = (c & 1) != 0;
2754  min = rep_min[c]; /* Pick up values from tables; */
2755  max = rep_max[c]; /* zero for max => infinity */
2756  if (max == 0) max = INT_MAX;
2757  break;
2758 
2759  case OP_CRRANGE:
2760  case OP_CRMINRANGE:
2761  minimize = (*ecode == OP_CRMINRANGE);
2762  min = GET2(ecode, 1);
2763  max = GET2(ecode, 1 + IMM2_SIZE);
2764  if (max == 0) max = INT_MAX;
2765  ecode += 1 + 2 * IMM2_SIZE;
2766  break;
2767 
2768  default: /* No repeat follows */
2769  if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2770  {
2771  if (length == -2) eptr = md->end_subject; /* Partial match */
2772  CHECK_PARTIAL();
2774  }
2775  eptr += length;
2776  continue; /* With the main loop */
2777  }
2778 
2779  /* Handle repeated back references. If the length of the reference is
2780  zero, just continue with the main loop. If the length is negative, it
2781  means the reference is unset in non-Java-compatible mode. If the minimum is
2782  zero, we can continue at the same level without recursion. For any other
2783  minimum, carrying on will result in NOMATCH. */
2784 
2785  if (length == 0) continue;
2786  if (length < 0 && min == 0) continue;
2787 
2788  /* First, ensure the minimum number of matches are present. We get back
2789  the length of the reference string explicitly rather than passing the
2790  address of eptr, so that eptr can be a register variable. */
2791 
2792  for (i = 1; i <= min; i++)
2793  {
2794  int slength;
2795  if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2796  {
2797  if (slength == -2) eptr = md->end_subject; /* Partial match */
2798  CHECK_PARTIAL();
2800  }
2801  eptr += slength;
2802  }
2803 
2804  /* If min = max, continue at the same level without recursion.
2805  They are not both allowed to be zero. */
2806 
2807  if (min == max) continue;
2808 
2809  /* If minimizing, keep trying and advancing the pointer */
2810 
2811  if (minimize)
2812  {
2813  for (fi = min;; fi++)
2814  {
2815  int slength;
2816  RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2817  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2818  if (fi >= max) RRETURN(MATCH_NOMATCH);
2819  if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2820  {
2821  if (slength == -2) eptr = md->end_subject; /* Partial match */
2822  CHECK_PARTIAL();
2824  }
2825  eptr += slength;
2826  }
2827  /* Control never gets here */
2828  }
2829 
2830  /* If maximizing, find the longest string and work backwards */
2831 
2832  else
2833  {
2834  pp = eptr;
2835  for (i = min; i < max; i++)
2836  {
2837  int slength;
2838  if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2839  {
2840  /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2841  the soft partial matching case. */
2842 
2843  if (slength == -2 && md->partial != 0 &&
2844  md->end_subject > md->start_used_ptr)
2845  {
2846  md->hitend = TRUE;
2847  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2848  }
2849  break;
2850  }
2851  eptr += slength;
2852  }
2853 
2854  while (eptr >= pp)
2855  {
2856  RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2857  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2858  eptr -= length;
2859  }
2861  }
2862  /* Control never gets here */
2863 
2864  /* Match a bit-mapped character class, possibly repeatedly. This op code is
2865  used when all the characters in the class have values in the range 0-255,
2866  and either the matching is caseful, or the characters are in the range
2867  0-127 when UTF-8 processing is enabled. The only difference between
2868  OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2869  encountered.
2870 
2871  First, look past the end of the item to see if there is repeat information
2872  following. Then obey similar code to character type repeats - written out
2873  again for speed. */
2874 
2875  case OP_NCLASS:
2876  case OP_CLASS:
2877  {
2878  /* The data variable is saved across frames, so the byte map needs to
2879  be stored there. */
2880 #define BYTE_MAP ((pcre_uint8 *)data)
2881  data = ecode + 1; /* Save for matching */
2882  ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2883 
2884  switch (*ecode)
2885  {
2886  case OP_CRSTAR:
2887  case OP_CRMINSTAR:
2888  case OP_CRPLUS:
2889  case OP_CRMINPLUS:
2890  case OP_CRQUERY:
2891  case OP_CRMINQUERY:
2892  case OP_CRPOSSTAR:
2893  case OP_CRPOSPLUS:
2894  case OP_CRPOSQUERY:
2895  c = *ecode++ - OP_CRSTAR;
2896  if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2897  else possessive = TRUE;
2898  min = rep_min[c]; /* Pick up values from tables; */
2899  max = rep_max[c]; /* zero for max => infinity */
2900  if (max == 0) max = INT_MAX;
2901  break;
2902 
2903  case OP_CRRANGE:
2904  case OP_CRMINRANGE:
2905  case OP_CRPOSRANGE:
2906  minimize = (*ecode == OP_CRMINRANGE);
2907  possessive = (*ecode == OP_CRPOSRANGE);
2908  min = GET2(ecode, 1);
2909  max = GET2(ecode, 1 + IMM2_SIZE);
2910  if (max == 0) max = INT_MAX;
2911  ecode += 1 + 2 * IMM2_SIZE;
2912  break;
2913 
2914  default: /* No repeat follows */
2915  min = max = 1;
2916  break;
2917  }
2918 
2919  /* First, ensure the minimum number of matches are present. */
2920 
2921 #ifdef SUPPORT_UTF
2922  if (utf)
2923  {
2924  for (i = 1; i <= min; i++)
2925  {
2926  if (eptr >= md->end_subject)
2927  {
2928  SCHECK_PARTIAL();
2930  }
2931  GETCHARINC(c, eptr);
2932  if (c > 255)
2933  {
2934  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2935  }
2936  else
2937  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2938  }
2939  }
2940  else
2941 #endif
2942  /* Not UTF mode */
2943  {
2944  for (i = 1; i <= min; i++)
2945  {
2946  if (eptr >= md->end_subject)
2947  {
2948  SCHECK_PARTIAL();
2950  }
2951  c = *eptr++;
2952 #ifndef COMPILE_PCRE8
2953  if (c > 255)
2954  {
2955  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2956  }
2957  else
2958 #endif
2959  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2960  }
2961  }
2962 
2963  /* If max == min we can continue with the main loop without the
2964  need to recurse. */
2965 
2966  if (min == max) continue;
2967 
2968  /* If minimizing, keep testing the rest of the expression and advancing
2969  the pointer while it matches the class. */
2970 
2971  if (minimize)
2972  {
2973 #ifdef SUPPORT_UTF
2974  if (utf)
2975  {
2976  for (fi = min;; fi++)
2977  {
2978  RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2979  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2980  if (fi >= max) RRETURN(MATCH_NOMATCH);
2981  if (eptr >= md->end_subject)
2982  {
2983  SCHECK_PARTIAL();
2985  }
2986  GETCHARINC(c, eptr);
2987  if (c > 255)
2988  {
2989  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2990  }
2991  else
2992  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2993  }
2994  }
2995  else
2996 #endif
2997  /* Not UTF mode */
2998  {
2999  for (fi = min;; fi++)
3000  {
3001  RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3002  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3003  if (fi >= max) RRETURN(MATCH_NOMATCH);
3004  if (eptr >= md->end_subject)
3005  {
3006  SCHECK_PARTIAL();
3008  }
3009  c = *eptr++;
3010 #ifndef COMPILE_PCRE8
3011  if (c > 255)
3012  {
3013  if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3014  }
3015  else
3016 #endif
3017  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3018  }
3019  }
3020  /* Control never gets here */
3021  }
3022 
3023  /* If maximizing, find the longest possible run, then work backwards. */
3024 
3025  else
3026  {
3027  pp = eptr;
3028 
3029 #ifdef SUPPORT_UTF
3030  if (utf)
3031  {
3032  for (i = min; i < max; i++)
3033  {
3034  int len = 1;
3035  if (eptr >= md->end_subject)
3036  {
3037  SCHECK_PARTIAL();
3038  break;
3039  }
3040  GETCHARLEN(c, eptr, len);
3041  if (c > 255)
3042  {
3043  if (op == OP_CLASS) break;
3044  }
3045  else
3046  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3047  eptr += len;
3048  }
3049 
3050  if (possessive) continue; /* No backtracking */
3051 
3052  for (;;)
3053  {
3054  RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3055  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3056  if (eptr-- <= pp) break; /* Stop if tried at original pos */
3057  BACKCHAR(eptr);
3058  }
3059  }
3060  else
3061 #endif
3062  /* Not UTF mode */
3063  {
3064  for (i = min; i < max; i++)
3065  {
3066  if (eptr >= md->end_subject)
3067  {
3068  SCHECK_PARTIAL();
3069  break;
3070  }
3071  c = *eptr;
3072 #ifndef COMPILE_PCRE8
3073  if (c > 255)
3074  {
3075  if (op == OP_CLASS) break;
3076  }
3077  else
3078 #endif
3079  if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3080  eptr++;
3081  }
3082 
3083  if (possessive) continue; /* No backtracking */
3084 
3085  while (eptr >= pp)
3086  {
3087  RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3088  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3089  eptr--;
3090  }
3091  }
3092 
3094  }
3095 #undef BYTE_MAP
3096  }
3097  /* Control never gets here */
3098 
3099 
3100  /* Match an extended character class. In the 8-bit library, this opcode is
3101  encountered only when UTF-8 mode mode is supported. In the 16-bit and
3102  32-bit libraries, codepoints greater than 255 may be encountered even when
3103  UTF is not supported. */
3104 
3105 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3106  case OP_XCLASS:
3107  {
3108  data = ecode + 1 + LINK_SIZE; /* Save for matching */
3109  ecode += GET(ecode, 1); /* Advance past the item */
3110 
3111  switch (*ecode)
3112  {
3113  case OP_CRSTAR:
3114  case OP_CRMINSTAR:
3115  case OP_CRPLUS:
3116  case OP_CRMINPLUS:
3117  case OP_CRQUERY:
3118  case OP_CRMINQUERY:
3119  case OP_CRPOSSTAR:
3120  case OP_CRPOSPLUS:
3121  case OP_CRPOSQUERY:
3122  c = *ecode++ - OP_CRSTAR;
3123  if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3124  else possessive = TRUE;
3125  min = rep_min[c]; /* Pick up values from tables; */
3126  max = rep_max[c]; /* zero for max => infinity */
3127  if (max == 0) max = INT_MAX;
3128  break;
3129 
3130  case OP_CRRANGE:
3131  case OP_CRMINRANGE:
3132  case OP_CRPOSRANGE:
3133  minimize = (*ecode == OP_CRMINRANGE);
3134  possessive = (*ecode == OP_CRPOSRANGE);
3135  min = GET2(ecode, 1);
3136  max = GET2(ecode, 1 + IMM2_SIZE);
3137  if (max == 0) max = INT_MAX;
3138  ecode += 1 + 2 * IMM2_SIZE;
3139  break;
3140 
3141  default: /* No repeat follows */
3142  min = max = 1;
3143  break;
3144  }
3145 
3146  /* First, ensure the minimum number of matches are present. */
3147 
3148  for (i = 1; i <= min; i++)
3149  {
3150  if (eptr >= md->end_subject)
3151  {
3152  SCHECK_PARTIAL();
3154  }
3155  GETCHARINCTEST(c, eptr);
3156  if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3157  }
3158 
3159  /* If max == min we can continue with the main loop without the
3160  need to recurse. */
3161 
3162  if (min == max) continue;
3163 
3164  /* If minimizing, keep testing the rest of the expression and advancing
3165  the pointer while it matches the class. */
3166 
3167  if (minimize)
3168  {
3169  for (fi = min;; fi++)
3170  {
3171  RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3172  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3173  if (fi >= max) RRETURN(MATCH_NOMATCH);
3174  if (eptr >= md->end_subject)
3175  {
3176  SCHECK_PARTIAL();
3178  }
3179  GETCHARINCTEST(c, eptr);
3180  if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3181  }
3182  /* Control never gets here */
3183  }
3184 
3185  /* If maximizing, find the longest possible run, then work backwards. */
3186 
3187  else
3188  {
3189  pp = eptr;
3190  for (i = min; i < max; i++)
3191  {
3192  int len = 1;
3193  if (eptr >= md->end_subject)
3194  {
3195  SCHECK_PARTIAL();
3196  break;
3197  }
3198 #ifdef SUPPORT_UTF
3199  GETCHARLENTEST(c, eptr, len);
3200 #else
3201  c = *eptr;
3202 #endif
3203  if (!PRIV(xclass)(c, data, utf)) break;
3204  eptr += len;
3205  }
3206 
3207  if (possessive) continue; /* No backtracking */
3208 
3209  for(;;)
3210  {
3211  RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3212  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3213  if (eptr-- <= pp) break; /* Stop if tried at original pos */
3214 #ifdef SUPPORT_UTF
3215  if (utf) BACKCHAR(eptr);
3216 #endif
3217  }
3219  }
3220 
3221  /* Control never gets here */
3222  }
3223 #endif /* End of XCLASS */
3224 
3225  /* Match a single character, casefully */
3226 
3227  case OP_CHAR:
3228 #ifdef SUPPORT_UTF
3229  if (utf)
3230  {
3231  length = 1;
3232  ecode++;
3233  GETCHARLEN(fc, ecode, length);
3234  if (length > md->end_subject - eptr)
3235  {
3236  CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
3238  }
3239  while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3240  }
3241  else
3242 #endif
3243  /* Not UTF mode */
3244  {
3245  if (md->end_subject - eptr < 1)
3246  {
3247  SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
3249  }
3250  if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3251  ecode += 2;
3252  }
3253  break;
3254 
3255  /* Match a single character, caselessly. If we are at the end of the
3256  subject, give up immediately. */
3257 
3258  case OP_CHARI:
3259  if (eptr >= md->end_subject)
3260  {
3261  SCHECK_PARTIAL();
3263  }
3264 
3265 #ifdef SUPPORT_UTF
3266  if (utf)
3267  {
3268  length = 1;
3269  ecode++;
3270  GETCHARLEN(fc, ecode, length);
3271 
3272  /* If the pattern character's value is < 128, we have only one byte, and
3273  we know that its other case must also be one byte long, so we can use the
3274  fast lookup table. We know that there is at least one byte left in the
3275  subject. */
3276 
3277  if (fc < 128)
3278  {
3279  pcre_uint32 cc = UCHAR21(eptr);
3280  if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3281  ecode++;
3282  eptr++;
3283  }
3284 
3285  /* Otherwise we must pick up the subject character. Note that we cannot
3286  use the value of "length" to check for sufficient bytes left, because the
3287  other case of the character may have more or fewer bytes. */
3288 
3289  else
3290  {
3291  pcre_uint32 dc;
3292  GETCHARINC(dc, eptr);
3293  ecode += length;
3294 
3295  /* If we have Unicode property support, we can use it to test the other
3296  case of the character, if there is one. */
3297 
3298  if (fc != dc)
3299  {
3300 #ifdef SUPPORT_UCP
3301  if (dc != UCD_OTHERCASE(fc))
3302 #endif
3304  }
3305  }
3306  }
3307  else
3308 #endif /* SUPPORT_UTF */
3309 
3310  /* Not UTF mode */
3311  {
3312  if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3313  != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3314  eptr++;
3315  ecode += 2;
3316  }
3317  break;
3318 
3319  /* Match a single character repeatedly. */
3320 
3321  case OP_EXACT:
3322  case OP_EXACTI:
3323  min = max = GET2(ecode, 1);
3324  ecode += 1 + IMM2_SIZE;
3325  goto REPEATCHAR;
3326 
3327  case OP_POSUPTO:
3328  case OP_POSUPTOI:
3329  possessive = TRUE;
3330  /* Fall through */
3331 
3332  case OP_UPTO:
3333  case OP_UPTOI:
3334  case OP_MINUPTO:
3335  case OP_MINUPTOI:
3336  min = 0;
3337  max = GET2(ecode, 1);
3338  minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3339  ecode += 1 + IMM2_SIZE;
3340  goto REPEATCHAR;
3341 
3342  case OP_POSSTAR:
3343  case OP_POSSTARI:
3344  possessive = TRUE;
3345  min = 0;
3346  max = INT_MAX;
3347  ecode++;
3348  goto REPEATCHAR;
3349 
3350  case OP_POSPLUS:
3351  case OP_POSPLUSI:
3352  possessive = TRUE;
3353  min = 1;
3354  max = INT_MAX;
3355  ecode++;
3356  goto REPEATCHAR;
3357 
3358  case OP_POSQUERY:
3359  case OP_POSQUERYI:
3360  possessive = TRUE;
3361  min = 0;
3362  max = 1;
3363  ecode++;
3364  goto REPEATCHAR;
3365 
3366  case OP_STAR:
3367  case OP_STARI:
3368  case OP_MINSTAR:
3369  case OP_MINSTARI:
3370  case OP_PLUS:
3371  case OP_PLUSI:
3372  case OP_MINPLUS:
3373  case OP_MINPLUSI:
3374  case OP_QUERY:
3375  case OP_QUERYI:
3376  case OP_MINQUERY:
3377  case OP_MINQUERYI:
3378  c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3379  minimize = (c & 1) != 0;
3380  min = rep_min[c]; /* Pick up values from tables; */
3381  max = rep_max[c]; /* zero for max => infinity */
3382  if (max == 0) max = INT_MAX;
3383 
3384  /* Common code for all repeated single-character matches. We first check
3385  for the minimum number of characters. If the minimum equals the maximum, we
3386  are done. Otherwise, if minimizing, check the rest of the pattern for a
3387  match; if there isn't one, advance up to the maximum, one character at a
3388  time.
3389 
3390  If maximizing, advance up to the maximum number of matching characters,
3391  until eptr is past the end of the maximum run. If possessive, we are
3392  then done (no backing up). Otherwise, match at this position; anything
3393  other than no match is immediately returned. For nomatch, back up one
3394  character, unless we are matching \R and the last thing matched was
3395  \r\n, in which case, back up two bytes. When we reach the first optional
3396  character position, we can save stack by doing a tail recurse.
3397 
3398  The various UTF/non-UTF and caseful/caseless cases are handled separately,
3399  for speed. */
3400 
3401  REPEATCHAR:
3402 #ifdef SUPPORT_UTF
3403  if (utf)
3404  {
3405  length = 1;
3406  charptr = ecode;
3407  GETCHARLEN(fc, ecode, length);
3408  ecode += length;
3409 
3410  /* Handle multibyte character matching specially here. There is
3411  support for caseless matching if UCP support is present. */
3412 
3413  if (length > 1)
3414  {
3415 #ifdef SUPPORT_UCP
3416  pcre_uint32 othercase;
3417  if (op >= OP_STARI && /* Caseless */
3418  (othercase = UCD_OTHERCASE(fc)) != fc)
3419  oclength = PRIV(ord2utf)(othercase, occhars);
3420  else oclength = 0;
3421 #endif /* SUPPORT_UCP */
3422 
3423  for (i = 1; i <= min; i++)
3424  {
3425  if (eptr <= md->end_subject - length &&
3426  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3427 #ifdef SUPPORT_UCP
3428  else if (oclength > 0 &&
3429  eptr <= md->end_subject - oclength &&
3430  memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3431 #endif /* SUPPORT_UCP */
3432  else
3433  {
3434  CHECK_PARTIAL();
3436  }
3437  }
3438 
3439  if (min == max) continue;
3440 
3441  if (minimize)
3442  {
3443  for (fi = min;; fi++)
3444  {
3445  RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3446  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447  if (fi >= max) RRETURN(MATCH_NOMATCH);
3448  if (eptr <= md->end_subject - length &&
3449  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3450 #ifdef SUPPORT_UCP
3451  else if (oclength > 0 &&
3452  eptr <= md->end_subject - oclength &&
3453  memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3454 #endif /* SUPPORT_UCP */
3455  else
3456  {
3457  CHECK_PARTIAL();
3459  }
3460  }
3461  /* Control never gets here */
3462  }
3463 
3464  else /* Maximize */
3465  {
3466  pp = eptr;
3467  for (i = min; i < max; i++)
3468  {
3469  if (eptr <= md->end_subject - length &&
3470  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3471 #ifdef SUPPORT_UCP
3472  else if (oclength > 0 &&
3473  eptr <= md->end_subject - oclength &&
3474  memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3475 #endif /* SUPPORT_UCP */
3476  else
3477  {
3478  CHECK_PARTIAL();
3479  break;
3480  }
3481  }
3482 
3483  if (possessive) continue; /* No backtracking */
3484  for(;;)
3485  {
3486  if (eptr <= pp) goto TAIL_RECURSE;
3487  RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3488  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3489 #ifdef SUPPORT_UCP
3490  eptr--;
3491  BACKCHAR(eptr);
3492 #else /* without SUPPORT_UCP */
3493  eptr -= length;
3494 #endif /* SUPPORT_UCP */
3495  }
3496  }
3497  /* Control never gets here */
3498  }
3499 
3500  /* If the length of a UTF-8 character is 1, we fall through here, and
3501  obey the code as for non-UTF-8 characters below, though in this case the
3502  value of fc will always be < 128. */
3503  }
3504  else
3505 #endif /* SUPPORT_UTF */
3506  /* When not in UTF-8 mode, load a single-byte character. */
3507  fc = *ecode++;
3508 
3509  /* The value of fc at this point is always one character, though we may
3510  or may not be in UTF mode. The code is duplicated for the caseless and
3511  caseful cases, for speed, since matching characters is likely to be quite
3512  common. First, ensure the minimum number of matches are present. If min =
3513  max, continue at the same level without recursing. Otherwise, if
3514  minimizing, keep trying the rest of the expression and advancing one
3515  matching character if failing, up to the maximum. Alternatively, if
3516  maximizing, find the maximum number of characters and work backwards. */
3517 
3518  DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3519  max, (char *)eptr));
3520 
3521  if (op >= OP_STARI) /* Caseless */
3522  {
3523 #ifdef COMPILE_PCRE8
3524  /* fc must be < 128 if UTF is enabled. */
3525  foc = md->fcc[fc];
3526 #else
3527 #ifdef SUPPORT_UTF
3528 #ifdef SUPPORT_UCP
3529  if (utf && fc > 127)
3530  foc = UCD_OTHERCASE(fc);
3531 #else
3532  if (utf && fc > 127)
3533  foc = fc;
3534 #endif /* SUPPORT_UCP */
3535  else
3536 #endif /* SUPPORT_UTF */
3537  foc = TABLE_GET(fc, md->fcc, fc);
3538 #endif /* COMPILE_PCRE8 */
3539 
3540  for (i = 1; i <= min; i++)
3541  {
3542  pcre_uint32 cc; /* Faster than pcre_uchar */
3543  if (eptr >= md->end_subject)
3544  {
3545  SCHECK_PARTIAL();
3547  }
3548  cc = UCHAR21TEST(eptr);
3549  if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3550  eptr++;
3551  }
3552  if (min == max) continue;
3553  if (minimize)
3554  {
3555  for (fi = min;; fi++)
3556  {
3557  pcre_uint32 cc; /* Faster than pcre_uchar */
3558  RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3559  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3560  if (fi >= max) RRETURN(MATCH_NOMATCH);
3561  if (eptr >= md->end_subject)
3562  {
3563  SCHECK_PARTIAL();
3565  }
3566  cc = UCHAR21TEST(eptr);
3567  if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3568  eptr++;
3569  }
3570  /* Control never gets here */
3571  }
3572  else /* Maximize */
3573  {
3574  pp = eptr;
3575  for (i = min; i < max; i++)
3576  {
3577  pcre_uint32 cc; /* Faster than pcre_uchar */
3578  if (eptr >= md->end_subject)
3579  {
3580  SCHECK_PARTIAL();
3581  break;
3582  }
3583  cc = UCHAR21TEST(eptr);
3584  if (fc != cc && foc != cc) break;
3585  eptr++;
3586  }
3587  if (possessive) continue; /* No backtracking */
3588  for (;;)
3589  {
3590  if (eptr == pp) goto TAIL_RECURSE;
3591  RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3592  eptr--;
3593  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3594  }
3595  /* Control never gets here */
3596  }
3597  }
3598 
3599  /* Caseful comparisons (includes all multi-byte characters) */
3600 
3601  else
3602  {
3603  for (i = 1; i <= min; i++)
3604  {
3605  if (eptr >= md->end_subject)
3606  {
3607  SCHECK_PARTIAL();
3609  }
3610  if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3611  }
3612 
3613  if (min == max) continue;
3614 
3615  if (minimize)
3616  {
3617  for (fi = min;; fi++)
3618  {
3619  RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3620  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3621  if (fi >= max) RRETURN(MATCH_NOMATCH);
3622  if (eptr >= md->end_subject)
3623  {
3624  SCHECK_PARTIAL();
3626  }
3627  if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3628  }
3629  /* Control never gets here */
3630  }
3631  else /* Maximize */
3632  {
3633  pp = eptr;
3634  for (i = min; i < max; i++)
3635  {
3636  if (eptr >= md->end_subject)
3637  {
3638  SCHECK_PARTIAL();
3639  break;
3640  }
3641  if (fc != UCHAR21TEST(eptr)) break;
3642  eptr++;
3643  }
3644  if (possessive) continue; /* No backtracking */
3645  for (;;)
3646  {
3647  if (eptr == pp) goto TAIL_RECURSE;
3648  RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3649  eptr--;
3650  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3651  }
3652  /* Control never gets here */
3653  }
3654  }
3655  /* Control never gets here */
3656 
3657  /* Match a negated single one-byte character. The character we are
3658  checking can be multibyte. */
3659 
3660  case OP_NOT:
3661  case OP_NOTI:
3662  if (eptr >= md->end_subject)
3663  {
3664  SCHECK_PARTIAL();
3666  }
3667 #ifdef SUPPORT_UTF
3668  if (utf)
3669  {
3670  register pcre_uint32 ch, och;
3671 
3672  ecode++;
3673  GETCHARINC(ch, ecode);
3674  GETCHARINC(c, eptr);
3675 
3676  if (op == OP_NOT)
3677  {
3678  if (ch == c) RRETURN(MATCH_NOMATCH);
3679  }
3680  else
3681  {
3682 #ifdef SUPPORT_UCP
3683  if (ch > 127)
3684  och = UCD_OTHERCASE(ch);
3685 #else
3686  if (ch > 127)
3687  och = ch;
3688 #endif /* SUPPORT_UCP */
3689  else
3690  och = TABLE_GET(ch, md->fcc, ch);
3691  if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3692  }
3693  }
3694  else
3695 #endif
3696  {
3697  register pcre_uint32 ch = ecode[1];
3698  c = *eptr++;
3699  if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3701  ecode += 2;
3702  }
3703  break;
3704 
3705  /* Match a negated single one-byte character repeatedly. This is almost a
3706  repeat of the code for a repeated single character, but I haven't found a
3707  nice way of commoning these up that doesn't require a test of the
3708  positive/negative option for each character match. Maybe that wouldn't add
3709  very much to the time taken, but character matching *is* what this is all
3710  about... */
3711 
3712  case OP_NOTEXACT:
3713  case OP_NOTEXACTI:
3714  min = max = GET2(ecode, 1);
3715  ecode += 1 + IMM2_SIZE;
3716  goto REPEATNOTCHAR;
3717 
3718  case OP_NOTUPTO:
3719  case OP_NOTUPTOI:
3720  case OP_NOTMINUPTO:
3721  case OP_NOTMINUPTOI:
3722  min = 0;
3723  max = GET2(ecode, 1);
3724  minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3725  ecode += 1 + IMM2_SIZE;
3726  goto REPEATNOTCHAR;
3727 
3728  case OP_NOTPOSSTAR:
3729  case OP_NOTPOSSTARI:
3730  possessive = TRUE;
3731  min = 0;
3732  max = INT_MAX;
3733  ecode++;
3734  goto REPEATNOTCHAR;
3735 
3736  case OP_NOTPOSPLUS:
3737  case OP_NOTPOSPLUSI:
3738  possessive = TRUE;
3739  min = 1;
3740  max = INT_MAX;
3741  ecode++;
3742  goto REPEATNOTCHAR;
3743 
3744  case OP_NOTPOSQUERY:
3745  case OP_NOTPOSQUERYI:
3746  possessive = TRUE;
3747  min = 0;
3748  max = 1;
3749  ecode++;
3750  goto REPEATNOTCHAR;
3751 
3752  case OP_NOTPOSUPTO:
3753  case OP_NOTPOSUPTOI:
3754  possessive = TRUE;
3755  min = 0;
3756  max = GET2(ecode, 1);
3757  ecode += 1 + IMM2_SIZE;
3758  goto REPEATNOTCHAR;
3759 
3760  case OP_NOTSTAR:
3761  case OP_NOTSTARI:
3762  case OP_NOTMINSTAR:
3763  case OP_NOTMINSTARI:
3764  case OP_NOTPLUS:
3765  case OP_NOTPLUSI:
3766  case OP_NOTMINPLUS:
3767  case OP_NOTMINPLUSI:
3768  case OP_NOTQUERY:
3769  case OP_NOTQUERYI:
3770  case OP_NOTMINQUERY:
3771  case OP_NOTMINQUERYI:
3772  c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3773  minimize = (c & 1) != 0;
3774  min = rep_min[c]; /* Pick up values from tables; */
3775  max = rep_max[c]; /* zero for max => infinity */
3776  if (max == 0) max = INT_MAX;
3777 
3778  /* Common code for all repeated single-byte matches. */
3779 
3780  REPEATNOTCHAR:
3781  GETCHARINCTEST(fc, ecode);
3782 
3783  /* The code is duplicated for the caseless and caseful cases, for speed,
3784  since matching characters is likely to be quite common. First, ensure the
3785  minimum number of matches are present. If min = max, continue at the same
3786  level without recursing. Otherwise, if minimizing, keep trying the rest of
3787  the expression and advancing one matching character if failing, up to the
3788  maximum. Alternatively, if maximizing, find the maximum number of
3789  characters and work backwards. */
3790 
3791  DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3792  max, (char *)eptr));
3793 
3794  if (op >= OP_NOTSTARI) /* Caseless */
3795  {
3796 #ifdef SUPPORT_UTF
3797 #ifdef SUPPORT_UCP
3798  if (utf && fc > 127)
3799  foc = UCD_OTHERCASE(fc);
3800 #else
3801  if (utf && fc > 127)
3802  foc = fc;
3803 #endif /* SUPPORT_UCP */
3804  else
3805 #endif /* SUPPORT_UTF */
3806  foc = TABLE_GET(fc, md->fcc, fc);
3807 
3808 #ifdef SUPPORT_UTF
3809  if (utf)
3810  {
3811  register pcre_uint32 d;
3812  for (i = 1; i <= min; i++)
3813  {
3814  if (eptr >= md->end_subject)
3815  {
3816  SCHECK_PARTIAL();
3818  }
3819  GETCHARINC(d, eptr);
3820  if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3821  }
3822  }
3823  else
3824 #endif /* SUPPORT_UTF */
3825  /* Not UTF mode */
3826  {
3827  for (i = 1; i <= min; i++)
3828  {
3829  if (eptr >= md->end_subject)
3830  {
3831  SCHECK_PARTIAL();
3833  }
3834  if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3835  eptr++;
3836  }
3837  }
3838 
3839  if (min == max) continue;
3840 
3841  if (minimize)
3842  {
3843 #ifdef SUPPORT_UTF
3844  if (utf)
3845  {
3846  register pcre_uint32 d;
3847  for (fi = min;; fi++)
3848  {
3849  RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3850  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3851  if (fi >= max) RRETURN(MATCH_NOMATCH);
3852  if (eptr >= md->end_subject)
3853  {
3854  SCHECK_PARTIAL();
3856  }
3857  GETCHARINC(d, eptr);
3858  if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3859  }
3860  }
3861  else
3862 #endif /*SUPPORT_UTF */
3863  /* Not UTF mode */
3864  {
3865  for (fi = min;; fi++)
3866  {
3867  RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3868  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3869  if (fi >= max) RRETURN(MATCH_NOMATCH);
3870  if (eptr >= md->end_subject)
3871  {
3872  SCHECK_PARTIAL();
3874  }
3875  if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3876  eptr++;
3877  }
3878  }
3879  /* Control never gets here */
3880  }
3881 
3882  /* Maximize case */
3883 
3884  else
3885  {
3886  pp = eptr;
3887 
3888 #ifdef SUPPORT_UTF
3889  if (utf)
3890  {
3891  register pcre_uint32 d;
3892  for (i = min; i < max; i++)
3893  {
3894  int len = 1;
3895  if (eptr >= md->end_subject)
3896  {
3897  SCHECK_PARTIAL();
3898  break;
3899  }
3900  GETCHARLEN(d, eptr, len);
3901  if (fc == d || (unsigned int)foc == d) break;
3902  eptr += len;
3903  }
3904  if (possessive) continue; /* No backtracking */
3905  for(;;)
3906  {
3907  if (eptr <= pp) goto TAIL_RECURSE;
3908  RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3909  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910  eptr--;
3911  BACKCHAR(eptr);
3912  }
3913  }
3914  else
3915 #endif /* SUPPORT_UTF */
3916  /* Not UTF mode */
3917  {
3918  for (i = min; i < max; i++)
3919  {
3920  if (eptr >= md->end_subject)
3921  {
3922  SCHECK_PARTIAL();
3923  break;
3924  }
3925  if (fc == *eptr || foc == *eptr) break;
3926  eptr++;
3927  }
3928  if (possessive) continue; /* No backtracking */
3929  for (;;)
3930  {
3931  if (eptr == pp) goto TAIL_RECURSE;
3932  RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3933  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3934  eptr--;
3935  }
3936  }
3937  /* Control never gets here */
3938  }
3939  }
3940 
3941  /* Caseful comparisons */
3942 
3943  else
3944  {
3945 #ifdef SUPPORT_UTF
3946  if (utf)
3947  {
3948  register pcre_uint32 d;
3949  for (i = 1; i <= min; i++)
3950  {
3951  if (eptr >= md->end_subject)
3952  {
3953  SCHECK_PARTIAL();
3955  }
3956  GETCHARINC(d, eptr);
3957  if (fc == d) RRETURN(MATCH_NOMATCH);
3958  }
3959  }
3960  else
3961 #endif
3962  /* Not UTF mode */
3963  {
3964  for (i = 1; i <= min; i++)
3965  {
3966  if (eptr >= md->end_subject)
3967  {
3968  SCHECK_PARTIAL();
3970  }
3971  if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3972  }
3973  }
3974 
3975  if (min == max) continue;
3976 
3977  if (minimize)
3978  {
3979 #ifdef SUPPORT_UTF
3980  if (utf)
3981  {
3982  register pcre_uint32 d;
3983  for (fi = min;; fi++)
3984  {
3985  RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3986  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3987  if (fi >= max) RRETURN(MATCH_NOMATCH);
3988  if (eptr >= md->end_subject)
3989  {
3990  SCHECK_PARTIAL();
3992  }
3993  GETCHARINC(d, eptr);
3994  if (fc == d) RRETURN(MATCH_NOMATCH);
3995  }
3996  }
3997  else
3998 #endif
3999  /* Not UTF mode */
4000  {
4001  for (fi = min;; fi++)
4002  {
4003  RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4004  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4005  if (fi >= max) RRETURN(MATCH_NOMATCH);
4006  if (eptr >= md->end_subject)
4007  {
4008  SCHECK_PARTIAL();
4010  }
4011  if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4012  }
4013  }
4014  /* Control never gets here */
4015  }
4016 
4017  /* Maximize case */
4018 
4019  else
4020  {
4021  pp = eptr;
4022 
4023 #ifdef SUPPORT_UTF
4024  if (utf)
4025  {
4026  register pcre_uint32 d;
4027  for (i = min; i < max; i++)
4028  {
4029  int len = 1;
4030  if (eptr >= md->end_subject)
4031  {
4032  SCHECK_PARTIAL();
4033  break;
4034  }
4035  GETCHARLEN(d, eptr, len);
4036  if (fc == d) break;
4037  eptr += len;
4038  }
4039  if (possessive) continue; /* No backtracking */
4040  for(;;)
4041  {
4042  if (eptr <= pp) goto TAIL_RECURSE;
4043  RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4044  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4045  eptr--;
4046  BACKCHAR(eptr);
4047  }
4048  }
4049  else
4050 #endif
4051  /* Not UTF mode */
4052  {
4053  for (i = min; i < max; i++)
4054  {
4055  if (eptr >= md->end_subject)
4056  {
4057  SCHECK_PARTIAL();
4058  break;
4059  }
4060  if (fc == *eptr) break;
4061  eptr++;
4062  }
4063  if (possessive) continue; /* No backtracking */
4064  for (;;)
4065  {
4066  if (eptr == pp) goto TAIL_RECURSE;
4067  RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4068  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4069  eptr--;
4070  }
4071  }
4072  /* Control never gets here */
4073  }
4074  }
4075  /* Control never gets here */
4076 
4077  /* Match a single character type repeatedly; several different opcodes
4078  share code. This is very similar to the code for single characters, but we
4079  repeat it in the interests of efficiency. */
4080 
4081  case OP_TYPEEXACT:
4082  min = max = GET2(ecode, 1);
4083  minimize = TRUE;
4084  ecode += 1 + IMM2_SIZE;
4085  goto REPEATTYPE;
4086 
4087  case OP_TYPEUPTO:
4088  case OP_TYPEMINUPTO:
4089  min = 0;
4090  max = GET2(ecode, 1);
4091  minimize = *ecode == OP_TYPEMINUPTO;
4092  ecode += 1 + IMM2_SIZE;
4093  goto REPEATTYPE;
4094 
4095  case OP_TYPEPOSSTAR:
4096  possessive = TRUE;
4097  min = 0;
4098  max = INT_MAX;
4099  ecode++;
4100  goto REPEATTYPE;
4101 
4102  case OP_TYPEPOSPLUS:
4103  possessive = TRUE;
4104  min = 1;
4105  max = INT_MAX;
4106  ecode++;
4107  goto REPEATTYPE;
4108 
4109  case OP_TYPEPOSQUERY:
4110  possessive = TRUE;
4111  min = 0;
4112  max = 1;
4113  ecode++;
4114  goto REPEATTYPE;
4115 
4116  case OP_TYPEPOSUPTO:
4117  possessive = TRUE;
4118  min = 0;
4119  max = GET2(ecode, 1);
4120  ecode += 1 + IMM2_SIZE;
4121  goto REPEATTYPE;
4122 
4123  case OP_TYPESTAR:
4124  case OP_TYPEMINSTAR:
4125  case OP_TYPEPLUS:
4126  case OP_TYPEMINPLUS:
4127  case OP_TYPEQUERY:
4128  case OP_TYPEMINQUERY:
4129  c = *ecode++ - OP_TYPESTAR;
4130  minimize = (c & 1) != 0;
4131  min = rep_min[c]; /* Pick up values from tables; */
4132  max = rep_max[c]; /* zero for max => infinity */
4133  if (max == 0) max = INT_MAX;
4134 
4135  /* Common code for all repeated single character type matches. Note that
4136  in UTF-8 mode, '.' matches a character of any length, but for the other
4137  character types, the valid characters are all one-byte long. */
4138 
4139  REPEATTYPE:
4140  ctype = *ecode++; /* Code for the character type */
4141 
4142 #ifdef SUPPORT_UCP
4143  if (ctype == OP_PROP || ctype == OP_NOTPROP)
4144  {
4145  prop_fail_result = ctype == OP_NOTPROP;
4146  prop_type = *ecode++;
4147  prop_value = *ecode++;
4148  }
4149  else prop_type = -1;
4150 #endif
4151 
4152  /* First, ensure the minimum number of matches are present. Use inline
4153  code for maximizing the speed, and do the type test once at the start
4154  (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4155  is tidier. Also separate the UCP code, which can be the same for both UTF-8
4156  and single-bytes. */
4157 
4158  if (min > 0)
4159  {
4160 #ifdef SUPPORT_UCP
4161  if (prop_type >= 0)
4162  {
4163  switch(prop_type)
4164  {
4165  case PT_ANY:
4166  if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4167  for (i = 1; i <= min; i++)
4168  {
4169  if (eptr >= md->end_subject)
4170  {
4171  SCHECK_PARTIAL();
4173  }
4174  GETCHARINCTEST(c, eptr);
4175  }
4176  break;
4177 
4178  case PT_LAMP:
4179  for (i = 1; i <= min; i++)
4180  {
4181  int chartype;
4182  if (eptr >= md->end_subject)
4183  {
4184  SCHECK_PARTIAL();
4186  }
4187  GETCHARINCTEST(c, eptr);
4188  chartype = UCD_CHARTYPE(c);
4189  if ((chartype == ucp_Lu ||
4190  chartype == ucp_Ll ||
4191  chartype == ucp_Lt) == prop_fail_result)
4193  }
4194  break;
4195 
4196  case PT_GC:
4197  for (i = 1; i <= min; i++)
4198  {
4199  if (eptr >= md->end_subject)
4200  {
4201  SCHECK_PARTIAL();
4203  }
4204  GETCHARINCTEST(c, eptr);
4205  if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4207  }
4208  break;
4209 
4210  case PT_PC:
4211  for (i = 1; i <= min; i++)
4212  {
4213  if (eptr >= md->end_subject)
4214  {
4215  SCHECK_PARTIAL();
4217  }
4218  GETCHARINCTEST(c, eptr);
4219  if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4221  }
4222  break;
4223 
4224  case PT_SC:
4225  for (i = 1; i <= min; i++)
4226  {
4227  if (eptr >= md->end_subject)
4228  {
4229  SCHECK_PARTIAL();
4231  }
4232  GETCHARINCTEST(c, eptr);
4233  if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4235  }
4236  break;
4237 
4238  case PT_ALNUM:
4239  for (i = 1; i <= min; i++)
4240  {
4241  int category;
4242  if (eptr >= md->end_subject)
4243  {
4244  SCHECK_PARTIAL();
4246  }
4247  GETCHARINCTEST(c, eptr);
4248  category = UCD_CATEGORY(c);
4249  if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4251  }
4252  break;
4253 
4254  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4255  which means that Perl space and POSIX space are now identical. PCRE
4256  was changed at release 8.34. */
4257 
4258  case PT_SPACE: /* Perl space */
4259  case PT_PXSPACE: /* POSIX space */
4260  for (i = 1; i <= min; i++)
4261  {
4262  if (eptr >= md->end_subject)
4263  {
4264  SCHECK_PARTIAL();
4266  }
4267  GETCHARINCTEST(c, eptr);
4268  switch(c)
4269  {
4270  HSPACE_CASES:
4271  VSPACE_CASES:
4272  if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4273  break;
4274 
4275  default:
4276  if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4278  break;
4279  }
4280  }
4281  break;
4282 
4283  case PT_WORD:
4284  for (i = 1; i <= min; i++)
4285  {
4286  int category;
4287  if (eptr >= md->end_subject)
4288  {
4289  SCHECK_PARTIAL();
4291  }
4292  GETCHARINCTEST(c, eptr);
4293  category = UCD_CATEGORY(c);
4294  if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4295  == prop_fail_result)
4297  }
4298  break;
4299 
4300  case PT_CLIST:
4301  for (i = 1; i <= min; i++)
4302  {
4303  const pcre_uint32 *cp;
4304  if (eptr >= md->end_subject)
4305  {
4306  SCHECK_PARTIAL();
4308  }
4309  GETCHARINCTEST(c, eptr);
4310  cp = PRIV(ucd_caseless_sets) + prop_value;
4311  for (;;)
4312  {
4313  if (c < *cp)
4314  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4315  if (c == *cp++)
4316  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4317  }
4318  }
4319  break;
4320 
4321  case PT_UCNC:
4322  for (i = 1; i <= min; i++)
4323  {
4324  if (eptr >= md->end_subject)
4325  {
4326  SCHECK_PARTIAL();
4328  }
4329  GETCHARINCTEST(c, eptr);
4330  if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4331  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4332  c >= 0xe000) == prop_fail_result)
4334  }
4335  break;
4336 
4337  /* This should not occur */
4338 
4339  default:
4341  }
4342  }
4343 
4344  /* Match extended Unicode sequences. We will get here only if the
4345  support is in the binary; otherwise a compile-time error occurs. */
4346 
4347  else if (ctype == OP_EXTUNI)
4348  {
4349  for (i = 1; i <= min; i++)
4350  {
4351  if (eptr >= md->end_subject)
4352  {
4353  SCHECK_PARTIAL();
4355  }
4356  else
4357  {
4358  int lgb, rgb;
4359  GETCHARINCTEST(c, eptr);
4360  lgb = UCD_GRAPHBREAK(c);
4361  while (eptr < md->end_subject)
4362  {
4363  int len = 1;
4364  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4365  rgb = UCD_GRAPHBREAK(c);
4366  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4367  lgb = rgb;
4368  eptr += len;
4369  }
4370  }
4371  CHECK_PARTIAL();
4372  }
4373  }
4374 
4375  else
4376 #endif /* SUPPORT_UCP */
4377 
4378 /* Handle all other cases when the coding is UTF-8 */
4379 
4380 #ifdef SUPPORT_UTF
4381  if (utf) switch(ctype)
4382  {
4383  case OP_ANY:
4384  for (i = 1; i <= min; i++)
4385  {
4386  if (eptr >= md->end_subject)
4387  {
4388  SCHECK_PARTIAL();
4390  }
4391  if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4392  if (md->partial != 0 &&
4393  eptr + 1 >= md->end_subject &&
4394  NLBLOCK->nltype == NLTYPE_FIXED &&
4395  NLBLOCK->nllen == 2 &&
4396  UCHAR21(eptr) == NLBLOCK->nl[0])
4397  {
4398  md->hitend = TRUE;
4399  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4400  }
4401  eptr++;
4402  ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4403  }
4404  break;
4405 
4406  case OP_ALLANY:
4407  for (i = 1; i <= min; i++)
4408  {
4409  if (eptr >= md->end_subject)
4410  {
4411  SCHECK_PARTIAL();
4413  }
4414  eptr++;
4415  ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4416  }
4417  break;
4418 
4419  case OP_ANYBYTE:
4420  if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4421  eptr += min;
4422  break;
4423 
4424  case OP_ANYNL:
4425  for (i = 1; i <= min; i++)
4426  {
4427  if (eptr >= md->end_subject)
4428  {
4429  SCHECK_PARTIAL();
4431  }
4432  GETCHARINC(c, eptr);
4433  switch(c)
4434  {
4435  default: RRETURN(MATCH_NOMATCH);
4436 
4437  case CHAR_CR:
4438  if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4439  break;
4440 
4441  case CHAR_LF:
4442  break;
4443 
4444  case CHAR_VT:
4445  case CHAR_FF:
4446  case CHAR_NEL:
4447 #ifndef EBCDIC
4448  case 0x2028:
4449  case 0x2029:
4450 #endif /* Not EBCDIC */
4451  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4452  break;
4453  }
4454  }
4455  break;
4456 
4457  case OP_NOT_HSPACE:
4458  for (i = 1; i <= min; i++)
4459  {
4460  if (eptr >= md->end_subject)
4461  {
4462  SCHECK_PARTIAL();
4464  }
4465  GETCHARINC(c, eptr);
4466  switch(c)
4467  {
4468  HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
4469  default: break;
4470  }
4471  }
4472  break;
4473 
4474  case OP_HSPACE:
4475  for (i = 1; i <= min; i++)
4476  {
4477  if (eptr >= md->end_subject)
4478  {
4479  SCHECK_PARTIAL();
4481  }
4482  GETCHARINC(c, eptr);
4483  switch(c)
4484  {
4485  HSPACE_CASES: break; /* Byte and multibyte cases */
4486  default: RRETURN(MATCH_NOMATCH);
4487  }
4488  }
4489  break;
4490 
4491  case OP_NOT_VSPACE:
4492  for (i = 1; i <= min; i++)
4493  {
4494  if (eptr >= md->end_subject)
4495  {
4496  SCHECK_PARTIAL();
4498  }
4499  GETCHARINC(c, eptr);
4500  switch(c)
4501  {
4503  default: break;
4504  }
4505  }
4506  break;
4507 
4508  case OP_VSPACE:
4509  for (i = 1; i <= min; i++)
4510  {
4511  if (eptr >= md->end_subject)
4512  {
4513  SCHECK_PARTIAL();
4515  }
4516  GETCHARINC(c, eptr);
4517  switch(c)
4518  {
4519  VSPACE_CASES: break;
4520  default: RRETURN(MATCH_NOMATCH);
4521  }
4522  }
4523  break;
4524 
4525  case OP_NOT_DIGIT:
4526  for (i = 1; i <= min; i++)
4527  {
4528  if (eptr >= md->end_subject)
4529  {
4530  SCHECK_PARTIAL();
4532  }
4533  GETCHARINC(c, eptr);
4534  if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4536  }
4537  break;
4538 
4539  case OP_DIGIT:
4540  for (i = 1; i <= min; i++)
4541  {
4542  pcre_uint32 cc;
4543  if (eptr >= md->end_subject)
4544  {
4545  SCHECK_PARTIAL();
4547  }
4548  cc = UCHAR21(eptr);
4549  if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4551  eptr++;
4552  /* No need to skip more bytes - we know it's a 1-byte character */
4553  }
4554  break;
4555 
4556  case OP_NOT_WHITESPACE:
4557  for (i = 1; i <= min; i++)
4558  {
4559  pcre_uint32 cc;
4560  if (eptr >= md->end_subject)
4561  {
4562  SCHECK_PARTIAL();
4564  }
4565  cc = UCHAR21(eptr);
4566  if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4568  eptr++;
4569  ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4570  }
4571  break;
4572 
4573  case OP_WHITESPACE:
4574  for (i = 1; i <= min; i++)
4575  {
4576  pcre_uint32 cc;
4577  if (eptr >= md->end_subject)
4578  {
4579  SCHECK_PARTIAL();
4581  }
4582  cc = UCHAR21(eptr);
4583  if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4585  eptr++;
4586  /* No need to skip more bytes - we know it's a 1-byte character */
4587  }
4588  break;
4589 
4590  case OP_NOT_WORDCHAR:
4591  for (i = 1; i <= min; i++)
4592  {
4593  pcre_uint32 cc;
4594  if (eptr >= md->end_subject)
4595  {
4596  SCHECK_PARTIAL();
4598  }
4599  cc = UCHAR21(eptr);
4600  if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4602  eptr++;
4603  ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4604  }
4605  break;
4606 
4607  case OP_WORDCHAR:
4608  for (i = 1; i <= min; i++)
4609  {
4610  pcre_uint32 cc;
4611  if (eptr >= md->end_subject)
4612  {
4613  SCHECK_PARTIAL();
4615  }
4616  cc = UCHAR21(eptr);
4617  if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4619  eptr++;
4620  /* No need to skip more bytes - we know it's a 1-byte character */
4621  }
4622  break;
4623 
4624  default:
4626  } /* End switch(ctype) */
4627 
4628  else
4629 #endif /* SUPPORT_UTF */
4630 
4631  /* Code for the non-UTF-8 case for minimum matching of operators other
4632  than OP_PROP and OP_NOTPROP. */
4633 
4634  switch(ctype)
4635  {
4636  case OP_ANY:
4637  for (i = 1; i <= min; i++)
4638  {
4639  if (eptr >= md->end_subject)
4640  {
4641  SCHECK_PARTIAL();
4643  }
4644  if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4645  if (md->partial != 0 &&
4646  eptr + 1 >= md->end_subject &&
4647  NLBLOCK->nltype == NLTYPE_FIXED &&
4648  NLBLOCK->nllen == 2 &&
4649  *eptr == NLBLOCK->nl[0])
4650  {
4651  md->hitend = TRUE;
4652  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4653  }
4654  eptr++;
4655  }
4656  break;
4657 
4658  case OP_ALLANY:
4659  if (eptr > md->end_subject - min)
4660  {
4661  SCHECK_PARTIAL();
4663  }
4664  eptr += min;
4665  break;
4666 
4667  case OP_ANYBYTE:
4668  if (eptr > md->end_subject - min)
4669  {
4670  SCHECK_PARTIAL();
4672  }
4673  eptr += min;
4674  break;
4675 
4676  case OP_ANYNL:
4677  for (i = 1; i <= min; i++)
4678  {
4679  if (eptr >= md->end_subject)
4680  {
4681  SCHECK_PARTIAL();
4683  }
4684  switch(*eptr++)
4685  {
4686  default: RRETURN(MATCH_NOMATCH);
4687 
4688  case CHAR_CR:
4689  if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4690  break;
4691 
4692  case CHAR_LF:
4693  break;
4694 
4695  case CHAR_VT:
4696  case CHAR_FF:
4697  case CHAR_NEL:
4698 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4699  case 0x2028:
4700  case 0x2029:
4701 #endif
4702  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4703  break;
4704  }
4705  }
4706  break;
4707 
4708  case OP_NOT_HSPACE:
4709  for (i = 1; i <= min; i++)
4710  {
4711  if (eptr >= md->end_subject)
4712  {
4713  SCHECK_PARTIAL();
4715  }
4716  switch(*eptr++)
4717  {
4718  default: break;
4720 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4722 #endif
4724  }
4725  }
4726  break;
4727 
4728  case OP_HSPACE:
4729  for (i = 1; i <= min; i++)
4730  {
4731  if (eptr >= md->end_subject)
4732  {
4733  SCHECK_PARTIAL();
4735  }
4736  switch(*eptr++)
4737  {
4738  default: RRETURN(MATCH_NOMATCH);
4740 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4742 #endif
4743  break;
4744  }
4745  }
4746  break;
4747 
4748  case OP_NOT_VSPACE:
4749  for (i = 1; i <= min; i++)
4750  {
4751  if (eptr >= md->end_subject)
4752  {
4753  SCHECK_PARTIAL();
4755  }
4756  switch(*eptr++)
4757  {
4759 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4761 #endif
4763  default: break;
4764  }
4765  }
4766  break;
4767 
4768  case OP_VSPACE:
4769  for (i = 1; i <= min; i++)
4770  {
4771  if (eptr >= md->end_subject)
4772  {
4773  SCHECK_PARTIAL();
4775  }
4776  switch(*eptr++)
4777  {
4778  default: RRETURN(MATCH_NOMATCH);
4780 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4782 #endif
4783  break;
4784  }
4785  }
4786  break;
4787 
4788  case OP_NOT_DIGIT:
4789  for (i = 1; i <= min; i++)
4790  {
4791  if (eptr >= md->end_subject)
4792  {
4793  SCHECK_PARTIAL();
4795  }
4796  if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4798  eptr++;
4799  }
4800  break;
4801 
4802  case OP_DIGIT:
4803  for (i = 1; i <= min; i++)
4804  {
4805  if (eptr >= md->end_subject)
4806  {
4807  SCHECK_PARTIAL();
4809  }
4810  if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4812  eptr++;
4813  }
4814  break;
4815 
4816  case OP_NOT_WHITESPACE:
4817  for (i = 1; i <= min; i++)
4818  {
4819  if (eptr >= md->end_subject)
4820  {
4821  SCHECK_PARTIAL();
4823  }
4824  if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4826  eptr++;
4827  }
4828  break;
4829 
4830  case OP_WHITESPACE:
4831  for (i = 1; i <= min; i++)
4832  {
4833  if (eptr >= md->end_subject)
4834  {
4835  SCHECK_PARTIAL();
4837  }
4838  if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4840  eptr++;
4841  }
4842  break;
4843 
4844  case OP_NOT_WORDCHAR:
4845  for (i = 1; i <= min; i++)
4846  {
4847  if (eptr >= md->end_subject)
4848  {
4849  SCHECK_PARTIAL();
4851  }
4852  if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4854  eptr++;
4855  }
4856  break;
4857 
4858  case OP_WORDCHAR:
4859  for (i = 1; i <= min; i++)
4860  {
4861  if (eptr >= md->end_subject)
4862  {
4863  SCHECK_PARTIAL();
4865  }
4866  if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4868  eptr++;
4869  }
4870  break;
4871 
4872  default:
4874  }
4875  }
4876 
4877  /* If min = max, continue at the same level without recursing */
4878 
4879  if (min == max) continue;
4880 
4881  /* If minimizing, we have to test the rest of the pattern before each
4882  subsequent match. Again, separate the UTF-8 case for speed, and also
4883  separate the UCP cases. */
4884 
4885  if (minimize)
4886  {
4887 #ifdef SUPPORT_UCP
4888  if (prop_type >= 0)
4889  {
4890  switch(prop_type)
4891  {
4892  case PT_ANY:
4893  for (fi = min;; fi++)
4894  {
4895  RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4896  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4897  if (fi >= max) RRETURN(MATCH_NOMATCH);
4898  if (eptr >= md->end_subject)
4899  {
4900  SCHECK_PARTIAL();
4902  }
4903  GETCHARINCTEST(c, eptr);
4904  if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4905  }
4906  /* Control never gets here */
4907 
4908  case PT_LAMP:
4909  for (fi = min;; fi++)
4910  {
4911  int chartype;
4912  RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4913  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4914  if (fi >= max) RRETURN(MATCH_NOMATCH);
4915  if (eptr >= md->end_subject)
4916  {
4917  SCHECK_PARTIAL();
4919  }
4920  GETCHARINCTEST(c, eptr);
4921  chartype = UCD_CHARTYPE(c);
4922  if ((chartype == ucp_Lu ||
4923  chartype == ucp_Ll ||
4924  chartype == ucp_Lt) == prop_fail_result)
4926  }
4927  /* Control never gets here */
4928 
4929  case PT_GC:
4930  for (fi = min;; fi++)
4931  {
4932  RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4933  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934  if (fi >= max) RRETURN(MATCH_NOMATCH);
4935  if (eptr >= md->end_subject)
4936  {
4937  SCHECK_PARTIAL();
4939  }
4940  GETCHARINCTEST(c, eptr);
4941  if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4943  }
4944  /* Control never gets here */
4945 
4946  case PT_PC:
4947  for (fi = min;; fi++)
4948  {
4949  RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4950  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4951  if (fi >= max) RRETURN(MATCH_NOMATCH);
4952  if (eptr >= md->end_subject)
4953  {
4954  SCHECK_PARTIAL();
4956  }
4957  GETCHARINCTEST(c, eptr);
4958  if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4960  }
4961  /* Control never gets here */
4962 
4963  case PT_SC:
4964  for (fi = min;; fi++)
4965  {
4966  RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4967  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4968  if (fi >= max) RRETURN(MATCH_NOMATCH);
4969  if (eptr >= md->end_subject)
4970  {
4971  SCHECK_PARTIAL();
4973  }
4974  GETCHARINCTEST(c, eptr);
4975  if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4977  }
4978  /* Control never gets here */
4979 
4980  case PT_ALNUM:
4981  for (fi = min;; fi++)
4982  {
4983  int category;
4984  RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4985  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4986  if (fi >= max) RRETURN(MATCH_NOMATCH);
4987  if (eptr >= md->end_subject)
4988  {
4989  SCHECK_PARTIAL();
4991  }
4992  GETCHARINCTEST(c, eptr);
4993  category = UCD_CATEGORY(c);
4994  if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4996  }
4997  /* Control never gets here */
4998 
4999  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5000  which means that Perl space and POSIX space are now identical. PCRE
5001  was changed at release 8.34. */
5002 
5003  case PT_SPACE: /* Perl space */
5004  case PT_PXSPACE: /* POSIX space */
5005  for (fi = min;; fi++)
5006  {
5007  RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5008  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5009  if (fi >= max) RRETURN(MATCH_NOMATCH);
5010  if (eptr >= md->end_subject)
5011  {
5012  SCHECK_PARTIAL();
5014  }
5015  GETCHARINCTEST(c, eptr);
5016  switch(c)
5017  {
5018  HSPACE_CASES:
5019  VSPACE_CASES:
5020  if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5021  break;
5022 
5023  default:
5024  if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5026  break;
5027  }
5028  }
5029  /* Control never gets here */
5030 
5031  case PT_WORD:
5032  for (fi = min;; fi++)
5033  {
5034  int category;
5035  RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5036  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5037  if (fi >= max) RRETURN(MATCH_NOMATCH);
5038  if (eptr >= md->end_subject)
5039  {
5040  SCHECK_PARTIAL();
5042  }
5043  GETCHARINCTEST(c, eptr);
5044  category = UCD_CATEGORY(c);
5045  if ((category == ucp_L ||
5046  category == ucp_N ||
5047  c == CHAR_UNDERSCORE)
5048  == prop_fail_result)
5050  }
5051  /* Control never gets here */
5052 
5053  case PT_CLIST:
5054  for (fi = min;; fi++)
5055  {
5056  const pcre_uint32 *cp;
5057  RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5058  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5059  if (fi >= max) RRETURN(MATCH_NOMATCH);
5060  if (eptr >= md->end_subject)
5061  {
5062  SCHECK_PARTIAL();
5064  }
5065  GETCHARINCTEST(c, eptr);
5066  cp = PRIV(ucd_caseless_sets) + prop_value;
5067  for (;;)
5068  {
5069  if (c < *cp)
5070  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5071  if (c == *cp++)
5072  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5073  }
5074  }
5075  /* Control never gets here */
5076 
5077  case PT_UCNC:
5078  for (fi = min;; fi++)
5079  {
5080  RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5081  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5082  if (fi >= max) RRETURN(MATCH_NOMATCH);
5083  if (eptr >= md->end_subject)
5084  {
5085  SCHECK_PARTIAL();
5087  }
5088  GETCHARINCTEST(c, eptr);
5089  if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5090  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5091  c >= 0xe000) == prop_fail_result)
5093  }
5094  /* Control never gets here */
5095 
5096  /* This should never occur */
5097  default:
5099  }
5100  }
5101 
5102  /* Match extended Unicode sequences. We will get here only if the
5103  support is in the binary; otherwise a compile-time error occurs. */
5104 
5105  else if (ctype == OP_EXTUNI)
5106  {
5107  for (fi = min;; fi++)
5108  {
5109  RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5110  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5111  if (fi >= max) RRETURN(MATCH_NOMATCH);
5112  if (eptr >= md->end_subject)
5113  {
5114  SCHECK_PARTIAL();
5116  }
5117  else
5118  {
5119  int lgb, rgb;
5120  GETCHARINCTEST(c, eptr);
5121  lgb = UCD_GRAPHBREAK(c);
5122  while (eptr < md->end_subject)
5123  {
5124  int len = 1;
5125  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5126  rgb = UCD_GRAPHBREAK(c);
5127  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5128  lgb = rgb;
5129  eptr += len;
5130  }
5131  }
5132  CHECK_PARTIAL();
5133  }
5134  }
5135  else
5136 #endif /* SUPPORT_UCP */
5137 
5138 #ifdef SUPPORT_UTF
5139  if (utf)
5140  {
5141  for (fi = min;; fi++)
5142  {
5143  RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5144  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5145  if (fi >= max) RRETURN(MATCH_NOMATCH);
5146  if (eptr >= md->end_subject)
5147  {
5148  SCHECK_PARTIAL();
5150  }
5151  if (ctype == OP_ANY && IS_NEWLINE(eptr))
5153  GETCHARINC(c, eptr);
5154  switch(ctype)
5155  {
5156  case OP_ANY: /* This is the non-NL case */
5157  if (md->partial != 0 && /* Take care with CRLF partial */
5158  eptr >= md->end_subject &&
5159  NLBLOCK->nltype == NLTYPE_FIXED &&
5160  NLBLOCK->nllen == 2 &&
5161  c == NLBLOCK->nl[0])
5162  {
5163  md->hitend = TRUE;
5164  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5165  }
5166  break;
5167 
5168  case OP_ALLANY:
5169  case OP_ANYBYTE:
5170  break;
5171 
5172  case OP_ANYNL:
5173  switch(c)
5174  {
5175  default: RRETURN(MATCH_NOMATCH);
5176  case CHAR_CR:
5177  if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5178  break;
5179 
5180  case CHAR_LF:
5181  break;
5182 
5183  case CHAR_VT:
5184  case CHAR_FF:
5185  case CHAR_NEL:
5186 #ifndef EBCDIC
5187  case 0x2028:
5188  case 0x2029:
5189 #endif /* Not EBCDIC */
5190  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5191  break;
5192  }
5193  break;
5194 
5195  case OP_NOT_HSPACE:
5196  switch(c)
5197  {
5199  default: break;
5200  }
5201  break;
5202 
5203  case OP_HSPACE:
5204  switch(c)
5205  {
5206  HSPACE_CASES: break;
5207  default: RRETURN(MATCH_NOMATCH);
5208  }
5209  break;
5210 
5211  case OP_NOT_VSPACE:
5212  switch(c)
5213  {
5215  default: break;
5216  }
5217  break;
5218 
5219  case OP_VSPACE:
5220  switch(c)
5221  {
5222  VSPACE_CASES: break;
5223  default: RRETURN(MATCH_NOMATCH);
5224  }
5225  break;
5226 
5227  case OP_NOT_DIGIT:
5228  if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5230  break;
5231 
5232  case OP_DIGIT:
5233  if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5235  break;
5236 
5237  case OP_NOT_WHITESPACE:
5238  if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5240  break;
5241 
5242  case OP_WHITESPACE:
5243  if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5245  break;
5246 
5247  case OP_NOT_WORDCHAR:
5248  if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5250  break;
5251 
5252  case OP_WORDCHAR:
5253  if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5255  break;
5256 
5257  default:
5259  }
5260  }
5261  }
5262  else
5263 #endif
5264  /* Not UTF mode */
5265  {
5266  for (fi = min;; fi++)
5267  {
5268  RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5269  if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5270  if (fi >= max) RRETURN(MATCH_NOMATCH);
5271  if (eptr >= md->end_subject)
5272  {
5273  SCHECK_PARTIAL();
5275  }
5276  if (ctype == OP_ANY && IS_NEWLINE(eptr))
5278  c = *eptr++;
5279  switch(ctype)
5280  {
5281  case OP_ANY: /* This is the non-NL case */
5282  if (md->partial != 0 && /* Take care with CRLF partial */
5283  eptr >= md->end_subject &&
5284  NLBLOCK->nltype == NLTYPE_FIXED &&
5285  NLBLOCK->nllen == 2 &&
5286  c == NLBLOCK->nl[0])
5287  {
5288  md->hitend = TRUE;
5289  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5290  }
5291  break;
5292 
5293  case OP_ALLANY:
5294  case OP_ANYBYTE:
5295  break;
5296 
5297  case OP_ANYNL:
5298  switch(c)
5299  {
5300  default: RRETURN(MATCH_NOMATCH);
5301  case CHAR_CR:
5302  if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5303  break;
5304 
5305  case CHAR_LF:
5306  break;
5307 
5308  case CHAR_VT:
5309  case CHAR_FF:
5310  case CHAR_NEL:
5311 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5312  case 0x2028:
5313  case 0x2029:
5314 #endif
5315  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5316  break;
5317  }
5318  break;
5319 
5320  case OP_NOT_HSPACE:
5321  switch(c)
5322  {
5323  default: break;
5325 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5327 #endif
5329  }
5330  break;
5331 
5332  case OP_HSPACE:
5333  switch(c)
5334  {
5335  default: RRETURN(MATCH_NOMATCH);
5337 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5339 #endif
5340  break;
5341  }
5342  break;
5343 
5344  case OP_NOT_VSPACE:
5345  switch(c)
5346  {
5347  default: break;
5349 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5351 #endif
5353  }
5354  break;
5355 
5356  case OP_VSPACE:
5357  switch(c)
5358  {
5359  default: RRETURN(MATCH_NOMATCH);
5361 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5363 #endif
5364  break;
5365  }
5366  break;
5367 
5368  case OP_NOT_DIGIT:
5369  if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5370  break;
5371 
5372  case OP_DIGIT:
5373  if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5374  break;
5375 
5376  case OP_NOT_WHITESPACE:
5377  if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5378  break;
5379 
5380  case OP_WHITESPACE:
5381  if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5382  break;
5383 
5384  case OP_NOT_WORDCHAR:
5385  if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5386  break;
5387 
5388  case OP_WORDCHAR:
5389  if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5390  break;
5391 
5392  default:
5394  }
5395  }
5396  }
5397  /* Control never gets here */
5398  }
5399 
5400  /* If maximizing, it is worth using inline code for speed, doing the type
5401  test once at the start (i.e. keep it out of the loop). Again, keep the
5402  UTF-8 and UCP stuff separate. */
5403 
5404  else
5405  {
5406  pp = eptr; /* Remember where we started */
5407 
5408 #ifdef SUPPORT_UCP
5409  if (prop_type >= 0)
5410  {
5411  switch(prop_type)
5412  {
5413  case PT_ANY:
5414  for (i = min; i < max; i++)
5415  {
5416  int len = 1;
5417  if (eptr >= md->end_subject)
5418  {
5419  SCHECK_PARTIAL();
5420  break;
5421  }
5422  GETCHARLENTEST(c, eptr, len);
5423  if (prop_fail_result) break;
5424  eptr+= len;
5425  }
5426  break;
5427 
5428  case PT_LAMP:
5429  for (i = min; i < max; i++)
5430  {
5431  int chartype;
5432  int len = 1;
5433  if (eptr >= md->end_subject)
5434  {
5435  SCHECK_PARTIAL();
5436  break;
5437  }
5438  GETCHARLENTEST(c, eptr, len);
5439  chartype = UCD_CHARTYPE(c);
5440  if ((chartype == ucp_Lu ||
5441  chartype == ucp_Ll ||
5442  chartype == ucp_Lt) == prop_fail_result)
5443  break;
5444  eptr+= len;
5445  }
5446  break;
5447 
5448  case PT_GC:
5449  for (i = min; i < max; i++)
5450  {
5451  int len = 1;
5452  if (eptr >= md->end_subject)
5453  {
5454  SCHECK_PARTIAL();
5455  break;
5456  }
5457  GETCHARLENTEST(c, eptr, len);
5458  if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5459  eptr+= len;
5460  }
5461  break;
5462 
5463  case PT_PC:
5464  for (i = min; i < max; i++)
5465  {
5466  int len = 1;
5467  if (eptr >= md->end_subject)
5468  {
5469  SCHECK_PARTIAL();
5470  break;
5471  }
5472  GETCHARLENTEST(c, eptr, len);
5473  if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5474  eptr+= len;
5475  }
5476  break;
5477 
5478  case PT_SC:
5479  for (i = min; i < max; i++)
5480  {
5481  int len = 1;
5482  if (eptr >= md->end_subject)
5483  {
5484  SCHECK_PARTIAL();
5485  break;
5486  }
5487  GETCHARLENTEST(c, eptr, len);
5488  if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5489  eptr+= len;
5490  }
5491  break;
5492 
5493  case PT_ALNUM:
5494  for (i = min; i < max; i++)
5495  {
5496  int category;
5497  int len = 1;
5498  if (eptr >= md->end_subject)
5499  {
5500  SCHECK_PARTIAL();
5501  break;
5502  }
5503  GETCHARLENTEST(c, eptr, len);
5504  category = UCD_CATEGORY(c);
5505  if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5506  break;
5507  eptr+= len;
5508  }
5509  break;
5510 
5511  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5512  which means that Perl space and POSIX space are now identical. PCRE
5513  was changed at release 8.34. */
5514 
5515  case PT_SPACE: /* Perl space */
5516  case PT_PXSPACE: /* POSIX space */
5517  for (i = min; i < max; i++)
5518  {
5519  int len = 1;
5520  if (eptr >= md->end_subject)
5521  {
5522  SCHECK_PARTIAL();
5523  break;
5524  }
5525  GETCHARLENTEST(c, eptr, len);