NCBI C++ ToolKit
pcre_dfa_exec.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language (but see
7 below for why this module is different).
8 
9  Written by Philip Hazel
10  Copyright (c) 1997-2017 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16  * Redistributions of source code must retain the above copyright notice,
17  this list of conditions and the following disclaimer.
18 
19  * Redistributions in binary form must reproduce the above copyright
20  notice, this list of conditions and the following disclaimer in the
21  documentation and/or other materials provided with the distribution.
22 
23  * Neither the name of the University of Cambridge nor the names of its
24  contributors may be used to endorse or promote products derived from
25  this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 /* This module contains the external function pcre_dfa_exec(), which is an
42 alternative matching function that uses a sort of DFA algorithm (not a true
43 FSM). This is NOT Perl-compatible, but it has advantages in certain
44 applications. */
45 
46 
47 /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
48 the performance of his patterns greatly. I could not use it as it stood, as it
49 was not thread safe, and made assumptions about pattern sizes. Also, it caused
50 test 7 to loop, and test 9 to crash with a segfault.
51 
52 The issue is the check for duplicate states, which is done by a simple linear
53 search up the state list. (Grep for "duplicate" below to find the code.) For
54 many patterns, there will never be many states active at one time, so a simple
55 linear search is fine. In patterns that have many active states, it might be a
56 bottleneck. The suggested code used an indexing scheme to remember which states
57 had previously been used for each character, and avoided the linear search when
58 it knew there was no chance of a duplicate. This was implemented when adding
59 states to the state lists.
60 
61 I wrote some thread-safe, not-limited code to try something similar at the time
62 of checking for duplicates (instead of when adding states), using index vectors
63 on the stack. It did give a 13% improvement with one specially constructed
64 pattern for certain subject strings, but on other strings and on many of the
65 simpler patterns in the test suite it did worse. The major problem, I think,
66 was the extra time to initialize the index. This had to be done for each call
67 of internal_dfa_exec(). (The supplied patch used a static vector, initialized
68 only once - I suspect this was the cause of the problems with the tests.)
69 
70 Overall, I concluded that the gains in some cases did not outweigh the losses
71 in others, so I abandoned this code. */
72 
73 
74 
75 #ifdef HAVE_CONFIG_H
76 #include "config.h"
77 #endif
78 
79 #define NLBLOCK md /* Block containing newline information */
80 #define PSSTART start_subject /* Field containing processed string start */
81 #define PSEND end_subject /* Field containing processed string end */
82 
83 #include "pcre_internal.h"
84 
85 
86 /* For use to indent debugging output */
87 
88 #define SP " "
89 
90 
91 /*************************************************
92 * Code parameters and static tables *
93 *************************************************/
94 
95 /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
96 into others, under special conditions. A gap of 20 between the blocks should be
97 enough. The resulting opcodes don't have to be less than 256 because they are
98 never stored, so we push them well clear of the normal opcodes. */
99 
100 #define OP_PROP_EXTRA 300
101 #define OP_EXTUNI_EXTRA 320
102 #define OP_ANYNL_EXTRA 340
103 #define OP_HSPACE_EXTRA 360
104 #define OP_VSPACE_EXTRA 380
105 
106 
107 /* This table identifies those opcodes that are followed immediately by a
108 character that is to be tested in some way. This makes it possible to
109 centralize the loading of these characters. In the case of Type * etc, the
110 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
111 small value. Non-zero values in the table are the offsets from the opcode where
112 the character is to be found. ***NOTE*** If the start of this table is
113 modified, the three tables that follow must also be modified. */
114 
115 static const pcre_uint8 coptable[] = {
116  0, /* End */
117  0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
118  0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
119  0, 0, 0, /* Any, AllAny, Anybyte */
120  0, 0, /* \P, \p */
121  0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
122  0, /* \X */
123  0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
124  1, /* Char */
125  1, /* Chari */
126  1, /* not */
127  1, /* noti */
128  /* Positive single-char repeats */
129  1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
130  1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
131  1+IMM2_SIZE, /* exact */
132  1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
133  1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
134  1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
135  1+IMM2_SIZE, /* exact I */
136  1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
137  /* Negative single-char repeats - only for chars < 256 */
138  1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
139  1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
140  1+IMM2_SIZE, /* NOT exact */
141  1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
142  1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
143  1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
144  1+IMM2_SIZE, /* NOT exact I */
145  1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
146  /* Positive type repeats */
147  1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
148  1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
149  1+IMM2_SIZE, /* Type exact */
150  1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
151  /* Character class & ref repeats */
152  0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
153  0, 0, /* CRRANGE, CRMINRANGE */
154  0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
155  0, /* CLASS */
156  0, /* NCLASS */
157  0, /* XCLASS - variable length */
158  0, /* REF */
159  0, /* REFI */
160  0, /* DNREF */
161  0, /* DNREFI */
162  0, /* RECURSE */
163  0, /* CALLOUT */
164  0, /* Alt */
165  0, /* Ket */
166  0, /* KetRmax */
167  0, /* KetRmin */
168  0, /* KetRpos */
169  0, /* Reverse */
170  0, /* Assert */
171  0, /* Assert not */
172  0, /* Assert behind */
173  0, /* Assert behind not */
174  0, 0, /* ONCE, ONCE_NC */
175  0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
176  0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
177  0, 0, /* CREF, DNCREF */
178  0, 0, /* RREF, DNRREF */
179  0, /* DEF */
180  0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
181  0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
182  0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
183  0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
184  0, 0 /* CLOSE, SKIPZERO */
185 };
186 
187 /* This table identifies those opcodes that inspect a character. It is used to
188 remember the fact that a character could have been inspected when the end of
189 the subject is reached. ***NOTE*** If the start of this table is modified, the
190 two tables that follow must also be modified. */
191 
192 static const pcre_uint8 poptable[] = {
193  0, /* End */
194  0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
195  1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
196  1, 1, 1, /* Any, AllAny, Anybyte */
197  1, 1, /* \P, \p */
198  1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
199  1, /* \X */
200  0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
201  1, /* Char */
202  1, /* Chari */
203  1, /* not */
204  1, /* noti */
205  /* Positive single-char repeats */
206  1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
207  1, 1, 1, /* upto, minupto, exact */
208  1, 1, 1, 1, /* *+, ++, ?+, upto+ */
209  1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
210  1, 1, 1, /* upto I, minupto I, exact I */
211  1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
212  /* Negative single-char repeats - only for chars < 256 */
213  1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
214  1, 1, 1, /* NOT upto, minupto, exact */
215  1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
216  1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
217  1, 1, 1, /* NOT upto I, minupto I, exact I */
218  1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
219  /* Positive type repeats */
220  1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
221  1, 1, 1, /* Type upto, minupto, exact */
222  1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
223  /* Character class & ref repeats */
224  1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
225  1, 1, /* CRRANGE, CRMINRANGE */
226  1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
227  1, /* CLASS */
228  1, /* NCLASS */
229  1, /* XCLASS - variable length */
230  0, /* REF */
231  0, /* REFI */
232  0, /* DNREF */
233  0, /* DNREFI */
234  0, /* RECURSE */
235  0, /* CALLOUT */
236  0, /* Alt */
237  0, /* Ket */
238  0, /* KetRmax */
239  0, /* KetRmin */
240  0, /* KetRpos */
241  0, /* Reverse */
242  0, /* Assert */
243  0, /* Assert not */
244  0, /* Assert behind */
245  0, /* Assert behind not */
246  0, 0, /* ONCE, ONCE_NC */
247  0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
248  0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
249  0, 0, /* CREF, DNCREF */
250  0, 0, /* RREF, DNRREF */
251  0, /* DEF */
252  0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
253  0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
254  0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
255  0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
256  0, 0 /* CLOSE, SKIPZERO */
257 };
258 
259 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
260 and \w */
261 
262 static const pcre_uint8 toptable1[] = {
263  0, 0, 0, 0, 0, 0,
267  0, 0 /* OP_ANY, OP_ALLANY */
268 };
269 
270 static const pcre_uint8 toptable2[] = {
271  0, 0, 0, 0, 0, 0,
272  ctype_digit, 0,
273  ctype_space, 0,
274  ctype_word, 0,
275  1, 1 /* OP_ANY, OP_ALLANY */
276 };
277 
278 
279 /* Structure for holding data about a particular state, which is in effect the
280 current data for an active path through the match tree. It must consist
281 entirely of ints because the working vector we are passed, and which we put
282 these structures in, is a vector of ints. */
283 
284 typedef struct stateblock {
285  int offset; /* Offset to opcode */
286  int count; /* Count for repeats */
287  int data; /* Some use extra data */
289 
290 #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
291 
292 
293 #ifdef PCRE_DEBUG
294 /*************************************************
295 * Print character string *
296 *************************************************/
297 
298 /* Character string printing function for debugging.
299 
300 Arguments:
301  p points to string
302  length number of bytes
303  f where to print
304 
305 Returns: nothing
306 */
307 
308 static void
309 pchars(const pcre_uchar *p, int length, FILE *f)
310 {
311 pcre_uint32 c;
312 while (length-- > 0)
313  {
314  if (isprint(c = *(p++)))
315  fprintf(f, "%c", c);
316  else
317  fprintf(f, "\\x{%02x}", c);
318  }
319 }
320 #endif
321 
322 
323 
324 /*************************************************
325 * Execute a Regular Expression - DFA engine *
326 *************************************************/
327 
328 /* This internal function applies a compiled pattern to a subject string,
329 starting at a given point, using a DFA engine. This function is called from the
330 external one, possibly multiple times if the pattern is not anchored. The
331 function calls itself recursively for some kinds of subpattern.
332 
333 Arguments:
334  md the match_data block with fixed information
335  this_start_code the opening bracket of this subexpression's code
336  current_subject where we currently are in the subject string
337  start_offset start offset in the subject string
338  offsets vector to contain the matching string offsets
339  offsetcount size of same
340  workspace vector of workspace
341  wscount size of same
342  rlevel function call recursion level
343 
344 Returns: > 0 => number of match offset pairs placed in offsets
345  = 0 => offsets overflowed; longest matches are present
346  -1 => failed to match
347  < -1 => some kind of unexpected problem
348 
349 The following macros are used for adding states to the two state vectors (one
350 for the current character, one for the following character). */
351 
352 #define ADD_ACTIVE(x,y) \
353  if (active_count++ < wscount) \
354  { \
355  next_active_state->offset = (x); \
356  next_active_state->count = (y); \
357  next_active_state++; \
358  DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
359  } \
360  else return PCRE_ERROR_DFA_WSSIZE
361 
362 #define ADD_ACTIVE_DATA(x,y,z) \
363  if (active_count++ < wscount) \
364  { \
365  next_active_state->offset = (x); \
366  next_active_state->count = (y); \
367  next_active_state->data = (z); \
368  next_active_state++; \
369  DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
370  } \
371  else return PCRE_ERROR_DFA_WSSIZE
372 
373 #define ADD_NEW(x,y) \
374  if (new_count++ < wscount) \
375  { \
376  next_new_state->offset = (x); \
377  next_new_state->count = (y); \
378  next_new_state++; \
379  DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
380  } \
381  else return PCRE_ERROR_DFA_WSSIZE
382 
383 #define ADD_NEW_DATA(x,y,z) \
384  if (new_count++ < wscount) \
385  { \
386  next_new_state->offset = (x); \
387  next_new_state->count = (y); \
388  next_new_state->data = (z); \
389  next_new_state++; \
390  DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
391  (x), (y), (z), __LINE__)); \
392  } \
393  else return PCRE_ERROR_DFA_WSSIZE
394 
395 /* And now, here is the code */
396 
397 static int
400  const pcre_uchar *this_start_code,
401  const pcre_uchar *current_subject,
402  int start_offset,
403  int *offsets,
404  int offsetcount,
405  int *workspace,
406  int wscount,
407  int rlevel)
408 {
409 stateblock *active_states, *new_states, *temp_states;
410 stateblock *next_active_state, *next_new_state;
411 
412 const pcre_uint8 *ctypes, *lcc, *fcc;
413 const pcre_uchar *ptr;
414 const pcre_uchar *end_code, *first_op;
415 
416 dfa_recursion_info new_recursive;
417 
418 int active_count, new_count, match_count;
419 
420 /* Some fields in the md block are frequently referenced, so we load them into
421 independent variables in the hope that this will perform better. */
422 
423 const pcre_uchar *start_subject = md->start_subject;
424 const pcre_uchar *end_subject = md->end_subject;
425 const pcre_uchar *start_code = md->start_code;
426 
427 #ifdef SUPPORT_UTF
428 BOOL utf = (md->poptions & PCRE_UTF8) != 0;
429 #else
430 BOOL utf = FALSE;
431 #endif
432 
433 BOOL reset_could_continue = FALSE;
434 
435 rlevel++;
436 offsetcount &= (-2);
437 
438 wscount -= 2;
439 wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
440  (2 * INTS_PER_STATEBLOCK);
441 
442 DPRINTF(("\n%.*s---------------------\n"
443  "%.*sCall to internal_dfa_exec f=%d\n",
444  rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
445 
446 ctypes = md->tables + ctypes_offset;
447 lcc = md->tables + lcc_offset;
448 fcc = md->tables + fcc_offset;
449 
450 match_count = PCRE_ERROR_NOMATCH; /* A negative number */
451 
452 active_states = (stateblock *)(workspace + 2);
453 next_new_state = new_states = active_states + wscount;
454 new_count = 0;
455 
456 first_op = this_start_code + 1 + LINK_SIZE +
457  ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
458  *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
459  ? IMM2_SIZE:0);
460 
461 /* The first thing in any (sub) pattern is a bracket of some sort. Push all
462 the alternative states onto the list, and find out where the end is. This
463 makes is possible to use this function recursively, when we want to stop at a
464 matching internal ket rather than at the end.
465 
466 If the first opcode in the first alternative is OP_REVERSE, we are dealing with
467 a backward assertion. In that case, we have to find out the maximum amount to
468 move back, and set up each alternative appropriately. */
469 
470 if (*first_op == OP_REVERSE)
471  {
472  int max_back = 0;
473  int gone_back;
474 
475  end_code = this_start_code;
476  do
477  {
478  int back = GET(end_code, 2+LINK_SIZE);
479  if (back > max_back) max_back = back;
480  end_code += GET(end_code, 1);
481  }
482  while (*end_code == OP_ALT);
483 
484  /* If we can't go back the amount required for the longest lookbehind
485  pattern, go back as far as we can; some alternatives may still be viable. */
486 
487 #ifdef SUPPORT_UTF
488  /* In character mode we have to step back character by character */
489 
490  if (utf)
491  {
492  for (gone_back = 0; gone_back < max_back; gone_back++)
493  {
494  if (current_subject <= start_subject) break;
495  current_subject--;
496  ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
497  }
498  }
499  else
500 #endif
501 
502  /* In byte-mode we can do this quickly. */
503 
504  {
505  gone_back = (current_subject - max_back < start_subject)?
506  (int)(current_subject - start_subject) : max_back;
507  current_subject -= gone_back;
508  }
509 
510  /* Save the earliest consulted character */
511 
512  if (current_subject < md->start_used_ptr)
513  md->start_used_ptr = current_subject;
514 
515  /* Now we can process the individual branches. */
516 
517  end_code = this_start_code;
518  do
519  {
520  int back = GET(end_code, 2+LINK_SIZE);
521  if (back <= gone_back)
522  {
523  int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
524  ADD_NEW_DATA(-bstate, 0, gone_back - back);
525  }
526  end_code += GET(end_code, 1);
527  }
528  while (*end_code == OP_ALT);
529  }
530 
531 /* This is the code for a "normal" subpattern (not a backward assertion). The
532 start of a whole pattern is always one of these. If we are at the top level,
533 we may be asked to restart matching from the same point that we reached for a
534 previous partial match. We still have to scan through the top-level branches to
535 find the end state. */
536 
537 else
538  {
539  end_code = this_start_code;
540 
541  /* Restarting */
542 
543  if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
544  {
545  do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
546  new_count = workspace[1];
547  if (!workspace[0])
548  memcpy(new_states, active_states, new_count * sizeof(stateblock));
549  }
550 
551  /* Not restarting */
552 
553  else
554  {
555  int length = 1 + LINK_SIZE +
556  ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
557  *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
558  ? IMM2_SIZE:0);
559  do
560  {
561  ADD_NEW((int)(end_code - start_code + length), 0);
562  end_code += GET(end_code, 1);
563  length = 1 + LINK_SIZE;
564  }
565  while (*end_code == OP_ALT);
566  }
567  }
568 
569 workspace[0] = 0; /* Bit indicating which vector is current */
570 
571 DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
572 
573 /* Loop for scanning the subject */
574 
575 ptr = current_subject;
576 for (;;)
577  {
578  int i, j;
579  int clen, dlen;
580  pcre_uint32 c, d;
581  int forced_fail = 0;
582  BOOL partial_newline = FALSE;
583  BOOL could_continue = reset_could_continue;
584  reset_could_continue = FALSE;
585 
586  /* Make the new state list into the active state list and empty the
587  new state list. */
588 
589  temp_states = active_states;
590  active_states = new_states;
591  new_states = temp_states;
592  active_count = new_count;
593  new_count = 0;
594 
595  workspace[0] ^= 1; /* Remember for the restarting feature */
596  workspace[1] = active_count;
597 
598 #ifdef PCRE_DEBUG
599  printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
600  pchars(ptr, STRLEN_UC(ptr), stdout);
601  printf("\"\n");
602 
603  printf("%.*sActive states: ", rlevel*2-2, SP);
604  for (i = 0; i < active_count; i++)
605  printf("%d/%d ", active_states[i].offset, active_states[i].count);
606  printf("\n");
607 #endif
608 
609  /* Set the pointers for adding new states */
610 
611  next_active_state = active_states + active_count;
612  next_new_state = new_states;
613 
614  /* Load the current character from the subject outside the loop, as many
615  different states may want to look at it, and we assume that at least one
616  will. */
617 
618  if (ptr < end_subject)
619  {
620  clen = 1; /* Number of data items in the character */
621 #ifdef SUPPORT_UTF
622  GETCHARLENTEST(c, ptr, clen);
623 #else
624  c = *ptr;
625 #endif /* SUPPORT_UTF */
626  }
627  else
628  {
629  clen = 0; /* This indicates the end of the subject */
630  c = NOTACHAR; /* This value should never actually be used */
631  }
632 
633  /* Scan up the active states and act on each one. The result of an action
634  may be to add more states to the currently active list (e.g. on hitting a
635  parenthesis) or it may be to put states on the new list, for considering
636  when we move the character pointer on. */
637 
638  for (i = 0; i < active_count; i++)
639  {
640  stateblock *current_state = active_states + i;
641  BOOL caseless = FALSE;
642  const pcre_uchar *code;
643  int state_offset = current_state->offset;
644  int codevalue, rrc;
645  int count;
646 
647 #ifdef PCRE_DEBUG
648  printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
649  if (clen == 0) printf("EOL\n");
650  else if (c > 32 && c < 127) printf("'%c'\n", c);
651  else printf("0x%02x\n", c);
652 #endif
653 
654  /* A negative offset is a special case meaning "hold off going to this
655  (negated) state until the number of characters in the data field have
656  been skipped". If the could_continue flag was passed over from a previous
657  state, arrange for it to passed on. */
658 
659  if (state_offset < 0)
660  {
661  if (current_state->data > 0)
662  {
663  DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
664  ADD_NEW_DATA(state_offset, current_state->count,
665  current_state->data - 1);
666  if (could_continue) reset_could_continue = TRUE;
667  continue;
668  }
669  else
670  {
671  current_state->offset = state_offset = -state_offset;
672  }
673  }
674 
675  /* Check for a duplicate state with the same count, and skip if found.
676  See the note at the head of this module about the possibility of improving
677  performance here. */
678 
679  for (j = 0; j < i; j++)
680  {
681  if (active_states[j].offset == state_offset &&
682  active_states[j].count == current_state->count)
683  {
684  DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
685  goto NEXT_ACTIVE_STATE;
686  }
687  }
688 
689  /* The state offset is the offset to the opcode */
690 
691  code = start_code + state_offset;
692  codevalue = *code;
693 
694  /* If this opcode inspects a character, but we are at the end of the
695  subject, remember the fact for use when testing for a partial match. */
696 
697  if (clen == 0 && poptable[codevalue] != 0)
698  could_continue = TRUE;
699 
700  /* If this opcode is followed by an inline character, load it. It is
701  tempting to test for the presence of a subject character here, but that
702  is wrong, because sometimes zero repetitions of the subject are
703  permitted.
704 
705  We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
706  argument that is not a data character - but is always one byte long because
707  the values are small. We have to take special action to deal with \P, \p,
708  \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
709  these ones to new opcodes. */
710 
711  if (coptable[codevalue] > 0)
712  {
713  dlen = 1;
714 #ifdef SUPPORT_UTF
715  if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
716 #endif /* SUPPORT_UTF */
717  d = code[coptable[codevalue]];
718  if (codevalue >= OP_TYPESTAR)
719  {
720  switch(d)
721  {
722  case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
723  case OP_NOTPROP:
724  case OP_PROP: codevalue += OP_PROP_EXTRA; break;
725  case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
726  case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
727  case OP_NOT_HSPACE:
728  case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
729  case OP_NOT_VSPACE:
730  case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
731  default: break;
732  }
733  }
734  }
735  else
736  {
737  dlen = 0; /* Not strictly necessary, but compilers moan */
738  d = NOTACHAR; /* if these variables are not set. */
739  }
740 
741 
742  /* Now process the individual opcodes */
743 
744  switch (codevalue)
745  {
746 /* ========================================================================== */
747  /* These cases are never obeyed. This is a fudge that causes a compile-
748  time error if the vectors coptable or poptable, which are indexed by
749  opcode, are not the correct length. It seems to be the only way to do
750  such a check at compile time, as the sizeof() operator does not work
751  in the C preprocessor. */
752 
753  case OP_TABLE_LENGTH:
754  case OP_TABLE_LENGTH +
755  ((sizeof(coptable) == OP_TABLE_LENGTH) &&
756  (sizeof(poptable) == OP_TABLE_LENGTH)):
757  break;
758 
759 /* ========================================================================== */
760  /* Reached a closing bracket. If not at the end of the pattern, carry
761  on with the next opcode. For repeating opcodes, also add the repeat
762  state. Note that KETRPOS will always be encountered at the end of the
763  subpattern, because the possessive subpattern repeats are always handled
764  using recursive calls. Thus, it never adds any new states.
765 
766  At the end of the (sub)pattern, unless we have an empty string and
767  PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
768  start of the subject, save the match data, shifting up all previous
769  matches so we always have the longest first. */
770 
771  case OP_KET:
772  case OP_KETRMIN:
773  case OP_KETRMAX:
774  case OP_KETRPOS:
775  if (code != end_code)
776  {
777  ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
778  if (codevalue != OP_KET)
779  {
780  ADD_ACTIVE(state_offset - GET(code, 1), 0);
781  }
782  }
783  else
784  {
785  if (ptr > current_subject ||
786  ((md->moptions & PCRE_NOTEMPTY) == 0 &&
787  ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
788  current_subject > start_subject + md->start_offset)))
789  {
790  if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
791  else if (match_count > 0 && ++match_count * 2 > offsetcount)
792  match_count = 0;
793  count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
794  if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
795  if (offsetcount >= 2)
796  {
797  offsets[0] = (int)(current_subject - start_subject);
798  offsets[1] = (int)(ptr - start_subject);
799  DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
800  offsets[1] - offsets[0], (char *)current_subject));
801  }
802  if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
803  {
804  DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
805  "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
806  match_count, rlevel*2-2, SP));
807  return match_count;
808  }
809  }
810  }
811  break;
812 
813 /* ========================================================================== */
814  /* These opcodes add to the current list of states without looking
815  at the current character. */
816 
817  /*-----------------------------------------------------------------*/
818  case OP_ALT:
819  do { code += GET(code, 1); } while (*code == OP_ALT);
820  ADD_ACTIVE((int)(code - start_code), 0);
821  break;
822 
823  /*-----------------------------------------------------------------*/
824  case OP_BRA:
825  case OP_SBRA:
826  do
827  {
828  ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
829  code += GET(code, 1);
830  }
831  while (*code == OP_ALT);
832  break;
833 
834  /*-----------------------------------------------------------------*/
835  case OP_CBRA:
836  case OP_SCBRA:
837  ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
838  code += GET(code, 1);
839  while (*code == OP_ALT)
840  {
841  ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
842  code += GET(code, 1);
843  }
844  break;
845 
846  /*-----------------------------------------------------------------*/
847  case OP_BRAZERO:
848  case OP_BRAMINZERO:
849  ADD_ACTIVE(state_offset + 1, 0);
850  code += 1 + GET(code, 2);
851  while (*code == OP_ALT) code += GET(code, 1);
852  ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
853  break;
854 
855  /*-----------------------------------------------------------------*/
856  case OP_SKIPZERO:
857  code += 1 + GET(code, 2);
858  while (*code == OP_ALT) code += GET(code, 1);
859  ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
860  break;
861 
862  /*-----------------------------------------------------------------*/
863  case OP_CIRC:
864  if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
865  { ADD_ACTIVE(state_offset + 1, 0); }
866  break;
867 
868  /*-----------------------------------------------------------------*/
869  case OP_CIRCM:
870  if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
871  (ptr != end_subject && WAS_NEWLINE(ptr)))
872  { ADD_ACTIVE(state_offset + 1, 0); }
873  break;
874 
875  /*-----------------------------------------------------------------*/
876  case OP_EOD:
877  if (ptr >= end_subject)
878  {
879  if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
880  could_continue = TRUE;
881  else { ADD_ACTIVE(state_offset + 1, 0); }
882  }
883  break;
884 
885  /*-----------------------------------------------------------------*/
886  case OP_SOD:
887  if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
888  break;
889 
890  /*-----------------------------------------------------------------*/
891  case OP_SOM:
892  if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
893  break;
894 
895 
896 /* ========================================================================== */
897  /* These opcodes inspect the next subject character, and sometimes
898  the previous one as well, but do not have an argument. The variable
899  clen contains the length of the current character and is zero if we are
900  at the end of the subject. */
901 
902  /*-----------------------------------------------------------------*/
903  case OP_ANY:
904  if (clen > 0 && !IS_NEWLINE(ptr))
905  {
906  if (ptr + 1 >= md->end_subject &&
907  (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
908  NLBLOCK->nltype == NLTYPE_FIXED &&
909  NLBLOCK->nllen == 2 &&
910  c == NLBLOCK->nl[0])
911  {
912  could_continue = partial_newline = TRUE;
913  }
914  else
915  {
916  ADD_NEW(state_offset + 1, 0);
917  }
918  }
919  break;
920 
921  /*-----------------------------------------------------------------*/
922  case OP_ALLANY:
923  if (clen > 0)
924  { ADD_NEW(state_offset + 1, 0); }
925  break;
926 
927  /*-----------------------------------------------------------------*/
928  case OP_EODN:
929  if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
930  could_continue = TRUE;
931  else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
932  { ADD_ACTIVE(state_offset + 1, 0); }
933  break;
934 
935  /*-----------------------------------------------------------------*/
936  case OP_DOLL:
937  if ((md->moptions & PCRE_NOTEOL) == 0)
938  {
939  if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
940  could_continue = TRUE;
941  else if (clen == 0 ||
942  ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
943  (ptr == end_subject - md->nllen)
944  ))
945  { ADD_ACTIVE(state_offset + 1, 0); }
946  else if (ptr + 1 >= md->end_subject &&
947  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
948  NLBLOCK->nltype == NLTYPE_FIXED &&
949  NLBLOCK->nllen == 2 &&
950  c == NLBLOCK->nl[0])
951  {
952  if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
953  {
954  reset_could_continue = TRUE;
955  ADD_NEW_DATA(-(state_offset + 1), 0, 1);
956  }
957  else could_continue = partial_newline = TRUE;
958  }
959  }
960  break;
961 
962  /*-----------------------------------------------------------------*/
963  case OP_DOLLM:
964  if ((md->moptions & PCRE_NOTEOL) == 0)
965  {
966  if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
967  could_continue = TRUE;
968  else if (clen == 0 ||
969  ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
970  { ADD_ACTIVE(state_offset + 1, 0); }
971  else if (ptr + 1 >= md->end_subject &&
972  (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
973  NLBLOCK->nltype == NLTYPE_FIXED &&
974  NLBLOCK->nllen == 2 &&
975  c == NLBLOCK->nl[0])
976  {
977  if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
978  {
979  reset_could_continue = TRUE;
980  ADD_NEW_DATA(-(state_offset + 1), 0, 1);
981  }
982  else could_continue = partial_newline = TRUE;
983  }
984  }
985  else if (IS_NEWLINE(ptr))
986  { ADD_ACTIVE(state_offset + 1, 0); }
987  break;
988 
989  /*-----------------------------------------------------------------*/
990 
991  case OP_DIGIT:
992  case OP_WHITESPACE:
993  case OP_WORDCHAR:
994  if (clen > 0 && c < 256 &&
995  ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
996  { ADD_NEW(state_offset + 1, 0); }
997  break;
998 
999  /*-----------------------------------------------------------------*/
1000  case OP_NOT_DIGIT:
1001  case OP_NOT_WHITESPACE:
1002  case OP_NOT_WORDCHAR:
1003  if (clen > 0 && (c >= 256 ||
1004  ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
1005  { ADD_NEW(state_offset + 1, 0); }
1006  break;
1007 
1008  /*-----------------------------------------------------------------*/
1009  case OP_WORD_BOUNDARY:
1010  case OP_NOT_WORD_BOUNDARY:
1011  {
1012  int left_word, right_word;
1013 
1014  if (ptr > start_subject)
1015  {
1016  const pcre_uchar *temp = ptr - 1;
1017  if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1018 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1019  if (utf) { BACKCHAR(temp); }
1020 #endif
1021  GETCHARTEST(d, temp);
1022 #ifdef SUPPORT_UCP
1023  if ((md->poptions & PCRE_UCP) != 0)
1024  {
1025  if (d == '_') left_word = TRUE; else
1026  {
1027  int cat = UCD_CATEGORY(d);
1028  left_word = (cat == ucp_L || cat == ucp_N);
1029  }
1030  }
1031  else
1032 #endif
1033  left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1034  }
1035  else left_word = FALSE;
1036 
1037  if (clen > 0)
1038  {
1039 #ifdef SUPPORT_UCP
1040  if ((md->poptions & PCRE_UCP) != 0)
1041  {
1042  if (c == '_') right_word = TRUE; else
1043  {
1044  int cat = UCD_CATEGORY(c);
1045  right_word = (cat == ucp_L || cat == ucp_N);
1046  }
1047  }
1048  else
1049 #endif
1050  right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1051  }
1052  else right_word = FALSE;
1053 
1054  if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1055  { ADD_ACTIVE(state_offset + 1, 0); }
1056  }
1057  break;
1058 
1059 
1060  /*-----------------------------------------------------------------*/
1061  /* Check the next character by Unicode property. We will get here only
1062  if the support is in the binary; otherwise a compile-time error occurs.
1063  */
1064 
1065 #ifdef SUPPORT_UCP
1066  case OP_PROP:
1067  case OP_NOTPROP:
1068  if (clen > 0)
1069  {
1070  BOOL OK;
1071  const pcre_uint32 *cp;
1072  const ucd_record * prop = GET_UCD(c);
1073  switch(code[1])
1074  {
1075  case PT_ANY:
1076  OK = TRUE;
1077  break;
1078 
1079  case PT_LAMP:
1080  OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1081  prop->chartype == ucp_Lt;
1082  break;
1083 
1084  case PT_GC:
1085  OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1086  break;
1087 
1088  case PT_PC:
1089  OK = prop->chartype == code[2];
1090  break;
1091 
1092  case PT_SC:
1093  OK = prop->script == code[2];
1094  break;
1095 
1096  /* These are specials for combination cases. */
1097 
1098  case PT_ALNUM:
1099  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1100  PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1101  break;
1102 
1103  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1104  which means that Perl space and POSIX space are now identical. PCRE
1105  was changed at release 8.34. */
1106 
1107  case PT_SPACE: /* Perl space */
1108  case PT_PXSPACE: /* POSIX space */
1109  switch(c)
1110  {
1111  HSPACE_CASES:
1112  VSPACE_CASES:
1113  OK = TRUE;
1114  break;
1115 
1116  default:
1117  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1118  break;
1119  }
1120  break;
1121 
1122  case PT_WORD:
1123  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1124  PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1125  c == CHAR_UNDERSCORE;
1126  break;
1127 
1128  case PT_CLIST:
1129  cp = PRIV(ucd_caseless_sets) + code[2];
1130  for (;;)
1131  {
1132  if (c < *cp) { OK = FALSE; break; }
1133  if (c == *cp++) { OK = TRUE; break; }
1134  }
1135  break;
1136 
1137  case PT_UCNC:
1138  OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1139  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1140  c >= 0xe000;
1141  break;
1142 
1143  /* Should never occur, but keep compilers from grumbling. */
1144 
1145  default:
1146  OK = codevalue != OP_PROP;
1147  break;
1148  }
1149 
1150  if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
1151  }
1152  break;
1153 #endif
1154 
1155 
1156 
1157 /* ========================================================================== */
1158  /* These opcodes likewise inspect the subject character, but have an
1159  argument that is not a data character. It is one of these opcodes:
1160  OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1161  OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
1162 
1163  case OP_TYPEPLUS:
1164  case OP_TYPEMINPLUS:
1165  case OP_TYPEPOSPLUS:
1166  count = current_state->count; /* Already matched */
1167  if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1168  if (clen > 0)
1169  {
1170  if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1171  (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1172  NLBLOCK->nltype == NLTYPE_FIXED &&
1173  NLBLOCK->nllen == 2 &&
1174  c == NLBLOCK->nl[0])
1175  {
1176  could_continue = partial_newline = TRUE;
1177  }
1178  else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1179  (c < 256 &&
1180  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1181  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1182  {
1183  if (count > 0 && codevalue == OP_TYPEPOSPLUS)
1184  {
1185  active_count--; /* Remove non-match possibility */
1186  next_active_state--;
1187  }
1188  count++;
1189  ADD_NEW(state_offset, count);
1190  }
1191  }
1192  break;
1193 
1194  /*-----------------------------------------------------------------*/
1195  case OP_TYPEQUERY:
1196  case OP_TYPEMINQUERY:
1197  case OP_TYPEPOSQUERY:
1198  ADD_ACTIVE(state_offset + 2, 0);
1199  if (clen > 0)
1200  {
1201  if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1202  (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1203  NLBLOCK->nltype == NLTYPE_FIXED &&
1204  NLBLOCK->nllen == 2 &&
1205  c == NLBLOCK->nl[0])
1206  {
1207  could_continue = partial_newline = TRUE;
1208  }
1209  else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1210  (c < 256 &&
1211  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1212  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1213  {
1214  if (codevalue == OP_TYPEPOSQUERY)
1215  {
1216  active_count--; /* Remove non-match possibility */
1217  next_active_state--;
1218  }
1219  ADD_NEW(state_offset + 2, 0);
1220  }
1221  }
1222  break;
1223 
1224  /*-----------------------------------------------------------------*/
1225  case OP_TYPESTAR:
1226  case OP_TYPEMINSTAR:
1227  case OP_TYPEPOSSTAR:
1228  ADD_ACTIVE(state_offset + 2, 0);
1229  if (clen > 0)
1230  {
1231  if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1232  (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1233  NLBLOCK->nltype == NLTYPE_FIXED &&
1234  NLBLOCK->nllen == 2 &&
1235  c == NLBLOCK->nl[0])
1236  {
1237  could_continue = partial_newline = TRUE;
1238  }
1239  else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1240  (c < 256 &&
1241  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1242  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1243  {
1244  if (codevalue == OP_TYPEPOSSTAR)
1245  {
1246  active_count--; /* Remove non-match possibility */
1247  next_active_state--;
1248  }
1249  ADD_NEW(state_offset, 0);
1250  }
1251  }
1252  break;
1253 
1254  /*-----------------------------------------------------------------*/
1255  case OP_TYPEEXACT:
1256  count = current_state->count; /* Number already matched */
1257  if (clen > 0)
1258  {
1259  if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1260  (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1261  NLBLOCK->nltype == NLTYPE_FIXED &&
1262  NLBLOCK->nllen == 2 &&
1263  c == NLBLOCK->nl[0])
1264  {
1265  could_continue = partial_newline = TRUE;
1266  }
1267  else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1268  (c < 256 &&
1269  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1270  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1271  {
1272  if (++count >= (int)GET2(code, 1))
1273  { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1274  else
1275  { ADD_NEW(state_offset, count); }
1276  }
1277  }
1278  break;
1279 
1280  /*-----------------------------------------------------------------*/
1281  case OP_TYPEUPTO:
1282  case OP_TYPEMINUPTO:
1283  case OP_TYPEPOSUPTO:
1284  ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1285  count = current_state->count; /* Number already matched */
1286  if (clen > 0)
1287  {
1288  if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1289  (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1290  NLBLOCK->nltype == NLTYPE_FIXED &&
1291  NLBLOCK->nllen == 2 &&
1292  c == NLBLOCK->nl[0])
1293  {
1294  could_continue = partial_newline = TRUE;
1295  }
1296  else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1297  (c < 256 &&
1298  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1299  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1300  {
1301  if (codevalue == OP_TYPEPOSUPTO)
1302  {
1303  active_count--; /* Remove non-match possibility */
1304  next_active_state--;
1305  }
1306  if (++count >= (int)GET2(code, 1))
1307  { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1308  else
1309  { ADD_NEW(state_offset, count); }
1310  }
1311  }
1312  break;
1313 
1314 /* ========================================================================== */
1315  /* These are virtual opcodes that are used when something like
1316  OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1317  argument. It keeps the code above fast for the other cases. The argument
1318  is in the d variable. */
1319 
1320 #ifdef SUPPORT_UCP
1321  case OP_PROP_EXTRA + OP_TYPEPLUS:
1324  count = current_state->count; /* Already matched */
1325  if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1326  if (clen > 0)
1327  {
1328  BOOL OK;
1329  const pcre_uint32 *cp;
1330  const ucd_record * prop = GET_UCD(c);
1331  switch(code[2])
1332  {
1333  case PT_ANY:
1334  OK = TRUE;
1335  break;
1336 
1337  case PT_LAMP:
1338  OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1339  prop->chartype == ucp_Lt;
1340  break;
1341 
1342  case PT_GC:
1343  OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1344  break;
1345 
1346  case PT_PC:
1347  OK = prop->chartype == code[3];
1348  break;
1349 
1350  case PT_SC:
1351  OK = prop->script == code[3];
1352  break;
1353 
1354  /* These are specials for combination cases. */
1355 
1356  case PT_ALNUM:
1357  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1358  PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1359  break;
1360 
1361  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1362  which means that Perl space and POSIX space are now identical. PCRE
1363  was changed at release 8.34. */
1364 
1365  case PT_SPACE: /* Perl space */
1366  case PT_PXSPACE: /* POSIX space */
1367  switch(c)
1368  {
1369  HSPACE_CASES:
1370  VSPACE_CASES:
1371  OK = TRUE;
1372  break;
1373 
1374  default:
1375  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1376  break;
1377  }
1378  break;
1379 
1380  case PT_WORD:
1381  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1382  PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1383  c == CHAR_UNDERSCORE;
1384  break;
1385 
1386  case PT_CLIST:
1387  cp = PRIV(ucd_caseless_sets) + code[3];
1388  for (;;)
1389  {
1390  if (c < *cp) { OK = FALSE; break; }
1391  if (c == *cp++) { OK = TRUE; break; }
1392  }
1393  break;
1394 
1395  case PT_UCNC:
1396  OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1397  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1398  c >= 0xe000;
1399  break;
1400 
1401  /* Should never occur, but keep compilers from grumbling. */
1402 
1403  default:
1404  OK = codevalue != OP_PROP;
1405  break;
1406  }
1407 
1408  if (OK == (d == OP_PROP))
1409  {
1410  if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1411  {
1412  active_count--; /* Remove non-match possibility */
1413  next_active_state--;
1414  }
1415  count++;
1416  ADD_NEW(state_offset, count);
1417  }
1418  }
1419  break;
1420 
1421  /*-----------------------------------------------------------------*/
1425  count = current_state->count; /* Already matched */
1426  if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1427  if (clen > 0)
1428  {
1429  int lgb, rgb;
1430  const pcre_uchar *nptr = ptr + clen;
1431  int ncount = 0;
1432  if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1433  {
1434  active_count--; /* Remove non-match possibility */
1435  next_active_state--;
1436  }
1437  lgb = UCD_GRAPHBREAK(c);
1438  while (nptr < end_subject)
1439  {
1440  dlen = 1;
1441  if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1442  rgb = UCD_GRAPHBREAK(d);
1443  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1444  ncount++;
1445  lgb = rgb;
1446  nptr += dlen;
1447  }
1448  count++;
1449  ADD_NEW_DATA(-state_offset, count, ncount);
1450  }
1451  break;
1452 #endif
1453 
1454  /*-----------------------------------------------------------------*/
1455  case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1458  count = current_state->count; /* Already matched */
1459  if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1460  if (clen > 0)
1461  {
1462  int ncount = 0;
1463  switch (c)
1464  {
1465  case CHAR_VT:
1466  case CHAR_FF:
1467  case CHAR_NEL:
1468 #ifndef EBCDIC
1469  case 0x2028:
1470  case 0x2029:
1471 #endif /* Not EBCDIC */
1472  if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1473  goto ANYNL01;
1474 
1475  case CHAR_CR:
1476  if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1477  /* Fall through */
1478 
1479  ANYNL01:
1480  case CHAR_LF:
1481  if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1482  {
1483  active_count--; /* Remove non-match possibility */
1484  next_active_state--;
1485  }
1486  count++;
1487  ADD_NEW_DATA(-state_offset, count, ncount);
1488  break;
1489 
1490  default:
1491  break;
1492  }
1493  }
1494  break;
1495 
1496  /*-----------------------------------------------------------------*/
1500  count = current_state->count; /* Already matched */
1501  if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1502  if (clen > 0)
1503  {
1504  BOOL OK;
1505  switch (c)
1506  {
1507  VSPACE_CASES:
1508  OK = TRUE;
1509  break;
1510 
1511  default:
1512  OK = FALSE;
1513  break;
1514  }
1515 
1516  if (OK == (d == OP_VSPACE))
1517  {
1518  if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1519  {
1520  active_count--; /* Remove non-match possibility */
1521  next_active_state--;
1522  }
1523  count++;
1524  ADD_NEW_DATA(-state_offset, count, 0);
1525  }
1526  }
1527  break;
1528 
1529  /*-----------------------------------------------------------------*/
1533  count = current_state->count; /* Already matched */
1534  if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1535  if (clen > 0)
1536  {
1537  BOOL OK;
1538  switch (c)
1539  {
1540  HSPACE_CASES:
1541  OK = TRUE;
1542  break;
1543 
1544  default:
1545  OK = FALSE;
1546  break;
1547  }
1548 
1549  if (OK == (d == OP_HSPACE))
1550  {
1551  if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1552  {
1553  active_count--; /* Remove non-match possibility */
1554  next_active_state--;
1555  }
1556  count++;
1557  ADD_NEW_DATA(-state_offset, count, 0);
1558  }
1559  }
1560  break;
1561 
1562  /*-----------------------------------------------------------------*/
1563 #ifdef SUPPORT_UCP
1564  case OP_PROP_EXTRA + OP_TYPEQUERY:
1567  count = 4;
1568  goto QS1;
1569 
1570  case OP_PROP_EXTRA + OP_TYPESTAR:
1573  count = 0;
1574 
1575  QS1:
1576 
1577  ADD_ACTIVE(state_offset + 4, 0);
1578  if (clen > 0)
1579  {
1580  BOOL OK;
1581  const pcre_uint32 *cp;
1582  const ucd_record * prop = GET_UCD(c);
1583  switch(code[2])
1584  {
1585  case PT_ANY:
1586  OK = TRUE;
1587  break;
1588 
1589  case PT_LAMP:
1590  OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1591  prop->chartype == ucp_Lt;
1592  break;
1593 
1594  case PT_GC:
1595  OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1596  break;
1597 
1598  case PT_PC:
1599  OK = prop->chartype == code[3];
1600  break;
1601 
1602  case PT_SC:
1603  OK = prop->script == code[3];
1604  break;
1605 
1606  /* These are specials for combination cases. */
1607 
1608  case PT_ALNUM:
1609  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1610  PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1611  break;
1612 
1613  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1614  which means that Perl space and POSIX space are now identical. PCRE
1615  was changed at release 8.34. */
1616 
1617  case PT_SPACE: /* Perl space */
1618  case PT_PXSPACE: /* POSIX space */
1619  switch(c)
1620  {
1621  HSPACE_CASES:
1622  VSPACE_CASES:
1623  OK = TRUE;
1624  break;
1625 
1626  default:
1627  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1628  break;
1629  }
1630  break;
1631 
1632  case PT_WORD:
1633  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1634  PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1635  c == CHAR_UNDERSCORE;
1636  break;
1637 
1638  case PT_CLIST:
1639  cp = PRIV(ucd_caseless_sets) + code[3];
1640  for (;;)
1641  {
1642  if (c < *cp) { OK = FALSE; break; }
1643  if (c == *cp++) { OK = TRUE; break; }
1644  }
1645  break;
1646 
1647  case PT_UCNC:
1648  OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1649  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1650  c >= 0xe000;
1651  break;
1652 
1653  /* Should never occur, but keep compilers from grumbling. */
1654 
1655  default:
1656  OK = codevalue != OP_PROP;
1657  break;
1658  }
1659 
1660  if (OK == (d == OP_PROP))
1661  {
1662  if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1663  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1664  {
1665  active_count--; /* Remove non-match possibility */
1666  next_active_state--;
1667  }
1668  ADD_NEW(state_offset + count, 0);
1669  }
1670  }
1671  break;
1672 
1673  /*-----------------------------------------------------------------*/
1677  count = 2;
1678  goto QS2;
1679 
1683  count = 0;
1684 
1685  QS2:
1686 
1687  ADD_ACTIVE(state_offset + 2, 0);
1688  if (clen > 0)
1689  {
1690  int lgb, rgb;
1691  const pcre_uchar *nptr = ptr + clen;
1692  int ncount = 0;
1693  if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1694  codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1695  {
1696  active_count--; /* Remove non-match possibility */
1697  next_active_state--;
1698  }
1699  lgb = UCD_GRAPHBREAK(c);
1700  while (nptr < end_subject)
1701  {
1702  dlen = 1;
1703  if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1704  rgb = UCD_GRAPHBREAK(d);
1705  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1706  ncount++;
1707  lgb = rgb;
1708  nptr += dlen;
1709  }
1710  ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1711  }
1712  break;
1713 #endif
1714 
1715  /*-----------------------------------------------------------------*/
1719  count = 2;
1720  goto QS3;
1721 
1722  case OP_ANYNL_EXTRA + OP_TYPESTAR:
1725  count = 0;
1726 
1727  QS3:
1728  ADD_ACTIVE(state_offset + 2, 0);
1729  if (clen > 0)
1730  {
1731  int ncount = 0;
1732  switch (c)
1733  {
1734  case CHAR_VT:
1735  case CHAR_FF:
1736  case CHAR_NEL:
1737 #ifndef EBCDIC
1738  case 0x2028:
1739  case 0x2029:
1740 #endif /* Not EBCDIC */
1741  if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1742  goto ANYNL02;
1743 
1744  case CHAR_CR:
1745  if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
1746  /* Fall through */
1747 
1748  ANYNL02:
1749  case CHAR_LF:
1750  if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1751  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1752  {
1753  active_count--; /* Remove non-match possibility */
1754  next_active_state--;
1755  }
1756  ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1757  break;
1758 
1759  default:
1760  break;
1761  }
1762  }
1763  break;
1764 
1765  /*-----------------------------------------------------------------*/
1769  count = 2;
1770  goto QS4;
1771 
1775  count = 0;
1776 
1777  QS4:
1778  ADD_ACTIVE(state_offset + 2, 0);
1779  if (clen > 0)
1780  {
1781  BOOL OK;
1782  switch (c)
1783  {
1784  VSPACE_CASES:
1785  OK = TRUE;
1786  break;
1787 
1788  default:
1789  OK = FALSE;
1790  break;
1791  }
1792  if (OK == (d == OP_VSPACE))
1793  {
1794  if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1795  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1796  {
1797  active_count--; /* Remove non-match possibility */
1798  next_active_state--;
1799  }
1800  ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1801  }
1802  }
1803  break;
1804 
1805  /*-----------------------------------------------------------------*/
1809  count = 2;
1810  goto QS5;
1811 
1815  count = 0;
1816 
1817  QS5:
1818  ADD_ACTIVE(state_offset + 2, 0);
1819  if (clen > 0)
1820  {
1821  BOOL OK;
1822  switch (c)
1823  {
1824  HSPACE_CASES:
1825  OK = TRUE;
1826  break;
1827 
1828  default:
1829  OK = FALSE;
1830  break;
1831  }
1832 
1833  if (OK == (d == OP_HSPACE))
1834  {
1835  if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1836  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1837  {
1838  active_count--; /* Remove non-match possibility */
1839  next_active_state--;
1840  }
1841  ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1842  }
1843  }
1844  break;
1845 
1846  /*-----------------------------------------------------------------*/
1847 #ifdef SUPPORT_UCP
1848  case OP_PROP_EXTRA + OP_TYPEEXACT:
1849  case OP_PROP_EXTRA + OP_TYPEUPTO:
1852  if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1853  { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1854  count = current_state->count; /* Number already matched */
1855  if (clen > 0)
1856  {
1857  BOOL OK;
1858  const pcre_uint32 *cp;
1859  const ucd_record * prop = GET_UCD(c);
1860  switch(code[1 + IMM2_SIZE + 1])
1861  {
1862  case PT_ANY:
1863  OK = TRUE;
1864  break;
1865 
1866  case PT_LAMP:
1867  OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1868  prop->chartype == ucp_Lt;
1869  break;
1870 
1871  case PT_GC:
1872  OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1873  break;
1874 
1875  case PT_PC:
1876  OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1877  break;
1878 
1879  case PT_SC:
1880  OK = prop->script == code[1 + IMM2_SIZE + 2];
1881  break;
1882 
1883  /* These are specials for combination cases. */
1884 
1885  case PT_ALNUM:
1886  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1887  PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1888  break;
1889 
1890  /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1891  which means that Perl space and POSIX space are now identical. PCRE
1892  was changed at release 8.34. */
1893 
1894  case PT_SPACE: /* Perl space */
1895  case PT_PXSPACE: /* POSIX space */
1896  switch(c)
1897  {
1898  HSPACE_CASES:
1899  VSPACE_CASES:
1900  OK = TRUE;
1901  break;
1902 
1903  default:
1904  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1905  break;
1906  }
1907  break;
1908 
1909  case PT_WORD:
1910  OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1911  PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1912  c == CHAR_UNDERSCORE;
1913  break;
1914 
1915  case PT_CLIST:
1916  cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1917  for (;;)
1918  {
1919  if (c < *cp) { OK = FALSE; break; }
1920  if (c == *cp++) { OK = TRUE; break; }
1921  }
1922  break;
1923 
1924  case PT_UCNC:
1925  OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1926  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1927  c >= 0xe000;
1928  break;
1929 
1930  /* Should never occur, but keep compilers from grumbling. */
1931 
1932  default:
1933  OK = codevalue != OP_PROP;
1934  break;
1935  }
1936 
1937  if (OK == (d == OP_PROP))
1938  {
1939  if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1940  {
1941  active_count--; /* Remove non-match possibility */
1942  next_active_state--;
1943  }
1944  if (++count >= (int)GET2(code, 1))
1945  { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1946  else
1947  { ADD_NEW(state_offset, count); }
1948  }
1949  }
1950  break;
1951 
1952  /*-----------------------------------------------------------------*/
1957  if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1958  { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1959  count = current_state->count; /* Number already matched */
1960  if (clen > 0)
1961  {
1962  int lgb, rgb;
1963  const pcre_uchar *nptr = ptr + clen;
1964  int ncount = 0;
1965  if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1966  {
1967  active_count--; /* Remove non-match possibility */
1968  next_active_state--;
1969  }
1970  lgb = UCD_GRAPHBREAK(c);
1971  while (nptr < end_subject)
1972  {
1973  dlen = 1;
1974  if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1975  rgb = UCD_GRAPHBREAK(d);
1976  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1977  ncount++;
1978  lgb = rgb;
1979  nptr += dlen;
1980  }
1981  if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1982  reset_could_continue = TRUE;
1983  if (++count >= (int)GET2(code, 1))
1984  { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1985  else
1986  { ADD_NEW_DATA(-state_offset, count, ncount); }
1987  }
1988  break;
1989 #endif
1990 
1991  /*-----------------------------------------------------------------*/
1993  case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1996  if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1997  { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1998  count = current_state->count; /* Number already matched */
1999  if (clen > 0)
2000  {
2001  int ncount = 0;
2002  switch (c)
2003  {
2004  case CHAR_VT:
2005  case CHAR_FF:
2006  case CHAR_NEL:
2007 #ifndef EBCDIC
2008  case 0x2028:
2009  case 0x2029:
2010 #endif /* Not EBCDIC */
2011  if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2012  goto ANYNL03;
2013 
2014  case CHAR_CR:
2015  if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
2016  /* Fall through */
2017 
2018  ANYNL03:
2019  case CHAR_LF:
2020  if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2021  {
2022  active_count--; /* Remove non-match possibility */
2023  next_active_state--;
2024  }
2025  if (++count >= (int)GET2(code, 1))
2026  { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
2027  else
2028  { ADD_NEW_DATA(-state_offset, count, ncount); }
2029  break;
2030 
2031  default:
2032  break;
2033  }
2034  }
2035  break;
2036 
2037  /*-----------------------------------------------------------------*/
2042  if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2043  { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2044  count = current_state->count; /* Number already matched */
2045  if (clen > 0)
2046  {
2047  BOOL OK;
2048  switch (c)
2049  {
2050  VSPACE_CASES:
2051  OK = TRUE;
2052  break;
2053 
2054  default:
2055  OK = FALSE;
2056  }
2057 
2058  if (OK == (d == OP_VSPACE))
2059  {
2060  if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
2061  {
2062  active_count--; /* Remove non-match possibility */
2063  next_active_state--;
2064  }
2065  if (++count >= (int)GET2(code, 1))
2066  { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2067  else
2068  { ADD_NEW_DATA(-state_offset, count, 0); }
2069  }
2070  }
2071  break;
2072 
2073  /*-----------------------------------------------------------------*/
2078  if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2079  { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2080  count = current_state->count; /* Number already matched */
2081  if (clen > 0)
2082  {
2083  BOOL OK;
2084  switch (c)
2085  {
2086  HSPACE_CASES:
2087  OK = TRUE;
2088  break;
2089 
2090  default:
2091  OK = FALSE;
2092  break;
2093  }
2094 
2095  if (OK == (d == OP_HSPACE))
2096  {
2097  if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
2098  {
2099  active_count--; /* Remove non-match possibility */
2100  next_active_state--;
2101  }
2102  if (++count >= (int)GET2(code, 1))
2103  { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2104  else
2105  { ADD_NEW_DATA(-state_offset, count, 0); }
2106  }
2107  }
2108  break;
2109 
2110 /* ========================================================================== */
2111  /* These opcodes are followed by a character that is usually compared
2112  to the current subject character; it is loaded into d. We still get
2113  here even if there is no subject character, because in some cases zero
2114  repetitions are permitted. */
2115 
2116  /*-----------------------------------------------------------------*/
2117  case OP_CHAR:
2118  if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
2119  break;
2120 
2121  /*-----------------------------------------------------------------*/
2122  case OP_CHARI:
2123  if (clen == 0) break;
2124 
2125 #ifdef SUPPORT_UTF
2126  if (utf)
2127  {
2128  if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2129  {
2130  unsigned int othercase;
2131  if (c < 128)
2132  othercase = fcc[c];
2133  else
2134  /* If we have Unicode property support, we can use it to test the
2135  other case of the character. */
2136 #ifdef SUPPORT_UCP
2137  othercase = UCD_OTHERCASE(c);
2138 #else
2139  othercase = NOTACHAR;
2140 #endif
2141 
2142  if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2143  }
2144  }
2145  else
2146 #endif /* SUPPORT_UTF */
2147  /* Not UTF mode */
2148  {
2149  if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2150  { ADD_NEW(state_offset + 2, 0); }
2151  }
2152  break;
2153 
2154 
2155 #ifdef SUPPORT_UCP
2156  /*-----------------------------------------------------------------*/
2157  /* This is a tricky one because it can match more than one character.
2158  Find out how many characters to skip, and then set up a negative state
2159  to wait for them to pass before continuing. */
2160 
2161  case OP_EXTUNI:
2162  if (clen > 0)
2163  {
2164  int lgb, rgb;
2165  const pcre_uchar *nptr = ptr + clen;
2166  int ncount = 0;
2167  lgb = UCD_GRAPHBREAK(c);
2168  while (nptr < end_subject)
2169  {
2170  dlen = 1;
2171  if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2172  rgb = UCD_GRAPHBREAK(d);
2173  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2174  ncount++;
2175  lgb = rgb;
2176  nptr += dlen;
2177  }
2178  if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2179  reset_could_continue = TRUE;
2180  ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2181  }
2182  break;
2183 #endif
2184 
2185  /*-----------------------------------------------------------------*/
2186  /* This is a tricky like EXTUNI because it too can match more than one
2187  character (when CR is followed by LF). In this case, set up a negative
2188  state to wait for one character to pass before continuing. */
2189 
2190  case OP_ANYNL:
2191  if (clen > 0) switch(c)
2192  {
2193  case CHAR_VT:
2194  case CHAR_FF:
2195  case CHAR_NEL:
2196 #ifndef EBCDIC
2197  case 0x2028:
2198  case 0x2029:
2199 #endif /* Not EBCDIC */
2200  if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2201 
2202  case CHAR_LF:
2203  ADD_NEW(state_offset + 1, 0);
2204  break;
2205 
2206  case CHAR_CR:
2207  if (ptr + 1 >= end_subject)
2208  {
2209  ADD_NEW(state_offset + 1, 0);
2210  if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2211  reset_could_continue = TRUE;
2212  }
2213  else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
2214  {
2215  ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2216  }
2217  else
2218  {
2219  ADD_NEW(state_offset + 1, 0);
2220  }
2221  break;
2222  }
2223  break;
2224 
2225  /*-----------------------------------------------------------------*/
2226  case OP_NOT_VSPACE:
2227  if (clen > 0) switch(c)
2228  {
2229  VSPACE_CASES:
2230  break;
2231 
2232  default:
2233  ADD_NEW(state_offset + 1, 0);
2234  break;
2235  }
2236  break;
2237 
2238  /*-----------------------------------------------------------------*/
2239  case OP_VSPACE:
2240  if (clen > 0) switch(c)
2241  {
2242  VSPACE_CASES:
2243  ADD_NEW(state_offset + 1, 0);
2244  break;
2245 
2246  default:
2247  break;
2248  }
2249  break;
2250 
2251  /*-----------------------------------------------------------------*/
2252  case OP_NOT_HSPACE:
2253  if (clen > 0) switch(c)
2254  {
2255  HSPACE_CASES:
2256  break;
2257 
2258  default:
2259  ADD_NEW(state_offset + 1, 0);
2260  break;
2261  }
2262  break;
2263 
2264  /*-----------------------------------------------------------------*/
2265  case OP_HSPACE:
2266  if (clen > 0) switch(c)
2267  {
2268  HSPACE_CASES:
2269  ADD_NEW(state_offset + 1, 0);
2270  break;
2271 
2272  default:
2273  break;
2274  }
2275  break;
2276 
2277  /*-----------------------------------------------------------------*/
2278  /* Match a negated single character casefully. */
2279 
2280  case OP_NOT:
2281  if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2282  break;
2283 
2284  /*-----------------------------------------------------------------*/
2285  /* Match a negated single character caselessly. */
2286 
2287  case OP_NOTI:
2288  if (clen > 0)
2289  {
2290  pcre_uint32 otherd;
2291 #ifdef SUPPORT_UTF
2292  if (utf && d >= 128)
2293  {
2294 #ifdef SUPPORT_UCP
2295  otherd = UCD_OTHERCASE(d);
2296 #else
2297  otherd = d;
2298 #endif /* SUPPORT_UCP */
2299  }
2300  else
2301 #endif /* SUPPORT_UTF */
2302  otherd = TABLE_GET(d, fcc, d);
2303  if (c != d && c != otherd)
2304  { ADD_NEW(state_offset + dlen + 1, 0); }
2305  }
2306  break;
2307 
2308  /*-----------------------------------------------------------------*/
2309  case OP_PLUSI:
2310  case OP_MINPLUSI:
2311  case OP_POSPLUSI:
2312  case OP_NOTPLUSI:
2313  case OP_NOTMINPLUSI:
2314  case OP_NOTPOSPLUSI:
2315  caseless = TRUE;
2316  codevalue -= OP_STARI - OP_STAR;
2317 
2318  /* Fall through */
2319  case OP_PLUS:
2320  case OP_MINPLUS:
2321  case OP_POSPLUS:
2322  case OP_NOTPLUS:
2323  case OP_NOTMINPLUS:
2324  case OP_NOTPOSPLUS:
2325  count = current_state->count; /* Already matched */
2326  if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2327  if (clen > 0)
2328  {
2329  pcre_uint32 otherd = NOTACHAR;
2330  if (caseless)
2331  {
2332 #ifdef SUPPORT_UTF
2333  if (utf && d >= 128)
2334  {
2335 #ifdef SUPPORT_UCP
2336  otherd = UCD_OTHERCASE(d);
2337 #endif /* SUPPORT_UCP */
2338  }
2339  else
2340 #endif /* SUPPORT_UTF */
2341  otherd = TABLE_GET(d, fcc, d);
2342  }
2343  if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2344  {
2345  if (count > 0 &&
2346  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
2347  {
2348  active_count--; /* Remove non-match possibility */
2349  next_active_state--;
2350  }
2351  count++;
2352  ADD_NEW(state_offset, count);
2353  }
2354  }
2355  break;
2356 
2357  /*-----------------------------------------------------------------*/
2358  case OP_QUERYI:
2359  case OP_MINQUERYI:
2360  case OP_POSQUERYI:
2361  case OP_NOTQUERYI:
2362  case OP_NOTMINQUERYI:
2363  case OP_NOTPOSQUERYI:
2364  caseless = TRUE;
2365  codevalue -= OP_STARI - OP_STAR;
2366  /* Fall through */
2367  case OP_QUERY:
2368  case OP_MINQUERY:
2369  case OP_POSQUERY:
2370  case OP_NOTQUERY:
2371  case OP_NOTMINQUERY:
2372  case OP_NOTPOSQUERY:
2373  ADD_ACTIVE(state_offset + dlen + 1, 0);
2374  if (clen > 0)
2375  {
2376  pcre_uint32 otherd = NOTACHAR;
2377  if (caseless)
2378  {
2379 #ifdef SUPPORT_UTF
2380  if (utf && d >= 128)
2381  {
2382 #ifdef SUPPORT_UCP
2383  otherd = UCD_OTHERCASE(d);
2384 #endif /* SUPPORT_UCP */
2385  }
2386  else
2387 #endif /* SUPPORT_UTF */
2388  otherd = TABLE_GET(d, fcc, d);
2389  }
2390  if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2391  {
2392  if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2393  {
2394  active_count--; /* Remove non-match possibility */
2395  next_active_state--;
2396  }
2397  ADD_NEW(state_offset + dlen + 1, 0);
2398  }
2399  }
2400  break;
2401 
2402  /*-----------------------------------------------------------------*/
2403  case OP_STARI:
2404  case OP_MINSTARI:
2405  case OP_POSSTARI:
2406  case OP_NOTSTARI:
2407  case OP_NOTMINSTARI:
2408  case OP_NOTPOSSTARI:
2409  caseless = TRUE;
2410  codevalue -= OP_STARI - OP_STAR;
2411  /* Fall through */
2412  case OP_STAR:
2413  case OP_MINSTAR:
2414  case OP_POSSTAR:
2415  case OP_NOTSTAR:
2416  case OP_NOTMINSTAR:
2417  case OP_NOTPOSSTAR:
2418  ADD_ACTIVE(state_offset + dlen + 1, 0);
2419  if (clen > 0)
2420  {
2421  pcre_uint32 otherd = NOTACHAR;
2422  if (caseless)
2423  {
2424 #ifdef SUPPORT_UTF
2425  if (utf && d >= 128)
2426  {
2427 #ifdef SUPPORT_UCP
2428  otherd = UCD_OTHERCASE(d);
2429 #endif /* SUPPORT_UCP */
2430  }
2431  else
2432 #endif /* SUPPORT_UTF */
2433  otherd = TABLE_GET(d, fcc, d);
2434  }
2435  if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2436  {
2437  if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2438  {
2439  active_count--; /* Remove non-match possibility */
2440  next_active_state--;
2441  }
2442  ADD_NEW(state_offset, 0);
2443  }
2444  }
2445  break;
2446 
2447  /*-----------------------------------------------------------------*/
2448  case OP_EXACTI:
2449  case OP_NOTEXACTI:
2450  caseless = TRUE;
2451  codevalue -= OP_STARI - OP_STAR;
2452  /* Fall through */
2453  case OP_EXACT:
2454  case OP_NOTEXACT:
2455  count = current_state->count; /* Number already matched */
2456  if (clen > 0)
2457  {
2458  pcre_uint32 otherd = NOTACHAR;
2459  if (caseless)
2460  {
2461 #ifdef SUPPORT_UTF
2462  if (utf && d >= 128)
2463  {
2464 #ifdef SUPPORT_UCP
2465  otherd = UCD_OTHERCASE(d);
2466 #endif /* SUPPORT_UCP */
2467  }
2468  else
2469 #endif /* SUPPORT_UTF */
2470  otherd = TABLE_GET(d, fcc, d);
2471  }
2472  if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2473  {
2474  if (++count >= (int)GET2(code, 1))
2475  { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2476  else
2477  { ADD_NEW(state_offset, count); }
2478  }
2479  }
2480  break;
2481 
2482  /*-----------------------------------------------------------------*/
2483  case OP_UPTOI:
2484  case OP_MINUPTOI:
2485  case OP_POSUPTOI:
2486  case OP_NOTUPTOI:
2487  case OP_NOTMINUPTOI:
2488  case OP_NOTPOSUPTOI:
2489  caseless = TRUE;
2490  codevalue -= OP_STARI - OP_STAR;
2491  /* Fall through */
2492  case OP_UPTO:
2493  case OP_MINUPTO:
2494  case OP_POSUPTO:
2495  case OP_NOTUPTO:
2496  case OP_NOTMINUPTO:
2497  case OP_NOTPOSUPTO:
2498  ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2499  count = current_state->count; /* Number already matched */
2500  if (clen > 0)
2501  {
2502  pcre_uint32 otherd = NOTACHAR;
2503  if (caseless)
2504  {
2505 #ifdef SUPPORT_UTF
2506  if (utf && d >= 128)
2507  {
2508 #ifdef SUPPORT_UCP
2509  otherd = UCD_OTHERCASE(d);
2510 #endif /* SUPPORT_UCP */
2511  }
2512  else
2513 #endif /* SUPPORT_UTF */
2514  otherd = TABLE_GET(d, fcc, d);
2515  }
2516  if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2517  {
2518  if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2519  {
2520  active_count--; /* Remove non-match possibility */
2521  next_active_state--;
2522  }
2523  if (++count >= (int)GET2(code, 1))
2524  { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2525  else
2526  { ADD_NEW(state_offset, count); }
2527  }
2528  }
2529  break;
2530 
2531 
2532 /* ========================================================================== */
2533  /* These are the class-handling opcodes */
2534 
2535  case OP_CLASS:
2536  case OP_NCLASS:
2537  case OP_XCLASS:
2538  {
2539  BOOL isinclass = FALSE;
2540  int next_state_offset;
2541  const pcre_uchar *ecode;
2542 
2543  /* For a simple class, there is always just a 32-byte table, and we
2544  can set isinclass from it. */
2545 
2546  if (codevalue != OP_XCLASS)
2547  {
2548  ecode = code + 1 + (32 / sizeof(pcre_uchar));
2549  if (clen > 0)
2550  {
2551  isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2552  ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2553  }
2554  }
2555 
2556  /* An extended class may have a table or a list of single characters,
2557  ranges, or both, and it may be positive or negative. There's a
2558  function that sorts all this out. */
2559 
2560  else
2561  {
2562  ecode = code + GET(code, 1);
2563  if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2564  }
2565 
2566  /* At this point, isinclass is set for all kinds of class, and ecode
2567  points to the byte after the end of the class. If there is a
2568  quantifier, this is where it will be. */
2569 
2570  next_state_offset = (int)(ecode - start_code);
2571 
2572  switch (*ecode)
2573  {
2574  case OP_CRSTAR:
2575  case OP_CRMINSTAR:
2576  case OP_CRPOSSTAR:
2577  ADD_ACTIVE(next_state_offset + 1, 0);
2578  if (isinclass)
2579  {
2580  if (*ecode == OP_CRPOSSTAR)
2581  {
2582  active_count--; /* Remove non-match possibility */
2583  next_active_state--;
2584  }
2585  ADD_NEW(state_offset, 0);
2586  }
2587  break;
2588 
2589  case OP_CRPLUS:
2590  case OP_CRMINPLUS:
2591  case OP_CRPOSPLUS:
2592  count = current_state->count; /* Already matched */
2593  if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2594  if (isinclass)
2595  {
2596  if (count > 0 && *ecode == OP_CRPOSPLUS)
2597  {
2598  active_count--; /* Remove non-match possibility */
2599  next_active_state--;
2600  }
2601  count++;
2602  ADD_NEW(state_offset, count);
2603  }
2604  break;
2605 
2606  case OP_CRQUERY:
2607  case OP_CRMINQUERY:
2608  case OP_CRPOSQUERY:
2609  ADD_ACTIVE(next_state_offset + 1, 0);
2610  if (isinclass)
2611  {
2612  if (*ecode == OP_CRPOSQUERY)
2613  {
2614  active_count--; /* Remove non-match possibility */
2615  next_active_state--;
2616  }
2617  ADD_NEW(next_state_offset + 1, 0);
2618  }
2619  break;
2620 
2621  case OP_CRRANGE:
2622  case OP_CRMINRANGE:
2623  case OP_CRPOSRANGE:
2624  count = current_state->count; /* Already matched */
2625  if (count >= (int)GET2(ecode, 1))
2626  { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2627  if (isinclass)
2628  {
2629  int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2630  if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
2631  {
2632  active_count--; /* Remove non-match possibility */
2633  next_active_state--;
2634  }
2635  if (++count >= max && max != 0) /* Max 0 => no limit */
2636  { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2637  else
2638  { ADD_NEW(state_offset, count); }
2639  }
2640  break;
2641 
2642  default:
2643  if (isinclass) { ADD_NEW(next_state_offset, 0); }
2644  break;
2645  }
2646  }
2647  break;
2648 
2649 /* ========================================================================== */
2650  /* These are the opcodes for fancy brackets of various kinds. We have
2651  to use recursion in order to handle them. The "always failing" assertion
2652  (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2653  though the other "backtracking verbs" are not supported. */
2654 
2655  case OP_FAIL:
2656  forced_fail++; /* Count FAILs for multiple states */
2657  break;
2658 
2659  case OP_ASSERT:
2660  case OP_ASSERT_NOT:
2661  case OP_ASSERTBACK:
2662  case OP_ASSERTBACK_NOT:
2663  {
2664  int rc;
2665  int local_offsets[2];
2666  int local_workspace[1000];
2667  const pcre_uchar *endasscode = code + GET(code, 1);
2668 
2669  while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2670 
2671  rc = internal_dfa_exec(
2672  md, /* static match data */
2673  code, /* this subexpression's code */
2674  ptr, /* where we currently are */
2675  (int)(ptr - start_subject), /* start offset */
2676  local_offsets, /* offset vector */
2677  sizeof(local_offsets)/sizeof(int), /* size of same */
2678  local_workspace, /* workspace vector */
2679  sizeof(local_workspace)/sizeof(int), /* size of same */
2680  rlevel); /* function recursion level */
2681 
2682  if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2683  if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2684  { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2685  }
2686  break;
2687 
2688  /*-----------------------------------------------------------------*/
2689  case OP_COND:
2690  case OP_SCOND:
2691  {
2692  int local_offsets[1000];
2693  int local_workspace[1000];
2694  int codelink = GET(code, 1);
2695  int condcode;
2696 
2697  /* Because of the way auto-callout works during compile, a callout item
2698  is inserted between OP_COND and an assertion condition. This does not
2699  happen for the other conditions. */
2700 
2701  if (code[LINK_SIZE+1] == OP_CALLOUT)
2702  {
2703  rrc = 0;
2704  if (PUBL(callout) != NULL)
2705  {
2706  PUBL(callout_block) cb;
2707  cb.version = 1; /* Version 1 of the callout block */
2708  cb.callout_number = code[LINK_SIZE+2];
2709  cb.offset_vector = offsets;
2710 #if defined COMPILE_PCRE8
2711  cb.subject = (PCRE_SPTR)start_subject;
2712 #elif defined COMPILE_PCRE16
2713  cb.subject = (PCRE_SPTR16)start_subject;
2714 #elif defined COMPILE_PCRE32
2715  cb.subject = (PCRE_SPTR32)start_subject;
2716 #endif
2717  cb.subject_length = (int)(end_subject - start_subject);
2718  cb.start_match = (int)(current_subject - start_subject);
2719  cb.current_position = (int)(ptr - start_subject);
2720  cb.pattern_position = GET(code, LINK_SIZE + 3);
2721  cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2722  cb.capture_top = 1;
2723  cb.capture_last = -1;
2724  cb.callout_data = md->callout_data;
2725  cb.mark = NULL; /* No (*MARK) support */
2726  if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
2727  }
2728  if (rrc > 0) break; /* Fail this thread */
2729  code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
2730  }
2731 
2732  condcode = code[LINK_SIZE+1];
2733 
2734  /* Back reference conditions and duplicate named recursion conditions
2735  are not supported */
2736 
2737  if (condcode == OP_CREF || condcode == OP_DNCREF ||
2738  condcode == OP_DNRREF)
2739  return PCRE_ERROR_DFA_UCOND;
2740 
2741  /* The DEFINE condition is always false, and the assertion (?!) is
2742  converted to OP_FAIL. */
2743 
2744  if (condcode == OP_DEF || condcode == OP_FAIL)
2745  { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2746 
2747  /* The only supported version of OP_RREF is for the value RREF_ANY,
2748  which means "test if in any recursion". We can't test for specifically
2749  recursed groups. */
2750 
2751  else if (condcode == OP_RREF)
2752  {
2753  int value = GET2(code, LINK_SIZE + 2);
2754  if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2755  if (md->recursive != NULL)
2756  { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2757  else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2758  }
2759 
2760  /* Otherwise, the condition is an assertion */
2761 
2762  else
2763  {
2764  int rc;
2765  const pcre_uchar *asscode = code + LINK_SIZE + 1;
2766  const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2767 
2768  while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2769 
2770  rc = internal_dfa_exec(
2771  md, /* fixed match data */
2772  asscode, /* this subexpression's code */
2773  ptr, /* where we currently are */
2774  (int)(ptr - start_subject), /* start offset */
2775  local_offsets, /* offset vector */
2776  sizeof(local_offsets)/sizeof(int), /* size of same */
2777  local_workspace, /* workspace vector */
2778  sizeof(local_workspace)/sizeof(int), /* size of same */
2779  rlevel); /* function recursion level */
2780 
2781  if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2782  if ((rc >= 0) ==
2783  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2784  { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2785  else
2786  { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2787  }
2788  }
2789  break;
2790 
2791  /*-----------------------------------------------------------------*/
2792  case OP_RECURSE:
2793  {
2794  dfa_recursion_info *ri;
2795  int local_offsets[1000];
2796  int local_workspace[1000];
2797  const pcre_uchar *callpat = start_code + GET(code, 1);
2798  int recno = (callpat == md->start_code)? 0 :
2799  GET2(callpat, 1 + LINK_SIZE);
2800  int rc;
2801 
2802  DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2803 
2804  /* Check for repeating a recursion without advancing the subject
2805  pointer. This should catch convoluted mutual recursions. (Some simple
2806  cases are caught at compile time.) */
2807 
2808  for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2809  if (recno == ri->group_num && ptr == ri->subject_position)
2810  return PCRE_ERROR_RECURSELOOP;
2811 
2812  /* Remember this recursion and where we started it so as to
2813  catch infinite loops. */
2814 
2815  new_recursive.group_num = recno;
2816  new_recursive.subject_position = ptr;
2817  new_recursive.prevrec = md->recursive;
2818  md->recursive = &new_recursive;
2819 
2820  rc = internal_dfa_exec(
2821  md, /* fixed match data */
2822  callpat, /* this subexpression's code */
2823  ptr, /* where we currently are */
2824  (int)(ptr - start_subject), /* start offset */
2825  local_offsets, /* offset vector */
2826  sizeof(local_offsets)/sizeof(int), /* size of same */
2827  local_workspace, /* workspace vector */
2828  sizeof(local_workspace)/sizeof(int), /* size of same */
2829  rlevel); /* function recursion level */
2830 
2831  md->recursive = new_recursive.prevrec; /* Done this recursion */
2832 
2833  DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2834  rc));
2835 
2836  /* Ran out of internal offsets */
2837 
2838  if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
2839 
2840  /* For each successful matched substring, set up the next state with a
2841  count of characters to skip before trying it. Note that the count is in
2842  characters, not bytes. */
2843 
2844  if (rc > 0)
2845  {
2846  for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2847  {
2848  int charcount = local_offsets[rc+1] - local_offsets[rc];
2849 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2850  if (utf)
2851  {
2852  const pcre_uchar *p = start_subject + local_offsets[rc];
2853  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2854  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2855  }
2856 #endif
2857  if (charcount > 0)
2858  {
2859  ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
2860  }
2861  else
2862  {
2863  ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
2864  }
2865  }
2866  }
2867  else if (rc != PCRE_ERROR_NOMATCH) return rc;
2868  }
2869  break;
2870 
2871  /*-----------------------------------------------------------------*/
2872  case OP_BRAPOS:
2873  case OP_SBRAPOS:
2874  case OP_CBRAPOS:
2875  case OP_SCBRAPOS:
2876  case OP_BRAPOSZERO:
2877  {
2878  int charcount, matched_count;
2879  const pcre_uchar *local_ptr = ptr;
2880  BOOL allow_zero;
2881 
2882  if (codevalue == OP_BRAPOSZERO)
2883  {
2884  allow_zero = TRUE;
2885  codevalue = *(++code); /* Codevalue will be one of above BRAs */
2886  }
2887  else allow_zero = FALSE;
2888 
2889  /* Loop to match the subpattern as many times as possible as if it were
2890  a complete pattern. */
2891 
2892  for (matched_count = 0;; matched_count++)
2893  {
2894  int local_offsets[2];
2895  int local_workspace[1000];
2896 
2897  int rc = internal_dfa_exec(
2898  md, /* fixed match data */
2899  code, /* this subexpression's code */
2900  local_ptr, /* where we currently are */
2901  (int)(ptr - start_subject), /* start offset */
2902  local_offsets, /* offset vector */
2903  sizeof(local_offsets)/sizeof(int), /* size of same */
2904  local_workspace, /* workspace vector */
2905  sizeof(local_workspace)/sizeof(int), /* size of same */
2906  rlevel); /* function recursion level */
2907 
2908  /* Failed to match */
2909 
2910  if (rc < 0)
2911  {
2912  if (rc != PCRE_ERROR_NOMATCH) return rc;
2913  break;
2914  }
2915 
2916  /* Matched: break the loop if zero characters matched. */
2917 
2918  charcount = local_offsets[1] - local_offsets[0];
2919  if (charcount == 0) break;
2920  local_ptr += charcount; /* Advance temporary position ptr */
2921  }
2922 
2923  /* At this point we have matched the subpattern matched_count
2924  times, and local_ptr is pointing to the character after the end of the
2925  last match. */
2926 
2927  if (matched_count > 0 || allow_zero)
2928  {
2929  const pcre_uchar *end_subpattern = code;
2930  int next_state_offset;
2931 
2932  do { end_subpattern += GET(end_subpattern, 1); }
2933  while (*end_subpattern == OP_ALT);
2934  next_state_offset =
2935  (int)(end_subpattern - start_code + LINK_SIZE + 1);
2936 
2937  /* Optimization: if there are no more active states, and there
2938  are no new states yet set up, then skip over the subject string
2939  right here, to save looping. Otherwise, set up the new state to swing
2940  into action when the end of the matched substring is reached. */
2941 
2942  if (i + 1 >= active_count && new_count == 0)
2943  {
2944  ptr = local_ptr;
2945  clen = 0;
2946  ADD_NEW(next_state_offset, 0);
2947  }
2948  else
2949  {
2950  const pcre_uchar *p = ptr;
2951  const pcre_uchar *pp = local_ptr;
2952  charcount = (int)(pp - p);
2953 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2954  if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2955 #endif
2956  ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2957  }
2958  }
2959  }
2960  break;
2961 
2962  /*-----------------------------------------------------------------*/
2963  case OP_ONCE:
2964  case OP_ONCE_NC:
2965  {
2966  int local_offsets[2];
2967  int local_workspace[1000];
2968 
2969  int rc = internal_dfa_exec(
2970  md, /* fixed match data */
2971  code, /* this subexpression's code */
2972  ptr, /* where we currently are */
2973  (int)(ptr - start_subject), /* start offset */
2974  local_offsets, /* offset vector */
2975  sizeof(local_offsets)/sizeof(int), /* size of same */
2976  local_workspace, /* workspace vector */
2977  sizeof(local_workspace)/sizeof(int), /* size of same */
2978  rlevel); /* function recursion level */
2979 
2980  if (rc >= 0)
2981  {
2982  const pcre_uchar *end_subpattern = code;
2983  int charcount = local_offsets[1] - local_offsets[0];
2984  int next_state_offset, repeat_state_offset;
2985 
2986  do { end_subpattern += GET(end_subpattern, 1); }
2987  while (*end_subpattern == OP_ALT);
2988  next_state_offset =
2989  (int)(end_subpattern - start_code + LINK_SIZE + 1);
2990 
2991  /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2992  arrange for the repeat state also to be added to the relevant list.
2993  Calculate the offset, or set -1 for no repeat. */
2994 
2995  repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2996  *end_subpattern == OP_KETRMIN)?
2997  (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2998 
2999  /* If we have matched an empty string, add the next state at the
3000  current character pointer. This is important so that the duplicate
3001  checking kicks in, which is what breaks infinite loops that match an
3002  empty string. */
3003 
3004  if (charcount == 0)
3005  {
3006  ADD_ACTIVE(next_state_offset, 0);
3007  }
3008 
3009  /* Optimization: if there are no more active states, and there
3010  are no new states yet set up, then skip over the subject string
3011  right here, to save looping. Otherwise, set up the new state to swing
3012  into action when the end of the matched substring is reached. */
3013 
3014  else if (i + 1 >= active_count && new_count == 0)
3015  {
3016  ptr += charcount;
3017  clen = 0;
3018  ADD_NEW(next_state_offset, 0);
3019 
3020  /* If we are adding a repeat state at the new character position,
3021  we must fudge things so that it is the only current state.
3022  Otherwise, it might be a duplicate of one we processed before, and
3023  that would cause it to be skipped. */
3024 
3025  if (repeat_state_offset >= 0)
3026  {
3027  next_active_state = active_states;
3028  active_count = 0;
3029  i = -1;
3030  ADD_ACTIVE(repeat_state_offset, 0);
3031  }
3032  }
3033  else
3034  {
3035 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3036  if (utf)
3037  {
3038  const pcre_uchar *p = start_subject + local_offsets[0];
3039  const pcre_uchar *pp = start_subject + local_offsets[1];
3040  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3041  }
3042 #endif
3043  ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3044  if (repeat_state_offset >= 0)
3045  { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
3046  }
3047  }
3048  else if (rc != PCRE_ERROR_NOMATCH) return rc;
3049  }
3050  break;
3051 
3052 
3053 /* ========================================================================== */
3054  /* Handle callouts */
3055 
3056  case OP_CALLOUT:
3057  rrc = 0;
3058  if (PUBL(callout) != NULL)
3059  {
3060  PUBL(callout_block) cb;
3061  cb.version = 1; /* Version 1 of the callout block */
3062  cb.callout_number = code[1];
3063  cb.offset_vector = offsets;
3064 #if defined COMPILE_PCRE8
3065  cb.subject = (PCRE_SPTR)start_subject;
3066 #elif defined COMPILE_PCRE16
3067  cb.subject = (PCRE_SPTR16)start_subject;
3068 #elif defined COMPILE_PCRE32
3069  cb.subject = (PCRE_SPTR32)start_subject;
3070 #endif
3071  cb.subject_length = (int)(end_subject - start_subject);
3072  cb.start_match = (int)(current_subject - start_subject);
3073  cb.current_position = (int)(ptr - start_subject);
3074  cb.pattern_position = GET(code, 2);
3075  cb.next_item_length = GET(code, 2 + LINK_SIZE);
3076  cb.capture_top = 1;
3077  cb.capture_last = -1;
3078  cb.callout_data = md->callout_data;
3079  cb.mark = NULL; /* No (*MARK) support */
3080  if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
3081  }
3082  if (rrc == 0)
3083  { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3084  break;
3085 
3086 
3087 /* ========================================================================== */
3088  default: /* Unsupported opcode */
3089  return PCRE_ERROR_DFA_UITEM;
3090  }
3091 
3092  NEXT_ACTIVE_STATE: continue;
3093 
3094  } /* End of loop scanning active states */
3095 
3096  /* We have finished the processing at the current subject character. If no
3097  new states have been set for the next character, we have found all the
3098  matches that we are going to find. If we are at the top level and partial
3099  matching has been requested, check for appropriate conditions.
3100 
3101  The "forced_ fail" variable counts the number of (*F) encountered for the
3102  character. If it is equal to the original active_count (saved in
3103  workspace[1]) it means that (*F) was found on every active state. In this
3104  case we don't want to give a partial match.
3105 
3106  The "could_continue" variable is true if a state could have continued but
3107  for the fact that the end of the subject was reached. */
3108 
3109  if (new_count <= 0)
3110  {
3111  if (rlevel == 1 && /* Top level, and */
3112  could_continue && /* Some could go on, and */
3113  forced_fail != workspace[1] && /* Not all forced fail & */
3114  ( /* either... */
3115  (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
3116  || /* or... */
3117  ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
3118  match_count < 0) /* no matches */
3119  ) && /* And... */
3120  (
3121  partial_newline || /* Either partial NL */
3122  ( /* or ... */
3123  ptr >= end_subject && /* End of subject and */
3124  ptr > md->start_used_ptr) /* Inspected non-empty string */
3125  )
3126  )
3127  match_count = PCRE_ERROR_PARTIAL;
3128  DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3129  "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3130  rlevel*2-2, SP));
3131  break; /* In effect, "return", but see the comment below */
3132  }
3133 
3134  /* One or more states are active for the next character. */
3135 
3136  ptr += clen; /* Advance to next subject character */
3137  } /* Loop to move along the subject string */
3138 
3139 /* Control gets here from "break" a few lines above. We do it this way because
3140 if we use "return" above, we have compiler trouble. Some compilers warn if
3141 there's nothing here because they think the function doesn't return a value. On
3142 the other hand, if we put a dummy statement here, some more clever compilers
3143 complain that it can't be reached. Sigh. */
3144 
3145 return match_count;
3146 }
3147 
3148 
3149 
3150 
3151 /*************************************************
3152 * Execute a Regular Expression - DFA engine *
3153 *************************************************/
3154 
3155 /* This external function applies a compiled re to a subject string using a DFA
3156 engine. This function calls the internal function multiple times if the pattern
3157 is not anchored.
3158 
3159 Arguments:
3160  argument_re points to the compiled expression
3161  extra_data points to extra data or is NULL
3162  subject points to the subject string
3163  length length of subject string (may contain binary zeros)
3164  start_offset where to start in the subject string
3165  options option bits
3166  offsets vector of match offsets
3167  offsetcount size of same
3168  workspace workspace vector
3169  wscount size of same
3170 
3171 Returns: > 0 => number of match offset pairs placed in offsets
3172  = 0 => offsets overflowed; longest matches are present
3173  -1 => failed to match
3174  < -1 => some kind of unexpected problem
3175 */
3176 
3177 #if defined COMPILE_PCRE8
3179 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3180  const char *subject, int length, int start_offset, int options, int *offsets,
3181  int offsetcount, int *workspace, int wscount)
3182 #elif defined COMPILE_PCRE16
3184 pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3185  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3186  int offsetcount, int *workspace, int wscount)
3187 #elif defined COMPILE_PCRE32
3189 pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3190  PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3191  int offsetcount, int *workspace, int wscount)
3192 #endif
3193 {
3194 REAL_PCRE *re = (REAL_PCRE *)argument_re;
3195 dfa_match_data match_block;
3196 dfa_match_data *md = &match_block;
3197 BOOL utf, anchored, startline, firstline;
3198 const pcre_uchar *current_subject, *end_subject;
3199 const pcre_study_data *study = NULL;
3200 
3201 const pcre_uchar *req_char_ptr;
3202 const pcre_uint8 *start_bits = NULL;
3203 BOOL has_first_char = FALSE;
3204 BOOL has_req_char = FALSE;
3205 pcre_uchar first_char = 0;
3206 pcre_uchar first_char2 = 0;
3207 pcre_uchar req_char = 0;
3208 pcre_uchar req_char2 = 0;
3209 int newline;
3210 
3211 /* Plausibility checks */
3212 
3213 if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3214 if (re == NULL || subject == NULL || workspace == NULL ||
3215  (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3216 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3217 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3218 if (length < 0) return PCRE_ERROR_BADLENGTH;
3219 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3220 
3221 /* Check that the first field in the block is the magic number. If it is not,
3222 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3223 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3224 means that the pattern is likely compiled with different endianness. */
3225 
3226 if (re->magic_number != MAGIC_NUMBER)
3227  return re->magic_number == REVERSED_MAGIC_NUMBER?
3229 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3230 
3231 /* If restarting after a partial match, do some sanity checks on the contents
3232 of the workspace. */
3233 
3234 if ((options & PCRE_DFA_RESTART) != 0)
3235  {
3236  if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3237  workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3239  }
3240 
3241 /* Set up study, callout, and table data */
3242 
3243 md->tables = re->tables;
3244 md->callout_data = NULL;
3245 
3246 if (extra_data != NULL)
3247  {
3248  unsigned long int flags = extra_data->flags;
3249  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3250  study = (const pcre_study_data *)extra_data->study_data;
3253  return PCRE_ERROR_DFA_UMLIMIT;
3254  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3255  md->callout_data = extra_data->callout_data;
3256  if ((flags & PCRE_EXTRA_TABLES) != 0)
3257  md->tables = extra_data->tables;
3258  }
3259 
3260 /* Set some local values */
3261 
3262 current_subject = (const pcre_uchar *)subject + start_offset;
3263 end_subject = (const pcre_uchar *)subject + length;
3264 req_char_ptr = current_subject - 1;
3265 
3266 #ifdef SUPPORT_UTF
3267 /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3268 utf = (re->options & PCRE_UTF8) != 0;
3269 #else
3270 utf = FALSE;
3271 #endif
3272 
3273 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
3274  (re->options & PCRE_ANCHORED) != 0;
3275 
3276 /* The remaining fixed data for passing around. */
3277 
3278 md->start_code = (const pcre_uchar *)argument_re +
3279  re->name_table_offset + re->name_count * re->name_entry_size;
3280 md->start_subject = (const pcre_uchar *)subject;
3281 md->end_subject = end_subject;
3282 md->start_offset = start_offset;
3283 md->moptions = options;
3284 md->poptions = re->options;
3285 
3286 /* If the BSR option is not set at match time, copy what was set
3287 at compile time. */
3288 
3289 if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
3290  {
3291  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
3292  md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
3293 #ifdef BSR_ANYCRLF
3294  else md->moptions |= PCRE_BSR_ANYCRLF;
3295 #endif
3296  }
3297 
3298 /* Handle different types of newline. The three bits give eight cases. If
3299 nothing is set at run time, whatever was used at compile time applies. */
3300 
3301 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3303  {
3304  case 0: newline = NEWLINE; break; /* Compile-time default */
3305  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
3306  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
3307  case PCRE_NEWLINE_CR+
3308  PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
3309  case PCRE_NEWLINE_ANY: newline = -1; break;
3310  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3311  default: return PCRE_ERROR_BADNEWLINE;
3312  }
3313 
3314 if (newline == -2)
3315  {
3316  md->nltype = NLTYPE_ANYCRLF;
3317  }
3318 else if (newline < 0)
3319  {
3320  md->nltype = NLTYPE_ANY;
3321  }
3322 else
3323  {
3324  md->nltype = NLTYPE_FIXED;
3325  if (newline > 255)
3326  {
3327  md->nllen = 2;
3328  md->nl[0] = (newline >> 8) & 255;
3329  md->nl[1] = newline & 255;
3330  }
3331  else
3332  {
3333  md->nllen = 1;
3334  md->nl[0] = newline;
3335  }
3336  }
3337 
3338 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3339 back the character offset. */
3340 
3341 #ifdef SUPPORT_UTF
3342 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3343  {
3344  int erroroffset;
3345  int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3346  if (errorcode != 0)
3347  {
3348  if (offsetcount >= 2)
3349  {
3350  offsets[0] = erroroffset;
3351  offsets[1] = errorcode;
3352  }
3353 #if defined COMPILE_PCRE8
3354  return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3356 #elif defined COMPILE_PCRE16
3357  return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3359 #elif defined COMPILE_PCRE32
3360  return PCRE_ERROR_BADUTF32;
3361 #endif
3362  }
3363 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3364  if (start_offset > 0 && start_offset < length &&
3365  NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3367 #endif
3368  }
3369 #endif
3370 
3371 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3372 is a feature that makes it possible to save compiled regex and re-use them
3373 in other programs later. */
3374 
3375 if (md->tables == NULL) md->tables = PRIV(default_tables);
3376 
3377 /* The "must be at the start of a line" flags are used in a loop when finding
3378 where to start. */
3379 
3380 startline = (re->flags & PCRE_STARTLINE) != 0;
3381 firstline = (re->options & PCRE_FIRSTLINE) != 0;
3382 
3383 /* Set up the first character to match, if available. The first_byte value is
3384 never set for an anchored regular expression, but the anchoring may be forced
3385 at run time, so we have to test for anchoring. The first char may be unset for
3386 an unanchored pattern, of course. If there's no first char and the pattern was
3387 studied, there may be a bitmap of possible first characters. */
3388 
3389 if (!anchored)
3390  {
3391  if ((re->flags & PCRE_FIRSTSET) != 0)
3392  {
3393  has_first_char = TRUE;
3394  first_char = first_char2 = (pcre_uchar)(re->first_char);
3395  if ((re->flags & PCRE_FCH_CASELESS) != 0)
3396  {
3397  first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3398 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3399  if (utf && first_char > 127)
3400  first_char2 = UCD_OTHERCASE(first_char);
3401 #endif
3402  }
3403  }
3404  else
3405  {
3406  if (!startline && study != NULL &&
3407  (study->flags & PCRE_STUDY_MAPPED) != 0)
3408  start_bits = study->start_bits;
3409  }
3410  }
3411 
3412 /* For anchored or unanchored matches, there may be a "last known required
3413 character" set. */
3414 
3415 if ((re->flags & PCRE_REQCHSET) != 0)
3416  {
3417  has_req_char = TRUE;
3418  req_char = req_char2 = (pcre_uchar)(re->req_char);
3419  if ((re->flags & PCRE_RCH_CASELESS) != 0)
3420  {
3421  req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3422 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3423  if (utf && req_char > 127)
3424  req_char2 = UCD_OTHERCASE(req_char);
3425 #endif
3426  }
3427  }
3428 
3429 /* Call the main matching function, looping for a non-anchored regex after a
3430 failed match. If not restarting, perform certain optimizations at the start of
3431 a match. */
3432 
3433 for (;;)
3434  {
3435  int rc;
3436 
3437  if ((options & PCRE_DFA_RESTART) == 0)
3438  {
3439  const pcre_uchar *save_end_subject = end_subject;
3440 
3441  /* If firstline is TRUE, the start of the match is constrained to the first
3442  line of a multiline string. Implement this by temporarily adjusting
3443  end_subject so that we stop scanning at a newline. If the match fails at
3444  the newline, later code breaks this loop. */
3445 
3446  if (firstline)
3447  {
3448  PCRE_PUCHAR t = current_subject;
3449 #ifdef SUPPORT_UTF
3450  if (utf)
3451  {
3452  while (t < md->end_subject && !IS_NEWLINE(t))
3453  {
3454  t++;
3455  ACROSSCHAR(t < end_subject, *t, t++);
3456  }
3457  }
3458  else
3459 #endif
3460  while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3461  end_subject = t;
3462  }
3463 
3464  /* There are some optimizations that avoid running the match if a known
3465  starting point is not found. However, there is an option that disables
3466  these, for testing and for ensuring that all callouts do actually occur.
3467  The option can be set in the regex by (*NO_START_OPT) or passed in
3468  match-time options. */
3469 
3470  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3471  {
3472  /* Advance to a known first pcre_uchar (i.e. data item) */
3473 
3474  if (has_first_char)
3475  {
3476  if (first_char != first_char2)
3477  {
3478  pcre_uchar csc;
3479  while (current_subject < end_subject &&
3480  (csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
3481  current_subject++;
3482  }
3483  else
3484  while (current_subject < end_subject &&
3485  UCHAR21TEST(current_subject) != first_char)
3486  current_subject++;
3487  }
3488 
3489  /* Or to just after a linebreak for a multiline match if possible */
3490 
3491  else if (startline)
3492  {
3493  if (current_subject > md->start_subject + start_offset)
3494  {
3495 #ifdef SUPPORT_UTF
3496  if (utf)
3497  {
3498  while (current_subject < end_subject &&
3499  !WAS_NEWLINE(current_subject))
3500  {
3501  current_subject++;
3502  ACROSSCHAR(current_subject < end_subject, *current_subject,
3503  current_subject++);
3504  }
3505  }
3506  else
3507 #endif
3508  while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
3509  current_subject++;
3510 
3511  /* If we have just passed a CR and the newline option is ANY or
3512  ANYCRLF, and we are now at a LF, advance the match position by one
3513  more character. */
3514 
3515  if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
3516  (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3517  current_subject < end_subject &&
3518  UCHAR21TEST(current_subject) == CHAR_NL)
3519  current_subject++;
3520  }
3521  }
3522 
3523  /* Advance to a non-unique first pcre_uchar after study */
3524 
3525  else if (start_bits != NULL)
3526  {
3527  while (current_subject < end_subject)
3528  {
3529  register pcre_uint32 c = UCHAR21TEST(current_subject);
3530 #ifndef COMPILE_PCRE8
3531  if (c > 255) c = 255;
3532 #endif
3533  if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
3534  current_subject++;
3535  }
3536  }
3537  }
3538 
3539  /* Restore fudged end_subject */
3540 
3541  end_subject = save_end_subject;
3542 
3543  /* The following two optimizations are disabled for partial matching or if
3544  disabling is explicitly requested (and of course, by the test above, this
3545  code is not obeyed when restarting after a partial match). */
3546 
3547  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3548  (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3549  {
3550  /* If the pattern was studied, a minimum subject length may be set. This
3551  is a lower bound; no actual string of that length may actually match the
3552  pattern. Although the value is, strictly, in characters, we treat it as
3553  in pcre_uchar units to avoid spending too much time in this optimization.
3554  */
3555 
3556  if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3557  (pcre_uint32)(end_subject - current_subject) < study->minlength)
3558  return PCRE_ERROR_NOMATCH;
3559 
3560  /* If req_char is set, we know that that pcre_uchar must appear in the
3561  subject for the match to succeed. If the first pcre_uchar is set,
3562  req_char must be later in the subject; otherwise the test starts at the
3563  match point. This optimization can save a huge amount of work in patterns
3564  with nested unlimited repeats that aren't going to match. Writing
3565  separate code for cased/caseless versions makes it go faster, as does
3566  using an autoincrement and backing off on a match.
3567 
3568  HOWEVER: when the subject string is very, very long, searching to its end
3569  can take a long time, and give bad performance on quite ordinary
3570  patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3571  string... so we don't do this when the string is sufficiently long. */
3572 
3573  if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3574  {
3575  register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3576 
3577  /* We don't need to repeat the search if we haven't yet reached the
3578  place we found it at last time. */
3579 
3580  if (p > req_char_ptr)
3581  {
3582  if (req_char != req_char2)
3583  {
3584  while (p < end_subject)
3585  {
3586  register pcre_uint32 pp = UCHAR21INCTEST(p);
3587  if (pp == req_char || pp == req_char2) { p--; break; }
3588  }
3589  }
3590  else
3591  {
3592  while (p < end_subject)
3593  {
3594  if (UCHAR21INCTEST(p) == req_char) { p--; break; }
3595  }
3596  }
3597 
3598  /* If we can't find the required pcre_uchar, break the matching loop,
3599  which will cause a return or PCRE_ERROR_NOMATCH. */
3600 
3601  if (p >= end_subject) break;
3602 
3603  /* If we have found the required pcre_uchar, save the point where we
3604  found it, so that we don't search again next time round the loop if
3605  the start hasn't passed this point yet. */
3606 
3607  req_char_ptr = p;
3608  }
3609  }
3610  }
3611  } /* End of optimizations that are done when not restarting */
3612 
3613  /* OK, now we can do the business */
3614 
3615  md->start_used_ptr = current_subject;
3616  md->recursive = NULL;
3617 
3618  rc = internal_dfa_exec(
3619  md, /* fixed match data */
3620  md->start_code, /* this subexpression's code */
3621  current_subject, /* where we currently are */
3622  start_offset, /* start offset in subject */
3623  offsets, /* offset vector */
3624  offsetcount, /* size of same */
3625  workspace, /* workspace vector */
3626  wscount, /* size of same */
3627  0); /* function recurse level */
3628 
3629  /* Anything other than "no match" means we are done, always; otherwise, carry
3630  on only if not anchored. */
3631 
3632  if (rc != PCRE_ERROR_NOMATCH || anchored)
3633  {
3634  if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3635  {
3636  offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3637  offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3638  if (offsetcount > 2)
3639  offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3640  }
3641  return rc;
3642  }
3643 
3644  /* Advance to the next subject character unless we are at the end of a line
3645  and firstline is set. */
3646 
3647  if (firstline && IS_NEWLINE(current_subject)) break;
3648  current_subject++;
3649 #ifdef SUPPORT_UTF
3650  if (utf)
3651  {
3652  ACROSSCHAR(current_subject < end_subject, *current_subject,
3653  current_subject++);
3654  }
3655 #endif
3656  if (current_subject > end_subject) break;
3657 
3658  /* If we have just passed a CR and we are now at a LF, and the pattern does
3659  not contain any explicit matches for \r or \n, and the newline option is CRLF
3660  or ANY or ANYCRLF, advance the match position by one more character. */
3661 
3662  if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
3663  current_subject < end_subject &&
3664  UCHAR21TEST(current_subject) == CHAR_NL &&
3665  (re->flags & PCRE_HASCRORLF) == 0 &&
3666  (md->nltype == NLTYPE_ANY ||
3667  md->nltype == NLTYPE_ANYCRLF ||
3668  md->nllen == 2))
3669  current_subject++;
3670 
3671  } /* "Bumpalong" loop */
3672 
3673 return PCRE_ERROR_NOMATCH;
3674 }
3675 
3676 /* End of pcre_dfa_exec.c */
#define md
Definition: compat-1.3.h:2001
char value[7]
Definition: config.c:431
#define NEWLINE
static uch flags
#define NULL
Definition: ncbistd.hpp:225
#define DPRINTF(args)
Definition: mdb.c:493
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
if(yy_accept[yy_current_state])
EIPRangeType t
Definition: ncbi_localip.c:101
int isprint(Uchar c)
Definition: ncbictype.hpp:67
T max(T x_, T y_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
#define PCRE_SPTR32
Definition: pcre.h:367
#define PCRE_ERROR_BADUTF8_OFFSET
Definition: pcre.h:194
#define PCRE_NO_UTF8_CHECK
Definition: pcre.h:148
#define PCRE_FIRSTLINE
Definition: pcre.h:163
#define PCRE_ERROR_PARTIAL
Definition: pcre.h:196
#define PCRE_NOTBOL
Definition: pcre.h:140
#define PCRE_UTF8
Definition: pcre.h:144
#define PCRE_EXTRA_CALLOUT_DATA
Definition: pcre.h:322
#define PCRE_ERROR_DFA_UMLIMIT
Definition: pcre.h:202
#define PCRE_ERROR_BADMODE
Definition: pcre.h:213
#define PCRE_NEWLINE_ANY
Definition: pcre.h:168
#define PCRE_ERROR_BADNEWLINE
Definition: pcre.h:207
#define PCRE_ERROR_DFA_BADRESTART
Definition: pcre.h:215
#define PCRE_EXTRA_STUDY_DATA
Definition: pcre.h:320
#define PCRE_ERROR_BADOPTION
Definition: pcre.h:183
#define PCRE_SPTR16
Definition: pcre.h:356
#define PCRE_ERROR_SHORTUTF8
Definition: pcre.h:209
#define PCRE_EXTRA_MATCH_LIMIT
Definition: pcre.h:321
#define PCRE_EXTRA_TABLES
Definition: pcre.h:323
#define PCRE_DFA_SHORTEST
Definition: pcre.h:157
#define PCRE_NOTEOL
Definition: pcre.h:141
#define PCRE_ERROR_BADMAGIC
Definition: pcre.h:184
#define PCRE_ERROR_BADENDIANNESS
Definition: pcre.h:214
#define PCRE_ERROR_DFA_WSSIZE
Definition: pcre.h:203
#define PCRE_ERROR_DFA_UITEM
Definition: pcre.h:200
#define PCRE_PARTIAL_SOFT
Definition: pcre.h:152
#define PCRE_NOTEMPTY
Definition: pcre.h:143
#define PCRE_NEWLINE_LF
Definition: pcre.h:166
#define PCRE_UTF16_ERR1
Definition: pcre.h:249
#define PCRE_ANCHORED
Definition: pcre.h:137
#define PCRE_ERROR_NULL
Definition: pcre.h:182
#define PCRE_UCP
Definition: pcre.h:177
#define PCRE_ERROR_BADCOUNT
Definition: pcre.h:199
#define PCRE_DFA_RESTART
Definition: pcre.h:161
#define PCRE_ERROR_BADUTF32
Definition: pcre.h:193
#define PCRE_NO_START_OPTIMIZE
Definition: pcre.h:173
#define PCRE_ERROR_BADUTF16
Definition: pcre.h:192
#define PCRE_NEWLINE_CR
Definition: pcre.h:165
#define PCRE_SPTR
Definition: pcre.h:375
#define PCRE_UTF8_ERR5
Definition: pcre.h:227
#define PCRE_ERROR_RECURSELOOP
Definition: pcre.h:211
#define PCRE_ERROR_DFA_UCOND
Definition: pcre.h:201
#define PCRE_BSR_UNICODE
Definition: pcre.h:171
#define PCRE_BSR_ANYCRLF
Definition: pcre.h:170
int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *, const unsigned int *, int, int, int, int *, int, int *, int)
#define PCRE_ERROR_BADLENGTH
Definition: pcre.h:217
#define PCRE_ERROR_DFA_RECURSE
Definition: pcre.h:204
#define PCRE_ERROR_BADOFFSET
Definition: pcre.h:208
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION
Definition: pcre.h:324
#define PCRE_ERROR_NOMATCH
Definition: pcre.h:181
#define PCRE_DOLLAR_ENDONLY
Definition: pcre.h:138
#define PCRE_PARTIAL_HARD
Definition: pcre.h:175
#define PCRE_NEWLINE_ANYCRLF
Definition: pcre.h:169
#define PCRE_ERROR_BADUTF8
Definition: pcre.h:191
#define PCRE_ERROR_SHORTUTF16
Definition: pcre.h:210
#define PCRE_NOTEMPTY_ATSTART
Definition: pcre.h:176
int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *, const unsigned short *, int, int, int, int *, int, int *, int)
static const pcre_uint8 coptable[]
int pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, const char *subject, int length, int start_offset, int options, int *offsets, int offsetcount, int *workspace, int wscount)
static int internal_dfa_exec(dfa_match_data *md, const pcre_uchar *this_start_code, const pcre_uchar *current_subject, int start_offset, int *offsets, int offsetcount, int *workspace, int wscount, int rlevel)
#define OP_VSPACE_EXTRA
struct stateblock stateblock
#define ADD_ACTIVE(x, y)
#define OP_PROP_EXTRA
#define OP_HSPACE_EXTRA
#define INTS_PER_STATEBLOCK
#define NLBLOCK
Definition: pcre_dfa_exec.c:79
#define OP_ANYNL_EXTRA
#define ADD_NEW(x, y)
#define OP_EXTUNI_EXTRA
static const pcre_uint8 toptable1[]
static const pcre_uint8 toptable2[]
static const pcre_uint8 poptable[]
#define SP
Definition: pcre_dfa_exec.c:88
#define ADD_NEW_DATA(x, y, z)
#define allow_zero
#define PCRE_MODE
#define PT_PC
#define PCRE_PUCHAR
#define CHAR_DOLLAR_SIGN
#define NLTYPE_ANYCRLF
#define REAL_PCRE
#define VSPACE_CASES
#define REQ_BYTE_MAX
#define PT_ANY
#define PT_GC
#define PUBL(name)
#define ctype_digit
#define PCRE_NEWLINE_BITS
#define PCRE_EXP_DEFN
#define ctype_word
#define PT_CLIST
#define CHAR_GRAVE_ACCENT
#define NLTYPE_FIXED
#define PCRE_STUDY_MAPPED
#define MAGIC_NUMBER
#define REVERSED_MAGIC_NUMBER
#define ctype_space
#define NOTACHAR
#define PCRE_REQCHSET
unsigned char pcre_uint8
#define CHAR_COMMERCIAL_AT
#define UCHAR21TEST(eptr)
#define PCRE_HASCRORLF
#define PCRE_CALL_CONVENTION
#define RREF_ANY
#define PT_UCNC
#define PT_WORD
#define CHAR_NL
@ OP_STARI
@ OP_NOTMINSTARI
@ OP_ANYNL
@ OP_SKIPZERO
@ OP_CHAR
@ OP_CRMINQUERY
@ OP_NOTPOSUPTOI
@ OP_SBRA
@ OP_ONCE
@ OP_NOTPROP
@ OP_NOTPLUS
@ OP_TYPEMINPLUS
@ OP_TYPEQUERY
@ OP_SBRAPOS
@ OP_SCOND
@ OP_ASSERTBACK
@ OP_CIRCM
@ OP_CLASS
@ OP_TYPEPLUS
@ OP_HSPACE
@ OP_NOT_WORDCHAR
@ OP_MINSTARI
@ OP_CRMINPLUS
@ OP_CRRANGE
@ OP_DOLL
@ OP_ONCE_NC
@ OP_BRAPOS
@ OP_ASSERT_NOT
@ OP_NOTSTARI
@ OP_NOT
@ OP_DNCREF
@ OP_ASSERT
@ OP_NOTMINPLUSI
@ OP_TYPEPOSSTAR
@ OP_TYPEMINUPTO
@ OP_TYPEPOSPLUS
@ OP_POSSTAR
@ OP_NOTUPTO
@ OP_TYPESTAR
@ OP_BRAMINZERO
@ OP_EXACTI
@ OP_NOTPLUSI
@ OP_NOTQUERYI
@ OP_CRQUERY
@ OP_ASSERTBACK_NOT
@ OP_RREF
@ OP_DNRREF
@ OP_DIGIT
@ OP_KETRPOS
@ OP_EXACT
@ OP_TYPEEXACT
@ OP_PLUS
@ OP_WHITESPACE
@ OP_CRMINSTAR
@ OP_NOTPOSPLUSI
@ OP_NOT_WORD_BOUNDARY
@ OP_KET
@ OP_NOT_DIGIT
@ OP_CALLOUT
@ OP_CRMINRANGE
@ OP_RECURSE
@ OP_BRA
@ OP_CREF
@ OP_TABLE_LENGTH
@ OP_POSUPTO
@ OP_MINUPTOI
@ OP_NOTPOSUPTO
@ OP_REVERSE
@ OP_NCLASS
@ OP_KETRMIN
@ OP_COND
@ OP_MINPLUS
@ OP_TYPEPOSUPTO
@ OP_WORDCHAR
@ OP_MINQUERY
@ OP_EODN
@ OP_UPTOI
@ OP_CRPOSRANGE
@ OP_ALT
@ OP_UPTO
@ OP_QUERY
@ OP_POSQUERYI
@ OP_NOTPOSSTARI
@ OP_PROP
@ OP_NOTPOSSTAR
@ OP_PLUSI
@ OP_KETRMAX
@ OP_NOTMINPLUS
@ OP_CBRAPOS
@ OP_BRAZERO
@ OP_QUERYI
@ OP_POSPLUSI
@ OP_ANYBYTE
@ OP_SCBRAPOS
@ OP_CHARI
@ OP_NOTMINQUERYI
@ OP_TYPEMINQUERY
@ OP_NOT_WHITESPACE
@ OP_NOTMINSTAR
@ OP_NOTSTAR
@ OP_SCBRA
@ OP_CRPOSSTAR
@ OP_MINUPTO
@ OP_NOTPOSQUERYI
@ OP_NOT_VSPACE
@ OP_CRSTAR
@ OP_VSPACE
@ OP_POSQUERY
@ OP_MINSTAR
@ OP_STAR
@ OP_ALLANY
@ OP_DEF
@ OP_DOLLM
@ OP_CRPOSQUERY
@ OP_TYPEMINSTAR
@ OP_NOTMINUPTO
@ OP_NOTMINQUERY
@ OP_CRPLUS
@ OP_TYPEPOSQUERY
@ OP_POSPLUS
@ OP_SOD
@ OP_NOTPOSQUERY
@ OP_TYPEUPTO
@ OP_SOM
@ OP_ANY
@ OP_XCLASS
@ OP_POSSTARI
@ OP_NOT_HSPACE
@ OP_FAIL
@ OP_MINQUERYI
@ OP_MINPLUSI
@ OP_NOTMINUPTOI
@ OP_NOTI
@ OP_NOTQUERY
@ OP_POSUPTOI
@ OP_CBRA
@ OP_BRAPOSZERO
@ OP_NOTUPTOI
@ OP_CRPOSPLUS
@ OP_EXTUNI
@ OP_EOD
@ OP_NOTEXACTI
@ OP_NOTEXACT
@ OP_WORD_BOUNDARY
@ OP_NOTPOSPLUS
@ OP_CIRC
#define PCRE_FCH_CASELESS
#define PT_SC
#define CHAR_CR
#define PCRE_STUDY_MINLEN
#define PT_SPACE
unsigned char pcre_uchar
#define CHAR_NEL
#define IS_NEWLINE(p)
#define PCRE_FIRSTSET
#define PT_ALNUM
#define TABLE_GET(c, table, default)
#define PT_PXSPACE
#define PCRE_RCH_CASELESS
#define UCHAR21INCTEST(eptr)
#define CHAR_LF
#define WAS_NEWLINE(p)
#define PT_LAMP
#define CHAR_UNDERSCORE
#define STRLEN_UC(str)
#define fcc_offset
#define CHAR_FF
#define NLTYPE_ANY
#define memmove(a, b, c)
#define IMM2_SIZE
#define GETCHARLEN(c, eptr, len)
#define lcc_offset
#define PRIV(name)
#define ctypes_offset
#define PUBLIC_DFA_EXEC_OPTIONS
#define CHAR_VT
#define HSPACE_CASES
#define GETCHARTEST(c, eptr)
#define GET2(a, n)
#define PCRE_STARTLINE
static char * newline
Definition: pcregrep.c:151
static int callout(pcre_callout_block *cb)
Definition: pcretest.c:2253
int offset
Definition: replacements.h:160
#define LINK_SIZE
Definition: config.h:188
static string subject
Definition: inftrees.h:24
const pcre_uchar * subject_position
struct dfa_recursion_info * prevrec
unsigned long int flags
Definition: pcre.h:409
void * callout_data
Definition: pcre.h:412
const unsigned char * tables
Definition: pcre.h:413
void * study_data
Definition: pcre.h:410
pcre_uint32 minlength
pcre_uint32 flags
pcre_uint8 start_bits[32]
pcre_uint16 name_table_offset
pcre_uint8 chartype
pcre_uint8 script
int BOOL
Definition: sybdb.h:150
@ FALSE
Definition: testodbc.c:27
@ TRUE
Definition: testodbc.c:27
@ ucp_Lu
Definition: ucp.h:43
@ ucp_Lt
Definition: ucp.h:42
@ ucp_Ll
Definition: ucp.h:39
@ ucp_Z
Definition: ucp.h:28
@ ucp_N
Definition: ucp.h:25
@ ucp_L
Definition: ucp.h:23
Modified on Wed Nov 29 02:18:41 2023 by modify_doxy.py rev. 669887