NCBI C++ ToolKit
pcre_jit_compile.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Written by Philip Hazel
9  Copyright (c) 1997-2013 University of Cambridge
10 
11  The machine code generator part (this module) was written by Zoltan Herczeg
12  Copyright (c) 2010-2013
13 
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17 
18  * Redistributions of source code must retain the above copyright notice,
19  this list of conditions and the following disclaimer.
20 
21  * Redistributions in binary form must reproduce the above copyright
22  notice, this list of conditions and the following disclaimer in the
23  documentation and/or other materials provided with the distribution.
24 
25  * Neither the name of the University of Cambridge nor the names of its
26  contributors may be used to endorse or promote products derived from
27  this software without specific prior written permission.
28 
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42 
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46 
47 #include "pcre_internal.h"
48 
49 #if defined SUPPORT_JIT
50 
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54 
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61 
62 #include "sljit/sljitLir.c"
63 
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67 
68 /* Defines for debugging purposes. */
69 
70 /* 1 - Use unoptimized capturing brackets.
71  2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73 
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76 
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80 
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84 
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89 
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92 
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98 
99  'ab' - 'a' and 'b' regexps are concatenated
100  'a+' - 'a' is the sub-expression of the '+' operator
101 
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107 
108  Greedy star operator (*) :
109  Matching path: match happens.
110  Backtrack path: match failed.
111  Non-greedy star operator (*?) :
112  Matching path: no need to perform a match.
113  Backtrack path: match is required.
114 
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118 
119  A(B|C)D
120 
121 The generated code will be the following:
122 
123  A matching path
124  '(' matching path (pushing arguments to the stack)
125  B matching path
126  ')' matching path (pushing arguments to the stack)
127  D matching path
128  return with successful match
129 
130  D backtrack path
131  ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132  B backtrack path
133  C expected path
134  jump to D matching path
135  C backtrack path
136  A backtrack path
137 
138  Notice, that the order of backtrack code paths are the opposite of the fast
139  code paths. In this way the topmost value on the stack is always belong
140  to the current backtrack code path. The backtrack path must check
141  whether there is a next alternative. If so, it needs to jump back to
142  the matching path eventually. Otherwise it needs to clear out its own stack
143  frame and continue the execution on the backtrack code paths.
144 */
145 
146 /*
147 Saved stack frames:
148 
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153 
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156 
157 Thus we can restore the private data to a particular point in the stack.
158 */
159 
160 typedef struct jit_arguments {
161  /* Pointers first. */
162  struct sljit_stack *stack;
163  const pcre_uchar *str;
164  const pcre_uchar *begin;
165  const pcre_uchar *end;
166  int *offsets;
167  pcre_uchar *mark_ptr;
168  void *callout_data;
169  /* Everything else after. */
170  sljit_u32 limit_match;
171  int real_offset_count;
172  int offset_count;
173  sljit_u8 notbol;
174  sljit_u8 noteol;
175  sljit_u8 notempty;
176  sljit_u8 notempty_atstart;
177 } jit_arguments;
178 
179 typedef struct executable_functions {
180  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
181  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
182  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183  PUBL(jit_callback) callback;
184  void *userdata;
185  sljit_u32 top_bracket;
186  sljit_u32 limit_match;
187 } executable_functions;
188 
189 typedef struct jump_list {
190  struct sljit_jump *jump;
191  struct jump_list *next;
192 } jump_list;
193 
194 typedef struct stub_list {
195  struct sljit_jump *start;
196  struct sljit_label *quit;
197  struct stub_list *next;
198 } stub_list;
199 
200 typedef struct label_addr_list {
201  struct sljit_label *label;
202  sljit_uw *update_addr;
203  struct label_addr_list *next;
204 } label_addr_list;
205 
206 enum frame_types {
207  no_frame = -1,
208  no_stack = -2
209 };
210 
211 enum control_types {
212  type_mark = 0,
213  type_then_trap = 1
214 };
215 
216 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
217 
218 /* The following structure is the key data type for the recursive
219 code generator. It is allocated by compile_matchingpath, and contains
220 the arguments for compile_backtrackingpath. Must be the first member
221 of its descendants. */
222 typedef struct backtrack_common {
223  /* Concatenation stack. */
224  struct backtrack_common *prev;
225  jump_list *nextbacktracks;
226  /* Internal stack (for component operators). */
227  struct backtrack_common *top;
228  jump_list *topbacktracks;
229  /* Opcode pointer. */
230  pcre_uchar *cc;
231 } backtrack_common;
232 
233 typedef struct assert_backtrack {
234  backtrack_common common;
235  jump_list *condfailed;
236  /* Less than 0 if a frame is not needed. */
237  int framesize;
238  /* Points to our private memory word on the stack. */
239  int private_data_ptr;
240  /* For iterators. */
241  struct sljit_label *matchingpath;
242 } assert_backtrack;
243 
244 typedef struct bracket_backtrack {
245  backtrack_common common;
246  /* Where to coninue if an alternative is successfully matched. */
247  struct sljit_label *alternative_matchingpath;
248  /* For rmin and rmax iterators. */
249  struct sljit_label *recursive_matchingpath;
250  /* For greedy ? operator. */
251  struct sljit_label *zero_matchingpath;
252  /* Contains the branches of a failed condition. */
253  union {
254  /* Both for OP_COND, OP_SCOND. */
255  jump_list *condfailed;
256  assert_backtrack *assert;
257  /* For OP_ONCE. Less than 0 if not needed. */
258  int framesize;
259  } u;
260  /* Points to our private memory word on the stack. */
261  int private_data_ptr;
262 } bracket_backtrack;
263 
264 typedef struct bracketpos_backtrack {
265  backtrack_common common;
266  /* Points to our private memory word on the stack. */
267  int private_data_ptr;
268  /* Reverting stack is needed. */
269  int framesize;
270  /* Allocated stack size. */
271  int stacksize;
272 } bracketpos_backtrack;
273 
274 typedef struct braminzero_backtrack {
275  backtrack_common common;
276  struct sljit_label *matchingpath;
277 } braminzero_backtrack;
278 
279 typedef struct char_iterator_backtrack {
280  backtrack_common common;
281  /* Next iteration. */
282  struct sljit_label *matchingpath;
283  union {
284  jump_list *backtracks;
285  struct {
286  unsigned int othercasebit;
287  pcre_uchar chr;
288  BOOL enabled;
289  } charpos;
290  } u;
291 } char_iterator_backtrack;
292 
293 typedef struct ref_iterator_backtrack {
294  backtrack_common common;
295  /* Next iteration. */
296  struct sljit_label *matchingpath;
297 } ref_iterator_backtrack;
298 
299 typedef struct recurse_entry {
300  struct recurse_entry *next;
301  /* Contains the function entry. */
302  struct sljit_label *entry;
303  /* Collects the calls until the function is not created. */
304  jump_list *calls;
305  /* Points to the starting opcode. */
306  sljit_sw start;
307 } recurse_entry;
308 
309 typedef struct recurse_backtrack {
310  backtrack_common common;
311  BOOL inlined_pattern;
312 } recurse_backtrack;
313 
314 #define OP_THEN_TRAP OP_TABLE_LENGTH
315 
316 typedef struct then_trap_backtrack {
317  backtrack_common common;
318  /* If then_trap is not NULL, this structure contains the real
319  then_trap for the backtracking path. */
320  struct then_trap_backtrack *then_trap;
321  /* Points to the starting opcode. */
322  sljit_sw start;
323  /* Exit point for the then opcodes of this alternative. */
324  jump_list *quit;
325  /* Frame size of the current alternative. */
326  int framesize;
327 } then_trap_backtrack;
328 
329 #define MAX_RANGE_SIZE 4
330 
331 typedef struct compiler_common {
332  /* The sljit ceneric compiler. */
333  struct sljit_compiler *compiler;
334  /* First byte code. */
335  pcre_uchar *start;
336  /* Maps private data offset to each opcode. */
337  sljit_s32 *private_data_ptrs;
338  /* Chain list of read-only data ptrs. */
339  void *read_only_data_head;
340  /* Tells whether the capturing bracket is optimized. */
341  sljit_u8 *optimized_cbracket;
342  /* Tells whether the starting offset is a target of then. */
343  sljit_u8 *then_offsets;
344  /* Current position where a THEN must jump. */
345  then_trap_backtrack *then_trap;
346  /* Starting offset of private data for capturing brackets. */
347  sljit_s32 cbra_ptr;
348  /* Output vector starting point. Must be divisible by 2. */
349  sljit_s32 ovector_start;
350  /* Points to the starting character of the current match. */
351  sljit_s32 start_ptr;
352  /* Last known position of the requested byte. */
353  sljit_s32 req_char_ptr;
354  /* Head of the last recursion. */
355  sljit_s32 recursive_head_ptr;
356  /* First inspected character for partial matching.
357  (Needed for avoiding zero length partial matches.) */
358  sljit_s32 start_used_ptr;
359  /* Starting pointer for partial soft matches. */
360  sljit_s32 hit_start;
361  /* Pointer of the match end position. */
362  sljit_s32 match_end_ptr;
363  /* Points to the marked string. */
364  sljit_s32 mark_ptr;
365  /* Recursive control verb management chain. */
366  sljit_s32 control_head_ptr;
367  /* Points to the last matched capture block index. */
368  sljit_s32 capture_last_ptr;
369  /* Fast forward skipping byte code pointer. */
370  pcre_uchar *fast_forward_bc_ptr;
371  /* Locals used by fast fail optimization. */
372  sljit_s32 fast_fail_start_ptr;
373  sljit_s32 fast_fail_end_ptr;
374 
375  /* Flipped and lower case tables. */
376  const sljit_u8 *fcc;
377  sljit_sw lcc;
378  /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
379  int mode;
380  /* TRUE, when minlength is greater than 0. */
381  BOOL might_be_empty;
382  /* \K is found in the pattern. */
383  BOOL has_set_som;
384  /* (*SKIP:arg) is found in the pattern. */
385  BOOL has_skip_arg;
386  /* (*THEN) is found in the pattern. */
387  BOOL has_then;
388  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
389  BOOL has_skip_in_assert_back;
390  /* Currently in recurse or negative assert. */
391  BOOL local_exit;
392  /* Currently in a positive assert. */
393  BOOL positive_assert;
394  /* Newline control. */
395  int nltype;
396  sljit_u32 nlmax;
397  sljit_u32 nlmin;
398  int newline;
399  int bsr_nltype;
400  sljit_u32 bsr_nlmax;
401  sljit_u32 bsr_nlmin;
402  /* Dollar endonly. */
403  int endonly;
404  /* Tables. */
405  sljit_sw ctypes;
406  /* Named capturing brackets. */
407  pcre_uchar *name_table;
408  sljit_sw name_count;
409  sljit_sw name_entry_size;
410 
411  /* Labels and jump lists. */
412  struct sljit_label *partialmatchlabel;
413  struct sljit_label *quit_label;
414  struct sljit_label *forced_quit_label;
415  struct sljit_label *accept_label;
416  struct sljit_label *ff_newline_shortcut;
417  stub_list *stubs;
418  label_addr_list *label_addrs;
419  recurse_entry *entries;
420  recurse_entry *currententry;
421  jump_list *partialmatch;
422  jump_list *quit;
423  jump_list *positive_assert_quit;
424  jump_list *forced_quit;
425  jump_list *accept;
426  jump_list *calllimit;
427  jump_list *stackalloc;
428  jump_list *revertframes;
429  jump_list *wordboundary;
430  jump_list *anynewline;
431  jump_list *hspace;
432  jump_list *vspace;
433  jump_list *casefulcmp;
434  jump_list *caselesscmp;
435  jump_list *reset_match;
436  BOOL jscript_compat;
437 #ifdef SUPPORT_UTF
438  BOOL utf;
439 #ifdef SUPPORT_UCP
440  BOOL use_ucp;
441  jump_list *getucd;
442 #endif
443 #ifdef COMPILE_PCRE8
444  jump_list *utfreadchar;
445  jump_list *utfreadchar16;
446  jump_list *utfreadtype8;
447 #endif
448 #endif /* SUPPORT_UTF */
449 } compiler_common;
450 
451 /* For byte_sequence_compare. */
452 
453 typedef struct compare_context {
454  int length;
455  int sourcereg;
456 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
457  int ucharptr;
458  union {
459  sljit_s32 asint;
460  sljit_u16 asushort;
461 #if defined COMPILE_PCRE8
462  sljit_u8 asbyte;
463  sljit_u8 asuchars[4];
464 #elif defined COMPILE_PCRE16
465  sljit_u16 asuchars[2];
466 #elif defined COMPILE_PCRE32
467  sljit_u32 asuchars[1];
468 #endif
469  } c;
470  union {
471  sljit_s32 asint;
472  sljit_u16 asushort;
473 #if defined COMPILE_PCRE8
474  sljit_u8 asbyte;
475  sljit_u8 asuchars[4];
476 #elif defined COMPILE_PCRE16
477  sljit_u16 asuchars[2];
478 #elif defined COMPILE_PCRE32
479  sljit_u32 asuchars[1];
480 #endif
481  } oc;
482 #endif
483 } compare_context;
484 
485 /* Undefine sljit macros. */
486 #undef CMP
487 
488 /* Used for accessing the elements of the stack. */
489 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
490 
491 #ifdef SLJIT_PREF_SHIFT_REG
492 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
493 /* Nothing. */
494 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
495 #define SHIFT_REG_IS_R3
496 #else
497 #error "Unsupported shift register"
498 #endif
499 #endif
500 
501 #define TMP1 SLJIT_R0
502 #ifdef SHIFT_REG_IS_R3
503 #define TMP2 SLJIT_R3
504 #define TMP3 SLJIT_R2
505 #else
506 #define TMP2 SLJIT_R2
507 #define TMP3 SLJIT_R3
508 #endif
509 #define STR_PTR SLJIT_S0
510 #define STR_END SLJIT_S1
511 #define STACK_TOP SLJIT_R1
512 #define STACK_LIMIT SLJIT_S2
513 #define COUNT_MATCH SLJIT_S3
514 #define ARGUMENTS SLJIT_S4
515 #define RETURN_ADDR SLJIT_R4
516 
517 /* Local space layout. */
518 /* These two locals can be used by the current opcode. */
519 #define LOCALS0 (0 * sizeof(sljit_sw))
520 #define LOCALS1 (1 * sizeof(sljit_sw))
521 /* Two local variables for possessive quantifiers (char1 cannot use them). */
522 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
523 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
524 /* Max limit of recursions. */
525 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
526 /* The output vector is stored on the stack, and contains pointers
527 to characters. The vector data is divided into two groups: the first
528 group contains the start / end character pointers, and the second is
529 the start pointers when the end of the capturing group has not yet reached. */
530 #define OVECTOR_START (common->ovector_start)
531 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
532 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
533 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
534 
535 #if defined COMPILE_PCRE8
536 #define MOV_UCHAR SLJIT_MOV_U8
537 #elif defined COMPILE_PCRE16
538 #define MOV_UCHAR SLJIT_MOV_U16
539 #elif defined COMPILE_PCRE32
540 #define MOV_UCHAR SLJIT_MOV_U32
541 #else
542 #error Unsupported compiling mode
543 #endif
544 
545 /* Shortcuts. */
546 #define DEFINE_COMPILER \
547  struct sljit_compiler *compiler = common->compiler
548 #define OP1(op, dst, dstw, src, srcw) \
549  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
550 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
551  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
552 #define LABEL() \
553  sljit_emit_label(compiler)
554 #define JUMP(type) \
555  sljit_emit_jump(compiler, (type))
556 #define JUMPTO(type, label) \
557  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
558 #define JUMPHERE(jump) \
559  sljit_set_label((jump), sljit_emit_label(compiler))
560 #define SET_LABEL(jump, label) \
561  sljit_set_label((jump), (label))
562 #define CMP(type, src1, src1w, src2, src2w) \
563  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
564 #define CMPTO(type, src1, src1w, src2, src2w, label) \
565  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
566 #define OP_FLAGS(op, dst, dstw, type) \
567  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
568 #define GET_LOCAL_BASE(dst, dstw, offset) \
569  sljit_get_local_base(compiler, (dst), (dstw), (offset))
570 
571 #define READ_CHAR_MAX 0x7fffffff
572 
573 #define INVALID_UTF_CHAR 888
574 
575 static pcre_uchar *bracketend(pcre_uchar *cc)
576 {
577 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
578 do cc += GET(cc, 1); while (*cc == OP_ALT);
579 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
580 cc += 1 + LINK_SIZE;
581 return cc;
582 }
583 
584 static int no_alternatives(pcre_uchar *cc)
585 {
586 int count = 0;
587 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
588 do
589  {
590  cc += GET(cc, 1);
591  count++;
592  }
593 while (*cc == OP_ALT);
594 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
595 return count;
596 }
597 
598 /* Functions whose might need modification for all new supported opcodes:
599  next_opcode
600  check_opcode_types
601  set_private_data_ptrs
602  get_framesize
603  init_frame
604  get_private_data_copy_length
605  copy_private_data
606  compile_matchingpath
607  compile_backtrackingpath
608 */
609 
610 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
611 {
612 SLJIT_UNUSED_ARG(common);
613 switch(*cc)
614  {
615  case OP_SOD:
616  case OP_SOM:
617  case OP_SET_SOM:
619  case OP_WORD_BOUNDARY:
620  case OP_NOT_DIGIT:
621  case OP_DIGIT:
622  case OP_NOT_WHITESPACE:
623  case OP_WHITESPACE:
624  case OP_NOT_WORDCHAR:
625  case OP_WORDCHAR:
626  case OP_ANY:
627  case OP_ALLANY:
628  case OP_NOTPROP:
629  case OP_PROP:
630  case OP_ANYNL:
631  case OP_NOT_HSPACE:
632  case OP_HSPACE:
633  case OP_NOT_VSPACE:
634  case OP_VSPACE:
635  case OP_EXTUNI:
636  case OP_EODN:
637  case OP_EOD:
638  case OP_CIRC:
639  case OP_CIRCM:
640  case OP_DOLL:
641  case OP_DOLLM:
642  case OP_CRSTAR:
643  case OP_CRMINSTAR:
644  case OP_CRPLUS:
645  case OP_CRMINPLUS:
646  case OP_CRQUERY:
647  case OP_CRMINQUERY:
648  case OP_CRRANGE:
649  case OP_CRMINRANGE:
650  case OP_CRPOSSTAR:
651  case OP_CRPOSPLUS:
652  case OP_CRPOSQUERY:
653  case OP_CRPOSRANGE:
654  case OP_CLASS:
655  case OP_NCLASS:
656  case OP_REF:
657  case OP_REFI:
658  case OP_DNREF:
659  case OP_DNREFI:
660  case OP_RECURSE:
661  case OP_CALLOUT:
662  case OP_ALT:
663  case OP_KET:
664  case OP_KETRMAX:
665  case OP_KETRMIN:
666  case OP_KETRPOS:
667  case OP_REVERSE:
668  case OP_ASSERT:
669  case OP_ASSERT_NOT:
670  case OP_ASSERTBACK:
671  case OP_ASSERTBACK_NOT:
672  case OP_ONCE:
673  case OP_ONCE_NC:
674  case OP_BRA:
675  case OP_BRAPOS:
676  case OP_CBRA:
677  case OP_CBRAPOS:
678  case OP_COND:
679  case OP_SBRA:
680  case OP_SBRAPOS:
681  case OP_SCBRA:
682  case OP_SCBRAPOS:
683  case OP_SCOND:
684  case OP_CREF:
685  case OP_DNCREF:
686  case OP_RREF:
687  case OP_DNRREF:
688  case OP_DEF:
689  case OP_BRAZERO:
690  case OP_BRAMINZERO:
691  case OP_BRAPOSZERO:
692  case OP_PRUNE:
693  case OP_SKIP:
694  case OP_THEN:
695  case OP_COMMIT:
696  case OP_FAIL:
697  case OP_ACCEPT:
698  case OP_ASSERT_ACCEPT:
699  case OP_CLOSE:
700  case OP_SKIPZERO:
701  return cc + PRIV(OP_lengths)[*cc];
702 
703  case OP_CHAR:
704  case OP_CHARI:
705  case OP_NOT:
706  case OP_NOTI:
707  case OP_STAR:
708  case OP_MINSTAR:
709  case OP_PLUS:
710  case OP_MINPLUS:
711  case OP_QUERY:
712  case OP_MINQUERY:
713  case OP_UPTO:
714  case OP_MINUPTO:
715  case OP_EXACT:
716  case OP_POSSTAR:
717  case OP_POSPLUS:
718  case OP_POSQUERY:
719  case OP_POSUPTO:
720  case OP_STARI:
721  case OP_MINSTARI:
722  case OP_PLUSI:
723  case OP_MINPLUSI:
724  case OP_QUERYI:
725  case OP_MINQUERYI:
726  case OP_UPTOI:
727  case OP_MINUPTOI:
728  case OP_EXACTI:
729  case OP_POSSTARI:
730  case OP_POSPLUSI:
731  case OP_POSQUERYI:
732  case OP_POSUPTOI:
733  case OP_NOTSTAR:
734  case OP_NOTMINSTAR:
735  case OP_NOTPLUS:
736  case OP_NOTMINPLUS:
737  case OP_NOTQUERY:
738  case OP_NOTMINQUERY:
739  case OP_NOTUPTO:
740  case OP_NOTMINUPTO:
741  case OP_NOTEXACT:
742  case OP_NOTPOSSTAR:
743  case OP_NOTPOSPLUS:
744  case OP_NOTPOSQUERY:
745  case OP_NOTPOSUPTO:
746  case OP_NOTSTARI:
747  case OP_NOTMINSTARI:
748  case OP_NOTPLUSI:
749  case OP_NOTMINPLUSI:
750  case OP_NOTQUERYI:
751  case OP_NOTMINQUERYI:
752  case OP_NOTUPTOI:
753  case OP_NOTMINUPTOI:
754  case OP_NOTEXACTI:
755  case OP_NOTPOSSTARI:
756  case OP_NOTPOSPLUSI:
757  case OP_NOTPOSQUERYI:
758  case OP_NOTPOSUPTOI:
759  cc += PRIV(OP_lengths)[*cc];
760 #ifdef SUPPORT_UTF
761  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
762 #endif
763  return cc;
764 
765  /* Special cases. */
766  case OP_TYPESTAR:
767  case OP_TYPEMINSTAR:
768  case OP_TYPEPLUS:
769  case OP_TYPEMINPLUS:
770  case OP_TYPEQUERY:
771  case OP_TYPEMINQUERY:
772  case OP_TYPEUPTO:
773  case OP_TYPEMINUPTO:
774  case OP_TYPEEXACT:
775  case OP_TYPEPOSSTAR:
776  case OP_TYPEPOSPLUS:
777  case OP_TYPEPOSQUERY:
778  case OP_TYPEPOSUPTO:
779  return cc + PRIV(OP_lengths)[*cc] - 1;
780 
781  case OP_ANYBYTE:
782 #ifdef SUPPORT_UTF
783  if (common->utf) return NULL;
784 #endif
785  return cc + 1;
786 
787 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
788  case OP_XCLASS:
789  return cc + GET(cc, 1);
790 #endif
791 
792  case OP_MARK:
793  case OP_PRUNE_ARG:
794  case OP_SKIP_ARG:
795  case OP_THEN_ARG:
796  return cc + 1 + 2 + cc[1];
797 
798  default:
799  /* All opcodes are supported now! */
800  SLJIT_UNREACHABLE();
801  return NULL;
802  }
803 }
804 
805 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
806 {
807 int count;
808 pcre_uchar *slot;
809 pcre_uchar *assert_back_end = cc - 1;
810 
811 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
812 while (cc < ccend)
813  {
814  switch(*cc)
815  {
816  case OP_SET_SOM:
817  common->has_set_som = TRUE;
818  common->might_be_empty = TRUE;
819  cc += 1;
820  break;
821 
822  case OP_REF:
823  case OP_REFI:
824  common->optimized_cbracket[GET2(cc, 1)] = 0;
825  cc += 1 + IMM2_SIZE;
826  break;
827 
828  case OP_CBRAPOS:
829  case OP_SCBRAPOS:
830  common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
831  cc += 1 + LINK_SIZE + IMM2_SIZE;
832  break;
833 
834  case OP_COND:
835  case OP_SCOND:
836  /* Only AUTO_CALLOUT can insert this opcode. We do
837  not intend to support this case. */
838  if (cc[1 + LINK_SIZE] == OP_CALLOUT)
839  return FALSE;
840  cc += 1 + LINK_SIZE;
841  break;
842 
843  case OP_CREF:
844  common->optimized_cbracket[GET2(cc, 1)] = 0;
845  cc += 1 + IMM2_SIZE;
846  break;
847 
848  case OP_DNREF:
849  case OP_DNREFI:
850  case OP_DNCREF:
851  count = GET2(cc, 1 + IMM2_SIZE);
852  slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
853  while (count-- > 0)
854  {
855  common->optimized_cbracket[GET2(slot, 0)] = 0;
856  slot += common->name_entry_size;
857  }
858  cc += 1 + 2 * IMM2_SIZE;
859  break;
860 
861  case OP_RECURSE:
862  /* Set its value only once. */
863  if (common->recursive_head_ptr == 0)
864  {
865  common->recursive_head_ptr = common->ovector_start;
866  common->ovector_start += sizeof(sljit_sw);
867  }
868  cc += 1 + LINK_SIZE;
869  break;
870 
871  case OP_CALLOUT:
872  if (common->capture_last_ptr == 0)
873  {
874  common->capture_last_ptr = common->ovector_start;
875  common->ovector_start += sizeof(sljit_sw);
876  }
877  cc += 2 + 2 * LINK_SIZE;
878  break;
879 
880  case OP_ASSERTBACK:
881  slot = bracketend(cc);
882  if (slot > assert_back_end)
883  assert_back_end = slot;
884  cc += 1 + LINK_SIZE;
885  break;
886 
887  case OP_THEN_ARG:
888  common->has_then = TRUE;
889  common->control_head_ptr = 1;
890  /* Fall through. */
891 
892  case OP_PRUNE_ARG:
893  case OP_MARK:
894  if (common->mark_ptr == 0)
895  {
896  common->mark_ptr = common->ovector_start;
897  common->ovector_start += sizeof(sljit_sw);
898  }
899  cc += 1 + 2 + cc[1];
900  break;
901 
902  case OP_THEN:
903  common->has_then = TRUE;
904  common->control_head_ptr = 1;
905  cc += 1;
906  break;
907 
908  case OP_SKIP:
909  if (cc < assert_back_end)
910  common->has_skip_in_assert_back = TRUE;
911  cc += 1;
912  break;
913 
914  case OP_SKIP_ARG:
915  common->control_head_ptr = 1;
916  common->has_skip_arg = TRUE;
917  if (cc < assert_back_end)
918  common->has_skip_in_assert_back = TRUE;
919  cc += 1 + 2 + cc[1];
920  break;
921 
922  default:
923  cc = next_opcode(common, cc);
924  if (cc == NULL)
925  return FALSE;
926  break;
927  }
928  }
929 return TRUE;
930 }
931 
932 static BOOL is_accelerated_repeat(pcre_uchar *cc)
933 {
934 switch(*cc)
935  {
936  case OP_TYPESTAR:
937  case OP_TYPEMINSTAR:
938  case OP_TYPEPLUS:
939  case OP_TYPEMINPLUS:
940  case OP_TYPEPOSSTAR:
941  case OP_TYPEPOSPLUS:
942  return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
943 
944  case OP_STAR:
945  case OP_MINSTAR:
946  case OP_PLUS:
947  case OP_MINPLUS:
948  case OP_POSSTAR:
949  case OP_POSPLUS:
950 
951  case OP_STARI:
952  case OP_MINSTARI:
953  case OP_PLUSI:
954  case OP_MINPLUSI:
955  case OP_POSSTARI:
956  case OP_POSPLUSI:
957 
958  case OP_NOTSTAR:
959  case OP_NOTMINSTAR:
960  case OP_NOTPLUS:
961  case OP_NOTMINPLUS:
962  case OP_NOTPOSSTAR:
963  case OP_NOTPOSPLUS:
964 
965  case OP_NOTSTARI:
966  case OP_NOTMINSTARI:
967  case OP_NOTPLUSI:
968  case OP_NOTMINPLUSI:
969  case OP_NOTPOSSTARI:
970  case OP_NOTPOSPLUSI:
971  return TRUE;
972 
973  case OP_CLASS:
974  case OP_NCLASS:
975 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
976  case OP_XCLASS:
977  cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
978 #else
979  cc += (1 + (32 / sizeof(pcre_uchar)));
980 #endif
981 
982  switch(*cc)
983  {
984  case OP_CRSTAR:
985  case OP_CRMINSTAR:
986  case OP_CRPLUS:
987  case OP_CRMINPLUS:
988  case OP_CRPOSSTAR:
989  case OP_CRPOSPLUS:
990  return TRUE;
991  }
992  break;
993  }
994 return FALSE;
995 }
996 
997 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
998 {
999 pcre_uchar *cc = common->start;
1000 pcre_uchar *end;
1001 
1002 /* Skip not repeated brackets. */
1003 while (TRUE)
1004  {
1005  switch(*cc)
1006  {
1007  case OP_SOD:
1008  case OP_SOM:
1009  case OP_SET_SOM:
1010  case OP_NOT_WORD_BOUNDARY:
1011  case OP_WORD_BOUNDARY:
1012  case OP_EODN:
1013  case OP_EOD:
1014  case OP_CIRC:
1015  case OP_CIRCM:
1016  case OP_DOLL:
1017  case OP_DOLLM:
1018  /* Zero width assertions. */
1019  cc++;
1020  continue;
1021  }
1022 
1023  if (*cc != OP_BRA && *cc != OP_CBRA)
1024  break;
1025 
1026  end = cc + GET(cc, 1);
1027  if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1028  return FALSE;
1029  if (*cc == OP_CBRA)
1030  {
1031  if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1032  return FALSE;
1033  cc += IMM2_SIZE;
1034  }
1035  cc += 1 + LINK_SIZE;
1036  }
1037 
1038 if (is_accelerated_repeat(cc))
1039  {
1040  common->fast_forward_bc_ptr = cc;
1041  common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1042  *private_data_start += sizeof(sljit_sw);
1043  return TRUE;
1044  }
1045 return FALSE;
1046 }
1047 
1048 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1049 {
1050  pcre_uchar *next_alt;
1051 
1052  SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1053 
1054  if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1055  return;
1056 
1057  next_alt = bracketend(cc) - (1 + LINK_SIZE);
1058  if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1059  return;
1060 
1061  do
1062  {
1063  next_alt = cc + GET(cc, 1);
1064 
1065  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1066 
1067  while (TRUE)
1068  {
1069  switch(*cc)
1070  {
1071  case OP_SOD:
1072  case OP_SOM:
1073  case OP_SET_SOM:
1074  case OP_NOT_WORD_BOUNDARY:
1075  case OP_WORD_BOUNDARY:
1076  case OP_EODN:
1077  case OP_EOD:
1078  case OP_CIRC:
1079  case OP_CIRCM:
1080  case OP_DOLL:
1081  case OP_DOLLM:
1082  /* Zero width assertions. */
1083  cc++;
1084  continue;
1085  }
1086  break;
1087  }
1088 
1089  if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1090  detect_fast_fail(common, cc, private_data_start, depth - 1);
1091 
1092  if (is_accelerated_repeat(cc))
1093  {
1094  common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1095 
1096  if (common->fast_fail_start_ptr == 0)
1097  common->fast_fail_start_ptr = *private_data_start;
1098 
1099  *private_data_start += sizeof(sljit_sw);
1100  common->fast_fail_end_ptr = *private_data_start;
1101 
1102  if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1103  return;
1104  }
1105 
1106  cc = next_alt;
1107  }
1108  while (*cc == OP_ALT);
1109 }
1110 
1111 static int get_class_iterator_size(pcre_uchar *cc)
1112 {
1113 sljit_u32 min;
1114 sljit_u32 max;
1115 switch(*cc)
1116  {
1117  case OP_CRSTAR:
1118  case OP_CRPLUS:
1119  return 2;
1120 
1121  case OP_CRMINSTAR:
1122  case OP_CRMINPLUS:
1123  case OP_CRQUERY:
1124  case OP_CRMINQUERY:
1125  return 1;
1126 
1127  case OP_CRRANGE:
1128  case OP_CRMINRANGE:
1129  min = GET2(cc, 1);
1130  max = GET2(cc, 1 + IMM2_SIZE);
1131  if (max == 0)
1132  return (*cc == OP_CRRANGE) ? 2 : 1;
1133  max -= min;
1134  if (max > 2)
1135  max = 2;
1136  return max;
1137 
1138  default:
1139  return 0;
1140  }
1141 }
1142 
1143 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1144 {
1145 pcre_uchar *end = bracketend(begin);
1146 pcre_uchar *next;
1147 pcre_uchar *next_end;
1148 pcre_uchar *max_end;
1150 sljit_sw length = end - begin;
1151 int min, max, i;
1152 
1153 /* Detect fixed iterations first. */
1154 if (end[-(1 + LINK_SIZE)] != OP_KET)
1155  return FALSE;
1156 
1157 /* Already detected repeat. */
1158 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1159  return TRUE;
1160 
1161 next = end;
1162 min = 1;
1163 while (1)
1164  {
1165  if (*next != *begin)
1166  break;
1167  next_end = bracketend(next);
1168  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1169  break;
1170  next = next_end;
1171  min++;
1172  }
1173 
1174 if (min == 2)
1175  return FALSE;
1176 
1177 max = 0;
1178 max_end = next;
1179 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1180  {
1181  type = *next;
1182  while (1)
1183  {
1184  if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1185  break;
1186  next_end = bracketend(next + 2 + LINK_SIZE);
1187  if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1188  break;
1189  next = next_end;
1190  max++;
1191  }
1192 
1193  if (next[0] == type && next[1] == *begin && max >= 1)
1194  {
1195  next_end = bracketend(next + 1);
1196  if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1197  {
1198  for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1199  if (*next_end != OP_KET)
1200  break;
1201 
1202  if (i == max)
1203  {
1204  common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1205  common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1206  /* +2 the original and the last. */
1207  common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1208  if (min == 1)
1209  return TRUE;
1210  min--;
1211  max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1212  }
1213  }
1214  }
1215  }
1216 
1217 if (min >= 3)
1218  {
1219  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1220  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1221  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1222  return TRUE;
1223  }
1224 
1225 return FALSE;
1226 }
1227 
1228 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1229  case OP_MINSTAR: \
1230  case OP_MINPLUS: \
1231  case OP_QUERY: \
1232  case OP_MINQUERY: \
1233  case OP_MINSTARI: \
1234  case OP_MINPLUSI: \
1235  case OP_QUERYI: \
1236  case OP_MINQUERYI: \
1237  case OP_NOTMINSTAR: \
1238  case OP_NOTMINPLUS: \
1239  case OP_NOTQUERY: \
1240  case OP_NOTMINQUERY: \
1241  case OP_NOTMINSTARI: \
1242  case OP_NOTMINPLUSI: \
1243  case OP_NOTQUERYI: \
1244  case OP_NOTMINQUERYI:
1245 
1246 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1247  case OP_STAR: \
1248  case OP_PLUS: \
1249  case OP_STARI: \
1250  case OP_PLUSI: \
1251  case OP_NOTSTAR: \
1252  case OP_NOTPLUS: \
1253  case OP_NOTSTARI: \
1254  case OP_NOTPLUSI:
1255 
1256 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1257  case OP_UPTO: \
1258  case OP_MINUPTO: \
1259  case OP_UPTOI: \
1260  case OP_MINUPTOI: \
1261  case OP_NOTUPTO: \
1262  case OP_NOTMINUPTO: \
1263  case OP_NOTUPTOI: \
1264  case OP_NOTMINUPTOI:
1265 
1266 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1267  case OP_TYPEMINSTAR: \
1268  case OP_TYPEMINPLUS: \
1269  case OP_TYPEQUERY: \
1270  case OP_TYPEMINQUERY:
1271 
1272 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1273  case OP_TYPESTAR: \
1274  case OP_TYPEPLUS:
1275 
1276 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1277  case OP_TYPEUPTO: \
1278  case OP_TYPEMINUPTO:
1279 
1280 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1281 {
1282 pcre_uchar *cc = common->start;
1283 pcre_uchar *alternative;
1284 pcre_uchar *end = NULL;
1285 int private_data_ptr = *private_data_start;
1286 int space, size, bracketlen;
1287 BOOL repeat_check = TRUE;
1288 
1289 while (cc < ccend)
1290  {
1291  space = 0;
1292  size = 0;
1293  bracketlen = 0;
1294  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1295  break;
1296 
1297  if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1298  {
1299  if (detect_repeat(common, cc))
1300  {
1301  /* These brackets are converted to repeats, so no global
1302  based single character repeat is allowed. */
1303  if (cc >= end)
1304  end = bracketend(cc);
1305  }
1306  }
1307  repeat_check = TRUE;
1308 
1309  switch(*cc)
1310  {
1311  case OP_KET:
1312  if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1313  {
1314  common->private_data_ptrs[cc - common->start] = private_data_ptr;
1315  private_data_ptr += sizeof(sljit_sw);
1316  cc += common->private_data_ptrs[cc + 1 - common->start];
1317  }
1318  cc += 1 + LINK_SIZE;
1319  break;
1320 
1321  case OP_ASSERT:
1322  case OP_ASSERT_NOT:
1323  case OP_ASSERTBACK:
1324  case OP_ASSERTBACK_NOT:
1325  case OP_ONCE:
1326  case OP_ONCE_NC:
1327  case OP_BRAPOS:
1328  case OP_SBRA:
1329  case OP_SBRAPOS:
1330  case OP_SCOND:
1331  common->private_data_ptrs[cc - common->start] = private_data_ptr;
1332  private_data_ptr += sizeof(sljit_sw);
1333  bracketlen = 1 + LINK_SIZE;
1334  break;
1335 
1336  case OP_CBRAPOS:
1337  case OP_SCBRAPOS:
1338  common->private_data_ptrs[cc - common->start] = private_data_ptr;
1339  private_data_ptr += sizeof(sljit_sw);
1340  bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1341  break;
1342 
1343  case OP_COND:
1344  /* Might be a hidden SCOND. */
1345  alternative = cc + GET(cc, 1);
1346  if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1347  {
1348  common->private_data_ptrs[cc - common->start] = private_data_ptr;
1349  private_data_ptr += sizeof(sljit_sw);
1350  }
1351  bracketlen = 1 + LINK_SIZE;
1352  break;
1353 
1354  case OP_BRA:
1355  bracketlen = 1 + LINK_SIZE;
1356  break;
1357 
1358  case OP_CBRA:
1359  case OP_SCBRA:
1360  bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1361  break;
1362 
1363  case OP_BRAZERO:
1364  case OP_BRAMINZERO:
1365  case OP_BRAPOSZERO:
1366  repeat_check = FALSE;
1367  size = 1;
1368  break;
1369 
1370  CASE_ITERATOR_PRIVATE_DATA_1
1371  space = 1;
1372  size = -2;
1373  break;
1374 
1375  CASE_ITERATOR_PRIVATE_DATA_2A
1376  space = 2;
1377  size = -2;
1378  break;
1379 
1380  CASE_ITERATOR_PRIVATE_DATA_2B
1381  space = 2;
1382  size = -(2 + IMM2_SIZE);
1383  break;
1384 
1385  CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1386  space = 1;
1387  size = 1;
1388  break;
1389 
1390  CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1391  if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1392  space = 2;
1393  size = 1;
1394  break;
1395 
1396  case OP_TYPEUPTO:
1397  if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1398  space = 2;
1399  size = 1 + IMM2_SIZE;
1400  break;
1401 
1402  case OP_TYPEMINUPTO:
1403  space = 2;
1404  size = 1 + IMM2_SIZE;
1405  break;
1406 
1407  case OP_CLASS:
1408  case OP_NCLASS:
1409  space = get_class_iterator_size(cc + size);
1410  size = 1 + 32 / sizeof(pcre_uchar);
1411  break;
1412 
1413 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1414  case OP_XCLASS:
1415  space = get_class_iterator_size(cc + size);
1416  size = GET(cc, 1);
1417  break;
1418 #endif
1419 
1420  default:
1421  cc = next_opcode(common, cc);
1422  SLJIT_ASSERT(cc != NULL);
1423  break;
1424  }
1425 
1426  /* Character iterators, which are not inside a repeated bracket,
1427  gets a private slot instead of allocating it on the stack. */
1428  if (space > 0 && cc >= end)
1429  {
1430  common->private_data_ptrs[cc - common->start] = private_data_ptr;
1431  private_data_ptr += sizeof(sljit_sw) * space;
1432  }
1433 
1434  if (size != 0)
1435  {
1436  if (size < 0)
1437  {
1438  cc += -size;
1439 #ifdef SUPPORT_UTF
1440  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1441 #endif
1442  }
1443  else
1444  cc += size;
1445  }
1446 
1447  if (bracketlen > 0)
1448  {
1449  if (cc >= end)
1450  {
1451  end = bracketend(cc);
1452  if (end[-1 - LINK_SIZE] == OP_KET)
1453  end = NULL;
1454  }
1455  cc += bracketlen;
1456  }
1457  }
1458 *private_data_start = private_data_ptr;
1459 }
1460 
1461 /* Returns with a frame_types (always < 0) if no need for frame. */
1462 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1463 {
1464 int length = 0;
1465 int possessive = 0;
1466 BOOL stack_restore = FALSE;
1467 BOOL setsom_found = recursive;
1468 BOOL setmark_found = recursive;
1469 /* The last capture is a local variable even for recursions. */
1470 BOOL capture_last_found = FALSE;
1471 
1472 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1473 SLJIT_ASSERT(common->control_head_ptr != 0);
1474 *needs_control_head = TRUE;
1475 #else
1476 *needs_control_head = FALSE;
1477 #endif
1478 
1479 if (ccend == NULL)
1480  {
1481  ccend = bracketend(cc) - (1 + LINK_SIZE);
1482  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1483  {
1484  possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1485  /* This is correct regardless of common->capture_last_ptr. */
1486  capture_last_found = TRUE;
1487  }
1488  cc = next_opcode(common, cc);
1489  }
1490 
1491 SLJIT_ASSERT(cc != NULL);
1492 while (cc < ccend)
1493  switch(*cc)
1494  {
1495  case OP_SET_SOM:
1496  SLJIT_ASSERT(common->has_set_som);
1497  stack_restore = TRUE;
1498  if (!setsom_found)
1499  {
1500  length += 2;
1501  setsom_found = TRUE;
1502  }
1503  cc += 1;
1504  break;
1505 
1506  case OP_MARK:
1507  case OP_PRUNE_ARG:
1508  case OP_THEN_ARG:
1509  SLJIT_ASSERT(common->mark_ptr != 0);
1510  stack_restore = TRUE;
1511  if (!setmark_found)
1512  {
1513  length += 2;
1514  setmark_found = TRUE;
1515  }
1516  if (common->control_head_ptr != 0)
1517  *needs_control_head = TRUE;
1518  cc += 1 + 2 + cc[1];
1519  break;
1520 
1521  case OP_RECURSE:
1522  stack_restore = TRUE;
1523  if (common->has_set_som && !setsom_found)
1524  {
1525  length += 2;
1526  setsom_found = TRUE;
1527  }
1528  if (common->mark_ptr != 0 && !setmark_found)
1529  {
1530  length += 2;
1531  setmark_found = TRUE;
1532  }
1533  if (common->capture_last_ptr != 0 && !capture_last_found)
1534  {
1535  length += 2;
1536  capture_last_found = TRUE;
1537  }
1538  cc += 1 + LINK_SIZE;
1539  break;
1540 
1541  case OP_CBRA:
1542  case OP_CBRAPOS:
1543  case OP_SCBRA:
1544  case OP_SCBRAPOS:
1545  stack_restore = TRUE;
1546  if (common->capture_last_ptr != 0 && !capture_last_found)
1547  {
1548  length += 2;
1549  capture_last_found = TRUE;
1550  }
1551  length += 3;
1552  cc += 1 + LINK_SIZE + IMM2_SIZE;
1553  break;
1554 
1555  case OP_THEN:
1556  stack_restore = TRUE;
1557  if (common->control_head_ptr != 0)
1558  *needs_control_head = TRUE;
1559  cc ++;
1560  break;
1561 
1562  default:
1563  stack_restore = TRUE;
1564  /* Fall through. */
1565 
1566  case OP_NOT_WORD_BOUNDARY:
1567  case OP_WORD_BOUNDARY:
1568  case OP_NOT_DIGIT:
1569  case OP_DIGIT:
1570  case OP_NOT_WHITESPACE:
1571  case OP_WHITESPACE:
1572  case OP_NOT_WORDCHAR:
1573  case OP_WORDCHAR:
1574  case OP_ANY:
1575  case OP_ALLANY:
1576  case OP_ANYBYTE:
1577  case OP_NOTPROP:
1578  case OP_PROP:
1579  case OP_ANYNL:
1580  case OP_NOT_HSPACE:
1581  case OP_HSPACE:
1582  case OP_NOT_VSPACE:
1583  case OP_VSPACE:
1584  case OP_EXTUNI:
1585  case OP_EODN:
1586  case OP_EOD:
1587  case OP_CIRC:
1588  case OP_CIRCM:
1589  case OP_DOLL:
1590  case OP_DOLLM:
1591  case OP_CHAR:
1592  case OP_CHARI:
1593  case OP_NOT:
1594  case OP_NOTI:
1595 
1596  case OP_EXACT:
1597  case OP_POSSTAR:
1598  case OP_POSPLUS:
1599  case OP_POSQUERY:
1600  case OP_POSUPTO:
1601 
1602  case OP_EXACTI:
1603  case OP_POSSTARI:
1604  case OP_POSPLUSI:
1605  case OP_POSQUERYI:
1606  case OP_POSUPTOI:
1607 
1608  case OP_NOTEXACT:
1609  case OP_NOTPOSSTAR:
1610  case OP_NOTPOSPLUS:
1611  case OP_NOTPOSQUERY:
1612  case OP_NOTPOSUPTO:
1613 
1614  case OP_NOTEXACTI:
1615  case OP_NOTPOSSTARI:
1616  case OP_NOTPOSPLUSI:
1617  case OP_NOTPOSQUERYI:
1618  case OP_NOTPOSUPTOI:
1619 
1620  case OP_TYPEEXACT:
1621  case OP_TYPEPOSSTAR:
1622  case OP_TYPEPOSPLUS:
1623  case OP_TYPEPOSQUERY:
1624  case OP_TYPEPOSUPTO:
1625 
1626  case OP_CLASS:
1627  case OP_NCLASS:
1628  case OP_XCLASS:
1629  case OP_CALLOUT:
1630 
1631  cc = next_opcode(common, cc);
1632  SLJIT_ASSERT(cc != NULL);
1633  break;
1634  }
1635 
1636 /* Possessive quantifiers can use a special case. */
1637 if (SLJIT_UNLIKELY(possessive == length))
1638  return stack_restore ? no_frame : no_stack;
1639 
1640 if (length > 0)
1641  return length + 1;
1642 return stack_restore ? no_frame : no_stack;
1643 }
1644 
1645 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1646 {
1647 DEFINE_COMPILER;
1648 BOOL setsom_found = recursive;
1649 BOOL setmark_found = recursive;
1650 /* The last capture is a local variable even for recursions. */
1651 BOOL capture_last_found = FALSE;
1652 int offset;
1653 
1654 /* >= 1 + shortest item size (2) */
1655 SLJIT_UNUSED_ARG(stacktop);
1656 SLJIT_ASSERT(stackpos >= stacktop + 2);
1657 
1658 stackpos = STACK(stackpos);
1659 if (ccend == NULL)
1660  {
1661  ccend = bracketend(cc) - (1 + LINK_SIZE);
1662  if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1663  cc = next_opcode(common, cc);
1664  }
1665 
1666 SLJIT_ASSERT(cc != NULL);
1667 while (cc < ccend)
1668  switch(*cc)
1669  {
1670  case OP_SET_SOM:
1671  SLJIT_ASSERT(common->has_set_som);
1672  if (!setsom_found)
1673  {
1674  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1675  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1676  stackpos -= (int)sizeof(sljit_sw);
1677  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1678  stackpos -= (int)sizeof(sljit_sw);
1679  setsom_found = TRUE;
1680  }
1681  cc += 1;
1682  break;
1683 
1684  case OP_MARK:
1685  case OP_PRUNE_ARG:
1686  case OP_THEN_ARG:
1687  SLJIT_ASSERT(common->mark_ptr != 0);
1688  if (!setmark_found)
1689  {
1690  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1691  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1692  stackpos -= (int)sizeof(sljit_sw);
1693  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1694  stackpos -= (int)sizeof(sljit_sw);
1695  setmark_found = TRUE;
1696  }
1697  cc += 1 + 2 + cc[1];
1698  break;
1699 
1700  case OP_RECURSE:
1701  if (common->has_set_som && !setsom_found)
1702  {
1703  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1704  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1705  stackpos -= (int)sizeof(sljit_sw);
1706  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1707  stackpos -= (int)sizeof(sljit_sw);
1708  setsom_found = TRUE;
1709  }
1710  if (common->mark_ptr != 0 && !setmark_found)
1711  {
1712  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1713  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1714  stackpos -= (int)sizeof(sljit_sw);
1715  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1716  stackpos -= (int)sizeof(sljit_sw);
1717  setmark_found = TRUE;
1718  }
1719  if (common->capture_last_ptr != 0 && !capture_last_found)
1720  {
1721  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1722  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1723  stackpos -= (int)sizeof(sljit_sw);
1724  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1725  stackpos -= (int)sizeof(sljit_sw);
1726  capture_last_found = TRUE;
1727  }
1728  cc += 1 + LINK_SIZE;
1729  break;
1730 
1731  case OP_CBRA:
1732  case OP_CBRAPOS:
1733  case OP_SCBRA:
1734  case OP_SCBRAPOS:
1735  if (common->capture_last_ptr != 0 && !capture_last_found)
1736  {
1737  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1738  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1739  stackpos -= (int)sizeof(sljit_sw);
1740  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1741  stackpos -= (int)sizeof(sljit_sw);
1742  capture_last_found = TRUE;
1743  }
1744  offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1745  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1746  stackpos -= (int)sizeof(sljit_sw);
1747  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1748  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1749  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1750  stackpos -= (int)sizeof(sljit_sw);
1751  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1752  stackpos -= (int)sizeof(sljit_sw);
1753 
1754  cc += 1 + LINK_SIZE + IMM2_SIZE;
1755  break;
1756 
1757  default:
1758  cc = next_opcode(common, cc);
1759  SLJIT_ASSERT(cc != NULL);
1760  break;
1761  }
1762 
1763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1764 SLJIT_ASSERT(stackpos == STACK(stacktop));
1765 }
1766 
1767 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1768 {
1769 int private_data_length = needs_control_head ? 3 : 2;
1770 int size;
1771 pcre_uchar *alternative;
1772 /* Calculate the sum of the private machine words. */
1773 while (cc < ccend)
1774  {
1775  size = 0;
1776  switch(*cc)
1777  {
1778  case OP_KET:
1779  if (PRIVATE_DATA(cc) != 0)
1780  {
1781  private_data_length++;
1782  SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1783  cc += PRIVATE_DATA(cc + 1);
1784  }
1785  cc += 1 + LINK_SIZE;
1786  break;
1787 
1788  case OP_ASSERT:
1789  case OP_ASSERT_NOT:
1790  case OP_ASSERTBACK:
1791  case OP_ASSERTBACK_NOT:
1792  case OP_ONCE:
1793  case OP_ONCE_NC:
1794  case OP_BRAPOS:
1795  case OP_SBRA:
1796  case OP_SBRAPOS:
1797  case OP_SCOND:
1798  private_data_length++;
1799  SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1800  cc += 1 + LINK_SIZE;
1801  break;
1802 
1803  case OP_CBRA:
1804  case OP_SCBRA:
1805  if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1806  private_data_length++;
1807  cc += 1 + LINK_SIZE + IMM2_SIZE;
1808  break;
1809 
1810  case OP_CBRAPOS:
1811  case OP_SCBRAPOS:
1812  private_data_length += 2;
1813  cc += 1 + LINK_SIZE + IMM2_SIZE;
1814  break;
1815 
1816  case OP_COND:
1817  /* Might be a hidden SCOND. */
1818  alternative = cc + GET(cc, 1);
1819  if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1820  private_data_length++;
1821  cc += 1 + LINK_SIZE;
1822  break;
1823 
1824  CASE_ITERATOR_PRIVATE_DATA_1
1825  if (PRIVATE_DATA(cc))
1826  private_data_length++;
1827  cc += 2;
1828 #ifdef SUPPORT_UTF
1829  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1830 #endif
1831  break;
1832 
1833  CASE_ITERATOR_PRIVATE_DATA_2A
1834  if (PRIVATE_DATA(cc))
1835  private_data_length += 2;
1836  cc += 2;
1837 #ifdef SUPPORT_UTF
1838  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1839 #endif
1840  break;
1841 
1842  CASE_ITERATOR_PRIVATE_DATA_2B
1843  if (PRIVATE_DATA(cc))
1844  private_data_length += 2;
1845  cc += 2 + IMM2_SIZE;
1846 #ifdef SUPPORT_UTF
1847  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1848 #endif
1849  break;
1850 
1851  CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1852  if (PRIVATE_DATA(cc))
1853  private_data_length++;
1854  cc += 1;
1855  break;
1856 
1857  CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1858  if (PRIVATE_DATA(cc))
1859  private_data_length += 2;
1860  cc += 1;
1861  break;
1862 
1863  CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1864  if (PRIVATE_DATA(cc))
1865  private_data_length += 2;
1866  cc += 1 + IMM2_SIZE;
1867  break;
1868 
1869  case OP_CLASS:
1870  case OP_NCLASS:
1871 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1872  case OP_XCLASS:
1873  size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1874 #else
1875  size = 1 + 32 / (int)sizeof(pcre_uchar);
1876 #endif
1877  if (PRIVATE_DATA(cc))
1878  private_data_length += get_class_iterator_size(cc + size);
1879  cc += size;
1880  break;
1881 
1882  default:
1883  cc = next_opcode(common, cc);
1884  SLJIT_ASSERT(cc != NULL);
1885  break;
1886  }
1887  }
1888 SLJIT_ASSERT(cc == ccend);
1889 return private_data_length;
1890 }
1891 
1892 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1893  BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1894 {
1895 DEFINE_COMPILER;
1896 int srcw[2];
1897 int count, size;
1898 BOOL tmp1next = TRUE;
1899 BOOL tmp1empty = TRUE;
1900 BOOL tmp2empty = TRUE;
1901 pcre_uchar *alternative;
1902 enum {
1903  loop,
1904  end
1905 } status;
1906 
1907 status = loop;
1908 stackptr = STACK(stackptr);
1909 stacktop = STACK(stacktop - 1);
1910 
1911 if (!save)
1912  {
1913  stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1914  if (stackptr < stacktop)
1915  {
1916  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1917  stackptr += sizeof(sljit_sw);
1918  tmp1empty = FALSE;
1919  }
1920  if (stackptr < stacktop)
1921  {
1922  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1923  stackptr += sizeof(sljit_sw);
1924  tmp2empty = FALSE;
1925  }
1926  /* The tmp1next must be TRUE in either way. */
1927  }
1928 
1929 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1930 
1931 do
1932  {
1933  count = 0;
1934  if (cc >= ccend)
1935  {
1936  if (!save)
1937  break;
1938 
1939  count = 1;
1940  srcw[0] = common->recursive_head_ptr;
1941  if (needs_control_head)
1942  {
1943  SLJIT_ASSERT(common->control_head_ptr != 0);
1944  count = 2;
1945  srcw[0] = common->control_head_ptr;
1946  srcw[1] = common->recursive_head_ptr;
1947  }
1948  status = end;
1949  }
1950  else switch(*cc)
1951  {
1952  case OP_KET:
1953  if (PRIVATE_DATA(cc) != 0)
1954  {
1955  count = 1;
1956  srcw[0] = PRIVATE_DATA(cc);
1957  SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1958  cc += PRIVATE_DATA(cc + 1);
1959  }
1960  cc += 1 + LINK_SIZE;
1961  break;
1962 
1963  case OP_ASSERT:
1964  case OP_ASSERT_NOT:
1965  case OP_ASSERTBACK:
1966  case OP_ASSERTBACK_NOT:
1967  case OP_ONCE:
1968  case OP_ONCE_NC:
1969  case OP_BRAPOS:
1970  case OP_SBRA:
1971  case OP_SBRAPOS:
1972  case OP_SCOND:
1973  count = 1;
1974  srcw[0] = PRIVATE_DATA(cc);
1975  SLJIT_ASSERT(srcw[0] != 0);
1976  cc += 1 + LINK_SIZE;
1977  break;
1978 
1979  case OP_CBRA:
1980  case OP_SCBRA:
1981  if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1982  {
1983  count = 1;
1984  srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1985  }
1986  cc += 1 + LINK_SIZE + IMM2_SIZE;
1987  break;
1988 
1989  case OP_CBRAPOS:
1990  case OP_SCBRAPOS:
1991  count = 2;
1992  srcw[0] = PRIVATE_DATA(cc);
1993  srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1994  SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1995  cc += 1 + LINK_SIZE + IMM2_SIZE;
1996  break;
1997 
1998  case OP_COND:
1999  /* Might be a hidden SCOND. */
2000  alternative = cc + GET(cc, 1);
2001  if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2002  {
2003  count = 1;
2004  srcw[0] = PRIVATE_DATA(cc);
2005  SLJIT_ASSERT(srcw[0] != 0);
2006  }
2007  cc += 1 + LINK_SIZE;
2008  break;
2009 
2010  CASE_ITERATOR_PRIVATE_DATA_1
2011  if (PRIVATE_DATA(cc))
2012  {
2013  count = 1;
2014  srcw[0] = PRIVATE_DATA(cc);
2015  }
2016  cc += 2;
2017 #ifdef SUPPORT_UTF
2018  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2019 #endif
2020  break;
2021 
2022  CASE_ITERATOR_PRIVATE_DATA_2A
2023  if (PRIVATE_DATA(cc))
2024  {
2025  count = 2;
2026  srcw[0] = PRIVATE_DATA(cc);
2027  srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2028  }
2029  cc += 2;
2030 #ifdef SUPPORT_UTF
2031  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2032 #endif
2033  break;
2034 
2035  CASE_ITERATOR_PRIVATE_DATA_2B
2036  if (PRIVATE_DATA(cc))
2037  {
2038  count = 2;
2039  srcw[0] = PRIVATE_DATA(cc);
2040  srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2041  }
2042  cc += 2 + IMM2_SIZE;
2043 #ifdef SUPPORT_UTF
2044  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2045 #endif
2046  break;
2047 
2048  CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2049  if (PRIVATE_DATA(cc))
2050  {
2051  count = 1;
2052  srcw[0] = PRIVATE_DATA(cc);
2053  }
2054  cc += 1;
2055  break;
2056 
2057  CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2058  if (PRIVATE_DATA(cc))
2059  {
2060  count = 2;
2061  srcw[0] = PRIVATE_DATA(cc);
2062  srcw[1] = srcw[0] + sizeof(sljit_sw);
2063  }
2064  cc += 1;
2065  break;
2066 
2067  CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2068  if (PRIVATE_DATA(cc))
2069  {
2070  count = 2;
2071  srcw[0] = PRIVATE_DATA(cc);
2072  srcw[1] = srcw[0] + sizeof(sljit_sw);
2073  }
2074  cc += 1 + IMM2_SIZE;
2075  break;
2076 
2077  case OP_CLASS:
2078  case OP_NCLASS:
2079 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2080  case OP_XCLASS:
2081  size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2082 #else
2083  size = 1 + 32 / (int)sizeof(pcre_uchar);
2084 #endif
2085  if (PRIVATE_DATA(cc))
2086  switch(get_class_iterator_size(cc + size))
2087  {
2088  case 1:
2089  count = 1;
2090  srcw[0] = PRIVATE_DATA(cc);
2091  break;
2092 
2093  case 2:
2094  count = 2;
2095  srcw[0] = PRIVATE_DATA(cc);
2096  srcw[1] = srcw[0] + sizeof(sljit_sw);
2097  break;
2098 
2099  default:
2100  SLJIT_UNREACHABLE();
2101  break;
2102  }
2103  cc += size;
2104  break;
2105 
2106  default:
2107  cc = next_opcode(common, cc);
2108  SLJIT_ASSERT(cc != NULL);
2109  break;
2110  }
2111 
2112  while (count > 0)
2113  {
2114  count--;
2115  if (save)
2116  {
2117  if (tmp1next)
2118  {
2119  if (!tmp1empty)
2120  {
2121  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2122  stackptr += sizeof(sljit_sw);
2123  }
2124  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2125  tmp1empty = FALSE;
2126  tmp1next = FALSE;
2127  }
2128  else
2129  {
2130  if (!tmp2empty)
2131  {
2132  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2133  stackptr += sizeof(sljit_sw);
2134  }
2135  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2136  tmp2empty = FALSE;
2137  tmp1next = TRUE;
2138  }
2139  }
2140  else
2141  {
2142  if (tmp1next)
2143  {
2144  SLJIT_ASSERT(!tmp1empty);
2145  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2146  tmp1empty = stackptr >= stacktop;
2147  if (!tmp1empty)
2148  {
2149  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2150  stackptr += sizeof(sljit_sw);
2151  }
2152  tmp1next = FALSE;
2153  }
2154  else
2155  {
2156  SLJIT_ASSERT(!tmp2empty);
2157  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2158  tmp2empty = stackptr >= stacktop;
2159  if (!tmp2empty)
2160  {
2161  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2162  stackptr += sizeof(sljit_sw);
2163  }
2164  tmp1next = TRUE;
2165  }
2166  }
2167  }
2168  }
2169 while (status != end);
2170 
2171 if (save)
2172  {
2173  if (tmp1next)
2174  {
2175  if (!tmp1empty)
2176  {
2177  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2178  stackptr += sizeof(sljit_sw);
2179  }
2180  if (!tmp2empty)
2181  {
2182  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2183  stackptr += sizeof(sljit_sw);
2184  }
2185  }
2186  else
2187  {
2188  if (!tmp2empty)
2189  {
2190  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2191  stackptr += sizeof(sljit_sw);
2192  }
2193  if (!tmp1empty)
2194  {
2195  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2196  stackptr += sizeof(sljit_sw);
2197  }
2198  }
2199  }
2200 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2201 }
2202 
2203 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2204 {
2205 pcre_uchar *end = bracketend(cc);
2206 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2207 
2208 /* Assert captures then. */
2209 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2210  current_offset = NULL;
2211 /* Conditional block does not. */
2212 if (*cc == OP_COND || *cc == OP_SCOND)
2213  has_alternatives = FALSE;
2214 
2215 cc = next_opcode(common, cc);
2216 if (has_alternatives)
2217  current_offset = common->then_offsets + (cc - common->start);
2218 
2219 while (cc < end)
2220  {
2221  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2222  cc = set_then_offsets(common, cc, current_offset);
2223  else
2224  {
2225  if (*cc == OP_ALT && has_alternatives)
2226  current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2227  if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2228  *current_offset = 1;
2229  cc = next_opcode(common, cc);
2230  }
2231  }
2232 
2233 return end;
2234 }
2235 
2236 #undef CASE_ITERATOR_PRIVATE_DATA_1
2237 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2238 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2239 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2240 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2241 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2242 
2243 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2244 {
2245 return (value & (value - 1)) == 0;
2246 }
2247 
2248 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2249 {
2250 while (list)
2251  {
2252  /* sljit_set_label is clever enough to do nothing
2253  if either the jump or the label is NULL. */
2254  SET_LABEL(list->jump, label);
2255  list = list->next;
2256  }
2257 }
2258 
2259 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2260 {
2261 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2262 if (list_item)
2263  {
2264  list_item->next = *list;
2265  list_item->jump = jump;
2266  *list = list_item;
2267  }
2268 }
2269 
2270 static void add_stub(compiler_common *common, struct sljit_jump *start)
2271 {
2272 DEFINE_COMPILER;
2273 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2274 
2275 if (list_item)
2276  {
2277  list_item->start = start;
2278  list_item->quit = LABEL();
2279  list_item->next = common->stubs;
2280  common->stubs = list_item;
2281  }
2282 }
2283 
2284 static void flush_stubs(compiler_common *common)
2285 {
2286 DEFINE_COMPILER;
2287 stub_list *list_item = common->stubs;
2288 
2289 while (list_item)
2290  {
2291  JUMPHERE(list_item->start);
2292  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2293  JUMPTO(SLJIT_JUMP, list_item->quit);
2294  list_item = list_item->next;
2295  }
2296 common->stubs = NULL;
2297 }
2298 
2299 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2300 {
2301 DEFINE_COMPILER;
2302 label_addr_list *label_addr;
2303 
2304 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2305 if (label_addr == NULL)
2306  return;
2307 label_addr->label = LABEL();
2308 label_addr->update_addr = update_addr;
2309 label_addr->next = common->label_addrs;
2310 common->label_addrs = label_addr;
2311 }
2312 
2313 static SLJIT_INLINE void count_match(compiler_common *common)
2314 {
2315 DEFINE_COMPILER;
2316 
2317 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2318 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2319 }
2320 
2321 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2322 {
2323 /* May destroy all locals and registers except TMP2. */
2324 DEFINE_COMPILER;
2325 
2326 SLJIT_ASSERT(size > 0);
2327 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2328 #ifdef DESTROY_REGISTERS
2329 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2330 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2331 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2334 #endif
2335 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2336 }
2337 
2338 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2339 {
2340 DEFINE_COMPILER;
2341 
2342 SLJIT_ASSERT(size > 0);
2343 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2344 }
2345 
2346 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2347 {
2348 DEFINE_COMPILER;
2349 sljit_uw *result;
2350 
2351 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2352  return NULL;
2353 
2354 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2355 if (SLJIT_UNLIKELY(result == NULL))
2356  {
2357  sljit_set_compiler_memory_error(compiler);
2358  return NULL;
2359  }
2360 
2361 *(void**)result = common->read_only_data_head;
2362 common->read_only_data_head = (void *)result;
2363 return result + 1;
2364 }
2365 
2366 static void free_read_only_data(void *current, void *allocator_data)
2367 {
2368 void *next;
2369 
2370 SLJIT_UNUSED_ARG(allocator_data);
2371 
2372 while (current != NULL)
2373  {
2374  next = *(void**)current;
2375  SLJIT_FREE(current, allocator_data);
2376  current = next;
2377  }
2378 }
2379 
2380 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2381 {
2382 DEFINE_COMPILER;
2383 struct sljit_label *loop;
2384 int i;
2385 
2386 /* At this point we can freely use all temporary registers. */
2387 SLJIT_ASSERT(length > 1);
2388 /* TMP1 returns with begin - 1. */
2389 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2390 if (length < 8)
2391  {
2392  for (i = 1; i < length; i++)
2393  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2394  }
2395 else
2396  {
2397  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2398  {
2399  GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2400  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2401  loop = LABEL();
2402  sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2403  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2404  JUMPTO(SLJIT_NOT_ZERO, loop);
2405  }
2406  else
2407  {
2408  GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2409  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2410  loop = LABEL();
2411  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2412  OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2413  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2414  JUMPTO(SLJIT_NOT_ZERO, loop);
2415  }
2416  }
2417 }
2418 
2419 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2420 {
2421 DEFINE_COMPILER;
2422 sljit_s32 i;
2423 
2424 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2425 
2426 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2427 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2428  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2429 }
2430 
2431 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2432 {
2433 DEFINE_COMPILER;
2434 struct sljit_label *loop;
2435 int i;
2436 
2437 SLJIT_ASSERT(length > 1);
2438 /* OVECTOR(1) contains the "string begin - 1" constant. */
2439 if (length > 2)
2440  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2441 if (length < 8)
2442  {
2443  for (i = 2; i < length; i++)
2444  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2445  }
2446 else
2447  {
2448  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2449  {
2450  GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2451  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2452  loop = LABEL();
2453  sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2454  OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2455  JUMPTO(SLJIT_NOT_ZERO, loop);
2456  }
2457  else
2458  {
2459  GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2460  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2461  loop = LABEL();
2462  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2463  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2464  OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2465  JUMPTO(SLJIT_NOT_ZERO, loop);
2466  }
2467  }
2468 
2469 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2470 if (common->mark_ptr != 0)
2471  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2472 if (common->control_head_ptr != 0)
2473  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2474 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2476 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2477 }
2478 
2479 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2480 {
2481 while (current != NULL)
2482  {
2483  switch (current[1])
2484  {
2485  case type_then_trap:
2486  break;
2487 
2488  case type_mark:
2489  if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2490  return current[3];
2491  break;
2492 
2493  default:
2494  SLJIT_UNREACHABLE();
2495  break;
2496  }
2497  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2498  current = (sljit_sw*)current[0];
2499  }
2500 return 0;
2501 }
2502 
2503 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2504 {
2505 DEFINE_COMPILER;
2506 struct sljit_label *loop;
2507 struct sljit_jump *early_quit;
2508 BOOL has_pre;
2509 
2510 /* At this point we can freely use all registers. */
2511 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2513 
2514 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2515 if (common->mark_ptr != 0)
2516  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2517 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2518 if (common->mark_ptr != 0)
2519  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2520 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2521 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2522 
2523 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2524 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2525 
2526 /* Unlikely, but possible */
2527 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2528 loop = LABEL();
2529 
2530 if (has_pre)
2531  sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2532 else
2533  {
2534  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2535  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2536  }
2537 
2538 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
2539 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2540 /* Copy the integer value to the output buffer */
2541 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2542 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2543 #endif
2544 
2545 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2546 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2547 JUMPTO(SLJIT_NOT_ZERO, loop);
2548 JUMPHERE(early_quit);
2549 
2550 /* Calculate the return value, which is the maximum ovector value. */
2551 if (topbracket > 1)
2552  {
2553  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2554  {
2555  GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2556  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2557 
2558  /* OVECTOR(0) is never equal to SLJIT_S2. */
2559  loop = LABEL();
2560  sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2561  OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2562  CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2563  }
2564  else
2565  {
2566  GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2567  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2568 
2569  /* OVECTOR(0) is never equal to SLJIT_S2. */
2570  loop = LABEL();
2571  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2572  OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2573  OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2574  CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2575  }
2576  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2577  }
2578 else
2579  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2580 }
2581 
2582 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2583 {
2584 DEFINE_COMPILER;
2585 struct sljit_jump *jump;
2586 
2587 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2588 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2589  && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2590 
2591 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2592 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2593 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2594 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2595 
2596 /* Store match begin and end. */
2597 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2598 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2599 
2600 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2601 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2602 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2603 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2604 #endif
2605 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2606 JUMPHERE(jump);
2607 
2608 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2609 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2610 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2611 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2612 #endif
2613 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2614 
2615 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2616 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2617 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2618 #endif
2619 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2620 
2621 JUMPTO(SLJIT_JUMP, quit);
2622 }
2623 
2624 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2625 {
2626 /* May destroy TMP1. */
2627 DEFINE_COMPILER;
2628 struct sljit_jump *jump;
2629 
2630 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2631  {
2632  /* The value of -1 must be kept for start_used_ptr! */
2633  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2634  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2635  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2636  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2637  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2638  JUMPHERE(jump);
2639  }
2640 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2641  {
2642  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2643  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2644  JUMPHERE(jump);
2645  }
2646 }
2647 
2648 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2649 {
2650 /* Detects if the character has an othercase. */
2651 unsigned int c;
2652 
2653 #ifdef SUPPORT_UTF
2654 if (common->utf)
2655  {
2656  GETCHAR(c, cc);
2657  if (c > 127)
2658  {
2659 #ifdef SUPPORT_UCP
2660  return c != UCD_OTHERCASE(c);
2661 #else
2662  return FALSE;
2663 #endif
2664  }
2665 #ifndef COMPILE_PCRE8
2666  return common->fcc[c] != c;
2667 #endif
2668  }
2669 else
2670 #endif
2671  c = *cc;
2672 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2673 }
2674 
2675 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2676 {
2677 /* Returns with the othercase. */
2678 #ifdef SUPPORT_UTF
2679 if (common->utf && c > 127)
2680  {
2681 #ifdef SUPPORT_UCP
2682  return UCD_OTHERCASE(c);
2683 #else
2684  return c;
2685 #endif
2686  }
2687 #endif
2688 return TABLE_GET(c, common->fcc, c);
2689 }
2690 
2691 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2692 {
2693 /* Detects if the character and its othercase has only 1 bit difference. */
2694 unsigned int c, oc, bit;
2695 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2696 int n;
2697 #endif
2698 
2699 #ifdef SUPPORT_UTF
2700 if (common->utf)
2701  {
2702  GETCHAR(c, cc);
2703  if (c <= 127)
2704  oc = common->fcc[c];
2705  else
2706  {
2707 #ifdef SUPPORT_UCP
2708  oc = UCD_OTHERCASE(c);
2709 #else
2710  oc = c;
2711 #endif
2712  }
2713  }
2714 else
2715  {
2716  c = *cc;
2717  oc = TABLE_GET(c, common->fcc, c);
2718  }
2719 #else
2720 c = *cc;
2721 oc = TABLE_GET(c, common->fcc, c);
2722 #endif
2723 
2724 SLJIT_ASSERT(c != oc);
2725 
2726 bit = c ^ oc;
2727 /* Optimized for English alphabet. */
2728 if (c <= 127 && bit == 0x20)
2729  return (0 << 8) | 0x20;
2730 
2731 /* Since c != oc, they must have at least 1 bit difference. */
2732 if (!is_powerof2(bit))
2733  return 0;
2734 
2735 #if defined COMPILE_PCRE8
2736 
2737 #ifdef SUPPORT_UTF
2738 if (common->utf && c > 127)
2739  {
2740  n = GET_EXTRALEN(*cc);
2741  while ((bit & 0x3f) == 0)
2742  {
2743  n--;
2744  bit >>= 6;
2745  }
2746  return (n << 8) | bit;
2747  }
2748 #endif /* SUPPORT_UTF */
2749 return (0 << 8) | bit;
2750 
2751 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2752 
2753 #ifdef SUPPORT_UTF
2754 if (common->utf && c > 65535)
2755  {
2756  if (bit >= (1 << 10))
2757  bit >>= 10;
2758  else
2759  return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2760  }
2761 #endif /* SUPPORT_UTF */
2762 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2763 
2764 #endif /* COMPILE_PCRE[8|16|32] */
2765 }
2766 
2767 static void check_partial(compiler_common *common, BOOL force)
2768 {
2769 /* Checks whether a partial matching is occurred. Does not modify registers. */
2770 DEFINE_COMPILER;
2771 struct sljit_jump *jump = NULL;
2772 
2773 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2774 
2775 if (common->mode == JIT_COMPILE)
2776  return;
2777 
2778 if (!force)
2779  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2780 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2781  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2782 
2783 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2784  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2785 else
2786  {
2787  if (common->partialmatchlabel != NULL)
2788  JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2789  else
2790  add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2791  }
2792 
2793 if (jump != NULL)
2794  JUMPHERE(jump);
2795 }
2796 
2797 static void check_str_end(compiler_common *common, jump_list **end_reached)
2798 {
2799 /* Does not affect registers. Usually used in a tight spot. */
2800 DEFINE_COMPILER;
2801 struct sljit_jump *jump;
2802 
2803 if (common->mode == JIT_COMPILE)
2804  {
2805  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2806  return;
2807  }
2808 
2809 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2810 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2811  {
2812  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2813  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2814  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2815  }
2816 else
2817  {
2818  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2819  if (common->partialmatchlabel != NULL)
2820  JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2821  else
2822  add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2823  }
2824 JUMPHERE(jump);
2825 }
2826 
2827 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2828 {
2829 DEFINE_COMPILER;
2830 struct sljit_jump *jump;
2831 
2832 if (common->mode == JIT_COMPILE)
2833  {
2834  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2835  return;
2836  }
2837 
2838 /* Partial matching mode. */
2839 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2840 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2841 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2842  {
2843  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2844  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2845  }
2846 else
2847  {
2848  if (common->partialmatchlabel != NULL)
2849  JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2850  else
2851  add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2852  }
2853 JUMPHERE(jump);
2854 }
2855 
2856 static void peek_char(compiler_common *common, sljit_u32 max)
2857 {
2858 /* Reads the character into TMP1, keeps STR_PTR.
2859 Does not check STR_END. TMP2 Destroyed. */
2860 DEFINE_COMPILER;
2861 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2862 struct sljit_jump *jump;
2863 #endif
2864 
2865 SLJIT_UNUSED_ARG(max);
2866 
2867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2868 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2869 if (common->utf)
2870  {
2871  if (max < 128) return;
2872 
2873  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2874  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875  add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2876  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2877  JUMPHERE(jump);
2878  }
2879 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2880 
2881 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2882 if (common->utf)
2883  {
2884  if (max < 0xd800) return;
2885 
2886  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2887  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2888  /* TMP2 contains the high surrogate. */
2889  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2891  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2892  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2893  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894  JUMPHERE(jump);
2895  }
2896 #endif
2897 }
2898 
2899 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2900 
2901 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2902 {
2903 /* Tells whether the character codes below 128 are enough
2904 to determine a match. */
2905 const sljit_u8 value = nclass ? 0xff : 0;
2906 const sljit_u8 *end = bitset + 32;
2907 
2908 bitset += 16;
2909 do
2910  {
2911  if (*bitset++ != value)
2912  return FALSE;
2913  }
2914 while (bitset < end);
2915 return TRUE;
2916 }
2917 
2918 static void read_char7_type(compiler_common *common, BOOL full_read)
2919 {
2920 /* Reads the precise character type of a character into TMP1, if the character
2921 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2922 full_read argument tells whether characters above max are accepted or not. */
2923 DEFINE_COMPILER;
2924 struct sljit_jump *jump;
2925 
2926 SLJIT_ASSERT(common->utf);
2927 
2928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2929 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2930 
2931 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2932 
2933 if (full_read)
2934  {
2935  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2936  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2937  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2938  JUMPHERE(jump);
2939  }
2940 }
2941 
2942 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2943 
2944 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2945 {
2946 /* Reads the precise value of a character into TMP1, if the character is
2947 between min and max (c >= min && c <= max). Otherwise it returns with a value
2948 outside the range. Does not check STR_END. */
2949 DEFINE_COMPILER;
2950 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2951 struct sljit_jump *jump;
2952 #endif
2953 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2954 struct sljit_jump *jump2;
2955 #endif
2956 
2957 SLJIT_UNUSED_ARG(update_str_ptr);
2958 SLJIT_UNUSED_ARG(min);
2959 SLJIT_UNUSED_ARG(max);
2960 SLJIT_ASSERT(min <= max);
2961 
2962 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2964 
2965 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2966 if (common->utf)
2967  {
2968  if (max < 128 && !update_str_ptr) return;
2969 
2970  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2971  if (min >= 0x10000)
2972  {
2973  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2974  if (update_str_ptr)
2975  OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2976  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2977  jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2978  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2979  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2980  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2981  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2982  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2983  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2984  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2985  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2986  if (!update_str_ptr)
2987  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2988  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2989  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2990  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2991  JUMPHERE(jump2);
2992  if (update_str_ptr)
2993  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2994  }
2995  else if (min >= 0x800 && max <= 0xffff)
2996  {
2997  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2998  if (update_str_ptr)
2999  OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3000  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3001  jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3002  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3003  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3004  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3005  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3006  if (!update_str_ptr)
3007  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3008  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011  JUMPHERE(jump2);
3012  if (update_str_ptr)
3013  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3014  }
3015  else if (max >= 0x800)
3016  add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3017  else if (max < 128)
3018  {
3019  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3020  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3021  }
3022  else
3023  {
3024  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3025  if (!update_str_ptr)
3026  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3027  else
3028  OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3029  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3030  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3031  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3032  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3033  if (update_str_ptr)
3034  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3035  }
3036  JUMPHERE(jump);
3037  }
3038 #endif
3039 
3040 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3041 if (common->utf)
3042  {
3043  if (max >= 0x10000)
3044  {
3045  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3046  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3047  /* TMP2 contains the high surrogate. */
3048  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3049  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3050  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3051  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3052  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3053  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3054  JUMPHERE(jump);
3055  return;
3056  }
3057 
3058  if (max < 0xd800 && !update_str_ptr) return;
3059 
3060  /* Skip low surrogate if necessary. */
3061  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3062  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3063  if (update_str_ptr)
3064  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3065  if (max >= 0xd800)
3066  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3067  JUMPHERE(jump);
3068  }
3069 #endif
3070 }
3071 
3072 static SLJIT_INLINE void read_char(compiler_common *common)
3073 {
3074 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3075 }
3076 
3077 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3078 {
3079 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3080 DEFINE_COMPILER;
3081 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3082 struct sljit_jump *jump;
3083 #endif
3084 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3085 struct sljit_jump *jump2;
3086 #endif
3087 
3088 SLJIT_UNUSED_ARG(update_str_ptr);
3089 
3090 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3091 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3092 
3093 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3094 if (common->utf)
3095  {
3096  /* This can be an extra read in some situations, but hopefully
3097  it is needed in most cases. */
3098  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3099  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3100  if (!update_str_ptr)
3101  {
3102  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3103  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3104  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3105  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3106  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3107  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3108  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3109  jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3110  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3111  JUMPHERE(jump2);
3112  }
3113  else
3114  add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3115  JUMPHERE(jump);
3116  return;
3117  }
3118 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3119 
3120 #if !defined COMPILE_PCRE8
3121 /* The ctypes array contains only 256 values. */
3122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3123 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3124 #endif
3125 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3126 #if !defined COMPILE_PCRE8
3127 JUMPHERE(jump);
3128 #endif
3129 
3130 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3131 if (common->utf && update_str_ptr)
3132  {
3133  /* Skip low surrogate if necessary. */
3134  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3135  jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3136  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3137  JUMPHERE(jump);
3138  }
3139 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3140 }
3141 
3142 static void skip_char_back(compiler_common *common)
3143 {
3144 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3145 DEFINE_COMPILER;
3146 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3147 #if defined COMPILE_PCRE8
3148 struct sljit_label *label;
3149 
3150 if (common->utf)
3151  {
3152  label = LABEL();
3153  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3154  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3155  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3156  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3157  return;
3158  }
3159 #elif defined COMPILE_PCRE16
3160 if (common->utf)
3161  {
3162  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3163  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3164  /* Skip low surrogate if necessary. */
3165  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3166  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3167  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3168  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3169  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3170  return;
3171  }
3172 #endif /* COMPILE_PCRE[8|16] */
3173 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3174 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3175 }
3176 
3177 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3178 {
3179 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3180 DEFINE_COMPILER;
3181 struct sljit_jump *jump;
3182 
3183 if (nltype == NLTYPE_ANY)
3184  {
3185  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3186  sljit_set_current_flags(compiler, SLJIT_SET_Z);
3187  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3188  }
3189 else if (nltype == NLTYPE_ANYCRLF)
3190  {
3191  if (jumpifmatch)
3192  {
3193  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3194  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3195  }
3196  else
3197  {
3198  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3199  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3200  JUMPHERE(jump);
3201  }
3202  }
3203 else
3204  {
3205  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3206  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3207  }
3208 }
3209 
3210 #ifdef SUPPORT_UTF
3211 
3212 #if defined COMPILE_PCRE8
3213 static void do_utfreadchar(compiler_common *common)
3214 {
3215 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3216 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3217 DEFINE_COMPILER;
3218 struct sljit_jump *jump;
3219 
3220 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3222 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3223 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3224 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3226 
3227 /* Searching for the first zero. */
3228 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3229 jump = JUMP(SLJIT_NOT_ZERO);
3230 /* Two byte sequence. */
3231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3232 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3234 
3235 JUMPHERE(jump);
3236 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3237 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3238 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3239 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3240 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3241 
3242 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3243 jump = JUMP(SLJIT_NOT_ZERO);
3244 /* Three byte sequence. */
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3246 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3247 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3248 
3249 /* Four byte sequence. */
3250 JUMPHERE(jump);
3251 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3252 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3253 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3254 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3255 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3256 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3257 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3258 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3259 }
3260 
3261 static void do_utfreadchar16(compiler_common *common)
3262 {
3263 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3264 of the character (>= 0xc0). Return value in TMP1. */
3265 DEFINE_COMPILER;
3266 struct sljit_jump *jump;
3267 
3268 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3269 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3270 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3271 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3272 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3273 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3274 
3275 /* Searching for the first zero. */
3276 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3277 jump = JUMP(SLJIT_NOT_ZERO);
3278 /* Two byte sequence. */
3279 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3280 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3281 
3282 JUMPHERE(jump);
3283 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3284 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3285 /* This code runs only in 8 bit mode. No need to shift the value. */
3286 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3288 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3289 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3290 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3291 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3292 /* Three byte sequence. */
3293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3295 }
3296 
3297 static void do_utfreadtype8(compiler_common *common)
3298 {
3299 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3300 of the character (>= 0xc0). Return value in TMP1. */
3301 DEFINE_COMPILER;
3302 struct sljit_jump *jump;
3303 struct sljit_jump *compare;
3304 
3305 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3306 
3307 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3308 jump = JUMP(SLJIT_NOT_ZERO);
3309 /* Two byte sequence. */
3310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3312 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3313 /* The upper 5 bits are known at this point. */
3314 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3315 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3316 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3317 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3318 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3319 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3320 
3321 JUMPHERE(compare);
3322 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3323 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3324 
3325 /* We only have types for characters less than 256. */
3326 JUMPHERE(jump);
3327 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3331 }
3332 
3333 #endif /* COMPILE_PCRE8 */
3334 
3335 #endif /* SUPPORT_UTF */
3336 
3337 #ifdef SUPPORT_UCP
3338 
3339 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3340 #define UCD_BLOCK_MASK 127
3341 #define UCD_BLOCK_SHIFT 7
3342 
3343 static void do_getucd(compiler_common *common)
3344 {
3345 /* Search the UCD record for the character comes in TMP1.
3346 Returns chartype in TMP1 and UCD offset in TMP2. */
3347 DEFINE_COMPILER;
3348 #ifdef COMPILE_PCRE32
3349 struct sljit_jump *jump;
3350 #endif
3351 
3352 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3353 /* dummy_ucd_record */
3354 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3355 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3356 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3357 #endif
3358 
3359 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3360 
3361 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3362 
3363 #ifdef COMPILE_PCRE32
3364 if (!common->utf)
3365  {
3366  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3367  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3368  JUMPHERE(jump);
3369  }
3370 #endif
3371 
3372 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3373 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3374 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3375 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3376 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3378 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3381 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3382 }
3383 #endif
3384 
3385 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3386 {
3387 DEFINE_COMPILER;
3388 struct sljit_label *mainloop;
3389 struct sljit_label *newlinelabel = NULL;
3390 struct sljit_jump *start;
3391 struct sljit_jump *end = NULL;
3392 struct sljit_jump *end2 = NULL;
3393 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3394 struct sljit_jump *singlechar;
3395 #endif
3396 jump_list *newline = NULL;
3397 BOOL newlinecheck = FALSE;
3398 BOOL readuchar = FALSE;
3399 
3400 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3401  (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3402  newlinecheck = TRUE;
3403 
3404 if (common->match_end_ptr != 0)
3405  {
3406  /* Search for the end of the first line. */
3407  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3408 
3409  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3410  {
3411  mainloop = LABEL();
3412  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3413  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3414  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3415  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3416  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3417  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3418  JUMPHERE(end);
3419  OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3420  }
3421  else
3422  {
3423  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3424  mainloop = LABEL();
3425  /* Continual stores does not cause data dependency. */
3426  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3427  read_char_range(common, common->nlmin, common->nlmax, TRUE);
3428  check_newlinechar(common, common->nltype, &newline, TRUE);
3429  CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3430  JUMPHERE(end);
3431  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3432  set_jumps(newline, LABEL());
3433  }
3434 
3435  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3436  }
3437 
3438 start = JUMP(SLJIT_JUMP);
3439 
3440 if (newlinecheck)
3441  {
3442  newlinelabel = LABEL();
3443  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3444  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3445  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3446  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3447  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3448 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3449  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3450 #endif
3451  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3452  end2 = JUMP(SLJIT_JUMP);
3453  }
3454 
3455 mainloop = LABEL();
3456 
3457 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3458 #ifdef SUPPORT_UTF
3459 if (common->utf) readuchar = TRUE;
3460 #endif
3461 if (newlinecheck) readuchar = TRUE;
3462 
3463 if (readuchar)
3464  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3465 
3466 if (newlinecheck)
3467  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3468 
3469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3471 #if defined COMPILE_PCRE8
3472 if (common->utf)
3473  {
3474  singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3475  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3476  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3477  JUMPHERE(singlechar);
3478  }
3479 #elif defined COMPILE_PCRE16
3480 if (common->utf)
3481  {
3482  singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3483  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3484  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3485  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3486  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3487  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3488  JUMPHERE(singlechar);
3489  }
3490 #endif /* COMPILE_PCRE[8|16] */
3491 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3492 JUMPHERE(start);
3493 
3494 if (newlinecheck)
3495  {
3496  JUMPHERE(end);
3497  JUMPHERE(end2);
3498  }
3499 
3500 return mainloop;
3501 }
3502 
3503 #define MAX_N_CHARS 16
3504 #define MAX_DIFF_CHARS 6
3505 
3506 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3507 {
3508 pcre_uchar i, len;
3509 
3510 len = chars[0];
3511 if (len == 255)
3512  return;
3513 
3514 if (len == 0)
3515  {
3516  chars[0] = 1;
3517  chars[1] = chr;
3518  return;
3519  }
3520 
3521 for (i = len; i > 0; i--)
3522  if (chars[i] == chr)
3523  return;
3524 
3525 if (len >= MAX_DIFF_CHARS - 1)
3526  {
3527  chars[0] = 255;
3528  return;
3529  }
3530 
3531 len++;
3532 chars[len] = chr;
3533 chars[0] = len;
3534 }
3535 
3536 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3537 {
3538 /* Recursive function, which scans prefix literals. */
3539 BOOL last, any, class, caseless;
3540 int len, repeat, len_save, consumed = 0;
3541 sljit_u32 chr; /* Any unicode character. */
3542 sljit_u8 *bytes, *bytes_end, byte;
3543 pcre_uchar *alternative, *cc_save, *oc;
3544 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3545 pcre_uchar othercase[8];
3546 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3547 pcre_uchar othercase[2];
3548 #else
3549 pcre_uchar othercase[1];
3550 #endif
3551 
3552 repeat = 1;
3553 while (TRUE)
3554  {
3555  if (*rec_count == 0)
3556  return 0;
3557  (*rec_count)--;
3558 
3559  last = TRUE;
3560  any = FALSE;
3561  class = FALSE;
3562  caseless = FALSE;
3563 
3564  switch (*cc)
3565  {
3566  case OP_CHARI:
3567  caseless = TRUE;
3568  case OP_CHAR:
3569  last = FALSE;
3570  cc++;
3571  break;
3572 
3573  case OP_SOD:
3574  case OP_SOM:
3575  case OP_SET_SOM:
3576  case OP_NOT_WORD_BOUNDARY:
3577  case OP_WORD_BOUNDARY:
3578  case OP_EODN:
3579  case OP_EOD:
3580  case OP_CIRC:
3581  case OP_CIRCM:
3582  case OP_DOLL:
3583  case OP_DOLLM:
3584  /* Zero width assertions. */
3585  cc++;
3586  continue;
3587 
3588  case OP_ASSERT:
3589  case OP_ASSERT_NOT:
3590  case OP_ASSERTBACK:
3591  case OP_ASSERTBACK_NOT:
3592  cc = bracketend(cc);
3593  continue;
3594 
3595  case OP_PLUSI:
3596  case OP_MINPLUSI:
3597  case OP_POSPLUSI:
3598  caseless = TRUE;
3599  case OP_PLUS:
3600  case OP_MINPLUS:
3601  case OP_POSPLUS:
3602  cc++;
3603  break;
3604 
3605  case OP_EXACTI:
3606  caseless = TRUE;
3607  case OP_EXACT:
3608  repeat = GET2(cc, 1);
3609  last = FALSE;
3610  cc += 1 + IMM2_SIZE;
3611  break;
3612 
3613  case OP_QUERYI:
3614  case OP_MINQUERYI:
3615  case OP_POSQUERYI:
3616  caseless = TRUE;
3617  case OP_QUERY:
3618  case OP_MINQUERY:
3619  case OP_POSQUERY:
3620  len = 1;
3621  cc++;
3622 #ifdef SUPPORT_UTF
3623  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3624 #endif
3625  max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3626  if (max_chars == 0)
3627  return consumed;
3628  last = FALSE;
3629  break;
3630 
3631  case OP_KET:
3632  cc += 1 + LINK_SIZE;
3633  continue;
3634 
3635  case OP_ALT:
3636  cc += GET(cc, 1);
3637  continue;
3638 
3639  case OP_ONCE:
3640  case OP_ONCE_NC:
3641  case OP_BRA:
3642  case OP_BRAPOS:
3643  case OP_CBRA:
3644  case OP_CBRAPOS:
3645  alternative = cc + GET(cc, 1);
3646  while (*alternative == OP_ALT)
3647  {
3648  max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3649  if (max_chars == 0)
3650  return consumed;
3651  alternative += GET(alternative, 1);
3652  }
3653 
3654  if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3655  cc += IMM2_SIZE;
3656  cc += 1 + LINK_SIZE;
3657  continue;
3658 
3659  case OP_CLASS:
3660 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3661  if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3662  return consumed;
3663 #endif
3664  class = TRUE;
3665  break;
3666 
3667  case OP_NCLASS:
3668 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3669  if (common->utf) return consumed;
3670 #endif
3671  class = TRUE;
3672  break;
3673 
3674 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3675  case OP_XCLASS:
3676 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3677  if (common->utf) return consumed;
3678 #endif
3679  any = TRUE;
3680  cc += GET(cc, 1);
3681  break;
3682 #endif
3683 
3684  case OP_DIGIT:
3685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3686  if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3687  return consumed;
3688 #endif
3689  any = TRUE;
3690  cc++;
3691  break;
3692 
3693  case OP_WHITESPACE:
3694 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3695  if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3696  return consumed;
3697 #endif
3698  any = TRUE;
3699  cc++;
3700  break;
3701 
3702  case OP_WORDCHAR:
3703 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3704  if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3705  return consumed;
3706 #endif
3707  any = TRUE;
3708  cc++;
3709  break;
3710 
3711  case OP_NOT:
3712  case OP_NOTI:
3713  cc++;
3714  /* Fall through. */
3715  case OP_NOT_DIGIT:
3716  case OP_NOT_WHITESPACE:
3717  case OP_NOT_WORDCHAR:
3718  case OP_ANY:
3719  case OP_ALLANY:
3720 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3721  if (common->utf) return consumed;
3722 #endif
3723  any = TRUE;
3724  cc++;
3725  break;
3726 
3727 #ifdef SUPPORT_UTF
3728  case OP_NOTPROP:
3729  case OP_PROP:
3730 #ifndef COMPILE_PCRE32
3731  if (common->utf) return consumed;
3732 #endif
3733  any = TRUE;
3734  cc += 1 + 2;
3735  break;
3736 #endif
3737 
3738  case OP_TYPEEXACT:
3739  repeat = GET2(cc, 1);
3740  cc += 1 + IMM2_SIZE;
3741  continue;
3742 
3743  case OP_NOTEXACT:
3744  case OP_NOTEXACTI:
3745 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3746  if (common->utf) return consumed;
3747 #endif
3748  any = TRUE;
3749  repeat = GET2(cc, 1);
3750  cc += 1 + IMM2_SIZE + 1;
3751  break;
3752 
3753  default:
3754  return consumed;
3755  }
3756 
3757  if (any)
3758  {
3759  do
3760  {
3761  chars[0] = 255;
3762 
3763  consumed++;
3764  if (--max_chars == 0)
3765  return consumed;
3766  chars += MAX_DIFF_CHARS;
3767  }
3768  while (--repeat > 0);
3769 
3770  repeat = 1;
3771  continue;
3772  }
3773 
3774  if (class)
3775  {
3776  bytes = (sljit_u8*) (cc + 1);
3777  cc += 1 + 32 / sizeof(pcre_uchar);
3778 
3779  switch (*cc)
3780  {
3781  case OP_CRSTAR:
3782  case OP_CRMINSTAR:
3783  case OP_CRPOSSTAR:
3784  case OP_CRQUERY:
3785  case OP_CRMINQUERY:
3786  case OP_CRPOSQUERY:
3787  max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3788  if (max_chars == 0)
3789  return consumed;
3790  break;
3791 
3792  default:
3793  case OP_CRPLUS:
3794  case OP_CRMINPLUS:
3795  case OP_CRPOSPLUS:
3796  break;
3797 
3798  case OP_CRRANGE:
3799  case OP_CRMINRANGE:
3800  case OP_CRPOSRANGE:
3801  repeat = GET2(cc, 1);
3802  if (repeat <= 0)
3803  return consumed;
3804  break;
3805  }
3806 
3807  do
3808  {
3809  if (bytes[31] & 0x80)
3810  chars[0] = 255;
3811  else if (chars[0] != 255)
3812  {
3813  bytes_end = bytes + 32;
3814  chr = 0;
3815  do
3816  {
3817  byte = *bytes++;
3818  SLJIT_ASSERT((chr & 0x7) == 0);
3819  if (byte == 0)
3820  chr += 8;
3821  else
3822  {
3823  do
3824  {
3825  if ((byte & 0x1) != 0)
3826  add_prefix_char(chr, chars);
3827  byte >>= 1;
3828  chr++;
3829  }
3830  while (byte != 0);
3831  chr = (chr + 7) & ~7;
3832  }
3833  }
3834  while (chars[0] != 255 && bytes < bytes_end);
3835  bytes = bytes_end - 32;
3836  }
3837 
3838  consumed++;
3839  if (--max_chars == 0)
3840  return consumed;
3841  chars += MAX_DIFF_CHARS;
3842  }
3843  while (--repeat > 0);
3844 
3845  switch (*cc)
3846  {
3847  case OP_CRSTAR:
3848  case OP_CRMINSTAR:
3849  case OP_CRPOSSTAR:
3850  return consumed;
3851 
3852  case OP_CRQUERY:
3853  case OP_CRMINQUERY:
3854  case OP_CRPOSQUERY:
3855  cc++;
3856  break;
3857 
3858  case OP_CRRANGE:
3859  case OP_CRMINRANGE:
3860  case OP_CRPOSRANGE:
3861  if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3862  return consumed;
3863  cc += 1 + 2 * IMM2_SIZE;
3864  break;
3865  }
3866 
3867  repeat = 1;
3868  continue;
3869  }
3870 
3871  len = 1;
3872 #ifdef SUPPORT_UTF
3873  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3874 #endif
3875 
3876  if (caseless && char_has_othercase(common, cc))
3877  {
3878 #ifdef SUPPORT_UTF
3879  if (common->utf)
3880  {
3881  GETCHAR(chr, cc);
3882  if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3883  return consumed;
3884  }
3885  else
3886 #endif
3887  {
3888  chr = *cc;
3889  othercase[0] = TABLE_GET(chr, common->fcc, chr);
3890  }
3891  }
3892  else
3893  {
3894  caseless = FALSE;
3895  othercase[0] = 0; /* Stops compiler warning - PH */
3896  }
3897 
3898  len_save = len;
3899  cc_save = cc;
3900  while (TRUE)
3901  {
3902  oc = othercase;
3903  do
3904  {
3905  chr = *cc;
3906  add_prefix_char(*cc, chars);
3907 
3908  if (caseless)
3909  add_prefix_char(*oc, chars);
3910 
3911  len--;
3912  consumed++;
3913  if (--max_chars == 0)
3914  return consumed;
3915  chars += MAX_DIFF_CHARS;
3916  cc++;
3917  oc++;
3918  }
3919  while (len > 0);
3920 
3921  if (--repeat == 0)
3922  break;
3923 
3924  len = len_save;
3925  cc = cc_save;
3926  }
3927 
3928  repeat = 1;
3929  if (last)
3930  return consumed;
3931  }
3932 }
3933 
3934 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3935 
3936 static sljit_s32 character_to_int32(pcre_uchar chr)
3937 {
3938 sljit_s32 value = (sljit_s32)chr;
3939 #if defined COMPILE_PCRE8
3940 #define SSE2_COMPARE_TYPE_INDEX 0
3941 return (value << 24) | (value << 16) | (value << 8) | value;
3942 #elif defined COMPILE_PCRE16
3943 #define SSE2_COMPARE_TYPE_INDEX 1
3944 return (value << 16) | value;
3945 #elif defined COMPILE_PCRE32
3946 #define SSE2_COMPARE_TYPE_INDEX 2
3947 return value;
3948 #else
3949 #error "Unsupported unit width"
3950 #endif
3951 }
3952 
3953 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3954 {
3955 DEFINE_COMPILER;
3956 struct sljit_label *start;
3957 struct sljit_jump *quit[3];
3958 struct sljit_jump *nomatch;
3959 sljit_u8 instruction[8];
3960 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3961 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3962 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3963 BOOL load_twice = FALSE;
3964 pcre_uchar bit;
3965 
3966 bit = char1 ^ char2;
3967 if (!is_powerof2(bit))
3968  bit = 0;
3969 
3970 if ((char1 != char2) && bit == 0)
3971  load_twice = TRUE;
3972 
3973 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3974 
3975 /* First part (unaligned start) */
3976 
3977 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3978 
3979 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3980 
3981 /* MOVD xmm, r/m32 */
3982 instruction[0] = 0x66;
3983 instruction[1] = 0x0f;
3984 instruction[2] = 0x6e;
3985 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3986 sljit_emit_op_custom(compiler, instruction, 4);
3987 
3988 if (char1 != char2)
3989  {
3990  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3991 
3992  /* MOVD xmm, r/m32 */
3993  instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3994  sljit_emit_op_custom(compiler, instruction, 4);
3995  }
3996 
3997 /* PSHUFD xmm1, xmm2/m128, imm8 */
3998 instruction[2] = 0x70;
3999 instruction[3] = 0xc0 | (2 << 3) | 2;
4000 instruction[4] = 0;
4001 sljit_emit_op_custom(compiler, instruction, 5);
4002 
4003 if (char1 != char2)
4004  {
4005  /* PSHUFD xmm1, xmm2/m128, imm8 */
4006  instruction[3] = 0xc0 | (3 << 3) | 3;
4007  instruction[4] = 0;
4008  sljit_emit_op_custom(compiler, instruction, 5);
4009  }
4010 
4011 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
4012 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4013 
4014 /* MOVDQA xmm1, xmm2/m128 */
4015 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4016 
4017 if (str_ptr_ind < 8)
4018  {
4019  instruction[2] = 0x6f;
4020  instruction[3] = (0 << 3) | str_ptr_ind;
4021  sljit_emit_op_custom(compiler, instruction, 4);
4022 
4023  if (load_twice)
4024  {
4025  instruction[3] = (1 << 3) | str_ptr_ind;
4026  sljit_emit_op_custom(compiler, instruction, 4);
4027  }
4028  }
4029 else
4030  {
4031  instruction[1] = 0x41;
4032  instruction[2] = 0x0f;
4033  instruction[3] = 0x6f;
4034  instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4035  sljit_emit_op_custom(compiler, instruction, 5);
4036 
4037  if (load_twice)
4038  {
4039  instruction[4] = (1 << 3) | str_ptr_ind;
4040  sljit_emit_op_custom(compiler, instruction, 5);
4041  }
4042  instruction[1] = 0x0f;
4043  }
4044 
4045 #else
4046 
4047 instruction[2] = 0x6f;
4048 instruction[3] = (0 << 3) | str_ptr_ind;
4049 sljit_emit_op_custom(compiler, instruction, 4);
4050 
4051 if (load_twice)
4052  {
4053  instruction[3] = (1 << 3) | str_ptr_ind;
4054  sljit_emit_op_custom(compiler, instruction, 4);
4055  }
4056 
4057 #endif
4058 
4059 if (bit != 0)
4060  {
4061  /* POR xmm1, xmm2/m128 */
4062  instruction[2] = 0xeb;
4063  instruction[3] = 0xc0 | (0 << 3) | 3;
4064  sljit_emit_op_custom(compiler, instruction, 4);
4065  }
4066 
4067 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4068 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4069 instruction[3] = 0xc0 | (0 << 3) | 2;
4070 sljit_emit_op_custom(compiler, instruction, 4);
4071 
4072 if (load_twice)
4073  {
4074  instruction[3] = 0xc0 | (1 << 3) | 3;
4075  sljit_emit_op_custom(compiler, instruction, 4);
4076  }
4077 
4078 /* PMOVMSKB reg, xmm */
4079 instruction[2] = 0xd7;
4080 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4081 sljit_emit_op_custom(compiler, instruction, 4);
4082 
4083 if (load_twice)
4084  {
4085  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4086  instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4087  sljit_emit_op_custom(compiler, instruction, 4);
4088 
4089  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4090  OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4091  }
4092 
4093 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4094 
4095 /* BSF r32, r/m32 */
4096 instruction[0] = 0x0f;
4097 instruction[1] = 0xbc;
4098 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4099 sljit_emit_op_custom(compiler, instruction, 3);
4100 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4101 
4102 nomatch = JUMP(SLJIT_ZERO);
4103 
4104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4105 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4106 quit[1] = JUMP(SLJIT_JUMP);
4107 
4108 JUMPHERE(nomatch);
4109 
4110 start = LABEL();
4111 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4112 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4113 
4114 /* Second part (aligned) */
4115 
4116 instruction[0] = 0x66;
4117 instruction[1] = 0x0f;
4118 
4119 /* MOVDQA xmm1, xmm2/m128 */
4120 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4121 
4122 if (str_ptr_ind < 8)
4123  {
4124  instruction[2] = 0x6f;
4125  instruction[3] = (0 << 3) | str_ptr_ind;
4126  sljit_emit_op_custom(compiler, instruction, 4);
4127 
4128  if (load_twice)
4129  {
4130  instruction[3] = (1 << 3) | str_ptr_ind;
4131  sljit_emit_op_custom(compiler, instruction, 4);
4132  }
4133  }
4134 else
4135  {
4136  instruction[1] = 0x41;
4137  instruction[2] = 0x0f;
4138  instruction[3] = 0x6f;
4139  instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4140  sljit_emit_op_custom(compiler, instruction, 5);
4141 
4142  if (load_twice)
4143  {
4144  instruction[4] = (1 << 3) | str_ptr_ind;
4145  sljit_emit_op_custom(compiler, instruction, 5);
4146  }
4147  instruction[1] = 0x0f;
4148  }
4149 
4150 #else
4151 
4152 instruction[2] = 0x6f;
4153 instruction[3] = (0 << 3) | str_ptr_ind;
4154 sljit_emit_op_custom(compiler, instruction, 4);
4155 
4156 if (load_twice)
4157  {
4158  instruction[3] = (1 << 3) | str_ptr_ind;
4159  sljit_emit_op_custom(compiler, instruction, 4);
4160  }
4161 
4162 #endif
4163 
4164 if (bit != 0)
4165  {
4166  /* POR xmm1, xmm2/m128 */
4167  instruction[2] = 0xeb;
4168  instruction[3] = 0xc0 | (0 << 3) | 3;
4169  sljit_emit_op_custom(compiler, instruction, 4);
4170  }
4171 
4172 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4173 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4174 instruction[3] = 0xc0 | (0 << 3) | 2;
4175 sljit_emit_op_custom(compiler, instruction, 4);
4176 
4177 if (load_twice)
4178  {
4179  instruction[3] = 0xc0 | (1 << 3) | 3;
4180  sljit_emit_op_custom(compiler, instruction, 4);
4181  }
4182 
4183 /* PMOVMSKB reg, xmm */
4184 instruction[2] = 0xd7;
4185 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4186 sljit_emit_op_custom(compiler, instruction, 4);
4187 
4188 if (load_twice)
4189  {
4190  instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4191  sljit_emit_op_custom(compiler, instruction, 4);
4192 
4193  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4194  }
4195 
4196 /* BSF r32, r/m32 */
4197 instruction[0] = 0x0f;
4198 instruction[1] = 0xbc;
4199 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4200 sljit_emit_op_custom(compiler, instruction, 3);
4201 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4202 
4203 JUMPTO(SLJIT_ZERO, start);
4204 
4205 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4206 
4207 start = LABEL();
4208 SET_LABEL(quit[0], start);
4209 SET_LABEL(quit[1], start);
4210 SET_LABEL(quit[2], start);
4211 }
4212 
4213 #undef SSE2_COMPARE_TYPE_INDEX
4214 
4215 #endif
4216 
4217 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4218 {
4219 DEFINE_COMPILER;
4220 struct sljit_label *start;
4221 struct sljit_jump *quit;
4222 struct sljit_jump *found;
4224 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4225 struct sljit_label *utf_start = NULL;
4226 struct sljit_jump *utf_quit = NULL;
4227 #endif
4228 BOOL has_match_end = (common->match_end_ptr != 0);
4229 
4230 if (offset > 0)
4231  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4232 
4233 if (has_match_end)
4234  {
4235  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4236 
4237  OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4238  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4239  sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4240  }
4241 
4242 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4243 if (common->utf && offset > 0)
4244  utf_start = LABEL();
4245 #endif
4246 
4247 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4248 
4249 /* SSE2 accelerated first character search. */
4250 
4251 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4252  {
4253  fast_forward_first_char2_sse2(common, char1, char2);
4254 
4255  SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4256  if (common->mode == JIT_COMPILE)
4257  {
4258  /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4259  SLJIT_ASSERT(common->forced_quit_label == NULL);
4260  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4261  add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4262 
4263 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4264  if (common->utf && offset > 0)
4265  {
4266  SLJIT_ASSERT(common->mode == JIT_COMPILE);
4267 
4268  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4269  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4270 #if defined COMPILE_PCRE8
4271  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4272  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4273 #elif defined COMPILE_PCRE16
4274  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4275  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4276 #else
4277 #error "Unknown code width"
4278 #endif
4279  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4280  }
4281 #endif
4282 
4283  if (offset > 0)
4284  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4285  }
4286  else
4287  {
4288  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4289  if (has_match_end)
4290  {
4291  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4292  sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4293  }
4294  else
4295  sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4296  }
4297 
4298  if (has_match_end)
4299  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4300  return;
4301  }
4302 
4303 #endif
4304 
4305 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4306 
4307 start = LABEL();
4308 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4309 
4310 if (char1 == char2)
4311  found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4312 else
4313  {
4314  mask = char1 ^ char2;
4315  if (is_powerof2(mask))
4316  {
4317  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4318  found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4319  }
4320  else
4321  {
4322  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4323  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4324  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4325  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4326  found = JUMP(SLJIT_NOT_ZERO);
4327  }
4328  }
4329 
4330 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4331 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4332 
4333 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4334 if (common->utf && offset > 0)
4335  utf_quit = JUMP(SLJIT_JUMP);
4336 #endif
4337 
4338 JUMPHERE(found);
4339 
4340 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4341 if (common->utf && offset > 0)
4342  {
4343  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4344  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4345 #if defined COMPILE_PCRE8
4346  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4347  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4348 #elif defined COMPILE_PCRE16
4349  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4350  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4351 #else
4352 #error "Unknown code width"
4353 #endif
4354  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4355  JUMPHERE(utf_quit);
4356  }
4357 #endif
4358 
4359 JUMPHERE(quit);
4360 
4361 if (has_match_end)
4362  {
4363  quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4364  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4365  if (offset > 0)
4366  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4367  JUMPHERE(quit);
4368  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4369  }
4370 
4371 if (offset > 0)
4372  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4373 }
4374 
4375 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4376 {
4377 DEFINE_COMPILER;
4378 struct sljit_label *start;
4379 struct sljit_jump *quit;
4380 struct sljit_jump *match;
4381 /* bytes[0] represent the number of characters between 0
4382 and MAX_N_BYTES - 1, 255 represents any character. */
4383 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4384 sljit_s32 offset;
4386 pcre_uchar *char_set, *char_set_end;
4387 int i, max, from;
4388 int range_right = -1, range_len;
4389 sljit_u8 *update_table = NULL;
4390 BOOL in_range;
4391 sljit_u32 rec_count;
4392 
4393 for (i = 0; i < MAX_N_CHARS; i++)
4394  chars[i * MAX_DIFF_CHARS] = 0;
4395 
4396 rec_count = 10000;
4397 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4398 
4399 if (max < 1)
4400  return FALSE;
4401 
4402 in_range = FALSE;
4403 /* Prevent compiler "uninitialized" warning */
4404 from = 0;
4405 range_len = 4 /* minimum length */ - 1;
4406 for (i = 0; i <= max; i++)
4407  {
4408  if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4409  {
4410  range_len = i - from;
4411  range_right = i - 1;
4412  }
4413 
4414  if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4415  {
4416  SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4417  if (!in_range)
4418  {
4419  in_range = TRUE;
4420  from = i;
4421  }
4422  }
4423  else
4424  in_range = FALSE;
4425  }
4426 
4427 if (range_right >= 0)
4428  {
4429  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4430  if (update_table == NULL)
4431  return TRUE;
4432  memset(update_table, IN_UCHARS(range_len), 256);
4433 
4434  for (i = 0; i < range_len; i++)
4435  {
4436  char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4437  SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4438  char_set_end = char_set + char_set[0];
4439  char_set++;
4440  while (char_set <= char_set_end)
4441  {
4442  if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4443  update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4444  char_set++;
4445  }
4446  }
4447  }
4448 
4449 offset = -1;
4450 /* Scan forward. */
4451 for (i = 0; i < max; i++)
4452  {
4453  if (offset == -1)
4454  {
4455  if (chars[i * MAX_DIFF_CHARS] <= 2)
4456  offset = i;
4457  }
4458  else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4459  {
4460  if (chars[i * MAX_DIFF_CHARS] == 1)
4461  offset = i;
4462  else
4463  {
4464  mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4465  if (!is_powerof2(mask))
4466  {
4467  mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4468  if (is_powerof2(mask))
4469  offset = i;
4470  }
4471  }
4472  }
4473  }
4474 
4475 if (range_right < 0)
4476  {
4477  if (offset < 0)
4478  return FALSE;
4479  SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4480  /* Works regardless the value is 1 or 2. */
4481  mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4482  fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4483  return TRUE;
4484  }
4485 
4486 if (range_right == offset)
4487  offset = -1;
4488 
4489 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4490 
4491 max -= 1;
4492 SLJIT_ASSERT(max > 0);
4493 if (common->match_end_ptr != 0)
4494  {
4495  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4496  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4497  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4498  quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4499  OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4500  JUMPHERE(quit);
4501  }
4502 else
4503  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4504 
4505 SLJIT_ASSERT(range_right >= 0);
4506 
4507 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4508 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4509 #endif
4510 
4511 start = LABEL();
4512 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4513 
4514 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4515 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4516 #else
4517 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4518 #endif
4519 
4520 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4521 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4522 #else
4523 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4524 #endif
4525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4526 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4527 
4528 if (offset >= 0)
4529  {
4530  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4531  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4532 
4533  if (chars[offset * MAX_DIFF_CHARS] == 1)
4534  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4535  else
4536  {
4537  mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4538  if (is_powerof2(mask))
4539  {
4540  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4541  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4542  }
4543  else
4544  {
4545  match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4546  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4547  JUMPHERE(match);
4548  }
4549  }
4550  }
4551 
4552 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4553 if (common->utf && offset != 0)
4554  {
4555  if (offset < 0)
4556  {
4557  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4558  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4559  }
4560  else
4561  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562 #if defined COMPILE_PCRE8
4563  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4564  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4565 #elif defined COMPILE_PCRE16
4566  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4567  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4568 #else
4569 #error "Unknown code width"
4570 #endif
4571  if (offset < 0)
4572  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4573  }
4574 #endif
4575 
4576 if (offset >= 0)
4577  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4578 
4579 JUMPHERE(quit);
4580 
4581 if (common->match_end_ptr != 0)
4582  {
4583  if (range_right >= 0)
4584  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4585  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4586  if (range_right >= 0)
4587  {
4588  quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4589  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4590  JUMPHERE(quit);
4591  }
4592  }
4593 else
4594  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4595 return TRUE;
4596 }
4597 
4598 #undef MAX_N_CHARS
4599 #undef MAX_DIFF_CHARS
4600 
4601 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4602 {
4603 pcre_uchar oc;
4604 
4605 oc = first_char;
4606 if (caseless)
4607  {
4608  oc = TABLE_GET(first_char, common->fcc, first_char);
4609 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4610  if (first_char > 127 && common->utf)
4611  oc = UCD_OTHERCASE(first_char);
4612 #endif
4613  }
4614 
4615 fast_forward_first_char2(common, first_char, oc, 0);
4616 }
4617 
4618 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4619 {
4620 DEFINE_COMPILER;
4621 struct sljit_label *loop;
4622 struct sljit_jump *lastchar;
4623 struct sljit_jump *firstchar;
4624 struct sljit_jump *quit;
4625 struct sljit_jump *foundcr = NULL;
4626 struct sljit_jump *notfoundnl;
4627 jump_list *newline = NULL;
4628 
4629 if (common->match_end_ptr != 0)
4630  {
4631  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4632  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4633  }
4634 
4635 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4636  {
4637  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4638  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4639  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4640  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4641  firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4642 
4643  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4644  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4645  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4646 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4647  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4648 #endif
4649  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4650 
4651  loop = LABEL();
4652  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4653  quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4655  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4656  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4657  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4658 
4659  JUMPHERE(quit);
4660  JUMPHERE(firstchar);
4661  JUMPHERE(lastchar);
4662 
4663  if (common->match_end_ptr != 0)
4664  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4665  return;
4666  }
4667 
4668 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4669 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4670 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4671 skip_char_back(common);
4672 
4673 loop = LABEL();
4674 common->ff_newline_shortcut = loop;
4675 
4676 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4677 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4678 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4679  foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4680 check_newlinechar(common, common->nltype, &newline, FALSE);
4681 set_jumps(newline, loop);
4682 
4683 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4684  {
4685  quit = JUMP(SLJIT_JUMP);
4686  JUMPHERE(foundcr);
4687  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4689  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4690  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4691 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4692  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4693 #endif
4694  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4695  JUMPHERE(notfoundnl);
4696  JUMPHERE(quit);
4697  }
4698 JUMPHERE(lastchar);
4699 JUMPHERE(firstchar);
4700 
4701 if (common->match_end_ptr != 0)
4702  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4703 }
4704 
4705 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4706 
4707 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4708 {
4709 DEFINE_COMPILER;
4710 struct sljit_label *start;
4711 struct sljit_jump *quit;
4712 struct sljit_jump *found = NULL;
4713 jump_list *matches = NULL;
4714 #ifndef COMPILE_PCRE8
4715 struct sljit_jump *jump;
4716 #endif
4717 
4718 if (common->match_end_ptr != 0)
4719  {
4720  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4721  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4722  }
4723 
4724 start = LABEL();
4725 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4726 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4727 #ifdef SUPPORT_UTF
4728 if (common->utf)
4729  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4730 #endif
4731 
4732 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4733  {
4734 #ifndef COMPILE_PCRE8
4735  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4736  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4737  JUMPHERE(jump);
4738 #endif
4739  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4740  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4741  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4742  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4743  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4744  found = JUMP(SLJIT_NOT_ZERO);
4745  }
4746 
4747 #ifdef SUPPORT_UTF
4748 if (common->utf)
4749  OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4750 #endif
4751 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4752 #ifdef SUPPORT_UTF
4753 #if defined COMPILE_PCRE8
4754 if (common->utf)
4755  {
4756  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4757  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4758  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4759  }
4760 #elif defined COMPILE_PCRE16
4761 if (common->utf)
4762  {
4763  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4764  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4765  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4766  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4767  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4768  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4769  }
4770 #endif /* COMPILE_PCRE[8|16] */
4771 #endif /* SUPPORT_UTF */
4772 JUMPTO(SLJIT_JUMP, start);
4773 if (found != NULL)
4774  JUMPHERE(found);
4775 if (matches != NULL)
4776  set_jumps(matches, LABEL());
4777 JUMPHERE(quit);
4778 
4779 if (common->match_end_ptr != 0)
4780  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4781 }
4782 
4783 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4784 {
4785 DEFINE_COMPILER;
4786 struct sljit_label *loop;
4787 struct sljit_jump *toolong;
4788 struct sljit_jump *alreadyfound;
4789 struct sljit_jump *found;
4790 struct sljit_jump *foundoc = NULL;
4791 struct sljit_jump *notfound;
4792 sljit_u32 oc, bit;
4793 
4794 SLJIT_ASSERT(common->req_char_ptr != 0);
4795 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4796 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4797 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4798 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4799 
4800 if (has_firstchar)
4801  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4802 else
4803  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4804 
4805 loop = LABEL();
4806 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4807 
4808 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4809 oc = req_char;
4810 if (caseless)
4811  {
4812  oc = TABLE_GET(req_char, common->fcc, req_char);
4813 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4814  if (req_char > 127 && common->utf)
4815  oc = UCD_OTHERCASE(req_char);
4816 #endif
4817  }
4818 if (req_char == oc)
4819  found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4820 else
4821  {
4822  bit = req_char ^ oc;
4823  if (is_powerof2(bit))
4824  {
4825  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4826  found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4827  }
4828  else
4829  {
4830  found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4831  foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4832  }
4833  }
4834 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4835 JUMPTO(SLJIT_JUMP, loop);
4836 
4837 JUMPHERE(found);
4838 if (foundoc)
4839  JUMPHERE(foundoc);
4840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4841 JUMPHERE(alreadyfound);
4842 JUMPHERE(toolong);
4843 return notfound;
4844 }
4845 
4846 static void do_revertframes(compiler_common *common)
4847 {
4848 DEFINE_COMPILER;
4849 struct sljit_jump *jump;
4850 struct sljit_label *mainloop;
4851 
4852 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4853 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4854 GET_LOCAL_BASE(TMP1, 0, 0);
4855 
4856 /* Drop frames until we reach STACK_TOP. */
4857 mainloop = LABEL();
4858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4859 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4860 
4861 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4862 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4863 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4864 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4865 JUMPTO(SLJIT_JUMP, mainloop);
4866 
4867 JUMPHERE(jump);
4868 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4869 /* End of reverting values. */
4870 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4871 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4872 
4873 JUMPHERE(jump);
4874 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4875 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4876 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4877 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4878 JUMPTO(SLJIT_JUMP, mainloop);
4879 }
4880 
4881 static void check_wordboundary(compiler_common *common)
4882 {
4883 DEFINE_COMPILER;
4884 struct sljit_jump *skipread;
4885 jump_list *skipread_list = NULL;
4886 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4887 struct sljit_jump *jump;
4888 #endif
4889 
4890 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4891 
4892 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4893 /* Get type of the previous char, and put it to LOCALS1. */
4894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4895 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4897 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4898 skip_char_back(common);
4899 check_start_used_ptr(common);
4900 read_char(common);
4901 
4902 /* Testing char type. */
4903 #ifdef SUPPORT_UCP
4904 if (common->use_ucp)
4905  {
4906  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4907  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4908  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4909  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4910  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4911  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4912  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4913  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4914  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4915  JUMPHERE(jump);
4916  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4917  }
4918 else
4919 #endif
4920  {
4921 #ifndef COMPILE_PCRE8
4922  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4923 #elif defined SUPPORT_UTF
4924  /* Here LOCALS1 has already been zeroed. */
4925  jump = NULL;
4926  if (common->utf)
4927  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4928 #endif /* COMPILE_PCRE8 */
4929  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4930  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4931  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4932  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4933 #ifndef COMPILE_PCRE8
4934  JUMPHERE(jump);
4935 #elif defined SUPPORT_UTF
4936  if (jump != NULL)
4937  JUMPHERE(jump);
4938 #endif /* COMPILE_PCRE8 */
4939  }
4940 JUMPHERE(skipread);
4941 
4942 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4943 check_str_end(common, &skipread_list);
4944 peek_char(common, READ_CHAR_MAX);
4945 
4946 /* Testing char type. This is a code duplication. */
4947 #ifdef SUPPORT_UCP
4948 if (common->use_ucp)
4949  {
4950  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4951  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4952  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4953  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4954  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4955  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4956  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4957  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4958  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4959  JUMPHERE(jump);
4960  }
4961 else
4962 #endif
4963  {
4964 #ifndef COMPILE_PCRE8
4965  /* TMP2 may be destroyed by peek_char. */
4966  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4967  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4968 #elif defined SUPPORT_UTF
4969  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4970  jump = NULL;
4971  if (common->utf)
4972  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4973 #endif
4974  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4975  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4976  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4977 #ifndef COMPILE_PCRE8
4978  JUMPHERE(jump);
4979 #elif defined SUPPORT_UTF
4980  if (jump != NULL)
4981  JUMPHERE(jump);
4982 #endif /* COMPILE_PCRE8 */
4983  }
4984 set_jumps(skipread_list, LABEL());
4985 
4986 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4987 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4988 }
4989 
4990 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4991 {
4992 /* May destroy TMP1. */
4993 DEFINE_COMPILER;
4994 int ranges[MAX_RANGE_SIZE];
4995 sljit_u8 bit, cbit, all;
4996 int i, byte, length = 0;
4997 
4998 bit = bits[0] & 0x1;
4999 /* All bits will be zero or one (since bit is zero or one). */
5000 all = -bit;
5001 
5002 for (i = 0; i < 256; )
5003  {
5004  byte = i >> 3;
5005  if ((i & 0x7) == 0 && bits[byte] == all)
5006  i += 8;
5007  else
5008  {
5009  cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5010  if (cbit != bit)
5011  {
5012  if (length >= MAX_RANGE_SIZE)
5013  return FALSE;
5014  ranges[length] = i;
5015  length++;
5016  bit = cbit;
5017  all = -cbit;
5018  }
5019  i++;
5020  }
5021  }
5022 
5023 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5024  {
5025  if (length >= MAX_RANGE_SIZE)
5026  return FALSE;
5027  ranges[length] = 256;
5028  length++;
5029  }
5030 
5031 if (length < 0 || length > 4)
5032  return FALSE;
5033 
5034 bit = bits[0] & 0x1;
5035 if (invert) bit ^= 0x1;
5036 
5037 /* No character is accepted. */
5038 if (length == 0 && bit == 0)
5039  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5040 
5041 switch(length)
5042  {
5043  case 0:
5044  /* When bit != 0, all characters are accepted. */
5045  return TRUE;
5046 
5047  case 1:
5048  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5049  return TRUE;
5050 
5051  case 2:
5052  if (ranges[0] + 1 != ranges[1])
5053  {
5054  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5055  add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5056  }
5057  else
5058  add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5059  return TRUE;
5060 
5061  case 3:
5062  if (bit != 0)
5063  {
5064  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5065  if (ranges[0] + 1 != ranges[1])
5066  {
5067  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5068  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5069  }
5070  else
5071  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5072  return TRUE;
5073  }
5074 
5075  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5076  if (ranges[1] + 1 != ranges[2])
5077  {
5078  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5079  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5080  }
5081  else
5082  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5083  return TRUE;
5084 
5085  case 4:
5086  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5087  && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5088  && (ranges[1] & (ranges[2] - ranges[0])) == 0
5089  && is_powerof2(ranges[2] - ranges[0]))
5090  {
5091  SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5092  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5093  if (ranges[2] + 1 != ranges[3])
5094  {
5095  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5096  add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5097  }
5098  else
5099  add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5100  return TRUE;
5101  }
5102 
5103  if (bit != 0)
5104  {
5105  i = 0;
5106  if (ranges[0] + 1 != ranges[1])
5107  {
5108  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5109  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5110  i = ranges[0];
5111  }
5112  else
5113  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5114 
5115  if (ranges[2] + 1 != ranges[3])
5116  {
5117  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5118  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5119  }
5120  else
5121  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5122  return TRUE;
5123  }
5124 
5125  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5126  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5127  if (ranges[1] + 1 != ranges[2])
5128  {
5129  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5130  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5131  }
5132  else
5133  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5134  return TRUE;
5135 
5136  default:
5137  SLJIT_UNREACHABLE();
5138  return FALSE;
5139  }
5140 }
5141 
5142 static void check_anynewline(compiler_common *common)
5143 {
5144 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5145 DEFINE_COMPILER;
5146 
5147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5148 
5149 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5152 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5153 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5154 #ifdef COMPILE_PCRE8
5155 if (common->utf)
5156  {
5157 #endif
5158  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5159  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5160  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5161 #ifdef COMPILE_PCRE8
5162  }
5163 #endif
5164 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5166 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5167 }
5168 
5169 static void check_hspace(compiler_common *common)
5170 {
5171 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5172 DEFINE_COMPILER;
5173 
5174 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5175 
5176 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5177 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5178 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5179 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5180 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5181 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5182 #ifdef COMPILE_PCRE8
5183 if (common->utf)
5184  {
5185 #endif
5186  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5187  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5188  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5189  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5190  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5191  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5192  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5193  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5194  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5195  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5196  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5197  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5198  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5199 #ifdef COMPILE_PCRE8
5200  }
5201 #endif
5202 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5203 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5204 
5205 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5206 }
5207 
5208 static void check_vspace(compiler_common *common)
5209 {
5210 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5211 DEFINE_COMPILER;
5212 
5213 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5214 
5215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5216 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5217 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5218 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5219 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5220 #ifdef COMPILE_PCRE8
5221 if (common->utf)
5222  {
5223 #endif
5224  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5225  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5226  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5227 #ifdef COMPILE_PCRE8
5228  }
5229 #endif
5230 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5231 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5232 
5233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5234 }
5235 
5236 static void do_casefulcmp(compiler_common *common)
5237 {
5238 DEFINE_COMPILER;
5239 struct sljit_jump *jump;
5240 struct sljit_label *label;
5241 int char1_reg;
5242 int char2_reg;
5243 
5244 if (sljit_get_register_index(TMP3) < 0)
5245  {
5246  char1_reg = STR_END;
5247  char2_reg = STACK_TOP;
5248  }
5249 else
5250  {
5251  char1_reg = TMP3;
5252  char2_reg = RETURN_ADDR;
5253  }
5254 
5255 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5256 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5257 
5258 if (char1_reg == STR_END)
5259  {
5260  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
5261  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
5262  }
5263 
5264 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5265  {
5266  label = LABEL();
5267  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5268  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5269  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5270  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5271  JUMPTO(SLJIT_NOT_ZERO, label);
5272 
5273  JUMPHERE(jump);
5274  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5275  }
5276 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5277  {
5278  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5279  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5280 
5281  label = LABEL();
5282  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5283  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5284  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5285  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5286  JUMPTO(SLJIT_NOT_ZERO, label);
5287 
5288  JUMPHERE(jump);
5289  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5290  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5291  }
5292 else
5293  {
5294  label = LABEL();
5295  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5296  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5297  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5298  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5299  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5300  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5301  JUMPTO(SLJIT_NOT_ZERO, label);
5302 
5303  JUMPHERE(jump);
5304  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5305  }
5306 
5307 if (char1_reg == STR_END)
5308  {
5309  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
5310  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
5311  }
5312 
5313 sljit_emit_fast_return(compiler, TMP1, 0);
5314 }
5315 
5316 static void do_caselesscmp(compiler_common *common)
5317 {
5318 DEFINE_COMPILER;
5319 struct sljit_jump *jump;
5320 struct sljit_label *label;
5321 int char1_reg = STR_END;
5322 int char2_reg;
5323 int lcc_table;
5324 int opt_type = 0;
5325 
5326 if (sljit_get_register_index(TMP3) < 0)
5327  {
5328  char2_reg = STACK_TOP;
5329  lcc_table = STACK_LIMIT;
5330  }
5331 else
5332  {
5333  char2_reg = RETURN_ADDR;
5334  lcc_table = TMP3;
5335  }
5336 
5337 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5338  opt_type = 1;
5339 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5340  opt_type = 2;
5341 
5342 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5343 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5344 
5345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
5346 
5347 if (char2_reg == STACK_TOP)
5348  {
5349  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
5350  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
5351  }
5352 
5353 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
5354 
5355 if (opt_type == 1)
5356  {
5357  label = LABEL();
5358  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5359  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5360  }
5361 else if (opt_type == 2)
5362  {
5363  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5364  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5365 
5366  label = LABEL();
5367  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5368  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5369  }
5370 else
5371  {
5372  label = LABEL();
5373  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5374  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5375  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5376  }
5377 
5378 #ifndef COMPILE_PCRE8
5379 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
5380 #endif
5381 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
5382 #ifndef COMPILE_PCRE8
5383 JUMPHERE(jump);
5384 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
5385 #endif
5386 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
5387 #ifndef COMPILE_PCRE8
5388 JUMPHERE(jump);
5389 #endif
5390 
5391 if (opt_type == 0)
5392  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5393 
5394 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5395 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5396 JUMPTO(SLJIT_NOT_ZERO, label);
5397 
5398 JUMPHERE(jump);
5399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5400 
5401 if (opt_type == 2)
5402  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5403 
5404 if (char2_reg == STACK_TOP)
5405  {
5406  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
5407  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
5408  }
5409 
5410 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5411 sljit_emit_fast_return(compiler, TMP1, 0);
5412 }
5413 
5414 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5415 
5416 static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2)
5417 {
5418 /* This function would be ineffective to do in JIT level. */
5419 sljit_u32 c1, c2;
5420 const ucd_record *ur;
5421 const sljit_u32 *pp;
5422 
5423 while (src1 < end1)
5424  {
5425  if (src2 >= end2)
5426  return (pcre_uchar*)1;
5427  GETCHARINC(c1, src1);
5428  GETCHARINC(c2, src2);
5429  ur = GET_UCD(c2);
5430  if (c1 != c2 && c1 != c2 + ur->other_case)
5431  {
5432  pp = PRIV(ucd_caseless_sets) + ur->caseset;
5433  for (;;)
5434  {
5435  if (c1 < *pp) return NULL;
5436  if (c1 == *pp++) break;
5437  }
5438  }
5439  }
5440 return src2;
5441 }
5442 
5443 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5444 
5445 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5446  compare_context *context, jump_list **backtracks)
5447 {
5448 DEFINE_COMPILER;
5449 unsigned int othercasebit = 0;
5450 pcre_uchar *othercasechar = NULL;
5451 #ifdef SUPPORT_UTF
5452 int utflength;
5453 #endif
5454 
5455 if (caseless && char_has_othercase(common, cc))
5456  {
5457  othercasebit = char_get_othercase_bit(common, cc);
5458  SLJIT_ASSERT(othercasebit);
5459  /* Extracting bit difference info. */
5460 #if defined COMPILE_PCRE8
5461  othercasechar = cc + (othercasebit >> 8);
5462  othercasebit &= 0xff;
5463 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5464  /* Note that this code only handles characters in the BMP. If there
5465  ever are characters outside the BMP whose othercase differs in only one
5466  bit from itself (there currently are none), this code will need to be
5467  revised for COMPILE_PCRE32. */
5468  othercasechar = cc + (othercasebit >> 9);
5469  if ((othercasebit & 0x100) != 0)
5470  othercasebit = (othercasebit & 0xff) << 8;
5471  else
5472  othercasebit &= 0xff;
5473 #endif /* COMPILE_PCRE[8|16|32] */
5474  }
5475 
5476 if (context->sourcereg == -1)
5477  {
5478 #if defined COMPILE_PCRE8
5479 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5480  if (context->length >= 4)
5481  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5482  else if (context->length >= 2)
5483  OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5484  else
5485 #endif
5486  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5487 #elif defined COMPILE_PCRE16
5488 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5489  if (context->length >= 4)
5490  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5491  else
5492 #endif
5493  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5494 #elif defined COMPILE_PCRE32
5495  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5496 #endif /* COMPILE_PCRE[8|16|32] */
5497  context->sourcereg = TMP2;
5498  }
5499 
5500 #ifdef SUPPORT_UTF
5501 utflength = 1;
5502 if (common->utf && HAS_EXTRALEN(*cc))
5503  utflength += GET_EXTRALEN(*cc);
5504 
5505 do
5506  {
5507 #endif
5508 
5509  context->length -= IN_UCHARS(1);
5510 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5511 
5512  /* Unaligned read is supported. */
5513  if (othercasebit != 0 && othercasechar == cc)
5514  {
5515  context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5516  context->oc.asuchars[context->ucharptr] = othercasebit;
5517  }
5518  else
5519  {
5520  context->c.asuchars[context->ucharptr] = *cc;
5521  context->oc.asuchars[context->ucharptr] = 0;
5522  }
5523  context->ucharptr++;
5524 
5525 #if defined COMPILE_PCRE8
5526  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5527 #else
5528  if (context->ucharptr >= 2 || context->length == 0)
5529 #endif
5530  {
5531  if (context->length >= 4)
5532  OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5533  else if (context->length >= 2)
5534  OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5535 #if defined COMPILE_PCRE8
5536  else if (context->length >= 1)
5537  OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5538 #endif /* COMPILE_PCRE8 */
5539  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5540 
5541  switch(context->ucharptr)
5542  {
5543  case 4 / sizeof(pcre_uchar):
5544  if (context->oc.asint != 0)
5545  OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5546  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5547  break;
5548 
5549  case 2 / sizeof(pcre_uchar):
5550  if (context->oc.asushort != 0)
5551  OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5552  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5553  break;
5554 
5555 #ifdef COMPILE_PCRE8
5556  case 1:
5557  if (context->oc.asbyte != 0)
5558  OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5559  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5560  break;
5561 #endif
5562 
5563  default:
5564  SLJIT_UNREACHABLE();
5565  break;
5566  }
5567  context->ucharptr = 0;
5568  }
5569 
5570 #else
5571 
5572  /* Unaligned read is unsupported or in 32 bit mode. */
5573  if (context->length >= 1)
5574  OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5575 
5576  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5577 
5578  if (othercasebit != 0 && othercasechar == cc)
5579  {
5580  OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5581  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5582  }
5583  else
5584  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5585 
5586 #endif
5587 
5588  cc++;
5589 #ifdef SUPPORT_UTF
5590  utflength--;
5591  }
5592 while (utflength > 0);
5593 #endif
5594 
5595 return cc;
5596 }
5597 
5598 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5599 
5600 #define SET_TYPE_OFFSET(value) \
5601  if ((value) != typeoffset) \
5602  { \
5603  if ((value) < typeoffset) \
5604  OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5605  else \
5606  OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5607  } \
5608  typeoffset = (value);
5609 
5610 #define SET_CHAR_OFFSET(value) \
5611  if ((value) != charoffset) \
5612  { \
5613  if ((value) < charoffset) \
5614  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5615  else \
5616  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5617  } \
5618  charoffset = (value);
5619 
5620 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5621 
5622 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5623 {
5624 DEFINE_COMPILER;
5625 jump_list *found = NULL;
5626 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5627 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5628 struct sljit_jump *jump = NULL;
5629 pcre_uchar *ccbegin;
5630 int compares, invertcmp, numberofcmps;
5631 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5632 BOOL utf = common->utf;
5633 #endif
5634 
5635 #ifdef SUPPORT_UCP
5636 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5637 BOOL charsaved = FALSE;
5638 int typereg = TMP1;
5639 const sljit_u32 *other_cases;
5640 sljit_uw typeoffset;
5641 #endif
5642 
5643 /* Scanning the necessary info. */
5644 cc++;
5645 ccbegin = cc;
5646 compares = 0;
5647 if (cc[-1] & XCL_MAP)
5648  {
5649  min = 0;
5650  cc += 32 / sizeof(pcre_uchar);
5651  }
5652 
5653 while (*cc != XCL_END)
5654  {
5655  compares++;
5656  if (*cc == XCL_SINGLE)
5657  {
5658  cc ++;
5659  GETCHARINCTEST(c, cc);
5660  if (c > max) max = c;
5661  if (c < min) min = c;
5662 #ifdef SUPPORT_UCP
5663  needschar = TRUE;
5664 #endif
5665  }
5666  else if (*cc == XCL_RANGE)
5667  {
5668  cc ++;
5669  GETCHARINCTEST(c, cc);
5670  if (c < min) min = c;
5671  GETCHARINCTEST(c, cc);
5672  if (c > max) max = c;
5673 #ifdef SUPPORT_UCP
5674  needschar = TRUE;
5675 #endif
5676  }
5677 #ifdef SUPPORT_UCP
5678  else
5679  {
5680  SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5681  cc++;
5682  if (*cc == PT_CLIST)
5683  {
5684  other_cases = PRIV(ucd_caseless_sets) + cc[1];
5685  while (*other_cases != NOTACHAR)
5686  {
5687  if (*other_cases > max) max = *other_cases;
5688  if (*other_cases < min) min = *other_cases;
5689  other_cases++;
5690  }
5691  }
5692  else
5693  {
5694  max = READ_CHAR_MAX;
5695  min = 0;
5696  }
5697 
5698  switch(*cc)
5699  {
5700  case PT_ANY:
5701  /* Any either accepts everything or ignored. */
5702  if (cc[-1] == XCL_PROP)
5703  {
5704  compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5705  if (list == backtracks)
5706  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5707  return;
5708  }
5709  break;
5710 
5711  case PT_LAMP:
5712  case PT_GC:
5713  case PT_PC:
5714  case PT_ALNUM:
5715  needstype = TRUE;
5716  break;
5717 
5718  case PT_SC:
5719  needsscript = TRUE;
5720  break;
5721 
5722  case PT_SPACE:
5723  case PT_PXSPACE:
5724  case PT_WORD:
5725  case PT_PXGRAPH:
5726  case PT_PXPRINT:
5727  case PT_PXPUNCT:
5728  needstype = TRUE;
5729  needschar = TRUE;
5730  break;
5731 
5732  case PT_CLIST:
5733  case PT_UCNC:
5734  needschar = TRUE;
5735  break;
5736 
5737  default:
5738  SLJIT_UNREACHABLE();
5739  break;
5740  }
5741  cc += 2;
5742  }
5743 #endif
5744  }
5745 SLJIT_ASSERT(compares > 0);
5746 
5747 /* We are not necessary in utf mode even in 8 bit mode. */
5748 cc = ccbegin;
5749 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5750 
5751 if ((cc[-1] & XCL_HASPROP) == 0)
5752  {
5753  if ((cc[-1] & XCL_MAP) != 0)
5754  {
5755  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5756  if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5757  {
5758  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5759  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5760  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5761  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5762  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5763  add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5764  }
5765 
5766  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5767  JUMPHERE(jump);
5768 
5769  cc += 32 / sizeof(pcre_uchar);
5770  }
5771  else
5772  {
5773  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5774  add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5775  }
5776  }
5777 else if ((cc[-1] & XCL_MAP) != 0)
5778  {
5779  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5780 #ifdef SUPPORT_UCP
5781  charsaved = TRUE;
5782 #endif
5783  if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5784  {
5785 #ifdef COMPILE_PCRE8
5786  jump = NULL;
5787  if (common->utf)
5788 #endif
5789  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5790 
5791  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5792  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5793  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5794  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5795  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5796  add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5797 
5798 #ifdef COMPILE_PCRE8
5799  if (common->utf)
5800 #endif
5801  JUMPHERE(jump);
5802  }
5803 
5804  OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5805  cc += 32 / sizeof(pcre_uchar);
5806  }
5807 
5808 #ifdef SUPPORT_UCP
5809 if (needstype || needsscript)
5810  {
5811  if (needschar && !charsaved)
5812  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5813 
5814 #ifdef COMPILE_PCRE32
5815  if (!common->utf)
5816  {
5817  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5818  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5819  JUMPHERE(jump);
5820  }
5821 #endif
5822 
5823  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5824  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5825  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5826  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5827  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5828  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5829  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5830 
5831  /* Before anything else, we deal with scripts. */
5832  if (needsscript)
5833  {
5834  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5835  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5836 
5837  ccbegin = cc;
5838 
5839  while (*cc != XCL_END)
5840  {
5841  if (*cc == XCL_SINGLE)
5842  {
5843  cc ++;
5844  GETCHARINCTEST(c, cc);
5845  }
5846  else if (*cc == XCL_RANGE)
5847  {
5848  cc ++;
5849  GETCHARINCTEST(c, cc);
5850  GETCHARINCTEST(c, cc);
5851  }
5852  else
5853  {
5854  SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5855  cc++;
5856  if (*cc == PT_SC)
5857  {
5858  compares--;
5859  invertcmp = (compares == 0 && list != backtracks);
5860  if (cc[-1] == XCL_NOTPROP)
5861  invertcmp ^= 0x1;
5862  jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5863  add_jump(compiler, compares > 0 ? list : backtracks, jump);
5864  }
5865  cc += 2;
5866  }
5867  }
5868 
5869  cc = ccbegin;
5870  }
5871 
5872  if (needschar)
5873  {
5874  OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5875  }
5876 
5877  if (needstype)
5878  {
5879  if (!needschar)
5880  {
5881  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5882  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5883  }
5884  else
5885  {
5886  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5887  OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5888  typereg = RETURN_ADDR;
5889  }
5890  }
5891  }
5892 #endif
5893 
5894 /* Generating code. */
5895 charoffset = 0;
5896 numberofcmps = 0;
5897 #ifdef SUPPORT_UCP
5898 typeoffset = 0;
5899 #endif
5900 
5901 while (*cc != XCL_END)
5902  {
5903  compares--;
5904  invertcmp = (compares == 0 && list != backtracks);
5905  jump = NULL;
5906 
5907  if (*cc == XCL_SINGLE)
5908  {
5909  cc ++;
5910  GETCHARINCTEST(c, cc);
5911 
5912  if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5913  {
5914  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5915  OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5916  numberofcmps++;
5917  }
5918  else if (numberofcmps > 0)
5919  {
5920  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5921  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5922  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5923  numberofcmps = 0;
5924  }
5925  else
5926  {
5927  jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5928  numberofcmps = 0;
5929  }
5930  }
5931  else if (*cc == XCL_RANGE)
5932  {
5933  cc ++;
5934  GETCHARINCTEST(c, cc);
5935  SET_CHAR_OFFSET(c);
5936  GETCHARINCTEST(c, cc);
5937 
5938  if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5939  {
5940  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5941  OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5942  numberofcmps++;
5943  }
5944  else if (numberofcmps > 0)
5945  {
5946  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5947  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5948  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5949  numberofcmps = 0;
5950  }
5951  else
5952  {
5953  jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5954  numberofcmps = 0;
5955  }
5956  }
5957 #ifdef SUPPORT_UCP
5958  else
5959  {
5960  SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5961  if (*cc == XCL_NOTPROP)
5962  invertcmp ^= 0x1;
5963  cc++;
5964  switch(*cc)
5965  {
5966  case PT_ANY:
5967  if (!invertcmp)
5968  jump = JUMP(SLJIT_JUMP);
5969  break;
5970 
5971  case PT_LAMP:
5972  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5973  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5974  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5975  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5976  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5977  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5978  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5979  break;
5980 
5981  case PT_GC:
5982  c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5983  SET_TYPE_OFFSET(c);
5984  jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5985  break;
5986 
5987  case PT_PC:
5988  jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5989  break;
5990 
5991  case PT_SC:
5992  compares++;
5993  /* Do nothing. */
5994  break;
5995 
5996  case PT_SPACE:
5997  case PT_PXSPACE:
5998  SET_CHAR_OFFSET(9);
5999  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
6000  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6001 
6002  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6003  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6004 
6005  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6006  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6007 
6008  SET_TYPE_OFFSET(ucp_Zl);
6009  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6010  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6011  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6012  break;
6013 
6014  case PT_WORD:
6015  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6016  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6017  /* Fall through. */
6018 
6019  case PT_ALNUM:
6020  SET_TYPE_OFFSET(ucp_Ll);
6021  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6022  OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6023  SET_TYPE_OFFSET(ucp_Nd);
6024  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6025  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6026  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6027  break;
6028 
6029  case PT_CLIST:
6030  other_cases = PRIV(ucd_caseless_sets) + cc[1];
6031 
6032  /* At least three characters are required.
6033  Otherwise this case would be handled by the normal code path. */
6034  SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6035  SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6036 
6037  /* Optimizing character pairs, if their difference is power of 2. */
6038  if (is_powerof2(other_cases[1] ^ other_cases[0]))
6039  {
6040  if (charoffset == 0)
6041  OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6042  else
6043  {
6044  OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6045  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6046  }
6047  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6048  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6049  other_cases += 2;
6050  }
6051  else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6052  {
6053  if (charoffset == 0)
6054  OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6055  else
6056  {
6057  OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6058  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6059  }
6060  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6061  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6062 
6063  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6064  OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6065 
6066  other_cases += 3;
6067  }
6068  else
6069  {
6070  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6071  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6072  }
6073 
6074  while (*other_cases != NOTACHAR)
6075  {
6076  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6077  OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6078  }
6079  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6080  break;
6081 
6082  case PT_UCNC:
6083  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6084  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6085  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6086  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6087  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6088  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6089 
6090  SET_CHAR_OFFSET(0xa0);
6091  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6092  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6093  SET_CHAR_OFFSET(0);
6094  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6095  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6096  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6097  break;
6098 
6099  case PT_PXGRAPH:
6100  /* C and Z groups are the farthest two groups. */
6101  SET_TYPE_OFFSET(ucp_Ll);
6102  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6103  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6104 
6105  jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6106 
6107  /* In case of ucp_Cf, we overwrite the result. */
6108  SET_CHAR_OFFSET(0x2066);
6109  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6110  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6111 
6112  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6113  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6114 
6115  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6116  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6117 
6118  JUMPHERE(jump);
6119  jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6120  break;
6121 
6122  case PT_PXPRINT:
6123  /* C and Z groups are the farthest two groups. */
6124  SET_TYPE_OFFSET(ucp_Ll);
6125  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6126  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6127 
6128  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6129  OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6130 
6131  jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6132 
6133  /* In case of ucp_Cf, we overwrite the result. */
6134  SET_CHAR_OFFSET(0x2066);
6135  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6136  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6137 
6138  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6139  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6140 
6141  JUMPHERE(jump);
6142  jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6143  break;
6144 
6145  case PT_PXPUNCT:
6146  SET_TYPE_OFFSET(ucp_Sc);
6147  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6148  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6149 
6150  SET_CHAR_OFFSET(0);
6151  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6152  OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6153 
6154  SET_TYPE_OFFSET(ucp_Pc);
6155  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6156  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6157  jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6158  break;
6159 
6160  default:
6161  SLJIT_UNREACHABLE();
6162  break;
6163  }
6164  cc += 2;
6165  }
6166 #endif
6167 
6168  if (jump != NULL)
6169  add_jump(compiler, compares > 0 ? list : backtracks, jump);
6170  }
6171 
6172 if (found != NULL)
6173  set_jumps(found, LABEL());
6174 }
6175 
6176 #undef SET_TYPE_OFFSET
6177 #undef SET_CHAR_OFFSET
6178 
6179 #endif
6180 
6181 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6182 {
6183 DEFINE_COMPILER;
6184 int length;
6185 struct sljit_jump *jump[4];
6186 #ifdef SUPPORT_UTF
6187 struct sljit_label *label;
6188 #endif /* SUPPORT_UTF */
6189 
6190 switch(type)
6191  {
6192  case OP_SOD:
6193  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6194  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6195  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6196  return cc;
6197 
6198  case OP_SOM:
6199  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6200  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6201  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6202  return cc;
6203 
6204  case OP_NOT_WORD_BOUNDARY:
6205  case OP_WORD_BOUNDARY:
6206  add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6207  sljit_set_current_flags(compiler, SLJIT_SET_Z);
6208  add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6209  return cc;
6210 
6211  case OP_EODN:
6212  /* Requires rather complex checks. */
6213  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6214  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6215  {
6216  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6217  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6218  if (common->mode == JIT_COMPILE)
6219  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6220  else
6221  {
6222  jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6223  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6224  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6225  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6226  OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6227  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6228  check_partial(common, TRUE);
6229  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6230  JUMPHERE(jump[1]);
6231  }
6232  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6233  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6234  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6235  }
6236  else if (common->nltype == NLTYPE_FIXED)
6237  {
6238  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6239  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6240  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6241  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6242  }
6243  else
6244  {
6245  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6246  jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6247  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6248  OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6249  jump[2] = JUMP(SLJIT_GREATER);
6250  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6251  /* Equal. */
6252  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6253  jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6254  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6255 
6256  JUMPHERE(jump[1]);
6257  if (common->nltype == NLTYPE_ANYCRLF)
6258  {
6259  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6260  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6261  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6262  }
6263  else
6264  {
6265  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6266  read_char_range(common, common->nlmin, common->nlmax, TRUE);
6267  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6268  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6269  sljit_set_current_flags(compiler, SLJIT_SET_Z);
6270  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6271  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6272  }
6273  JUMPHERE(jump[2]);
6274  JUMPHERE(jump[3]);
6275  }
6276  JUMPHERE(jump[0]);
6277  check_partial(common, FALSE);
6278  return cc;
6279 
6280  case OP_EOD:
6281  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6282  check_partial(common, FALSE);
6283  return cc;
6284 
6285  case OP_DOLL:
6286  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6287  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6288  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6289 
6290  if (!common->endonly)
6291  compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6292  else
6293  {
6294  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6295  check_partial(common, FALSE);
6296  }
6297  return cc;
6298 
6299  case OP_DOLLM:
6300  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6301  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6302  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6303  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6304  check_partial(common, FALSE);
6305  jump[0] = JUMP(SLJIT_JUMP);
6306  JUMPHERE(jump[1]);
6307 
6308  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6309  {
6310  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6311  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6312  if (common->mode == JIT_COMPILE)
6313  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6314  else
6315  {
6316  jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6317  /* STR_PTR = STR_END - IN_UCHARS(1) */
6318  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6319  check_partial(common, TRUE);
6320  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6321  JUMPHERE(jump[1]);
6322  }
6323 
6324  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6325  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6326  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6327  }
6328  else
6329  {
6330  peek_char(common, common->nlmax);
6331  check_newlinechar(common, common->nltype, backtracks, FALSE);
6332  }
6333  JUMPHERE(jump[0]);
6334  return cc;
6335 
6336  case OP_CIRC:
6337  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6338  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6339  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6340  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6341  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6342  return cc;
6343 
6344  case OP_CIRCM:
6345  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6346  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6347  jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6348  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6349  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6350  jump[0] = JUMP(SLJIT_JUMP);
6351  JUMPHERE(jump[1]);
6352 
6353  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6354  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6355  {
6356  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6357  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6358  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6359  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6360  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6361  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6362  }
6363  else
6364  {
6365  skip_char_back(common);
6366  read_char_range(common, common->nlmin, common->nlmax, TRUE);
6367  check_newlinechar(common, common->nltype, backtracks, FALSE);
6368  }
6369  JUMPHERE(jump[0]);
6370  return cc;
6371 
6372  case OP_REVERSE:
6373  length = GET(cc, 0);
6374  if (length == 0)
6375  return cc + LINK_SIZE;
6376  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6377 #ifdef SUPPORT_UTF
6378  if (common->utf)
6379  {
6380  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6381  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6382  label = LABEL();
6383  add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6384  skip_char_back(common);
6385  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6386  JUMPTO(SLJIT_NOT_ZERO, label);
6387  }
6388  else
6389 #endif
6390  {
6391  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6392  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6393  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6394  }
6395  check_start_used_ptr(common);
6396  return cc + LINK_SIZE;
6397  }
6398 SLJIT_UNREACHABLE();
6399 return cc;
6400 }
6401 
6402 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6403 {
6404 DEFINE_COMPILER;
6405 int length;
6406 unsigned int c, oc, bit;
6407 compare_context context;
6408 struct sljit_jump *jump[3];
6409 jump_list *end_list;
6410 #ifdef SUPPORT_UTF
6411 struct sljit_label *label;
6412 #ifdef SUPPORT_UCP
6413 pcre_uchar propdata[5];
6414 #endif
6415 #endif /* SUPPORT_UTF */
6416 
6417 switch(type)
6418  {
6419  case OP_NOT_DIGIT:
6420  case OP_DIGIT:
6421  /* Digits are usually 0-9, so it is worth to optimize them. */
6422  if (check_str_ptr)
6423  detect_partial_match(common, backtracks);
6424 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6425  if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6426  read_char7_type(common, type == OP_NOT_DIGIT);
6427  else
6428 #endif
6429  read_char8_type(common, type == OP_NOT_DIGIT);
6430  /* Flip the starting bit in the negative case. */
6431  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6432  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6433  return cc;
6434 
6435  case OP_NOT_WHITESPACE:
6436  case OP_WHITESPACE:
6437  if (check_str_ptr)
6438  detect_partial_match(common, backtracks);
6439 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6440  if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6441  read_char7_type(common, type == OP_NOT_WHITESPACE);
6442  else
6443 #endif
6444  read_char8_type(common, type == OP_NOT_WHITESPACE);
6445  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6446  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6447  return cc;
6448 
6449  case OP_NOT_WORDCHAR:
6450  case OP_WORDCHAR:
6451  if (check_str_ptr)
6452  detect_partial_match(common, backtracks);
6453 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6454  if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6455  read_char7_type(common, type == OP_NOT_WORDCHAR);
6456  else
6457 #endif
6458  read_char8_type(common, type == OP_NOT_WORDCHAR);
6459  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6460  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6461  return cc;
6462 
6463  case OP_ANY:
6464  if (check_str_ptr)
6465  detect_partial_match(common, backtracks);
6466  read_char_range(common, common->nlmin, common->nlmax, TRUE);
6467  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6468  {
6469  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6470  end_list = NULL;
6471  if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6472  add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6473  else
6474  check_str_end(common, &end_list);
6475 
6476  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6477  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6478  set_jumps(end_list, LABEL());
6479  JUMPHERE(jump[0]);
6480  }
6481  else
6482  check_newlinechar(common, common->nltype, backtracks, TRUE);
6483  return cc;
6484 
6485  case OP_ALLANY:
6486  if (check_str_ptr)
6487  detect_partial_match(common, backtracks);
6488 #ifdef SUPPORT_UTF
6489  if (common->utf)
6490  {
6491  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6492  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6493 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6494 #if defined COMPILE_PCRE8