NCBI C++ ToolKit
pcre2_jit_test.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Written by Philip Hazel
9  Original API code Copyright (c) 1997-2012 University of Cambridge
10  New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16  * Redistributions of source code must retain the above copyright notice,
17  this list of conditions and the following disclaimer.
18 
19  * Redistributions in binary form must reproduce the above copyright
20  notice, this list of conditions and the following disclaimer in the
21  documentation and/or other materials provided with the distribution.
22 
23  * Neither the name of the University of Cambridge nor the names of its
24  contributors may be used to endorse or promote products derived from
25  this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include <stdio.h>
46 #include <string.h>
47 
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50 
51 /*
52  Letter characters:
53  \xe6\x92\xad = 0x64ad = 25773 (kanji)
54  Non-letter characters:
55  \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
56  \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57  \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58  \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59  Newlines:
60  \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61  \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62  Othercase pairs:
63  \xc3\xa9 = 0xe9 = 233 (e')
64  \xc3\x89 = 0xc9 = 201 (E')
65  \xc3\xa1 = 0xe1 = 225 (a')
66  \xc3\x81 = 0xc1 = 193 (A')
67  \x53 = 0x53 = S
68  \x73 = 0x73 = s
69  \xc5\xbf = 0x17f = 383 (long S)
70  \xc8\xba = 0x23a = 570
71  \xe2\xb1\xa5 = 0x2c65 = 11365
72  \xe1\xbd\xb8 = 0x1f78 = 8056
73  \xe1\xbf\xb8 = 0x1ff8 = 8184
74  \xf0\x90\x90\x80 = 0x10400 = 66560
75  \xf0\x90\x90\xa8 = 0x10428 = 66600
76  \xc7\x84 = 0x1c4 = 452
77  \xc7\x85 = 0x1c5 = 453
78  \xc7\x86 = 0x1c6 = 454
79  Caseless sets:
80  ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81  ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82  ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83 
84  Mark property:
85  \xcc\x8d = 0x30d = 781
86  Special:
87  \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88  \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89  \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90  \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91  \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92  \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94 
95 static int regression_tests(void);
96 static int invalid_utf8_regression_tests(void);
97 static int invalid_utf16_regression_tests(void);
98 static int invalid_utf32_regression_tests(void);
99 
100 int main(void)
101 {
102  int jit = 0;
103 #if defined SUPPORT_PCRE2_8
105 #elif defined SUPPORT_PCRE2_16
107 #elif defined SUPPORT_PCRE2_32
109 #endif
110  if (!jit) {
111  printf("JIT must be enabled to run pcre2_jit_test\n");
112  return 1;
113  }
114  return regression_tests()
118 }
119 
120 /* --------------------------------------------------------------------------------------- */
121 
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124 #endif
125 
126 #define MU (PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M (PCRE2_MULTILINE)
131 #define MP (PCRE2_MULTILINE | PCRE2_UCP)
132 #define U (PCRE2_UTF)
133 #define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
134 
135 #define BSR(x) ((x) << 16)
136 #define A PCRE2_NEWLINE_ANYCRLF
137 
138 #define GET_NEWLINE(x) ((x) & 0xffff)
139 #define GET_BSR(x) ((x) >> 16)
140 
141 #define OFFSET_MASK 0x00ffff
142 #define F_NO8 0x010000
143 #define F_NO16 0x020000
144 #define F_NO32 0x020000
145 #define F_NOMATCH 0x040000
146 #define F_DIFF 0x080000
147 #define F_FORCECONV 0x100000
148 #define F_PROPERTY 0x200000
149 
152  int newline;
155  const char *pattern;
156  const char *input;
157 };
158 
160  /* Constant strings. */
161  { MU, A, 0, 0, "AbC", "AbAbC" },
162  { MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163  { CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164  { M, A, 0, 0, "[^a]", "aAbB" },
165  { CM, A, 0, 0, "[^m]", "mMnN" },
166  { M, A, 0, 0, "a[^b][^#]", "abacd" },
167  { CM, A, 0, 0, "A[^B][^E]", "abacd" },
168  { CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169  { MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170  { CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171  { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172  { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173  { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174  { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175  { MU, A, 0, 0, "[axd]", "sAXd" },
176  { CMU, A, 0, 0, "[axd]", "sAXd" },
177  { CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178  { MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179  { MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180  { CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181  { MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182  { MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183  { CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184  { CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185  { PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186  { PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187  { PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188 #ifndef NEVER_BACKSLASH_C
189  { M, A, 0, 0, "\\Ca", "cda" },
190  { CM, A, 0, 0, "\\Ca", "CDA" },
191  { M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192  { CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193 #endif /* !NEVER_BACKSLASH_C */
194  { CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195  { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196  { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197  { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198  { M, A, 0, 0, "[3-57-9]", "5" },
199  { PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200  "12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201  { 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
202  { 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
203 
204  /* Assertions. */
205  { MU, A, 0, 0, "\\b[^A]", "A_B#" },
206  { M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
207  { MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
208  { MP, A, 0, 0, "\\B", "_\xa1" },
209  { MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
210  { MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
211  { MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
212  { MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
213  { MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
214  { MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
215  { CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
216  { M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
217  { M, A, 0, 1 | F_NOMATCH, "^", "\n" },
218  { 0, 0, 0, 0, "^ab", "ab" },
219  { 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
220  { M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
221  { MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
222  { M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
223  { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
224  { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
225  { 0, 0, 0, 0, "ab$", "ab" },
226  { 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
227  { PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
228  { M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
229  { M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
230  { MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
231  { MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
232  { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
233  { M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
234  { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
235  { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
236  { U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
237  { M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
238  { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
239  { U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
240  { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
241  { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
242  { U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
243  { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
244  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
245  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
246  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
247  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
248  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
249  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
250  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
251  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
252  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
253  { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
254  { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
255  { M, A, 0, 0, "\\Aa", "aaa" },
256  { M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
257  { M, A, 0, 1, "\\Ga", "aaa" },
258  { M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
259  { M, A, 0, 0, "a\\z", "aaa" },
260  { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
261 
262  /* Brackets and alternatives. */
263  { MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
264  { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
265  { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
266  { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
267  { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
268  { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
269  { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270  { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
271  { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
272  { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
273  { U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
274  { U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
275  { CM, A, 0, 0, "ab|cd", "CD" },
276  { CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
277  { CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
278  { 0, A, 0, 0, "(a|)b*+a", "a" },
279  { 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" },
280 
281  /* Greedy and non-greedy ? operators. */
282  { MU, A, 0, 0, "(?:a)?a", "laab" },
283  { CMU, A, 0, 0, "(A)?A", "llaab" },
284  { MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
285  { MU, A, 0, 0, "(a)?a", "manm" },
286  { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
287  { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
288  { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
289 
290  /* Greedy and non-greedy + operators */
291  { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
292  { MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
293  { MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
294  { MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
295  { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
296  { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
297  { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
298  { MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
299 
300  /* Greedy and non-greedy * operators */
301  { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
302  { MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
303  { MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
304  { CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
305  { MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
306  { MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
307  { M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
308  { M, A, 0, 0, "((?:a|)*){0}a", "a" },
309 
310  /* Combining ? + * operators */
311  { MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
312  { MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
313  { MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
314  { MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
315  { MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
316 
317  /* Single character iterators. */
318  { MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
319  { MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
320  { MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
321  { MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
322  { MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
323  { MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
324  { MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
325  { MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
326  { MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
327  { MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
328  { MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
329  { MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
330  { CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
331  { CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
332  { MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
333  { MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
334  { CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
335  { CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
336  { CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
337  { CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
338  { MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
339  { CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
340  { MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
341  { MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
342  { MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
343  { MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
344  { CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
345  { CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
346  { CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
347  { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
348  { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
349  { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
350  { MU, A, 0, 0, "\\d+123", "987654321,01234" },
351  { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
352  { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
353  { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
354  { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
355  { MU, A, 0, 0, ".[ab]*.", "xx" },
356  { MU, A, 0, 0, ".[ab]*a", "xxa" },
357  { MU, A, 0, 0, ".[ab]?.", "xx" },
358  { MU, A, 0, 0, "_[ab]+_*a", "_aa" },
359  { MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
360  { MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
361  { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
362  { 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "<br><div id>" },
363 
364  /* Bracket repeats with limit. */
365  { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
366  { MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
367  { MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
368  { MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
369  { MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
370  { MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
371  { MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
372  { MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
373  { MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
374 
375  /* Basic character sets. */
376  { MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
377  { MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
378  { MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
379  { MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
380  { MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
381  { MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
382  { MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
383  { MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
384  { MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
385  { MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
386  { MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
387  { MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
388  { CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
389  { CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
390  { MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
391  { MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
392  { MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
393  { MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
394  { MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
395  { MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
396  { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
397  { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
398  { CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " },
399  { M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
400 
401  /* Unicode properties. */
402  { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
403  { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
404  { MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
405  { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
406  { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
407  { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
408  { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
409  { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
410  { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
411  { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
412  { MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
413  { MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
414  { CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
415  { MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
416  { MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
417  { MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
418  { CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
419  { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
420  { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
421  { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
422  { MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " },
423  { MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
424  { MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
425  { CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
426  { MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
427  { MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" },
428  { MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" },
429 
430  /* Possible empty brackets. */
431  { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
432  { MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
433  { MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
434  { MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
435  { MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
436  { MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
437  { MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
438  { MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
439  { MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
440  { MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
441 
442  /* Start offset. */
443  { MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
444  { MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
445  { MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
446  { MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
447 
448  /* Newline. */
449  { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
450  { M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
451  { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
452  { MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
453  { MU, A, 0, 1, "^", "\r\n" },
454  { M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
455  { M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
456 
457  /* Any character except newline or any newline. */
458  { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
459  { U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
460  { 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
461  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
462  { U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
463  { U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
464  { 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
465  { U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
466  { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
467  { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
468  { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
469  { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
470  { U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
471  { MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
472  { MU, A, 0, 0, "\\R+", "ab\r\n\r" },
473  { MU, A, 0, 0, "\\R*", "ab\r\n\r" },
474  { MU, A, 0, 0, "\\R*", "\r\n\r" },
475  { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
476  { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
477  { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
478  { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
479  { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
480  { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
481  { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
482  { MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
483  { MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
484 
485  /* Atomic groups (no fallback from "next" direction). */
486  { MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
487  { MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
488  { MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
489  "bababcdedefgheijijklmlmnop" },
490  { MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
491  { MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
492  { MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
493  { MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
494  { MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
495  { MU, A, 0, 0, "(?>x|)*$", "aaa" },
496  { MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
497  { MU, A, 0, 0, "(?>x|())*$", "aaa" },
498  { MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
499  { MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
500  { MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
501  { MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
502  { MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
503  { MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
504  { MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
505  { MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
506  { MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
507  { MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
508  { MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
509  { MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
510  { MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
511  { MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
512  { CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
513  { MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
514  { MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
515  { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
516  { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
517  { MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
518  { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
519  { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
520  { MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
521  { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
522  { MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
523  { MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
524  { MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" },
525 
526  /* Possessive quantifiers. */
527  { MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
528  { MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
529  { MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
530  { MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
531  { MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
532  { MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
533  { MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
534  { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
535  { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
536  { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
537  { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
538  { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
539  { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
540  { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
541  { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
542  { MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
543  { MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
544  { MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
545  { MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
546  { MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
547  { MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
548  { MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
549  { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
550  { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
551  { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
552  { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
553  { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
554  { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
555  { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
556  { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
557  { MU, A, 0, 0, "(A)*+$", "ABC" },
558  { MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
559  { MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
560  { MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
561  { MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
562  { MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
563 
564  /* Back references. */
565  { MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
566  { CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
567  { CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
568  { MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
569  { MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
570  { MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
571  { MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
572  { MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
573  { MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
574  { CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
575  { MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
576  { CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
577  { MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
578  { CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
579  { MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
580  { MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
581  { M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
582  { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
583  { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
584  { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
585  { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
586  { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
587  { CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
588  { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
589  { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
590  { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
591  { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
592  { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
593  { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
594  { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
595  { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
596  { MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
597  { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
598  { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
599  { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
600  { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
601  { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
602  { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
603  { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
604  { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
605  { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
606 
607  /* Assertions. */
608  { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
609  { MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
610  { MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
611  { MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
612  { MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
613  { M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
614  { M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
615  { MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
616  { MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
617  { MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
618  { MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
619  { MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
620  { MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
621  { MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
622  { MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
623  { MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
624  { MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
625  { MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
626  { MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
627  { MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
628  { MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
629  { MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
630  { MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
631  { MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
632  { MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
633  { MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
634  { MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
635  { MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
636  { MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
637  { MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
638  { MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
639  { MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
640  { MU, A, 0, 0, "a(?=)b", "ab" },
641  { MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
642  { MU, A, 0, 0, "(?(?<!|(|a)))", "a" },
643 
644  /* Not empty, ACCEPT, FAIL */
645  { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
646  { MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
647  { MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
648  { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
649  { MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
650  { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
651  { MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
652  { MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
653  { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
654  { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
655  { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
656  { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
657  { MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
658  { MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
659  { MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
660  { MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
661  { MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
662  { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
663  { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
664  { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
665  { MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
666 
667  /* Conditional blocks. */
668  { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
669  { MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
670  { MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
671  { MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
672  { MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
673  { MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
674  { MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
675  { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
676  { MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
677  { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
678  { MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
679  { MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
680  { MU, A, 0, 0, "(?(?=a)ab)", "a" },
681  { MU, A, 0, 0, "(?(?<!b)c)", "b" },
682  { MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
683  { MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
684  { MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
685  { MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
686  { MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
687  { MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
688  { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
689  { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
690  { MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
691  { MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
692  { MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
693  { MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
694  { MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
695  { MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
696  { MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
697  { MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
698  { MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
699  { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
700  { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
701  { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
702  { MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
703  { MU, A, 0, 0, "(?(?!)a|b)", "ab" },
704  { MU, A, 0, 0, "(?(?!)a)", "ab" },
705  { MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
706 
707  /* Set start of match. */
708  { MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
709  { MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
710  { MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
711  { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
712  { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
713 
714  /* First line. */
715  { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
716  { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
717  { MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
718  { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
719  { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
720  { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
721  { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
722  { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
723  { MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
724  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
725  { M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
726  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
727  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
728  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
729  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
730  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
731  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
732  { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
733  { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
734  { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
735  { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
736  { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
737 
738  /* Recurse. */
739  { MU, A, 0, 0, "(a)(?1)", "aa" },
740  { MU, A, 0, 0, "((a))(?1)", "aa" },
741  { MU, A, 0, 0, "(b|a)(?1)", "aa" },
742  { MU, A, 0, 0, "(b|(a))(?1)", "aa" },
743  { MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
744  { MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
745  { MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
746  { MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
747  { MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
748  { MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
749  { MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
750  { MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
751  { MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
752  { MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
753  { MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
754  { MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
755  { MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
756  { MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
757  { MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
758  { MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
759  { MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
760  { MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
761  { MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
762  { MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
763  { MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
764  { MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
765  { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
766  { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
767  { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
768  { MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
769  { MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
770  { MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
771 
772  /* 16 bit specific tests. */
773  { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
774  { CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
775  { CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
776  { CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
777  { CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
778  { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
779  { CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
780  { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
781  { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
782  { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
783  { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
784  { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
785  { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
786  { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
787  { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
788  { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
789  { M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
790  { M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
791  { CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
792  { CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
793  { CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
794  { CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
795  { CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
796  { CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
797  { CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
798  { M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
799  { 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
800  { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
801  { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
802  { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
803  { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
804 
805  /* Partial matching. */
806  { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
807  { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
808  { MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
809  { MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
810  { MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
811  { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
812  { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
813  { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
814 
815  /* (*MARK) verb. */
816  { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
817  { MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
818  { MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
819  { MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
820  { MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
821  { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
822  { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
823  { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
824  { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
825  { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
826  { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
827  { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
828  { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
829  { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
830  { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
831 
832  /* (*COMMIT) verb. */
833  { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
834  { MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
835  { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
836  { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
837  { MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
838  { MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
839 
840  /* (*PRUNE) verb. */
841  { MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
842  { MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
843  { MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
844  { MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
845  { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
846  { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
847  { MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
848  { MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
849  { MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
850  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
851  { MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
852  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
853  { MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
854  { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
855  { MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
856  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
857  { MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
858  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
859  { MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
860  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
861  { MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
862  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
863  { MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
864  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
865  { MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
866  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
867  { MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
868  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
869  { MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
870  { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
871 
872  /* (*SKIP) verb. */
873  { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
874  { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
875  { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
876  { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
877 
878  /* (*THEN) verb. */
879  { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
880  { MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
881  { MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
882  { MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
883  { MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
884  { MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
885  { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
886  { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
887  { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
888  { MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
889  { MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
890  { MU, A, 0, 0 | F_NOMATCH, "(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)", "c" },
891 
892  /* Recurse and control verbs. */
893  { MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
894  { MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
895  { MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
896  { MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
897  { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
898  { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
899  { MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
900  { MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
901 
902 #ifdef SUPPORT_UNICODE
903  /* Script runs and iterations. */
904  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
905  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
906  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
907  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
908  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
909  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
910  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
911  { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
912 #endif /* SUPPORT_UNICODE */
913 
914  /* Deep recursion. */
915  { MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
916  { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
917  { MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
918 
919  /* Deep recursion: Stack limit reached. */
920  { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
921  { M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
922  { M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
923  { M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
924  { M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
925 
926  { 0, 0, 0, 0, NULL, NULL }
927 };
928 
929 #ifdef SUPPORT_PCRE2_8
930 static pcre2_jit_stack_8* callback8(void *arg)
931 {
932  return (pcre2_jit_stack_8 *)arg;
933 }
934 #endif
935 
936 #ifdef SUPPORT_PCRE2_16
937 static pcre2_jit_stack_16* callback16(void *arg)
938 {
939  return (pcre2_jit_stack_16 *)arg;
940 }
941 #endif
942 
943 #ifdef SUPPORT_PCRE2_32
944 static pcre2_jit_stack_32* callback32(void *arg)
945 {
946  return (pcre2_jit_stack_32 *)arg;
947 }
948 #endif
949 
950 #ifdef SUPPORT_PCRE2_8
951 static pcre2_jit_stack_8 *stack8;
952 
953 static pcre2_jit_stack_8 *getstack8(void)
954 {
955  if (!stack8)
956  stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
957  return stack8;
958 }
959 
960 static void setstack8(pcre2_match_context_8 *mcontext)
961 {
962  if (!mcontext) {
963  if (stack8)
964  pcre2_jit_stack_free_8(stack8);
965  stack8 = NULL;
966  return;
967  }
968 
969  pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
970 }
971 #endif /* SUPPORT_PCRE2_8 */
972 
973 #ifdef SUPPORT_PCRE2_16
974 static pcre2_jit_stack_16 *stack16;
975 
976 static pcre2_jit_stack_16 *getstack16(void)
977 {
978  if (!stack16)
979  stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
980  return stack16;
981 }
982 
983 static void setstack16(pcre2_match_context_16 *mcontext)
984 {
985  if (!mcontext) {
986  if (stack16)
987  pcre2_jit_stack_free_16(stack16);
988  stack16 = NULL;
989  return;
990  }
991 
992  pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
993 }
994 #endif /* SUPPORT_PCRE2_16 */
995 
996 #ifdef SUPPORT_PCRE2_32
997 static pcre2_jit_stack_32 *stack32;
998 
999 static pcre2_jit_stack_32 *getstack32(void)
1000 {
1001  if (!stack32)
1002  stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
1003  return stack32;
1004 }
1005 
1006 static void setstack32(pcre2_match_context_32 *mcontext)
1007 {
1008  if (!mcontext) {
1009  if (stack32)
1010  pcre2_jit_stack_free_32(stack32);
1011  stack32 = NULL;
1012  return;
1013  }
1014 
1015  pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
1016 }
1017 #endif /* SUPPORT_PCRE2_32 */
1018 
1019 #ifdef SUPPORT_PCRE2_16
1020 
1021 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
1022 {
1023  PCRE2_SPTR8 iptr = input;
1024  PCRE2_UCHAR16 *optr = output;
1025  unsigned int c;
1026 
1027  if (max_length == 0)
1028  return 0;
1029 
1030  while (*iptr && max_length > 1) {
1031  c = 0;
1032  if (offsetmap)
1033  *offsetmap++ = (int)(iptr - (unsigned char*)input);
1034 
1035  if (*iptr < 0xc0)
1036  c = *iptr++;
1037  else if (!(*iptr & 0x20)) {
1038  c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1039  iptr += 2;
1040  } else if (!(*iptr & 0x10)) {
1041  c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1042  iptr += 3;
1043  } else if (!(*iptr & 0x08)) {
1044  c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1045  iptr += 4;
1046  }
1047 
1048  if (c < 65536) {
1049  *optr++ = c;
1050  max_length--;
1051  } else if (max_length <= 2) {
1052  *optr = '\0';
1053  return (int)(optr - output);
1054  } else {
1055  c -= 0x10000;
1056  *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1057  *optr++ = 0xdc00 | (c & 0x3ff);
1058  max_length -= 2;
1059  if (offsetmap)
1060  offsetmap++;
1061  }
1062  }
1063  if (offsetmap)
1064  *offsetmap = (int)(iptr - (unsigned char*)input);
1065  *optr = '\0';
1066  return (int)(optr - output);
1067 }
1068 
1069 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1070 {
1071  PCRE2_SPTR8 iptr = input;
1072  PCRE2_UCHAR16 *optr = output;
1073 
1074  if (max_length == 0)
1075  return 0;
1076 
1077  while (*iptr && max_length > 1) {
1078  *optr++ = *iptr++;
1079  max_length--;
1080  }
1081  *optr = '\0';
1082  return (int)(optr - output);
1083 }
1084 
1085 #define REGTEST_MAX_LENGTH16 4096
1086 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1087 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1088 
1089 #endif /* SUPPORT_PCRE2_16 */
1090 
1091 #ifdef SUPPORT_PCRE2_32
1092 
1093 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1094 {
1095  PCRE2_SPTR8 iptr = input;
1096  PCRE2_UCHAR32 *optr = output;
1097  unsigned int c;
1098 
1099  if (max_length == 0)
1100  return 0;
1101 
1102  while (*iptr && max_length > 1) {
1103  c = 0;
1104  if (offsetmap)
1105  *offsetmap++ = (int)(iptr - (unsigned char*)input);
1106 
1107  if (*iptr < 0xc0)
1108  c = *iptr++;
1109  else if (!(*iptr & 0x20)) {
1110  c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1111  iptr += 2;
1112  } else if (!(*iptr & 0x10)) {
1113  c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1114  iptr += 3;
1115  } else if (!(*iptr & 0x08)) {
1116  c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1117  iptr += 4;
1118  }
1119 
1120  *optr++ = c;
1121  max_length--;
1122  }
1123  if (offsetmap)
1124  *offsetmap = (int)(iptr - (unsigned char*)input);
1125  *optr = 0;
1126  return (int)(optr - output);
1127 }
1128 
1129 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1130 {
1131  PCRE2_SPTR8 iptr = input;
1132  PCRE2_UCHAR32 *optr = output;
1133 
1134  if (max_length == 0)
1135  return 0;
1136 
1137  while (*iptr && max_length > 1) {
1138  *optr++ = *iptr++;
1139  max_length--;
1140  }
1141  *optr = '\0';
1142  return (int)(optr - output);
1143 }
1144 
1145 #define REGTEST_MAX_LENGTH32 4096
1146 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1147 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1148 
1149 #endif /* SUPPORT_PCRE2_32 */
1150 
1151 static int check_ascii(const char *input)
1152 {
1153  const unsigned char *ptr = (unsigned char *)input;
1154  while (*ptr) {
1155  if (*ptr > 127)
1156  return 0;
1157  ptr++;
1158  }
1159  return 1;
1160 }
1161 
1162 #define OVECTOR_SIZE 15
1163 
1164 static int regression_tests(void)
1165 {
1166  struct regression_test_case *current = regression_test_cases;
1167  int error;
1168  PCRE2_SIZE err_offs;
1169  int is_successful;
1170  int is_ascii;
1171  int total = 0;
1172  int successful = 0;
1173  int successful_row = 0;
1174  int counter = 0;
1175  int jit_compile_mode;
1176  int utf = 0;
1177  uint32_t disabled_options = 0;
1178  int i;
1179 #ifdef SUPPORT_PCRE2_8
1180  pcre2_code_8 *re8;
1181  pcre2_compile_context_8 *ccontext8;
1182  pcre2_match_data_8 *mdata8_1;
1183  pcre2_match_data_8 *mdata8_2;
1184  pcre2_match_context_8 *mcontext8;
1185  PCRE2_SIZE *ovector8_1 = NULL;
1186  PCRE2_SIZE *ovector8_2 = NULL;
1187  int return_value8[2];
1188 #endif
1189 #ifdef SUPPORT_PCRE2_16
1190  pcre2_code_16 *re16;
1191  pcre2_compile_context_16 *ccontext16;
1192  pcre2_match_data_16 *mdata16_1;
1193  pcre2_match_data_16 *mdata16_2;
1194  pcre2_match_context_16 *mcontext16;
1195  PCRE2_SIZE *ovector16_1 = NULL;
1196  PCRE2_SIZE *ovector16_2 = NULL;
1197  int return_value16[2];
1198  int length16;
1199 #endif
1200 #ifdef SUPPORT_PCRE2_32
1201  pcre2_code_32 *re32;
1202  pcre2_compile_context_32 *ccontext32;
1203  pcre2_match_data_32 *mdata32_1;
1204  pcre2_match_data_32 *mdata32_2;
1205  pcre2_match_context_32 *mcontext32;
1206  PCRE2_SIZE *ovector32_1 = NULL;
1207  PCRE2_SIZE *ovector32_2 = NULL;
1208  int return_value32[2];
1209  int length32;
1210 #endif
1211 
1212 #if defined SUPPORT_PCRE2_8
1213  PCRE2_UCHAR8 cpu_info[128];
1214 #elif defined SUPPORT_PCRE2_16
1215  PCRE2_UCHAR16 cpu_info[128];
1216 #elif defined SUPPORT_PCRE2_32
1217  PCRE2_UCHAR32 cpu_info[128];
1218 #endif
1219 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1220  int return_value;
1221 #endif
1222 
1223  /* This test compares the behaviour of interpreter and JIT. Although disabling
1224  utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
1225  still considered successful from pcre2_jit_test point of view. */
1226 
1227 #if defined SUPPORT_PCRE2_8
1229 #elif defined SUPPORT_PCRE2_16
1231 #elif defined SUPPORT_PCRE2_32
1233 #endif
1234 
1235  printf("Running JIT regression tests\n");
1236  printf(" target CPU of SLJIT compiler: ");
1237  for (i = 0; cpu_info[i]; i++)
1238  printf("%c", (char)(cpu_info[i]));
1239  printf("\n");
1240 
1241 #if defined SUPPORT_PCRE2_8
1243 #elif defined SUPPORT_PCRE2_16
1245 #elif defined SUPPORT_PCRE2_32
1247 #endif
1248 
1249  if (!utf)
1250  disabled_options |= PCRE2_UTF;
1251 #ifdef SUPPORT_PCRE2_8
1252  printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
1253 #endif
1254 #ifdef SUPPORT_PCRE2_16
1255  printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1256 #endif
1257 #ifdef SUPPORT_PCRE2_32
1258  printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1259 #endif
1260 
1261  while (current->pattern) {
1262  /* printf("\nPattern: %s :\n", current->pattern); */
1263  total++;
1264  is_ascii = 0;
1265  if (!(current->start_offset & F_PROPERTY))
1266  is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1267 
1268  if (current->match_options & PCRE2_PARTIAL_SOFT)
1269  jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1270  else if (current->match_options & PCRE2_PARTIAL_HARD)
1271  jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1272  else
1273  jit_compile_mode = PCRE2_JIT_COMPLETE;
1274  error = 0;
1275 #ifdef SUPPORT_PCRE2_8
1276  re8 = NULL;
1277  ccontext8 = pcre2_compile_context_create_8(NULL);
1278  if (ccontext8) {
1279  if (GET_NEWLINE(current->newline))
1280  pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1281  if (GET_BSR(current->newline))
1282  pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1283 
1284  if (!(current->start_offset & F_NO8)) {
1286  current->compile_options & ~disabled_options,
1287  &error, &err_offs, ccontext8);
1288 
1289  if (!re8 && (utf || is_ascii))
1290  printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1291  }
1292  pcre2_compile_context_free_8(ccontext8);
1293  }
1294  else
1295  printf("\n8 bit: Cannot allocate compile context\n");
1296 #endif
1297 #ifdef SUPPORT_PCRE2_16
1298  if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1299  convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1300  else
1301  copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1302 
1303  re16 = NULL;
1304  ccontext16 = pcre2_compile_context_create_16(NULL);
1305  if (ccontext16) {
1306  if (GET_NEWLINE(current->newline))
1307  pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1308  if (GET_BSR(current->newline))
1309  pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1310 
1311  if (!(current->start_offset & F_NO16)) {
1312  re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1313  current->compile_options & ~disabled_options,
1314  &error, &err_offs, ccontext16);
1315 
1316  if (!re16 && (utf || is_ascii))
1317  printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1318  }
1319  pcre2_compile_context_free_16(ccontext16);
1320  }
1321  else
1322  printf("\n16 bit: Cannot allocate compile context\n");
1323 #endif
1324 #ifdef SUPPORT_PCRE2_32
1325  if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1326  convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1327  else
1328  copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1329 
1330  re32 = NULL;
1331  ccontext32 = pcre2_compile_context_create_32(NULL);
1332  if (ccontext32) {
1333  if (GET_NEWLINE(current->newline))
1334  pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1335  if (GET_BSR(current->newline))
1336  pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1337 
1338  if (!(current->start_offset & F_NO32)) {
1339  re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1340  current->compile_options & ~disabled_options,
1341  &error, &err_offs, ccontext32);
1342 
1343  if (!re32 && (utf || is_ascii))
1344  printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1345  }
1346  pcre2_compile_context_free_32(ccontext32);
1347  }
1348  else
1349  printf("\n32 bit: Cannot allocate compile context\n");
1350 #endif
1351 
1352  counter++;
1353  if ((counter & 0x3) != 0) {
1354 #ifdef SUPPORT_PCRE2_8
1355  setstack8(NULL);
1356 #endif
1357 #ifdef SUPPORT_PCRE2_16
1358  setstack16(NULL);
1359 #endif
1360 #ifdef SUPPORT_PCRE2_32
1361  setstack32(NULL);
1362 #endif
1363  }
1364 
1365 #ifdef SUPPORT_PCRE2_8
1366  return_value8[0] = -1000;
1367  return_value8[1] = -1000;
1370  mcontext8 = pcre2_match_context_create_8(NULL);
1371  if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1372  printf("\n8 bit: Cannot allocate match data\n");
1373  pcre2_match_data_free_8(mdata8_1);
1374  pcre2_match_data_free_8(mdata8_2);
1375  pcre2_match_context_free_8(mcontext8);
1376  pcre2_code_free_8(re8);
1377  re8 = NULL;
1378  } else {
1379  ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1380  ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1381  for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1382  ovector8_1[i] = (PCRE2_SIZE)(-2);
1383  for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1384  ovector8_2[i] = (PCRE2_SIZE)(-2);
1385  pcre2_set_match_limit_8(mcontext8, 10000000);
1386  }
1387  if (re8) {
1388  return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1389  current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1390 
1391  if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1392  printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1393  } else if ((counter & 0x1) != 0) {
1394  setstack8(mcontext8);
1395  return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1396  current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1397  } else {
1398  pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1399  return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1400  current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1401  }
1402  }
1403 #endif
1404 
1405 #ifdef SUPPORT_PCRE2_16
1406  return_value16[0] = -1000;
1407  return_value16[1] = -1000;
1410  mcontext16 = pcre2_match_context_create_16(NULL);
1411  if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1412  printf("\n16 bit: Cannot allocate match data\n");
1413  pcre2_match_data_free_16(mdata16_1);
1414  pcre2_match_data_free_16(mdata16_2);
1415  pcre2_match_context_free_16(mcontext16);
1416  pcre2_code_free_16(re16);
1417  re16 = NULL;
1418  } else {
1419  ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1420  ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1421  for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1422  ovector16_1[i] = (PCRE2_SIZE)(-2);
1423  for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1424  ovector16_2[i] = (PCRE2_SIZE)(-2);
1425  pcre2_set_match_limit_16(mcontext16, 10000000);
1426  }
1427  if (re16) {
1428  if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1429  length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1430  else
1431  length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1432 
1433  return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1434  current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1435 
1436  if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1437  printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1438  } else if ((counter & 0x1) != 0) {
1439  setstack16(mcontext16);
1440  return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1441  current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1442  } else {
1443  pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1444  return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1445  current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1446  }
1447  }
1448 #endif
1449 
1450 #ifdef SUPPORT_PCRE2_32
1451  return_value32[0] = -1000;
1452  return_value32[1] = -1000;
1455  mcontext32 = pcre2_match_context_create_32(NULL);
1456  if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1457  printf("\n32 bit: Cannot allocate match data\n");
1458  pcre2_match_data_free_32(mdata32_1);
1459  pcre2_match_data_free_32(mdata32_2);
1460  pcre2_match_context_free_32(mcontext32);
1461  pcre2_code_free_32(re32);
1462  re32 = NULL;
1463  } else {
1464  ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1465  ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1466  for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1467  ovector32_1[i] = (PCRE2_SIZE)(-2);
1468  for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1469  ovector32_2[i] = (PCRE2_SIZE)(-2);
1470  pcre2_set_match_limit_32(mcontext32, 10000000);
1471  }
1472  if (re32) {
1473  if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1474  length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1475  else
1476  length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1477 
1478  return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1479  current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1480 
1481  if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1482  printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1483  } else if ((counter & 0x1) != 0) {
1484  setstack32(mcontext32);
1485  return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1486  current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1487  } else {
1488  pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1489  return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1490  current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1491  }
1492  }
1493 #endif
1494 
1495  /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1496  return_value8[0], return_value16[0], return_value32[0],
1497  (int)ovector8_1[0], (int)ovector8_1[1],
1498  (int)ovector16_1[0], (int)ovector16_1[1],
1499  (int)ovector32_1[0], (int)ovector32_1[1],
1500  (current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1501 
1502  /* If F_DIFF is set, just run the test, but do not compare the results.
1503  Segfaults can still be captured. */
1504 
1505  is_successful = 1;
1506  if (!(current->start_offset & F_DIFF)) {
1507 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1508  if (!(current->start_offset & F_FORCECONV)) {
1509 
1510  /* All results must be the same. */
1511 #ifdef SUPPORT_PCRE2_8
1512  if ((return_value = return_value8[0]) != return_value8[1]) {
1513  printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1514  return_value8[0], return_value8[1], total, current->pattern, current->input);
1515  is_successful = 0;
1516  } else
1517 #endif
1518 #ifdef SUPPORT_PCRE2_16
1519  if ((return_value = return_value16[0]) != return_value16[1]) {
1520  printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1521  return_value16[0], return_value16[1], total, current->pattern, current->input);
1522  is_successful = 0;
1523  } else
1524 #endif
1525 #ifdef SUPPORT_PCRE2_32
1526  if ((return_value = return_value32[0]) != return_value32[1]) {
1527  printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1528  return_value32[0], return_value32[1], total, current->pattern, current->input);
1529  is_successful = 0;
1530  } else
1531 #endif
1532 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1533  if (return_value8[0] != return_value16[0]) {
1534  printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1535  return_value8[0], return_value16[0],
1536  total, current->pattern, current->input);
1537  is_successful = 0;
1538  } else
1539 #endif
1540 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1541  if (return_value8[0] != return_value32[0]) {
1542  printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1543  return_value8[0], return_value32[0],
1544  total, current->pattern, current->input);
1545  is_successful = 0;
1546  } else
1547 #endif
1548 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1549  if (return_value16[0] != return_value32[0]) {
1550  printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1551  return_value16[0], return_value32[0],
1552  total, current->pattern, current->input);
1553  is_successful = 0;
1554  } else
1555 #endif
1558  return_value = 2;
1559  } else {
1560  return_value *= 2;
1561  }
1562 #ifdef SUPPORT_PCRE2_8
1563  return_value8[0] = return_value;
1564 #endif
1565 #ifdef SUPPORT_PCRE2_16
1566  return_value16[0] = return_value;
1567 #endif
1568 #ifdef SUPPORT_PCRE2_32
1569  return_value32[0] = return_value;
1570 #endif
1571  /* Transform back the results. */
1572  if (current->compile_options & PCRE2_UTF) {
1573 #ifdef SUPPORT_PCRE2_16
1574  for (i = 0; i < return_value; ++i) {
1575  if (ovector16_1[i] != PCRE2_UNSET)
1576  ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1577  if (ovector16_2[i] != PCRE2_UNSET)
1578  ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1579  }
1580 #endif
1581 #ifdef SUPPORT_PCRE2_32
1582  for (i = 0; i < return_value; ++i) {
1583  if (ovector32_1[i] != PCRE2_UNSET)
1584  ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1585  if (ovector32_2[i] != PCRE2_UNSET)
1586  ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1587  }
1588 #endif
1589  }
1590 
1591  for (i = 0; i < return_value; ++i) {
1592 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1593  if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1594  printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1595  i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1596  total, current->pattern, current->input);
1597  is_successful = 0;
1598  }
1599 #endif
1600 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1601  if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1602  printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1603  i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1604  total, current->pattern, current->input);
1605  is_successful = 0;
1606  }
1607 #endif
1608 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1609  if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1610  printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1611  i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1612  total, current->pattern, current->input);
1613  is_successful = 0;
1614  }
1615 #endif
1616  }
1617  }
1618  } else
1619 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1620  {
1621 #ifdef SUPPORT_PCRE2_8
1622  if (return_value8[0] != return_value8[1]) {
1623  printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1624  return_value8[0], return_value8[1], total, current->pattern, current->input);
1625  is_successful = 0;
1626  } else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1627  if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1628  return_value8[0] = 2;
1629  else
1630  return_value8[0] *= 2;
1631 
1632  for (i = 0; i < return_value8[0]; ++i)
1633  if (ovector8_1[i] != ovector8_2[i]) {
1634  printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1635  i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1636  is_successful = 0;
1637  }
1638  }
1639 #endif
1640 
1641 #ifdef SUPPORT_PCRE2_16
1642  if (return_value16[0] != return_value16[1]) {
1643  printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1644  return_value16[0], return_value16[1], total, current->pattern, current->input);
1645  is_successful = 0;
1646  } else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1647  if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1648  return_value16[0] = 2;
1649  else
1650  return_value16[0] *= 2;
1651 
1652  for (i = 0; i < return_value16[0]; ++i)
1653  if (ovector16_1[i] != ovector16_2[i]) {
1654  printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1655  i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1656  is_successful = 0;
1657  }
1658  }
1659 #endif
1660 
1661 #ifdef SUPPORT_PCRE2_32
1662  if (return_value32[0] != return_value32[1]) {
1663  printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1664  return_value32[0], return_value32[1], total, current->pattern, current->input);
1665  is_successful = 0;
1666  } else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1667  if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1668  return_value32[0] = 2;
1669  else
1670  return_value32[0] *= 2;
1671 
1672  for (i = 0; i < return_value32[0]; ++i)
1673  if (ovector32_1[i] != ovector32_2[i]) {
1674  printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1675  i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1676  is_successful = 0;
1677  }
1678  }
1679 #endif
1680  }
1681  }
1682 
1683  if (is_successful) {
1684 #ifdef SUPPORT_PCRE2_8
1685  if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1686  if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1687  printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1688  total, current->pattern, current->input);
1689  is_successful = 0;
1690  }
1691 
1692  if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1693  printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1694  total, current->pattern, current->input);
1695  is_successful = 0;
1696  }
1697  }
1698 #endif
1699 #ifdef SUPPORT_PCRE2_16
1700  if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1701  if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1702  printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1703  total, current->pattern, current->input);
1704  is_successful = 0;
1705  }
1706 
1707  if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1708  printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1709  total, current->pattern, current->input);
1710  is_successful = 0;
1711  }
1712  }
1713 #endif
1714 #ifdef SUPPORT_PCRE2_32
1715  if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1716  if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1717  printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1718  total, current->pattern, current->input);
1719  is_successful = 0;
1720  }
1721 
1722  if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1723  printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1724  total, current->pattern, current->input);
1725  is_successful = 0;
1726  }
1727  }
1728 #endif
1729  }
1730 
1731  if (is_successful) {
1732 #ifdef SUPPORT_PCRE2_8
1733  if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1734  printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1735  total, current->pattern, current->input);
1736  is_successful = 0;
1737  }
1738 #endif
1739 #ifdef SUPPORT_PCRE2_16
1740  if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1741  printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1742  total, current->pattern, current->input);
1743  is_successful = 0;
1744  }
1745 #endif
1746 #ifdef SUPPORT_PCRE2_32
1747  if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1748  printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1749  total, current->pattern, current->input);
1750  is_successful = 0;
1751  }
1752 #endif
1753  }
1754 
1755 #ifdef SUPPORT_PCRE2_8
1756  pcre2_code_free_8(re8);
1757  pcre2_match_data_free_8(mdata8_1);
1758  pcre2_match_data_free_8(mdata8_2);
1759  pcre2_match_context_free_8(mcontext8);
1760 #endif
1761 #ifdef SUPPORT_PCRE2_16
1762  pcre2_code_free_16(re16);
1763  pcre2_match_data_free_16(mdata16_1);
1764  pcre2_match_data_free_16(mdata16_2);
1765  pcre2_match_context_free_16(mcontext16);
1766 #endif
1767 #ifdef SUPPORT_PCRE2_32
1768  pcre2_code_free_32(re32);
1769  pcre2_match_data_free_32(mdata32_1);
1770  pcre2_match_data_free_32(mdata32_2);
1771  pcre2_match_context_free_32(mcontext32);
1772 #endif
1773 
1774  if (is_successful) {
1775  successful++;
1776  successful_row++;
1777  printf(".");
1778  if (successful_row >= 60) {
1779  successful_row = 0;
1780  printf("\n");
1781  }
1782  } else
1783  successful_row = 0;
1784 
1785  fflush(stdout);
1786  current++;
1787  }
1788 #ifdef SUPPORT_PCRE2_8
1789  setstack8(NULL);
1790 #endif
1791 #ifdef SUPPORT_PCRE2_16
1792  setstack16(NULL);
1793 #endif
1794 #ifdef SUPPORT_PCRE2_32
1795  setstack32(NULL);
1796 #endif
1797 
1798  if (total == successful) {
1799  printf("\nAll JIT regression tests are successfully passed.\n");
1800  return 0;
1801  } else {
1802  printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1803  return 1;
1804  }
1805 }
1806 
1807 #if defined SUPPORT_UNICODE
1808 
1809 static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1810  int match_start, int match_end, PCRE2_SIZE *ovector)
1811 {
1812  if (match_start < 0) {
1813  if (result != -1) {
1814  printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1815  return 1;
1816  }
1817  return 0;
1818  }
1819 
1820  if (result <= 0) {
1821  printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1822  return 1;
1823  }
1824 
1825  if (ovector[0] != (PCRE2_SIZE)match_start) {
1826  printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1827  pattern_index, type, (int)ovector[0], match_start);
1828  return 1;
1829  }
1830 
1831  if (ovector[1] != (PCRE2_SIZE)match_end) {
1832  printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1833  pattern_index, type, (int)ovector[1], match_end);
1834  return 1;
1835  }
1836 
1837  return 0;
1838 }
1839 
1840 #endif /* SUPPORT_UNICODE */
1841 
1842 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1843 
1844 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1845 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1846 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1847 
1848 struct invalid_utf8_regression_test_case {
1849  uint32_t compile_options;
1850  int jit_compile_options;
1851  int start_offset;
1852  int skip_left;
1853  int skip_right;
1854  int match_start;
1855  int match_end;
1856  const char *pattern[2];
1857  const char *input;
1858 };
1859 
1860 static const char invalid_utf8_newline_cr;
1861 
1862 static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1863  { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1864  { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1865  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1866  { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1867  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1868  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1869  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1870  { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1871  { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1872  { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1873  { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1874  { UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1875  { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1876  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1877  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1878  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1879  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1880  { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1881  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1882  { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1883  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1884  { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1885  { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1886  { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1887  { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1888  { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1889  { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1890  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1891  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1892  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1893  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1894  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1895  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1896  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1897  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1898  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1899  { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1900  { UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1901 
1902  { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1903  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1904  { UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1905  { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1906  { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1907  { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1908  { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1909  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1910  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1911  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1912  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1913  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1914  { UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1915  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1916  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1917  { UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1918  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1919  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1920  { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1921  { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1922  { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1923  { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1924  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1925  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1926  { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1927 
1928  { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1929  { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1930  { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1931  { UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1932  { UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1933  { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1934  { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1935  { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1936 
1937  { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1938  { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1939  { UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1940  { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1941  { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1942  { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1943  { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1944 
1945  { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1946  { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1947  { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1948  { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1949 
1950  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1951  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1952  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1953  { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1954  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1955  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1956  { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1957  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1958  { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1959 
1960  { UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1961  { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1962  { UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1963  { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1964  { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1965  { UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1966  { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1967  { UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1968  { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1969 
1970  { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1971  { UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1972  { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1973  { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1974 
1975  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1976  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1977  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1978  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1979  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1980  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1981  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1982  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1983  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1984  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1985  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1986 
1987  { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1988  { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1989  { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1990  { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1991  { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1992  { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1993 
1994  { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1995  { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1996  { PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1997  { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1998 
1999  { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
2000  { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
2001  { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" },
2002  { PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" },
2003 
2004  /* These two are not invalid UTF tests, but this infrastructure fits better for them. */
2005  { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
2006  { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
2007 
2008  { PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
2009 
2010  { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2011 };
2012 
2013 #undef UDA
2014 #undef CI
2015 #undef CPI
2016 
2017 static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
2018  int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
2019 {
2020  pcre2_code_8 *code;
2021  int result, errorcode;
2022  PCRE2_SIZE length, erroroffset;
2023  PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
2024 
2025  if (current->pattern[i] == NULL)
2026  return 1;
2027 
2028  code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2029  current->compile_options, &errorcode, &erroroffset, ccontext);
2030 
2031  if (!code) {
2032  printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2033  return 0;
2034  }
2035 
2036  if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2037  printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2039  return 0;
2040  }
2041 
2042  length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2043 
2044  if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2045  result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2046  length, current->start_offset - current->skip_left, 0, mdata, NULL);
2047 
2048  if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2050  return 0;
2051  }
2052  }
2053 
2054  if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2055  result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2056  length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2057 
2058  if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2060  return 0;
2061  }
2062  }
2063 
2065  return 1;
2066 }
2067 
2068 static int invalid_utf8_regression_tests(void)
2069 {
2070  const struct invalid_utf8_regression_test_case *current;
2071  pcre2_compile_context_8 *ccontext;
2072  pcre2_match_data_8 *mdata;
2073  int total = 0, successful = 0;
2074  int result;
2075 
2076  printf("\nRunning invalid-utf8 JIT regression tests\n");
2077 
2080  mdata = pcre2_match_data_create_8(4, NULL);
2081 
2082  for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2083  /* printf("\nPattern: %s :\n", current->pattern); */
2084  total++;
2085 
2086  result = 1;
2087  if (current->pattern[1] != &invalid_utf8_newline_cr)
2088  {
2089  if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2090  result = 0;
2091  if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2092  result = 0;
2093  } else {
2095  if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2096  result = 0;
2098  }
2099 
2100  if (result) {
2101  successful++;
2102  }
2103 
2104  printf(".");
2105  if ((total % 60) == 0)
2106  printf("\n");
2107  }
2108 
2109  if ((total % 60) != 0)
2110  printf("\n");
2111 
2112  pcre2_match_data_free_8(mdata);
2113  pcre2_compile_context_free_8(ccontext);
2114 
2115  if (total == successful) {
2116  printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2117  return 0;
2118  } else {
2119  printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2120  return 1;
2121  }
2122 }
2123 
2124 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2125 
2127 {
2128  return 0;
2129 }
2130 
2131 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2132 
2133 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2134 
2135 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2136 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2137 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2138 
2139 struct invalid_utf16_regression_test_case {
2140  uint32_t compile_options;
2141  int jit_compile_options;
2142  int start_offset;
2143  int skip_left;
2144  int skip_right;
2145  int match_start;
2146  int match_end;
2147  const PCRE2_UCHAR16 *pattern[2];
2148  const PCRE2_UCHAR16 *input;
2149 };
2150 
2151 static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2152 static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2153 static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2154 static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2155 static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2156 static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2157 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2158 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2159 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2160 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2161 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2162 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2163 static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2164 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2165 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2166 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2167 static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2168 static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2169 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2170 static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2171 
2172 static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2173  { UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2174  { UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2175  { UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2176  { UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2177  { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2178  { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2179  { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2180  { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2181  { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2182  { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2183 
2184  { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2185  { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2186  { UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2187  { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2188  { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2189  { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2190  { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2191  { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2192  { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2193  { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2194 
2195  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2196  { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2197  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2198  { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2199 
2200  { UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2201  { UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2202  { UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2203  { UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2204  { UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2205  { UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2206 
2207  { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2208  { UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2209  { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2210 
2211  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2212  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2213 
2214  { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2215  { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2216  { PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2217  { PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2218 
2219  { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2220 };
2221 
2222 #undef UDA
2223 #undef CI
2224 #undef CPI
2225 
2226 static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2227  int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2228 {
2230  int result, errorcode;
2231  PCRE2_SIZE length, erroroffset;
2232  const PCRE2_UCHAR16 *input;
2233  PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2234 
2235  if (current->pattern[i] == NULL)
2236  return 1;
2237 
2238  code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2239  current->compile_options, &errorcode, &erroroffset, ccontext);
2240 
2241  if (!code) {
2242  printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2243  return 0;
2244  }
2245 
2246  if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2247  printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2249  return 0;
2250  }
2251 
2252  input = current->input;
2253  length = 0;
2254 
2255  while (*input++ != 0)
2256  length++;
2257 
2258  length -= current->skip_left + current->skip_right;
2259 
2260  if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2261  result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2262  length, current->start_offset - current->skip_left, 0, mdata, NULL);
2263 
2264  if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2266  return 0;
2267  }
2268  }
2269 
2270  if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2271  result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2272  length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2273 
2274  if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2276  return 0;
2277  }
2278  }
2279 
2281  return 1;
2282 }
2283 
2284 static int invalid_utf16_regression_tests(void)
2285 {
2286  const struct invalid_utf16_regression_test_case *current;
2287  pcre2_compile_context_16 *ccontext;
2288  pcre2_match_data_16 *mdata;
2289  int total = 0, successful = 0;
2290  int result;
2291 
2292  printf("\nRunning invalid-utf16 JIT regression tests\n");
2293 
2296  mdata = pcre2_match_data_create_16(4, NULL);
2297 
2298  for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2299  /* printf("\nPattern: %s :\n", current->pattern); */
2300  total++;
2301 
2302  result = 1;
2303  if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2304  result = 0;
2305  if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2306  result = 0;
2307 
2308  if (result) {
2309  successful++;
2310  }
2311 
2312  printf(".");
2313  if ((total % 60) == 0)
2314  printf("\n");
2315  }
2316 
2317  if ((total % 60) != 0)
2318  printf("\n");
2319 
2320  pcre2_match_data_free_16(mdata);
2322 
2323  if (total == successful) {
2324  printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2325  return 0;
2326  } else {
2327  printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2328  return 1;
2329  }
2330 }
2331 
2332 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2333 
2335 {
2336  return 0;
2337 }
2338 
2339 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2340 
2341 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2342 
2343 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2344 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2345 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2346 
2347 struct invalid_utf32_regression_test_case {
2348  uint32_t compile_options;
2349  int jit_compile_options;
2350  int start_offset;
2351  int skip_left;
2352  int skip_right;
2353  int match_start;
2354  int match_end;
2355  const PCRE2_UCHAR32 *pattern[2];
2356  const PCRE2_UCHAR32 *input;
2357 };
2358 
2359 static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2360 static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2361 static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2362 static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2363 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2364 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2365 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2366 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2367 static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2368 static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2369 static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2370 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2371 static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2372 
2373 static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2374  { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2375  { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2376  { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2377  { UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2378  { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2379  { UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2380 
2381  { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2382  { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2383  { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2384  { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2385  { UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2386 
2387  { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2388  { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2389 
2390  { UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2391  { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2392  { UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2393  { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2394  { UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2395  { UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2396 
2397  { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2398  { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2399  { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2400  { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2401  { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2402 
2403  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2404  { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2405 
2406  { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2407 };
2408 
2409 #undef UDA
2410 #undef CI
2411 #undef CPI
2412 
2413 static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2414  int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2415 {
2417  int result, errorcode;
2418  PCRE2_SIZE length, erroroffset;
2419  const PCRE2_UCHAR32 *input;
2420  PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2421 
2422  if (current->pattern[i] == NULL)
2423  return 1;
2424 
2425  code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2426  current->compile_options, &errorcode, &erroroffset, ccontext);
2427 
2428  if (!code) {
2429  printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2430  return 0;
2431  }
2432 
2433  if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2434  printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2436  return 0;
2437  }
2438 
2439  input = current->input;
2440  length = 0;
2441 
2442  while (*input++ != 0)
2443  length++;
2444 
2445  length -= current->skip_left + current->skip_right;
2446 
2447  if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2448  result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2449  length, current->start_offset - current->skip_left, 0, mdata, NULL);
2450 
2451  if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2453  return 0;
2454  }
2455  }
2456 
2457  if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2458  result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2459  length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2460 
2461  if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2463  return 0;
2464  }
2465  }
2466 
2468  return 1;
2469 }
2470 
2471 static int invalid_utf32_regression_tests(void)
2472 {
2473  const struct invalid_utf32_regression_test_case *current;
2474  pcre2_compile_context_32 *ccontext;
2475  pcre2_match_data_32 *mdata;
2476  int total = 0, successful = 0;
2477  int result;
2478 
2479  printf("\nRunning invalid-utf32 JIT regression tests\n");
2480 
2483  mdata = pcre2_match_data_create_32(4, NULL);
2484 
2485  for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2486  /* printf("\nPattern: %s :\n", current->pattern); */
2487  total++;
2488 
2489  result = 1;
2490  if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2491  result = 0;
2492  if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2493  result = 0;
2494 
2495  if (result) {
2496  successful++;
2497  }
2498 
2499  printf(".");
2500  if ((total % 60) == 0)
2501  printf("\n");
2502  }
2503 
2504  if ((total % 60) != 0)
2505  printf("\n");
2506 
2507  pcre2_match_data_free_32(mdata);
2509 
2510  if (total == successful) {
2511  printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2512  return 0;
2513  } else {
2514  printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2515  return 1;
2516  }
2517 }
2518 
2519 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2520 
2522 {
2523  return 0;
2524 }
2525 
2526 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2527 
2528 /* End of pcre2_jit_test.c */
CI –.
Definition: I.hpp:64
static Uint4 match_start
static SQLCHAR output[256]
Definition: print.c:5
static char * return_value
Definition: readconf.c:24
Uint4 uint32_t
#define NULL
Definition: ncbistd.hpp:225
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
static int input()
int i
uint16_t PCRE2_UCHAR16
Definition: pcre2.h:473
PCRE2_SPTR8 pcre2_get_mark_8(pcre2_match_data_8 *)
pcre2_code_8 * pcre2_compile_8(PCRE2_SPTR8, size_t, uint32_t, int *, size_t *, pcre2_compile_context_8 *)
#define PCRE2_DOLLAR_ENDONLY
Definition: pcre2.h:129
void pcre2_jit_stack_assign_32(pcre2_match_context_32 *, pcre2_jit_callback_32, void *)
struct pcre2_real_code_16 pcre2_code_16
Definition: pcre2.h:962
pcre2_code_32 * pcre2_compile_32(PCRE2_SPTR32, size_t, uint32_t, int *, size_t *, pcre2_compile_context_32 *)
pcre2_jit_stack_8 * pcre2_jit_stack_create_8(size_t, size_t, pcre2_general_context_8 *)
#define PCRE2_ZERO_TERMINATED
Definition: pcre2.h:487
void pcre2_compile_context_free_16(pcre2_compile_context_16 *)
struct pcre2_real_match_context_8 pcre2_match_context_8
Definition: pcre2.h:958
#define PCRE2_NOTEMPTY_ATSTART
Definition: pcre2.h:185
const PCRE2_UCHAR8 * PCRE2_SPTR8
Definition: pcre2.h:476
int pcre2_set_newline_32(pcre2_compile_context_32 *, uint32_t)
pcre2_compile_context_16 * pcre2_compile_context_create_16(pcre2_general_context_16 *)
void pcre2_jit_stack_free_32(pcre2_jit_stack_32 *)
#define PCRE2_ENDANCHORED
Definition: pcre2.h:113
#define PCRE2_BSR_ANYCRLF
Definition: pcre2.h:224
int pcre2_config_16(uint32_t, void *)
void pcre2_match_data_free_8(pcre2_match_data_8 *)
int pcre2_set_newline_16(pcre2_compile_context_16 *, uint32_t)
pcre2_match_context_16 * pcre2_match_context_create_16(pcre2_general_context_16 *)
#define PCRE2_UNSET
Definition: pcre2.h:488
struct pcre2_real_match_data_16 pcre2_match_data_16
Definition: pcre2.h:962
#define PCRE2_UCP
Definition: pcre2.h:142
#define PCRE2_AUTO_CALLOUT
Definition: pcre2.h:127
#define PCRE2_CONFIG_UNICODE
Definition: pcre2.h:461
int pcre2_set_bsr_8(pcre2_compile_context_8 *, uint32_t)
#define PCRE2_DOTALL
Definition: pcre2.h:130
pcre2_match_data_32 * pcre2_match_data_create_32(uint32_t, pcre2_general_context_32 *)
int pcre2_jit_match_32(const pcre2_code_32 *, PCRE2_SPTR32, size_t, size_t, uint32_t, pcre2_match_data_32 *, pcre2_match_context_32 *)
uint8_t PCRE2_UCHAR8
Definition: pcre2.h:472
struct pcre2_real_match_context_32 pcre2_match_context_32
Definition: pcre2.h:966
int pcre2_config_8(uint32_t, void *)
#define PCRE2_NEWLINE_ANYCRLF
Definition: pcre2.h:220
#define PCRE2_EXTENDED
Definition: pcre2.h:132
PCRE2_SPTR32 pcre2_get_mark_32(pcre2_match_data_32 *)
void pcre2_jit_stack_assign_16(pcre2_match_context_16 *, pcre2_jit_callback_16, void *)
int pcre2_match_32(const pcre2_code_32 *, PCRE2_SPTR32, size_t, size_t, uint32_t, pcre2_match_data_32 *, pcre2_match_context_32 *)
size_t * pcre2_get_ovector_pointer_16(pcre2_match_data_16 *)
void pcre2_jit_stack_free_8(pcre2_jit_stack_8 *)
#define PCRE2_NEWLINE_CR
Definition: pcre2.h:216
void pcre2_match_context_free_8(pcre2_match_context_8 *)
void pcre2_code_free_8(pcre2_code_8 *)
void pcre2_match_data_free_16(pcre2_match_data_16 *)
void pcre2_code_free_32(pcre2_code_32 *)
#define PCRE2_SIZE
Definition: pcre2.h:485
struct pcre2_real_jit_stack_16 pcre2_jit_stack_16
Definition: pcre2.h:962
#define PCRE2_MATCH_UNSET_BACKREF
Definition: pcre2.h:134
#define PCRE2_PARTIAL_SOFT
Definition: pcre2.h:186
pcre2_compile_context_8 * pcre2_compile_context_create_8(pcre2_general_context_8 *)
void pcre2_code_free_16(pcre2_code_16 *)
size_t * pcre2_get_ovector_pointer_32(pcre2_match_data_32 *)
pcre2_match_data_16 * pcre2_match_data_create_16(uint32_t, pcre2_general_context_16 *)
#define PCRE2_NOTBOL
Definition: pcre2.h:182
#define PCRE2_PARTIAL_HARD
Definition: pcre2.h:187
#define PCRE2_CASELESS
Definition: pcre2.h:128
void pcre2_match_data_free_32(pcre2_match_data_32 *)
uint32_t PCRE2_UCHAR32
Definition: pcre2.h:474
size_t * pcre2_get_ovector_pointer_8(pcre2_match_data_8 *)
struct pcre2_real_code_32 pcre2_code_32
Definition: pcre2.h:966
struct pcre2_real_compile_context_16 pcre2_compile_context_16
Definition: pcre2.h:962
int pcre2_jit_match_16(const pcre2_code_16 *, PCRE2_SPTR16, size_t, size_t, uint32_t, pcre2_match_data_16 *, pcre2_match_context_16 *)
int pcre2_set_match_limit_8(pcre2_match_context_8 *, uint32_t)
int pcre2_jit_match_8(const pcre2_code_8 *, PCRE2_SPTR8, size_t, size_t, uint32_t, pcre2_match_data_8 *, pcre2_match_context_8 *)
int pcre2_set_match_limit_16(pcre2_match_context_16 *, uint32_t)
struct pcre2_real_jit_stack_8 pcre2_jit_stack_8
Definition: pcre2.h:958
void pcre2_match_context_free_16(pcre2_match_context_16 *)
void pcre2_compile_context_free_8(pcre2_compile_context_8 *)
int pcre2_set_bsr_32(pcre2_compile_context_32 *, uint32_t)
int pcre2_set_bsr_16(pcre2_compile_context_16 *, uint32_t)
int pcre2_jit_compile_16(pcre2_code_16 *, uint32_t)
struct pcre2_real_match_data_32 pcre2_match_data_32
Definition: pcre2.h:966
pcre2_jit_stack_32 * pcre2_jit_stack_create_32(size_t, size_t, pcre2_general_context_32 *)
void pcre2_match_context_free_32(pcre2_match_context_32 *)
pcre2_match_context_32 * pcre2_match_context_create_32(pcre2_general_context_32 *)
#define PCRE2_DUPNAMES
Definition: pcre2.h:131
pcre2_jit_stack_16 * pcre2_jit_stack_create_16(size_t, size_t, pcre2_general_context_16 *)
int pcre2_set_newline_8(pcre2_compile_context_8 *, uint32_t)
#define PCRE2_JIT_PARTIAL_HARD
Definition: pcre2.h:173
struct pcre2_real_match_data_8 pcre2_match_data_8
Definition: pcre2.h:958
int pcre2_match_16(const pcre2_code_16 *, PCRE2_SPTR16, size_t, size_t, uint32_t, pcre2_match_data_16 *, pcre2_match_context_16 *)
struct pcre2_real_code_8 pcre2_code_8
Definition: pcre2.h:958
#define PCRE2_JIT_COMPLETE
Definition: pcre2.h:171
#define PCRE2_NEWLINE_CRLF
Definition: pcre2.h:218
pcre2_match_context_8 * pcre2_match_context_create_8(pcre2_general_context_8 *)
#define PCRE2_MULTILINE
Definition: pcre2.h:135
pcre2_match_data_8 * pcre2_match_data_create_8(uint32_t, pcre2_general_context_8 *)
int pcre2_jit_compile_8(pcre2_code_8 *, uint32_t)
void pcre2_jit_stack_assign_8(pcre2_match_context_8 *, pcre2_jit_callback_8, void *)
#define PCRE2_JIT_PARTIAL_SOFT
Definition: pcre2.h:172
#define PCRE2_UTF
Definition: pcre2.h:144
pcre2_code_16 * pcre2_compile_16(PCRE2_SPTR16, size_t, uint32_t, int *, size_t *, pcre2_compile_context_16 *)
PCRE2_SPTR16 pcre2_get_mark_16(pcre2_match_data_16 *)
#define PCRE2_CONFIG_JIT
Definition: pcre2.h:452
struct pcre2_real_jit_stack_32 pcre2_jit_stack_32
Definition: pcre2.h:966
int pcre2_config_32(uint32_t, void *)
#define PCRE2_NO_UTF_CHECK
Definition: pcre2.h:112
void pcre2_compile_context_free_32(pcre2_compile_context_32 *)
#define PCRE2_BSR_UNICODE
Definition: pcre2.h:223
#define PCRE2_NO_START_OPTIMIZE
Definition: pcre2.h:141
void pcre2_jit_stack_free_16(pcre2_jit_stack_16 *)
struct pcre2_real_compile_context_8 pcre2_compile_context_8
Definition: pcre2.h:958
#define PCRE2_ERROR_PARTIAL
Definition: pcre2.h:334
#define PCRE2_FIRSTLINE
Definition: pcre2.h:133
#define PCRE2_NOTEMPTY
Definition: pcre2.h:184
struct pcre2_real_compile_context_32 pcre2_compile_context_32
Definition: pcre2.h:966
int pcre2_set_match_limit_32(pcre2_match_context_32 *, uint32_t)
#define PCRE2_NEWLINE_ANY
Definition: pcre2.h:219
#define PCRE2_NEWLINE_LF
Definition: pcre2.h:217
struct pcre2_real_match_context_16 pcre2_match_context_16
Definition: pcre2.h:962
#define PCRE2_NOTEOL
Definition: pcre2.h:183
int pcre2_jit_compile_32(pcre2_code_32 *, uint32_t)
pcre2_compile_context_32 * pcre2_compile_context_create_32(pcre2_general_context_32 *)
int pcre2_match_8(const pcre2_code_8 *, PCRE2_SPTR8, size_t, size_t, uint32_t, pcre2_match_data_8 *, pcre2_match_context_8 *)
#define PCRE2_CONFIG_JITTARGET
Definition: pcre2.h:453
static int check_ascii(const char *input)
#define MU
#define CMU
static int regression_tests(void)
#define M
#define MP
#define F_DIFF
static int invalid_utf32_regression_tests(void)
static struct regression_test_case regression_test_cases[]
#define F_NO16
int main(void)
#define CM
#define F_NOMATCH
#define F_NO32
#define A
#define F_NO8
#define OFFSET_MASK
#define F_PROPERTY
static int invalid_utf8_regression_tests(void)
#define BSR(x)
#define OVECTOR_SIZE
#define F_FORCECONV
#define GET_BSR(x)
#define MUP
static int invalid_utf16_regression_tests(void)
#define GET_NEWLINE(x)
#define CMUP
#define U
static BOOL utf
Definition: pcre2grep.c:291
Definition: inftrees.h:24
Definition: type.c:6
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:57:42 2024 by modify_doxy.py rev. 669887