48 #define PCRE2_CODE_UNIT_WIDTH 0
103 #if defined SUPPORT_PCRE2_8
105 #elif defined SUPPORT_PCRE2_16
107 #elif defined SUPPORT_PCRE2_32
111 printf(
"JIT must be enabled to run pcre2_jit_test\n");
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
126 #define MU (PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M (PCRE2_MULTILINE)
131 #define MP (PCRE2_MULTILINE | PCRE2_UCP)
132 #define U (PCRE2_UTF)
133 #define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
135 #define BSR(x) ((x) << 16)
136 #define A PCRE2_NEWLINE_ANYCRLF
138 #define GET_NEWLINE(x) ((x) & 0xffff)
139 #define GET_BSR(x) ((x) >> 16)
141 #define OFFSET_MASK 0x00ffff
142 #define F_NO8 0x010000
143 #define F_NO16 0x020000
144 #define F_NO32 0x020000
145 #define F_NOMATCH 0x040000
146 #define F_DIFF 0x080000
147 #define F_FORCECONV 0x100000
148 #define F_PROPERTY 0x200000
161 {
MU,
A, 0, 0,
"AbC",
"AbAbC" },
162 {
MU,
A, 0, 0,
"ACCEPT",
"AACACCACCEACCEPACCEPTACCEPTT" },
163 {
CMU,
A, 0, 0,
"aA#\xc3\xa9\xc3\x81",
"aA#Aa#\xc3\x89\xc3\xa1" },
164 {
M,
A, 0, 0,
"[^a]",
"aAbB" },
165 {
CM,
A, 0, 0,
"[^m]",
"mMnN" },
166 {
M,
A, 0, 0,
"a[^b][^#]",
"abacd" },
167 {
CM,
A, 0, 0,
"A[^B][^E]",
"abacd" },
168 {
CMU,
A, 0, 0,
"[^x][^#]",
"XxBll" },
169 {
MU,
A, 0, 0,
"[^a]",
"aaa\xc3\xa1#Ab" },
170 {
CMU,
A, 0, 0,
"[^A]",
"aA\xe6\x92\xad" },
171 {
MU,
A, 0, 0,
"\\W(\\W)?\\w",
"\r\n+bc" },
172 {
MU,
A, 0, 0,
"\\W(\\W)?\\w",
"\n\r+bc" },
173 {
MU,
A, 0, 0,
"\\W(\\W)?\\w",
"\r\r+bc" },
174 {
MU,
A, 0, 0,
"\\W(\\W)?\\w",
"\n\n+bc" },
175 {
MU,
A, 0, 0,
"[axd]",
"sAXd" },
176 {
CMU,
A, 0, 0,
"[axd]",
"sAXd" },
178 {
MU,
A, 0, 0,
"[a-dA-C]",
"\xe6\x92\xad\xc3\xa9.B" },
179 {
MU,
A, 0, 0,
"[^a-dA-C]",
"\xe6\x92\xad\xc3\xa9" },
180 {
CMU,
A, 0, 0,
"[^\xc3\xa9]",
"\xc3\xa9\xc3\x89." },
181 {
MU,
A, 0, 0,
"[^\xc3\xa9]",
"\xc3\xa9\xc3\x89." },
182 {
MU,
A, 0, 0,
"[^a]",
"\xc2\x80[]" },
183 {
CMU,
A, 0, 0,
"\xf0\x90\x90\xa7",
"\xf0\x90\x91\x8f" },
184 {
CM,
A, 0, 0,
"1a2b3c4",
"1a2B3c51A2B3C4" },
188 #ifndef NEVER_BACKSLASH_C
189 {
M,
A, 0, 0,
"\\Ca",
"cda" },
190 {
CM,
A, 0, 0,
"\\Ca",
"CDA" },
194 {
CMUP,
A, 0, 0,
"\xf0\x90\x90\x80\xf0\x90\x90\xa8",
"\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195 {
CMUP,
A, 0, 0,
"\xf0\x90\x90\x80{2}",
"\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196 {
CMUP,
A, 0, 0,
"\xf0\x90\x90\xa8{2}",
"\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197 {
CMUP,
A, 0, 0,
"\xe1\xbd\xb8\xe1\xbf\xb8",
"\xe1\xbf\xb8\xe1\xbd\xb8" },
198 {
M,
A, 0, 0,
"[3-57-9]",
"5" },
199 {
PCRE2_AUTO_CALLOUT,
A, 0, 0,
"12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200 "12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201 { 0,
A, 0, 0,
"..a.......b",
"bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
202 { 0,
A, 0, 0,
"..a.....b",
"bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
205 {
MU,
A, 0, 0,
"\\b[^A]",
"A_B#" },
207 {
MU,
A, 0, 0,
"\\B[^,]\\b[^s]\\b",
"#X" },
208 {
MP,
A, 0, 0,
"\\B",
"_\xa1" },
210 {
MUP,
A, 0, 0,
"\\b",
"\xe6\x92\xad!" },
211 {
MUP,
A, 0, 0,
"\\B",
"_\xc2\xa1\xc3\xa1\xc2\x85" },
212 {
MUP,
A, 0, 0,
"\\b[^A]\\B[^c]\\b[^_]\\B",
"_\xc3\xa1\xe2\x80\xa8" },
213 {
MUP,
A, 0, 0,
"\\b\\w+\\B",
"\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
215 {
CMUP,
A, 0, 0,
"\\By",
"\xf0\x90\x90\xa8y" },
218 { 0, 0, 0, 0,
"^ab",
"ab" },
219 { 0, 0, 0, 0 |
F_NOMATCH,
"^ab",
"aab" },
221 {
MU,
A, 0, 0,
"^-",
"\xe2\x80\xa8--\xc2\x85-\r\n-" },
225 { 0, 0, 0, 0,
"ab$",
"ab" },
226 { 0, 0, 0, 0 |
F_NOMATCH,
"ab$",
"abab\n\n" },
255 {
M,
A, 0, 0,
"\\Aa",
"aaa" },
257 {
M,
A, 0, 1,
"\\Ga",
"aaa" },
259 {
M,
A, 0, 0,
"a\\z",
"aaa" },
263 {
MU,
A, 0, 0,
"(ab|bb|cd)",
"bacde" },
264 {
MU,
A, 0, 0,
"(?:ab|a)(bc|c)",
"ababc" },
265 {
MU,
A, 0, 0,
"((ab|(cc))|(bb)|(?:cd|efg))",
"abac" },
266 {
CMU,
A, 0, 0,
"((aB|(Cc))|(bB)|(?:cd|EFg))",
"AcCe" },
267 {
MU,
A, 0, 0,
"((ab|(cc))|(bb)|(?:cd|ebg))",
"acebebg" },
268 {
MU,
A, 0, 0,
"(?:(a)|(?:b))(cc|(?:d|e))(a|b)k",
"accabdbbccbk" },
269 {
MU,
A, 0, 0,
"\xc7\x82|\xc6\x82",
"\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270 {
MU,
A, 0, 0,
"=\xc7\x82|#\xc6\x82",
"\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
271 {
MU,
A, 0, 0,
"\xc7\x82\xc7\x83|\xc6\x82\xc6\x82",
"\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
272 {
MU,
A, 0, 0,
"\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84",
"\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
273 {
U,
A, 0, 0,
"\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80",
"\xdf\xbf\xc2\x80\xe4\x84\x80" },
274 {
U,
A, 0, 0,
"(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#",
"\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
275 {
CM,
A, 0, 0,
"ab|cd",
"CD" },
276 {
CM,
A, 0, 0,
"a1277|a1377|bX487",
"bx487" },
277 {
CM,
A, 0, 0,
"a1277|a1377|bx487",
"bX487" },
278 { 0,
A, 0, 0,
"(a|)b*+a",
"a" },
279 { 0,
A, 0, 0 |
F_NOMATCH,
"(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa",
"1234567890123456ax" },
282 {
MU,
A, 0, 0,
"(?:a)?a",
"laab" },
283 {
CMU,
A, 0, 0,
"(A)?A",
"llaab" },
284 {
MU,
A, 0, 0,
"(a)?\?a",
"aab" },
285 {
MU,
A, 0, 0,
"(a)?a",
"manm" },
286 {
CMU,
A, 0, 0,
"(a|b)?\?d((?:e)?)",
"ABABdx" },
287 {
MU,
A, 0, 0,
"(a|b)?\?d((?:e)?)",
"abcde" },
288 {
MU,
A, 0, 0,
"((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m",
"abgnbgnnbgdnmm" },
291 {
MU,
A, 0, 0,
"(aa)+aa",
"aaaaaaa" },
292 {
MU,
A, 0, 0,
"(aa)+?aa",
"aaaaaaa" },
293 {
MU,
A, 0, 0,
"(?:aba|ab|a)+l",
"ababamababal" },
294 {
MU,
A, 0, 0,
"(?:aba|ab|a)+?l",
"ababamababal" },
295 {
MU,
A, 0, 0,
"(a(?:bc|cb|b|c)+?|ss)+e",
"accssabccbcacbccbbXaccssabccbcacbccbbe" },
296 {
MU,
A, 0, 0,
"(a(?:bc|cb|b|c)+|ss)+?e",
"accssabccbcacbccbbXaccssabccbcacbccbbe" },
297 {
MU,
A, 0, 0,
"(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+",
"bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
298 {
MU,
A, 0, 0,
"(aa|bb){8,1000}",
"abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
301 {
CMU,
A, 0, 0,
"(?:AA)*AB",
"aaaaaaamaaaaaaab" },
302 {
MU,
A, 0, 0,
"(?:aa)*?ab",
"aaaaaaamaaaaaaab" },
303 {
MU,
A, 0, 0,
"(aa|ab)*ab",
"aaabaaab" },
304 {
CMU,
A, 0, 0,
"(aa|Ab)*?aB",
"aaabaaab" },
305 {
MU,
A, 0, 0,
"(a|b)*(?:a)*(?:b)*m",
"abbbaaababanabbbaaababamm" },
306 {
MU,
A, 0, 0,
"(a|b)*?(?:a)*?(?:b)*?m",
"abbbaaababanabbbaaababamm" },
307 {
M,
A, 0, 0,
"a(a(\\1*)a|(b)b+){0}a",
"aa" },
308 {
M,
A, 0, 0,
"((?:a|)*){0}a",
"a" },
311 {
MU,
A, 0, 0,
"((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n",
"bmbmabmamaaamambmaman" },
312 {
MU,
A, 0, 0,
"(((ab)?cd)*ef)+g",
"abcdcdefcdefefmabcdcdefcdefefgg" },
313 {
MU,
A, 0, 0,
"(((ab)?\?cd)*?ef)+?g",
"abcdcdefcdefefmabcdcdefcdefefgg" },
314 {
MU,
A, 0, 0,
"(?:(ab)?c|(?:ab)+?d)*g",
"ababcdccababddg" },
315 {
MU,
A, 0, 0,
"(?:(?:ab)?\?c|(ab)+d)*?g",
"ababcdccababddg" },
318 {
MU,
A, 0, 0,
"(a+aab)+aaaab",
"aaaabcaaaabaabcaabcaaabaaaab" },
319 {
MU,
A, 0, 0,
"(a*a*aab)+x",
"aaaaabaabaaabmaabx" },
320 {
MU,
A, 0, 0,
"(a*?(b|ab)a*?)+x",
"aaaabcxbbaabaacbaaabaabax" },
321 {
MU,
A, 0, 0,
"(a+(ab|ad)a+)+x",
"aaabaaaadaabaaabaaaadaaax" },
322 {
MU,
A, 0, 0,
"(a?(a)a?)+(aaa)",
"abaaabaaaaaaaa" },
323 {
MU,
A, 0, 0,
"(a?\?(a)a?\?)+(b)",
"aaaacaaacaacacbaaab" },
324 {
MU,
A, 0, 0,
"(a{0,4}(b))+d",
"aaaaaabaabcaaaaabaaaaabd" },
325 {
MU,
A, 0, 0,
"(a{0,4}?[^b])+d+(a{0,4}[^b])d+",
"aaaaadaaaacaadddaaddd" },
326 {
MU,
A, 0, 0,
"(ba{2})+c",
"baabaaabacbaabaac" },
327 {
MU,
A, 0, 0,
"(a*+bc++)+",
"aaabbcaaabcccab" },
328 {
MU,
A, 0, 0,
"(a?+[^b])+",
"babaacacb" },
329 {
MU,
A, 0, 0,
"(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]",
"abaabaaacbaabaaaac" },
330 {
CMU,
A, 0, 0,
"([a-c]+[d-f]+?)+?g",
"aBdacdehAbDaFgA" },
331 {
CMU,
A, 0, 0,
"[c-f]+k",
"DemmFke" },
332 {
MU,
A, 0, 0,
"([DGH]{0,4}M)+",
"GGDGHDGMMHMDHHGHM" },
333 {
MU,
A, 0, 0,
"([a-c]{4,}s)+",
"abasabbasbbaabsbba" },
334 {
CMU,
A, 0, 0,
"[ace]{3,7}",
"AcbDAcEEcEd" },
335 {
CMU,
A, 0, 0,
"[ace]{3,7}?",
"AcbDAcEEcEd" },
336 {
CMU,
A, 0, 0,
"[ace]{3,}",
"AcbDAcEEcEd" },
337 {
CMU,
A, 0, 0,
"[ace]{3,}?",
"AcbDAcEEcEd" },
338 {
MU,
A, 0, 0,
"[ckl]{2,}?g",
"cdkkmlglglkcg" },
339 {
CMU,
A, 0, 0,
"[ace]{5}?",
"AcCebDAcEEcEd" },
340 {
MU,
A, 0, 0,
"([AbC]{3,5}?d)+",
"BACaAbbAEAACCbdCCbdCCAAbb" },
341 {
MU,
A, 0, 0,
"([^ab]{0,}s){2}",
"abaabcdsABamsDDs" },
342 {
MU,
A, 0, 0,
"\\b\\w+\\B",
"x,a_cd" },
343 {
MUP,
A, 0, 0,
"\\b[^\xc2\xa1]+\\B",
"\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
344 {
CMU,
A, 0, 0,
"[^b]+(a*)([^c]?d{3})",
"aaaaddd" },
345 {
CMUP,
A, 0, 0,
"\xe1\xbd\xb8{2}",
"\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
346 {
CMU,
A, 0, 0,
"[^\xf0\x90\x90\x80]{2,4}@",
"\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
347 {
CMU,
A, 0, 0,
"[^\xe1\xbd\xb8][^\xc3\xa9]",
"\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
348 {
MU,
A, 0, 0,
"[^\xe1\xbd\xb8][^\xc3\xa9]",
"\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
349 {
MU,
A, 0, 0,
"[^\xe1\xbd\xb8]{3,}?",
"##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
350 {
MU,
A, 0, 0,
"\\d+123",
"987654321,01234" },
351 {
MU,
A, 0, 0,
"abcd*|\\w+xy",
"aaaaa,abxyz" },
352 {
MU,
A, 0, 0,
"(?:abc|((?:amc|\\b\\w*xy)))",
"aaaaa,abxyz" },
353 {
MU,
A, 0, 0,
"a(?R)|([a-z]++)#",
".abcd.abcd#."},
354 {
MU,
A, 0, 0,
"a(?R)|([a-z]++)#",
".abcd.mbcd#."},
355 {
MU,
A, 0, 0,
".[ab]*.",
"xx" },
356 {
MU,
A, 0, 0,
".[ab]*a",
"xxa" },
357 {
MU,
A, 0, 0,
".[ab]?.",
"xx" },
358 {
MU,
A, 0, 0,
"_[ab]+_*a",
"_aa" },
359 {
MU,
A, 0, 0,
"#(A+)#\\d+",
"#A#A#0" },
360 {
MU,
A, 0, 0,
"(?P<size>\\d+)m|M",
"4M" },
362 { 0,
A, 0, 0,
"<(\\w+)[\\s\\w]+id>",
"<br><div id>" },
365 {
MU,
A, 0, 0,
"(?:(ab){2}){5}M",
"abababababababababababM" },
366 {
MU,
A, 0, 0,
"(?:ab|abab){1,5}M",
"abababababababababababM" },
367 {
MU,
A, 0, 0,
"(?>ab|abab){1,5}M",
"abababababababababababM" },
368 {
MU,
A, 0, 0,
"(?:ab|abab){1,5}?M",
"abababababababababababM" },
369 {
MU,
A, 0, 0,
"(?>ab|abab){1,5}?M",
"abababababababababababM" },
370 {
MU,
A, 0, 0,
"(?:(ab){1,4}?){1,3}?M",
"abababababababababababababM" },
371 {
MU,
A, 0, 0,
"(?:(ab){1,4}){1,3}abababababababababababM",
"ababababababababababababM" },
372 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?:(ab){1,4}){1,3}abababababababababababM",
"abababababababababababM" },
373 {
MU,
A, 0, 0,
"(ab){4,6}?M",
"abababababababM" },
376 {
MU,
A, 0, 0,
"(?:\\s)+(?:\\S)+",
"ab \t\xc3\xa9\xe6\x92\xad " },
377 {
MU,
A, 0, 0,
"(\\w)*(k)(\\W)?\?",
"abcdef abck11" },
378 {
MU,
A, 0, 0,
"\\((\\d)+\\)\\D",
"a() (83 (8)2 (9)ab" },
379 {
MU,
A, 0, 0,
"\\w(\\s|(?:\\d)*,)+\\w\\wb",
"a 5, 4,, bb 5, 4,, aab" },
380 {
MU,
A, 0, 0,
"(\\v+)(\\V+)",
"\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
381 {
MU,
A, 0, 0,
"(\\h+)(\\H+)",
"\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
382 {
MU,
A, 0, 0,
"x[bcef]+",
"xaxdxecbfg" },
383 {
MU,
A, 0, 0,
"x[bcdghij]+",
"xaxexfxdgbjk" },
384 {
MU,
A, 0, 0,
"x[^befg]+",
"xbxexacdhg" },
385 {
MU,
A, 0, 0,
"x[^bcdl]+",
"xlxbxaekmd" },
386 {
MU,
A, 0, 0,
"x[^bcdghi]+",
"xbxdxgxaefji" },
387 {
MU,
A, 0, 0,
"x[B-Fb-f]+",
"xaxAxgxbfBFG" },
388 {
CMU,
A, 0, 0,
"\\x{e9}+",
"#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
389 {
CMU,
A, 0, 0,
"[^\\x{e9}]+",
"\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
390 {
MU,
A, 0, 0,
"[\\x02\\x7e]+",
"\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
391 {
MU,
A, 0, 0,
"[^\\x02\\x7e]+",
"\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
392 {
MU,
A, 0, 0,
"[\\x{81}-\\x{7fe}]+",
"#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
393 {
MU,
A, 0, 0,
"[^\\x{81}-\\x{7fe}]+",
"\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
394 {
MU,
A, 0, 0,
"[\\x{801}-\\x{fffe}]+",
"#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
395 {
MU,
A, 0, 0,
"[^\\x{801}-\\x{fffe}]+",
"\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
396 {
MU,
A, 0, 0,
"[\\x{10001}-\\x{10fffe}]+",
"#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
397 {
MU,
A, 0, 0,
"[^\\x{10001}-\\x{10fffe}]+",
"\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
399 {
M,
A, 0, 0 |
F_NOMATCH,
"[^\\S\\W]{6}",
"abcdefghijk" },
402 {
MUP,
A, 0, 0,
"[1-5\xc3\xa9\\w]",
"\xc3\xa1_" },
403 {
MUP,
A, 0, 0 |
F_PROPERTY,
"[\xc3\x81\\p{Ll}]",
"A_\xc3\x89\xc3\xa1" },
404 {
MUP,
A, 0, 0,
"[\\Wd-h_x-z]+",
"a\xc2\xa1#_yhzdxi" },
411 {
MUP,
A, 0, 0 |
F_PROPERTY,
"[\xc3\xa1-\xc3\xa8\\p{Any}]",
"abc" },
412 {
MUP,
A, 0, 0 |
F_PROPERTY,
"[^\xc3\xa1-\xc3\xa8\\P{Any}]",
"abc" },
413 {
MUP,
A, 0, 0,
"[b-\xc3\xa9\\s]",
"a\xc\xe6\x92\xad" },
414 {
CMUP,
A, 0, 0,
"[\xc2\x85-\xc2\x89\xc3\x89]",
"\xc2\x84\xc3\xa9" },
415 {
MUP,
A, 0, 0,
"[^b-d^&\\s]{3,}",
"db^ !a\xe2\x80\xa8_ae" },
416 {
MUP,
A, 0, 0 |
F_PROPERTY,
"[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}",
"\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
417 {
MU,
A, 0, 0 |
F_PROPERTY,
"[^\\P{L}\x9!D-F\xa]{2,3}",
"\x9,.DF\xa.CG\xc3\x81" },
418 {
CMUP,
A, 0, 0,
"[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]",
"\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
419 {
MUP,
A, 0, 0 |
F_PROPERTY,
"[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}",
"\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
420 {
MUP,
A, 0, 0 |
F_PROPERTY,
"[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}",
"\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
423 {
MUP, 0, 0, 0,
"[\\p{Lu}\\P{Latin}]+",
"c\xEA\xA4\xAE,A,b" },
424 {
MUP, 0, 0, 0,
"[\\x{a92e}\\p{Lu}\\P{Latin}]+",
"c\xEA\xA4\xAE,A,b" },
425 {
CMUP, 0, 0, 0,
"[^S]\\B",
"\xe2\x80\x8a" },
426 {
MUP, 0, 0, 0 |
F_NOMATCH,
"[^[:print:]\\x{f6f6}]",
"\xef\x9b\xb6" },
427 {
MUP, 0, 0, 0,
"[[:xdigit:]\\x{6500}]#",
"\xe6\x94\x80#" },
431 {
MU,
A, 0, 0,
"(?:|ab||bc|a)+d",
"abcxabcabd" },
432 {
MU,
A, 0, 0,
"(|ab||bc|a)+d",
"abcxabcabd" },
433 {
MU,
A, 0, 0,
"(?:|ab||bc|a)*d",
"abcxabcabd" },
434 {
MU,
A, 0, 0,
"(|ab||bc|a)*d",
"abcxabcabd" },
435 {
MU,
A, 0, 0,
"(?:|ab||bc|a)+?d",
"abcxabcabd" },
436 {
MU,
A, 0, 0,
"(|ab||bc|a)+?d",
"abcxabcabd" },
437 {
MU,
A, 0, 0,
"(?:|ab||bc|a)*?d",
"abcxabcabd" },
438 {
MU,
A, 0, 0,
"(|ab||bc|a)*?d",
"abcxabcabd" },
439 {
MU,
A, 0, 0,
"(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m",
"abaacaccabacabalabaacaccabacabamm" },
440 {
MU,
A, 0, 0,
"(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m",
"abaacaccabacabalabaacaccabacabamm" },
443 {
MU,
A, 0, 3,
"(\\d|(?:\\w)*\\w)+",
"0ac01Hb" },
446 {
MU,
A, 0, 1,
"(\\w\\W\\w)+",
"ab#d" },
453 {
MU,
A, 0, 1,
"^",
"\r\n" },
472 {
MU,
A, 0, 0,
"\\R+",
"ab\r\n\r" },
473 {
MU,
A, 0, 0,
"\\R*",
"ab\r\n\r" },
474 {
MU,
A, 0, 0,
"\\R*",
"\r\n\r" },
475 {
MU,
A, 0, 0,
"\\R{2,4}",
"\r\nab\r\r" },
476 {
MU,
A, 0, 0,
"\\R{2,4}",
"\r\nab\n\n\n\r\r\r" },
477 {
MU,
A, 0, 0,
"\\R{2,}",
"\r\nab\n\n\n\r\r\r" },
478 {
MU,
A, 0, 0,
"\\R{0,3}",
"\r\n\r\n\r\n\r\n\r\n" },
480 {
MU,
A, 0, 0,
"\\R+\\R\\R",
"\r\r\r" },
481 {
MU,
A, 0, 0,
"\\R*\\R\\R",
"\n\r" },
482 {
MU,
A, 0, 0 |
F_NOMATCH,
"\\R{2,4}\\R\\R",
"\r\r\r" },
483 {
MU,
A, 0, 0,
"\\R{2,4}\\R\\R",
"\r\r\r\r" },
488 {
MU,
A, 0, 0,
"(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
489 "bababcdedefgheijijklmlmnop" },
490 {
MU,
A, 0, 0,
"(?>a(b)+a|(ab)?\?(b))an",
"abban" },
491 {
MU,
A, 0, 0,
"(?>ab+a|(?:ab)?\?b)an",
"abban" },
492 {
MU,
A, 0, 0,
"((?>ab|ad|)*?)(?>|c)*abad",
"abababcababad" },
493 {
MU,
A, 0, 0,
"(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)",
"aabaa#####da" },
494 {
MU,
A, 0, 0,
"((?>a|)+?)b",
"aaacaaab" },
495 {
MU,
A, 0, 0,
"(?>x|)*$",
"aaa" },
496 {
MU,
A, 0, 0,
"(?>(x)|)*$",
"aaa" },
497 {
MU,
A, 0, 0,
"(?>x|())*$",
"aaa" },
498 {
MU,
A, 0, 0,
"((?>[cxy]a|[a-d])*?)b",
"aaa+ aaab" },
499 {
MU,
A, 0, 0,
"((?>[cxy](a)|[a-d])*?)b",
"aaa+ aaab" },
500 {
MU,
A, 0, 0,
"(?>((?>(a+))))bab|(?>((?>(a+))))bb",
"aaaabaaabaabab" },
501 {
MU,
A, 0, 0,
"(?>(?>a+))bab|(?>(?>a+))bb",
"aaaabaaabaabab" },
502 {
MU,
A, 0, 0,
"(?>(a)c|(?>(c)|(a))a)b*?bab",
"aaaabaaabaabab" },
503 {
MU,
A, 0, 0,
"(?>ac|(?>c|a)a)b*?bab",
"aaaabaaabaabab" },
504 {
MU,
A, 0, 0,
"(?>(b)b|(a))*b(?>(c)|d)?x",
"ababcaaabdbx" },
505 {
MU,
A, 0, 0,
"(?>bb|a)*b(?>c|d)?x",
"ababcaaabdbx" },
506 {
MU,
A, 0, 0,
"(?>(bb)|a)*b(?>c|(d))?x",
"ababcaaabdbx" },
507 {
MU,
A, 0, 0,
"(?>(a))*?(?>(a))+?(?>(a))??x",
"aaaaaacccaaaaabax" },
508 {
MU,
A, 0, 0,
"(?>a)*?(?>a)+?(?>a)??x",
"aaaaaacccaaaaabax" },
509 {
MU,
A, 0, 0,
"(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x",
"aaaaaacccaaaaabax" },
510 {
MU,
A, 0, 0,
"(?>a|)*?(?>a|)+?(?>a|)??x",
"aaaaaacccaaaaabax" },
511 {
MU,
A, 0, 0,
"(?>a(?>(a{0,2}))*?b|aac)+b",
"aaaaaaacaaaabaaaaacaaaabaacaaabb" },
512 {
CM,
A, 0, 0,
"(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f",
"aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
513 {
MU,
A, 0, 0,
"(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d",
"aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
514 {
MU,
A, 0, 0,
"(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d",
"aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
516 {
MU,
A, 0, 0 |
F_PROPERTY,
"\\X",
"\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
517 {
MU,
A, 0, 0 |
F_PROPERTY,
"\\X+..",
"\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
521 {
MU,
A, 0, 0 |
F_PROPERTY,
"\\X{2,4}..",
"#\xcc\x8d#\xcc\x8d##" },
522 {
MU,
A, 0, 0,
"(c(ab)?+ab)+",
"cabcababcab" },
523 {
MU,
A, 0, 0,
"(?>(a+)b)+aabab",
"aaaabaaabaabab" },
527 {
MU,
A, 0, 0,
"(?:a|b)++m",
"mababbaaxababbaam" },
528 {
MU,
A, 0, 0,
"(?:a|b)*+m",
"mababbaaxababbaam" },
529 {
MU,
A, 0, 0,
"(?:a|b)*+m",
"ababbaaxababbaam" },
530 {
MU,
A, 0, 0,
"(a|b)++m",
"mababbaaxababbaam" },
531 {
MU,
A, 0, 0,
"(a|b)*+m",
"mababbaaxababbaam" },
532 {
MU,
A, 0, 0,
"(a|b)*+m",
"ababbaaxababbaam" },
533 {
MU,
A, 0, 0,
"(a|b(*ACCEPT))++m",
"maaxab" },
534 {
MU,
A, 0, 0,
"(?:b*)++m",
"bxbbxbbbxm" },
535 {
MU,
A, 0, 0,
"(?:b*)++m",
"bxbbxbbbxbbm" },
536 {
MU,
A, 0, 0,
"(?:b*)*+m",
"bxbbxbbbxm" },
537 {
MU,
A, 0, 0,
"(?:b*)*+m",
"bxbbxbbbxbbm" },
538 {
MU,
A, 0, 0,
"(b*)++m",
"bxbbxbbbxm" },
539 {
MU,
A, 0, 0,
"(b*)++m",
"bxbbxbbbxbbm" },
540 {
MU,
A, 0, 0,
"(b*)*+m",
"bxbbxbbbxm" },
541 {
MU,
A, 0, 0,
"(b*)*+m",
"bxbbxbbbxbbm" },
542 {
MU,
A, 0, 0,
"(?:a|(b))++m",
"mababbaaxababbaam" },
543 {
MU,
A, 0, 0,
"(?:(a)|b)*+m",
"mababbaaxababbaam" },
544 {
MU,
A, 0, 0,
"(?:(a)|(b))*+m",
"ababbaaxababbaam" },
545 {
MU,
A, 0, 0,
"(a|(b))++m",
"mababbaaxababbaam" },
546 {
MU,
A, 0, 0,
"((a)|b)*+m",
"mababbaaxababbaam" },
547 {
MU,
A, 0, 0,
"((a)|(b))*+m",
"ababbaaxababbaam" },
548 {
MU,
A, 0, 0,
"(a|(b)(*ACCEPT))++m",
"maaxab" },
549 {
MU,
A, 0, 0,
"(?:(b*))++m",
"bxbbxbbbxm" },
550 {
MU,
A, 0, 0,
"(?:(b*))++m",
"bxbbxbbbxbbm" },
551 {
MU,
A, 0, 0,
"(?:(b*))*+m",
"bxbbxbbbxm" },
552 {
MU,
A, 0, 0,
"(?:(b*))*+m",
"bxbbxbbbxbbm" },
553 {
MU,
A, 0, 0,
"((b*))++m",
"bxbbxbbbxm" },
554 {
MU,
A, 0, 0,
"((b*))++m",
"bxbbxbbbxbbm" },
555 {
MU,
A, 0, 0,
"((b*))*+m",
"bxbbxbbbxm" },
556 {
MU,
A, 0, 0,
"((b*))*+m",
"bxbbxbbbxbbm" },
557 {
MU,
A, 0, 0,
"(A)*+$",
"ABC" },
558 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)",
"bbaacaaccaaaacxbbbmbn" },
559 {
MU,
A, 0, 0,
"((?:b)++a)+(cd)*+m",
"bbababbacdcdnbbababbacdcdm" },
560 {
MU,
A, 0, 0,
"((?:(b))++a)+((c)d)*+m",
"bbababbacdcdnbbababbacdcdm" },
561 {
MU,
A, 0, 0,
"(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m",
"ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
562 {
MU,
A, 0, 0,
"(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m",
"ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
565 {
MU,
A, 0, 0,
"(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc",
"aaaaaabbbbbbbbc" },
566 {
CMU,
A, 0, 0,
"(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc",
"bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
567 {
CM,
A, 0, 0,
"(a{2,4})\\1",
"AaAaaAaA" },
568 {
MU,
A, 0, 0,
"(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc",
"aaaaaaaabbaabbbbaabbbbc" },
569 {
MU,
A, 0, 0,
"(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc",
"bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
570 {
MU,
A, 0, 0,
"(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc",
"bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
571 {
MU,
A, 0, 0,
"(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc",
"bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
572 {
MU,
A, 0, 0,
"(\\w+)b(\\1+)c",
"GabGaGaDbGaDGaDc" },
573 {
MU,
A, 0, 0,
"(?:(aa)|b)\\1?b",
"bb" },
574 {
CMU,
A, 0, 0,
"(aa|bb)(\\1*?)aa(\\1+?)",
"bBBbaaAAaaAAaa" },
575 {
MU,
A, 0, 0,
"(aa|bb)(\\1*?)(dd|)cc(\\3+?)",
"aaaaaccdd" },
576 {
CMU,
A, 0, 0,
"(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)",
"aAaABBbbAAaAcCaAcCaA" },
577 {
MU,
A, 0, 0,
"(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)",
"aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
578 {
CM,
A, 0, 0,
"(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)",
"aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
579 {
MU,
A, 0, 0,
"(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})",
"aaaaaaaaaaaaaaabaaaaa" },
580 {
MU,
A, 0, 0,
"(a(?:\\1|)a){3}b",
"aaaaaaaaaaab" },
581 {
M,
A, 0, 0,
"(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d",
"bb#b##d" },
587 {
CMUP,
A, 0, 0,
"(\xf0\x90\x90\x80)\\1",
"\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
591 {
MU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa",
"aabbaaaaaa" },
593 {
MU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m",
"aaaaaaaabbbbaabbbbm" },
597 {
MU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m",
"aaaaaabbbbbbaabbbbbbbbbbm" },
598 {
MU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>aa)|(?<A>bb))\\k<A>*?m",
"aaaaaabbbbbbaabbbbbbbbbbm" },
599 {
MU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?",
"aaaabbbbaaaabbbbbbbbbb" },
600 {
CMU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M",
"aaaaaaaabbbbaabbbbm" },
601 {
CMU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M",
"aaaaaaaabbbbaabbbbm" },
602 {
CMU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M",
"aaaaaabbbbbbaabbbbbbbbbbm" },
603 {
CMU |
PCRE2_DUPNAMES,
A, 0, 0,
"(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?",
"aaaabbbbaaaabbbbbbbbbb" },
608 {
MU,
A, 0, 0,
"(?=xx|yy|zz)\\w{4}",
"abczzdefg" },
609 {
MU,
A, 0, 0,
"(?=((\\w+)b){3}|ab)",
"dbbbb ab" },
610 {
MU,
A, 0, 0,
"(?!ab|bc|cd)[a-z]{2}",
"Xabcdef" },
611 {
MU,
A, 0, 0,
"(?<=aaa|aa|a)a",
"aaa" },
612 {
MU,
A, 0, 2,
"(?<=aaa|aa|a)a",
"aaa" },
613 {
M,
A, 0, 0,
"(?<=aaa|aa|a)a",
"aaa" },
614 {
M,
A, 0, 2,
"(?<=aaa|aa|a)a",
"aaa" },
615 {
MU,
A, 0, 0,
"(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)",
"x5656" },
616 {
MU,
A, 0, 0,
"((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}",
"567v09708K12l00M00 567v09708K12l00M00K45K" },
617 {
MU,
A, 0, 0,
"(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d",
"bba bbab nbbkba nbbkba0kl" },
618 {
MU,
A, 0, 0,
"(?>a(?>(b+))a(?=(..)))*?k",
"acabbcabbaabacabaabbakk" },
619 {
MU,
A, 0, 0,
"((?(?=(a))a)+k)",
"bbak" },
620 {
MU,
A, 0, 0,
"((?(?=a)a)+k)",
"bbak" },
624 {
MU,
A, 0, 0,
"(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k",
"aaam bbam baaambaam abbabba baaambaamk" },
625 {
MU,
A, 0, 0,
"(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?",
"bca ssbc mabd ssbc mabc" },
626 {
MU,
A, 0, 0,
"(?:(?=ab)?[^n][^n])+m",
"ababcdabcdcdabnababcdabcdcdabm" },
627 {
MU,
A, 0, 0,
"(?:(?=a(b))?[^n][^n])+m",
"ababcdabcdcdabnababcdabcdcdabm" },
628 {
MU,
A, 0, 0,
"(?:(?=.(.))??\\1.)+m",
"aabbbcbacccanaabbbcbacccam" },
629 {
MU,
A, 0, 0,
"(?:(?=.)??[a-c])+m",
"abacdcbacacdcaccam" },
630 {
MU,
A, 0, 0,
"((?!a)?(?!([^a]))?)+$",
"acbab" },
631 {
MU,
A, 0, 0,
"((?!a)?\?(?!([^a]))?\?)+$",
"acbab" },
632 {
MU,
A, 0, 0,
"a(?=(?C)\\B(?C`x`))b",
"ab" },
633 {
MU,
A, 0, 0,
"a(?!(?C)\\B(?C`x`))bb|ab",
"abb" },
634 {
MU,
A, 0, 0,
"a(?=\\b|(?C)\\B(?C`x`))b",
"ab" },
635 {
MU,
A, 0, 0,
"a(?!\\b|(?C)\\B(?C`x`))bb|ab",
"abb" },
636 {
MU,
A, 0, 0,
"c(?(?=(?C)\\B(?C`x`))ab|a)",
"cab" },
637 {
MU,
A, 0, 0,
"c(?(?!(?C)\\B(?C`x`))ab|a)",
"cab" },
638 {
MU,
A, 0, 0,
"c(?(?=\\b|(?C)\\B(?C`x`))ab|a)",
"cab" },
639 {
MU,
A, 0, 0,
"c(?(?!\\b|(?C)\\B(?C`x`))ab|a)",
"cab" },
640 {
MU,
A, 0, 0,
"a(?=)b",
"ab" },
642 {
MU,
A, 0, 0,
"(?(?<!|(|a)))",
"a" },
649 {
MU,
A, 0, 0,
"a(*ACCEPT)b",
"ab" },
658 {
MU,
A, 0, 0,
"((a(*ACCEPT)b))",
"ab" },
659 {
MU,
A, 0, 0,
"(a(*FAIL)a|a)",
"aaa" },
660 {
MU,
A, 0, 0,
"(?=ab(*ACCEPT)b)a",
"ab" },
661 {
MU,
A, 0, 0,
"(?=(?:x|ab(*ACCEPT)b))",
"ab" },
662 {
MU,
A, 0, 0,
"(?=(a(b(*ACCEPT)b)))a",
"ab" },
668 {
MU,
A, 0, 0,
"(?(?=(a))a|b)+k",
"ababbalbbadabak" },
669 {
MU,
A, 0, 0,
"(?(?!(b))a|b)+k",
"ababbalbbadabak" },
670 {
MU,
A, 0, 0,
"(?(?=a)a|b)+k",
"ababbalbbadabak" },
671 {
MU,
A, 0, 0,
"(?(?!b)a|b)+k",
"ababbalbbadabak" },
672 {
MU,
A, 0, 0,
"(?(?=(a))a*|b*)+k",
"ababbalbbadabak" },
673 {
MU,
A, 0, 0,
"(?(?!(b))a*|b*)+k",
"ababbalbbadabak" },
674 {
MU,
A, 0, 0,
"(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak",
"aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
675 {
MU,
A, 0, 0,
"(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak",
"aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
676 {
MU,
A, 0, 0 |
F_DIFF,
"(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk",
"aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
677 {
MU,
A, 0, 0,
"(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk",
"aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
678 {
MU,
A, 0, 0,
"(?(?=a)a*|b*)+k",
"ababbalbbadabak" },
679 {
MU,
A, 0, 0,
"(?(?!b)a*|b*)+k",
"ababbalbbadabak" },
680 {
MU,
A, 0, 0,
"(?(?=a)ab)",
"a" },
681 {
MU,
A, 0, 0,
"(?(?<!b)c)",
"b" },
682 {
MU,
A, 0, 0,
"(?(DEFINE)a(b))",
"a" },
683 {
MU,
A, 0, 0,
"a(?(DEFINE)(?:b|(?:c?)+)*)",
"a" },
684 {
MU,
A, 0, 0,
"(?(?=.[a-c])[k-l]|[A-D])",
"kdB" },
685 {
MU,
A, 0, 0,
"(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+",
"aabbccddaa" },
686 {
MU,
A, 0, 0,
"(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}",
"aaabaaaba#aaabaaaba#aaabaaaba@" },
687 {
MU,
A, 0, 0,
"((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+",
"mol m10kk m088k _f_a_ mbkkl" },
688 {
MU,
A, 0, 0,
"(c)?\?(?(1)a|b)",
"cdcaa" },
689 {
MU,
A, 0, 0,
"(c)?\?(?(1)a|b)",
"cbb" },
690 {
MU,
A, 0, 0 |
F_DIFF,
"(?(?=(a))(aaaa|a?))+aak",
"aaaaab aaaaak" },
691 {
MU,
A, 0, 0,
"(?(?=a)(aaaa|a?))+aak",
"aaaaab aaaaak" },
692 {
MU,
A, 0, 0,
"(?(?!(b))(aaaa|a?))+aak",
"aaaaab aaaaak" },
693 {
MU,
A, 0, 0,
"(?(?!b)(aaaa|a?))+aak",
"aaaaab aaaaak" },
694 {
MU,
A, 0, 0 |
F_DIFF,
"(?(?=(a))a*)+aak",
"aaaaab aaaaak" },
695 {
MU,
A, 0, 0,
"(?(?=a)a*)+aak",
"aaaaab aaaaak" },
696 {
MU,
A, 0, 0,
"(?(?!(b))a*)+aak",
"aaaaab aaaaak" },
697 {
MU,
A, 0, 0,
"(?(?!b)a*)+aak",
"aaaaab aaaaak" },
698 {
MU,
A, 0, 0,
"(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k",
"abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
699 {
MU,
A, 0, 0,
"(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l",
"bc ddd abccabccl" },
700 {
MU,
A, 0, 0,
"(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd",
"bcabcacdb bdddd" },
701 {
MU,
A, 0, 0,
"(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l",
"ababccddabdbccd abcccl" },
702 {
MU,
A, 0, 0,
"((?:a|aa)(?(1)aaa))x",
"aax" },
703 {
MU,
A, 0, 0,
"(?(?!)a|b)",
"ab" },
704 {
MU,
A, 0, 0,
"(?(?!)a)",
"ab" },
708 {
MU,
A, 0, 0,
"(?:\\Ka)*aaaab",
"aaaaaaaa aaaaaaabb" },
709 {
MU,
A, 0, 0,
"(?>\\Ka\\Ka)*aaaab",
"aaaaaaaa aaaaaaaaaabb" },
710 {
MU,
A, 0, 0,
"a+\\K(?<=\\Gaa)a",
"aaaaaa" },
739 {
MU,
A, 0, 0,
"(a)(?1)",
"aa" },
740 {
MU,
A, 0, 0,
"((a))(?1)",
"aa" },
741 {
MU,
A, 0, 0,
"(b|a)(?1)",
"aa" },
742 {
MU,
A, 0, 0,
"(b|(a))(?1)",
"aa" },
743 {
MU,
A, 0, 0 |
F_NOMATCH,
"((a)(b)(?:a*))(?1)",
"aba" },
744 {
MU,
A, 0, 0,
"((a)(b)(?:a*))(?1)",
"abab" },
745 {
MU,
A, 0, 0,
"((a+)c(?2))b(?1)",
"aacaabaca" },
746 {
MU,
A, 0, 0,
"((?2)b|(a)){2}(?1)",
"aabab" },
747 {
MU,
A, 0, 0,
"(?1)(a)*+(?2)(b(?1))",
"aababa" },
748 {
MU,
A, 0, 0,
"(?1)(((a(*ACCEPT)))b)",
"axaa" },
749 {
MU,
A, 0, 0,
"(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )",
"akaac" },
750 {
MU,
A, 0, 0,
"(a+)b(?1)b\\1",
"abaaabaaaaa" },
751 {
MU,
A, 0, 0,
"(?(DEFINE)(aa|a))(?1)ab",
"aab" },
752 {
MU,
A, 0, 0,
"(?(DEFINE)(a\\Kb))(?1)+ababc",
"abababxabababc" },
753 {
MU,
A, 0, 0,
"(a\\Kb)(?1)+ababc",
"abababxababababc" },
754 {
MU,
A, 0, 0 |
F_NOMATCH,
"(a\\Kb)(?1)+ababc",
"abababxababababxc" },
755 {
MU,
A, 0, 0,
"b|<(?R)*>",
"<<b>" },
756 {
MU,
A, 0, 0,
"(a\\K){0}(?:(?1)b|ac)",
"ac" },
757 {
MU,
A, 0, 0,
"(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m",
"ababababnababababaam" },
758 {
MU,
A, 0, 0,
"(a)((?(R)a|b))(?2)",
"aabbabaa" },
759 {
MU,
A, 0, 0,
"(a)((?(R2)a|b))(?2)",
"aabbabaa" },
760 {
MU,
A, 0, 0,
"(a)((?(R1)a|b))(?2)",
"ababba" },
761 {
MU,
A, 0, 0,
"(?(R0)aa|bb(?R))",
"abba aabb bbaa" },
762 {
MU,
A, 0, 0,
"((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$",
"aaaaaaaaaa aaaa" },
763 {
MU,
A, 0, 0,
"(?P<Name>a(?(R&Name)a|b))(?1)",
"aab abb abaa" },
764 {
MU,
A, 0, 0,
"((?(R)a|(?1)){3})",
"XaaaaaaaaaX" },
765 {
MU,
A, 0, 0,
"((?:(?(R)a|(?1))){3})",
"XaaaaaaaaaX" },
766 {
MU,
A, 0, 0,
"((?(R)a|(?1)){1,3})aaaaaa",
"aaaaaaaaXaaaaaaaaa" },
767 {
MU,
A, 0, 0,
"((?(R)a|(?1)){1,3}?)M",
"aaaM" },
768 {
MU,
A, 0, 0,
"((.)(?:.|\\2(?1))){0}#(?1)#",
"#aabbccdde# #aabbccddee#" },
769 {
MU,
A, 0, 0,
"((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#",
"#aaaab# #aaaaab#" },
770 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?1)$((.|\\2xx){1,2})",
"abc" },
774 {
CM,
A, 0, 0 |
F_FORCECONV,
"\xe1\xbd\xb8",
"\xe1\xbf\xb8\xe1\xbd\xb8" },
776 {
CM,
A, 0, 0 |
F_FORCECONV,
"[\xe1\xbd\xb8]",
"\xe1\xbf\xb8\xe1\xbd\xb8" },
780 {
CM,
A, 0, 0 |
F_NO8 |
F_FORCECONV,
"\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}",
"\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
781 {
CM,
A, 0, 0 |
F_NO8 |
F_FORCECONV,
"[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#",
"\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
782 {
CM,
A, 0, 0 |
F_FORCECONV,
"[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)",
"\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
783 {
CM,
A, 0, 0 |
F_FORCECONV,
"[\xed\xa0\x80-\xed\xb3\xbf]",
"\xed\x9f\xbf\xed\xa0\x83" },
784 {
CM,
A, 0, 0 |
F_FORCECONV,
"[\xed\xa0\x80-\xed\xb3\xbf]",
"\xed\xb4\x80\xed\xb3\xb0" },
785 {
CM,
A, 0, 0 |
F_NO8 |
F_FORCECONV,
"[\\x{d800}-\\x{dcff}]",
"\xed\x9f\xbf\xed\xa0\x83" },
786 {
CM,
A, 0, 0 |
F_NO8 |
F_FORCECONV,
"[\\x{d800}-\\x{dcff}]",
"\xed\xb4\x80\xed\xb3\xb0" },
787 {
CM,
A, 0, 0 |
F_FORCECONV,
"[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#",
"\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
788 {
CM,
A, 0, 0 |
F_FORCECONV,
"[\xed\xa0\x80][\xed\xb0\x80]{2,}",
"\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
789 {
M,
A, 0, 0 |
F_FORCECONV,
"[^\xed\xb0\x80]{3,}?",
"##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
790 {
M,
A, 0, 0 |
F_NO8 |
F_FORCECONV,
"[^\\x{dc00}]{3,}?",
"##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
791 {
CM,
A, 0, 0 |
F_FORCECONV,
".\\B.",
"\xed\xa0\x80\xed\xb0\x80" },
792 {
CM,
A, 0, 0 |
F_FORCECONV,
"\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80",
"\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
793 {
CM,
A, 0, 0 |
F_FORCECONV,
"\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80",
"\xed\xa0\x80\xed\xa0\x80" },
794 {
CM,
A, 0, 0 |
F_FORCECONV |
F_NOMATCH,
"\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##",
"\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
796 {
CM,
A, 0, 0 |
F_FORCECONV,
"\xed\xa0\x80+#[^#]+\xed\xa0\x80",
"\xed\xa0\x80#a\xed\xa0\x80" },
797 {
CM,
A, 0, 0 |
F_FORCECONV,
"(\xed\xa0\x80+)#\\1",
"\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
802 { 0, 0, 0, 0 |
F_NO8 |
F_FORCECONV,
"\\v+?\\V+?#",
"\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
803 { 0, 0, 0, 0 |
F_NO8 |
F_FORCECONV,
"\\h+?\\H+?#",
"\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
816 {
MU,
A, 0, 0,
"a(*MARK:aa)a",
"ababaa" },
818 {
MU,
A, 0, 0,
"a(*:aa)(b(*:bb)b|bc)",
"abc" },
820 {
MU,
A, 0, 0,
"(?>a(*:aa))b|ac",
"ac" },
821 {
MU,
A, 0, 0,
"(?(DEFINE)(a(*:aa)))(?1)",
"a" },
822 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?(DEFINE)((a)(*:aa)))(?1)b",
"aa" },
823 {
MU,
A, 0, 0,
"(?(DEFINE)(a(*:aa)))a(?1)b|aac",
"aac" },
824 {
MU,
A, 0, 0,
"(a(*:aa)){0}(?:b(?1)b|c)+c",
"babbab cc" },
825 {
MU,
A, 0, 0,
"(a(*:aa)){0}(?:b(?1)b)+",
"babba" },
826 {
MU,
A, 0, 0 |
F_NOMATCH,
"(a(*:aa)){0}(?:b(?1)b)+",
"ba" },
827 {
MU,
A, 0, 0,
"(a\\K(*:aa)){0}(?:b(?1)b|c)+c",
"babbab cc" },
828 {
MU,
A, 0, 0,
"(a\\K(*:aa)){0}(?:b(?1)b)+",
"babba" },
829 {
MU,
A, 0, 0 |
F_NOMATCH,
"(a\\K(*:aa)){0}(?:b(?1)b)+",
"ba" },
834 {
MU,
A, 0, 0,
"aa(*COMMIT)b",
"xaxaab" },
835 {
MU,
A, 0, 0 |
F_NOMATCH,
"a(*COMMIT)(*:msg)b|ac",
"ac" },
837 {
MU,
A, 0, 0 |
F_NOMATCH,
"((a)(*COMMIT)b)++",
"abac" },
838 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?=a(*COMMIT)b)ab|ad",
"ad" },
841 {
MU,
A, 0, 0,
"aa\\K(*PRUNE)b",
"aaab" },
842 {
MU,
A, 0, 0,
"aa(*PRUNE:bb)b|a",
"aa" },
843 {
MU,
A, 0, 0,
"(a)(a)(*PRUNE)b|(a)",
"aa" },
844 {
MU,
A, 0, 0,
"(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)",
"aaaaaaaa" },
847 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?=a(*PRUNE)b)ab|ad",
"ad" },
848 {
MU,
A, 0, 0,
"a(*COMMIT)(*PRUNE)d|bc",
"abc" },
849 {
MU,
A, 0, 0,
"(?=a(*COMMIT)b)a(*PRUNE)c|bc",
"abc" },
850 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc",
"abc" },
851 {
MU,
A, 0, 0,
"(?=(a)(*COMMIT)b)a(*PRUNE)c|bc",
"abc" },
852 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc",
"abc" },
853 {
MU,
A, 0, 0,
"(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc",
"abc" },
854 {
MU,
A, 0, 0 |
F_NOMATCH,
"(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc",
"abc" },
855 {
MU,
A, 0, 0,
"(a(*COMMIT)b)++(*PRUNE)d|c",
"ababc" },
856 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c",
"ababc" },
857 {
MU,
A, 0, 0,
"((a)(*COMMIT)b)++(*PRUNE)d|c",
"ababc" },
858 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c",
"ababc" },
859 {
MU,
A, 0, 0,
"(?>a(*COMMIT)b)*abab(*PRUNE)d|ba",
"ababab" },
860 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba",
"ababab" },
861 {
MU,
A, 0, 0,
"(?>a(*COMMIT)b)+abab(*PRUNE)d|ba",
"ababab" },
862 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba",
"ababab" },
863 {
MU,
A, 0, 0,
"(?>a(*COMMIT)b)?ab(*PRUNE)d|ba",
"aba" },
864 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba",
"aba" },
865 {
MU,
A, 0, 0,
"(?>a(*COMMIT)b)*?n(*PRUNE)d|ba",
"abababn" },
866 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba",
"abababn" },
867 {
MU,
A, 0, 0,
"(?>a(*COMMIT)b)+?n(*PRUNE)d|ba",
"abababn" },
868 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba",
"abababn" },
869 {
MU,
A, 0, 0,
"(?>a(*COMMIT)b)??n(*PRUNE)d|bn",
"abn" },
870 {
MU,
A, 0, 0 |
F_NOMATCH,
"(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn",
"abn" },
873 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?=a(*SKIP)b)ab|ad",
"ad" },
874 {
MU,
A, 0, 0,
"(\\w+(*SKIP)#)",
"abcd,xyz#," },
875 {
MU,
A, 0, 0,
"\\w+(*SKIP)#|mm",
"abcd,xyz#," },
876 {
MU,
A, 0, 0 |
F_NOMATCH,
"b+(?<=(*SKIP)#c)|b+",
"#bbb" },
879 {
MU,
A, 0, 0,
"((?:a(*THEN)|aab)(*THEN)c|a+)+m",
"aabcaabcaabcaabcnacm" },
880 {
MU,
A, 0, 0 |
F_NOMATCH,
"((?:a(*THEN)|aab)(*THEN)c|a+)+m",
"aabcm" },
881 {
MU,
A, 0, 0,
"((?:a(*THEN)|aab)c|a+)+m",
"aabcaabcnmaabcaabcm" },
882 {
MU,
A, 0, 0,
"((?:a|aab)(*THEN)c|a+)+m",
"aam" },
883 {
MU,
A, 0, 0,
"((?:a(*COMMIT)|aab)(*THEN)c|a+)+m",
"aam" },
884 {
MU,
A, 0, 0,
"(?(?=a(*THEN)b)ab|ad)",
"ad" },
885 {
MU,
A, 0, 0,
"(?(?!a(*THEN)b)ad|add)",
"add" },
886 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?(?=a)a(*THEN)b|ad)",
"ad" },
887 {
MU,
A, 0, 0,
"(?!(?(?=a)ab|b(*THEN)d))bn|bnn",
"bnn" },
888 {
MU,
A, 0, 0,
"(?=(*THEN: ))* ",
" " },
889 {
MU,
A, 0, 0,
"a(*THEN)(?R) |",
"a" },
890 {
MU,
A, 0, 0 |
F_NOMATCH,
"(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)",
"c" },
893 {
MU,
A, 0, 0,
"(a(*ACCEPT)b){0}a(?1)b",
"aacaabb" },
894 {
MU,
A, 0, 0,
"((a)\\2(*ACCEPT)b){0}a(?1)b",
"aaacaaabb" },
895 {
MU,
A, 0, 0,
"((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_",
"_ababababax_ _ababababa_" },
896 {
MU,
A, 0, 0,
"((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_",
"_bcdaAdcb_bcdaAdcb_" },
897 {
MU,
A, 0, 0,
"((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_",
"_ab_" },
898 {
MU,
A, 0, 0,
"((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)",
"_aa_" },
899 {
MU,
A, 0, 0,
"(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_",
"_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
900 {
MU,
A, 0, 0,
"((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_",
"_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
902 #ifdef SUPPORT_UNICODE
904 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)*#",
"!abcdefghijklmno!abcdefghijklmno!abcdef#" },
905 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)+#",
"!abcdefghijklmno!abcdefghijklmno!abcdef#" },
906 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)*?#",
"!abcdefghijklmno!abcdefghijklmno!abcdef#" },
907 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)+?#",
"!abcdefghijklmno!abcdefghijklmno!abcdef#" },
908 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)*+#",
"!abcdefghijklmno!abcdefghijklmno!abcdef#" },
909 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)++#",
"!abcdefghijklmno!abcdefghijklmno!abcdef#" },
910 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)?#",
"!ab!abc!ab!ab#" },
911 {
MU,
A, 0, 0,
"!(*sr:\\w\\w|\\w\\w\\w)??#",
"!ab!abc!ab!ab#" },
915 {
MU,
A, 0, 0,
"((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s",
"aaaaa+ " },
916 {
MU,
A, 0, 0,
"(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s",
"aa+ " },
917 {
MU,
A, 0, 0,
"((a?)+)+b",
"aaaaaaaaaaaa b" },
920 {
M,
A, 0, 0 |
F_NOMATCH,
"a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa",
"aaaaaaaaaaaaaaaaaaaaaaa" },
921 {
M,
A, 0, 0 |
F_NOMATCH,
"(?:a+)+b",
"aaaaaaaaaaaaaaaaaaaaaaaa b" },
922 {
M,
A, 0, 0 |
F_NOMATCH,
"(?:a+?)+?b",
"aaaaaaaaaaaaaaaaaaaaaaaa b" },
923 {
M,
A, 0, 0 |
F_NOMATCH,
"(?:a*)*b",
"aaaaaaaaaaaaaaaaaaaaaaaa b" },
924 {
M,
A, 0, 0 |
F_NOMATCH,
"(?:a*?)*?b",
"aaaaaaaaaaaaaaaaaaaaaaaa b" },
929 #ifdef SUPPORT_PCRE2_8
936 #ifdef SUPPORT_PCRE2_16
943 #ifdef SUPPORT_PCRE2_32
950 #ifdef SUPPORT_PCRE2_8
973 #ifdef SUPPORT_PCRE2_16
996 #ifdef SUPPORT_PCRE2_32
1019 #ifdef SUPPORT_PCRE2_16
1027 if (max_length == 0)
1030 while (*iptr && max_length > 1) {
1033 *offsetmap++ = (
int)(iptr - (
unsigned char*)
input);
1037 else if (!(*iptr & 0x20)) {
1038 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1040 }
else if (!(*iptr & 0x10)) {
1041 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1043 }
else if (!(*iptr & 0x08)) {
1044 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1051 }
else if (max_length <= 2) {
1053 return (
int)(optr -
output);
1056 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1057 *optr++ = 0xdc00 | (c & 0x3ff);
1064 *offsetmap = (
int)(iptr - (
unsigned char*)
input);
1066 return (
int)(optr -
output);
1074 if (max_length == 0)
1077 while (*iptr && max_length > 1) {
1082 return (
int)(optr -
output);
1085 #define REGTEST_MAX_LENGTH16 4096
1087 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1091 #ifdef SUPPORT_PCRE2_32
1099 if (max_length == 0)
1102 while (*iptr && max_length > 1) {
1105 *offsetmap++ = (
int)(iptr - (
unsigned char*)
input);
1109 else if (!(*iptr & 0x20)) {
1110 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1112 }
else if (!(*iptr & 0x10)) {
1113 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1115 }
else if (!(*iptr & 0x08)) {
1116 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1124 *offsetmap = (
int)(iptr - (
unsigned char*)
input);
1126 return (
int)(optr -
output);
1134 if (max_length == 0)
1137 while (*iptr && max_length > 1) {
1142 return (
int)(optr -
output);
1145 #define REGTEST_MAX_LENGTH32 4096
1147 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1153 const unsigned char *ptr = (
unsigned char *)
input;
1162 #define OVECTOR_SIZE 15
1173 int successful_row = 0;
1175 int jit_compile_mode;
1179 #ifdef SUPPORT_PCRE2_8
1187 int return_value8[2];
1189 #ifdef SUPPORT_PCRE2_16
1197 int return_value16[2];
1200 #ifdef SUPPORT_PCRE2_32
1208 int return_value32[2];
1212 #if defined SUPPORT_PCRE2_8
1214 #elif defined SUPPORT_PCRE2_16
1216 #elif defined SUPPORT_PCRE2_32
1219 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1227 #if defined SUPPORT_PCRE2_8
1229 #elif defined SUPPORT_PCRE2_16
1231 #elif defined SUPPORT_PCRE2_32
1235 printf(
"Running JIT regression tests\n");
1236 printf(
" target CPU of SLJIT compiler: ");
1237 for (
i = 0; cpu_info[
i];
i++)
1238 printf(
"%c", (
char)(cpu_info[
i]));
1241 #if defined SUPPORT_PCRE2_8
1243 #elif defined SUPPORT_PCRE2_16
1245 #elif defined SUPPORT_PCRE2_32
1251 #ifdef SUPPORT_PCRE2_8
1252 printf(
" in 8 bit mode with UTF-8 %s:\n",
utf ?
"enabled" :
"disabled");
1254 #ifdef SUPPORT_PCRE2_16
1255 printf(
" in 16 bit mode with UTF-16 %s:\n",
utf ?
"enabled" :
"disabled");
1257 #ifdef SUPPORT_PCRE2_32
1258 printf(
" in 32 bit mode with UTF-32 %s:\n",
utf ?
"enabled" :
"disabled");
1275 #ifdef SUPPORT_PCRE2_8
1287 &
error, &err_offs, ccontext8);
1289 if (!re8 && (
utf || is_ascii))
1290 printf(
"\n8 bit: Cannot compile pattern \"%s\": %d\n", current->
pattern,
error);
1295 printf(
"\n8 bit: Cannot allocate compile context\n");
1297 #ifdef SUPPORT_PCRE2_16
1301 copy_char8_to_char16((
PCRE2_SPTR8)current->
pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1314 &
error, &err_offs, ccontext16);
1316 if (!re16 && (
utf || is_ascii))
1317 printf(
"\n16 bit: Cannot compile pattern \"%s\": %d\n", current->
pattern,
error);
1322 printf(
"\n16 bit: Cannot allocate compile context\n");
1324 #ifdef SUPPORT_PCRE2_32
1328 copy_char8_to_char32((
PCRE2_SPTR8)current->
pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1341 &
error, &err_offs, ccontext32);
1343 if (!re32 && (
utf || is_ascii))
1344 printf(
"\n32 bit: Cannot compile pattern \"%s\": %d\n", current->
pattern,
error);
1349 printf(
"\n32 bit: Cannot allocate compile context\n");
1353 if ((counter & 0x3) != 0) {
1354 #ifdef SUPPORT_PCRE2_8
1357 #ifdef SUPPORT_PCRE2_16
1360 #ifdef SUPPORT_PCRE2_32
1365 #ifdef SUPPORT_PCRE2_8
1366 return_value8[0] = -1000;
1367 return_value8[1] = -1000;
1371 if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1372 printf(
"\n8 bit: Cannot allocate match data\n");
1392 printf(
"\n8 bit: JIT compiler does not support \"%s\"\n", current->
pattern);
1393 }
else if ((counter & 0x1) != 0) {
1394 setstack8(mcontext8);
1405 #ifdef SUPPORT_PCRE2_16
1406 return_value16[0] = -1000;
1407 return_value16[1] = -1000;
1411 if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1412 printf(
"\n16 bit: Cannot allocate match data\n");
1429 length16 = convert_utf8_to_utf16((
PCRE2_SPTR8)current->
input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1431 length16 = copy_char8_to_char16((
PCRE2_SPTR8)current->
input, regtest_buf16, REGTEST_MAX_LENGTH16);
1433 return_value16[1] =
pcre2_match_16(re16, regtest_buf16, length16,
1437 printf(
"\n16 bit: JIT compiler does not support \"%s\"\n", current->
pattern);
1438 }
else if ((counter & 0x1) != 0) {
1439 setstack16(mcontext16);
1440 return_value16[0] =
pcre2_match_16(re16, regtest_buf16, length16,
1450 #ifdef SUPPORT_PCRE2_32
1451 return_value32[0] = -1000;
1452 return_value32[1] = -1000;
1456 if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1457 printf(
"\n32 bit: Cannot allocate match data\n");
1474 length32 = convert_utf8_to_utf32((
PCRE2_SPTR8)current->
input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1476 length32 = copy_char8_to_char32((
PCRE2_SPTR8)current->
input, regtest_buf32, REGTEST_MAX_LENGTH32);
1478 return_value32[1] =
pcre2_match_32(re32, regtest_buf32, length32,
1482 printf(
"\n32 bit: JIT compiler does not support \"%s\"\n", current->
pattern);
1483 }
else if ((counter & 0x1) != 0) {
1484 setstack32(mcontext32);
1485 return_value32[0] =
pcre2_match_32(re32, regtest_buf32, length32,
1507 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1511 #ifdef SUPPORT_PCRE2_8
1512 if ((
return_value = return_value8[0]) != return_value8[1]) {
1513 printf(
"\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1514 return_value8[0], return_value8[1], total, current->
pattern, current->
input);
1518 #ifdef SUPPORT_PCRE2_16
1519 if ((
return_value = return_value16[0]) != return_value16[1]) {
1520 printf(
"\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1521 return_value16[0], return_value16[1], total, current->
pattern, current->
input);
1525 #ifdef SUPPORT_PCRE2_32
1526 if ((
return_value = return_value32[0]) != return_value32[1]) {
1527 printf(
"\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1528 return_value32[0], return_value32[1], total, current->
pattern, current->
input);
1532 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1533 if (return_value8[0] != return_value16[0]) {
1534 printf(
"\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1535 return_value8[0], return_value16[0],
1540 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1541 if (return_value8[0] != return_value32[0]) {
1542 printf(
"\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1543 return_value8[0], return_value32[0],
1548 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1549 if (return_value16[0] != return_value32[0]) {
1550 printf(
"\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1551 return_value16[0], return_value32[0],
1562 #ifdef SUPPORT_PCRE2_8
1565 #ifdef SUPPORT_PCRE2_16
1568 #ifdef SUPPORT_PCRE2_32
1573 #ifdef SUPPORT_PCRE2_16
1576 ovector16_1[
i] = regtest_offsetmap16[ovector16_1[
i]];
1578 ovector16_2[
i] = regtest_offsetmap16[ovector16_2[
i]];
1581 #ifdef SUPPORT_PCRE2_32
1584 ovector32_1[
i] = regtest_offsetmap32[ovector32_1[
i]];
1586 ovector32_2[
i] = regtest_offsetmap32[ovector32_2[
i]];
1592 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1593 if (ovector8_1[
i] != ovector8_2[
i] || ovector8_1[
i] != ovector16_1[
i] || ovector8_1[
i] != ovector16_2[
i]) {
1594 printf(
"\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1595 i, (
int)ovector8_1[
i], (
int)ovector8_2[
i], (
int)ovector16_1[
i], (
int)ovector16_2[
i],
1600 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1601 if (ovector8_1[
i] != ovector8_2[
i] || ovector8_1[
i] != ovector32_1[
i] || ovector8_1[
i] != ovector32_2[
i]) {
1602 printf(
"\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1603 i, (
int)ovector8_1[
i], (
int)ovector8_2[
i], (
int)ovector32_1[
i], (
int)ovector32_2[
i],
1608 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1609 if (ovector16_1[
i] != ovector16_2[
i] || ovector16_1[
i] != ovector32_1[
i] || ovector16_1[
i] != ovector32_2[
i]) {
1610 printf(
"\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1611 i, (
int)ovector16_1[
i], (
int)ovector16_2[
i], (
int)ovector32_1[
i], (
int)ovector32_2[
i],
1621 #ifdef SUPPORT_PCRE2_8
1622 if (return_value8[0] != return_value8[1]) {
1623 printf(
"\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1624 return_value8[0], return_value8[1], total, current->
pattern, current->
input);
1628 return_value8[0] = 2;
1630 return_value8[0] *= 2;
1632 for (
i = 0;
i < return_value8[0]; ++
i)
1633 if (ovector8_1[
i] != ovector8_2[
i]) {
1634 printf(
"\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1635 i, (
int)ovector8_1[
i], (
int)ovector8_2[
i], total, current->
pattern, current->
input);
1641 #ifdef SUPPORT_PCRE2_16
1642 if (return_value16[0] != return_value16[1]) {
1643 printf(
"\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1644 return_value16[0], return_value16[1], total, current->
pattern, current->
input);
1648 return_value16[0] = 2;
1650 return_value16[0] *= 2;
1652 for (
i = 0;
i < return_value16[0]; ++
i)
1653 if (ovector16_1[
i] != ovector16_2[
i]) {
1654 printf(
"\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1655 i, (
int)ovector16_1[
i], (
int)ovector16_2[
i], total, current->
pattern, current->
input);
1661 #ifdef SUPPORT_PCRE2_32
1662 if (return_value32[0] != return_value32[1]) {
1663 printf(
"\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1664 return_value32[0], return_value32[1], total, current->
pattern, current->
input);
1668 return_value32[0] = 2;
1670 return_value32[0] *= 2;
1672 for (
i = 0;
i < return_value32[0]; ++
i)
1673 if (ovector32_1[
i] != ovector32_2[
i]) {
1674 printf(
"\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1675 i, (
int)ovector32_1[
i], (
int)ovector32_2[
i], total, current->
pattern, current->
input);
1683 if (is_successful) {
1684 #ifdef SUPPORT_PCRE2_8
1687 printf(
"8 bit: Test should match: [%d] '%s' @ '%s'\n",
1693 printf(
"8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1699 #ifdef SUPPORT_PCRE2_16
1702 printf(
"16 bit: Test should match: [%d] '%s' @ '%s'\n",
1708 printf(
"16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1714 #ifdef SUPPORT_PCRE2_32
1717 printf(
"32 bit: Test should match: [%d] '%s' @ '%s'\n",
1723 printf(
"32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1731 if (is_successful) {
1732 #ifdef SUPPORT_PCRE2_8
1734 printf(
"8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1739 #ifdef SUPPORT_PCRE2_16
1741 printf(
"16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1746 #ifdef SUPPORT_PCRE2_32
1748 printf(
"32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1755 #ifdef SUPPORT_PCRE2_8
1761 #ifdef SUPPORT_PCRE2_16
1767 #ifdef SUPPORT_PCRE2_32
1774 if (is_successful) {
1778 if (successful_row >= 60) {
1788 #ifdef SUPPORT_PCRE2_8
1791 #ifdef SUPPORT_PCRE2_16
1794 #ifdef SUPPORT_PCRE2_32
1798 if (total == successful) {
1799 printf(
"\nAll JIT regression tests are successfully passed.\n");
1802 printf(
"\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1807 #if defined SUPPORT_UNICODE
1809 static int check_invalid_utf_result(
int pattern_index,
const char *
type,
int result,
1814 printf(
"Pattern[%d] %s result is not -1.\n", pattern_index,
type);
1821 printf(
"Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index,
type,
result);
1826 printf(
"Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1832 printf(
"Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1833 pattern_index,
type, (
int)ovector[1], match_end);
1842 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1844 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1845 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1846 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1848 struct invalid_utf8_regression_test_case {
1850 int jit_compile_options;
1856 const char *pattern[2];
1860 static const char invalid_utf8_newline_cr;
1862 static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1863 { UDA,
CI, 0, 0, 0, 0, 4, {
".",
NULL },
"\xf4\x8f\xbf\xbf" },
1864 { UDA,
CI, 0, 0, 0, 0, 4, {
".",
NULL },
"\xf0\x90\x80\x80" },
1865 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xf4\x90\x80\x80" },
1866 { UDA,
CI, 0, 0, 1, -1, -1, {
".",
NULL },
"\xf4\x8f\xbf\xbf" },
1867 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xf0\x90\x80\x7f" },
1868 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xf0\x90\x80\xc0" },
1869 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xf0\x8f\xbf\xbf" },
1870 { UDA,
CI, 0, 0, 0, 0, 3, {
".",
NULL },
"\xef\xbf\xbf#" },
1871 { UDA,
CI, 0, 0, 0, 0, 3, {
".",
NULL },
"\xef\xbf\xbf" },
1872 { UDA,
CI, 0, 0, 0, 0, 3, {
".",
NULL },
"\xe0\xa0\x80#" },
1873 { UDA,
CI, 0, 0, 0, 0, 3, {
".",
NULL },
"\xe0\xa0\x80" },
1874 { UDA,
CI, 0, 0, 2, -1, -1, {
".",
NULL },
"\xef\xbf\xbf#" },
1875 { UDA,
CI, 0, 0, 1, -1, -1, {
".",
NULL },
"\xef\xbf\xbf" },
1876 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xef\xbf\x7f#" },
1877 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xef\xbf\xc0" },
1878 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xe0\x9f\xbf#" },
1879 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xe0\x9f\xbf" },
1880 { UDA,
CI, 0, 0, 0, 0, 3, {
".",
NULL },
"\xed\x9f\xbf#" },
1881 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xed\xa0\x80#" },
1882 { UDA,
CI, 0, 0, 0, 0, 3, {
".",
NULL },
"\xee\x80\x80#" },
1883 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xed\xbf\xbf#" },
1884 { UDA,
CI, 0, 0, 0, 0, 2, {
".",
NULL },
"\xdf\xbf##" },
1885 { UDA,
CI, 0, 0, 0, 0, 2, {
".",
NULL },
"\xdf\xbf#" },
1886 { UDA,
CI, 0, 0, 0, 0, 2, {
".",
NULL },
"\xdf\xbf" },
1887 { UDA,
CI, 0, 0, 0, 0, 2, {
".",
NULL },
"\xc2\x80##" },
1888 { UDA,
CI, 0, 0, 0, 0, 2, {
".",
NULL },
"\xc2\x80#" },
1889 { UDA,
CI, 0, 0, 0, 0, 2, {
".",
NULL },
"\xc2\x80" },
1890 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xe0\x80##" },
1891 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xdf\xc0##" },
1892 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xe0\x80" },
1893 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xdf\xc0" },
1894 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xc1\xbf##" },
1895 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xc1\xbf" },
1896 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\x80###" },
1897 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\x80" },
1898 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xf8###" },
1899 { UDA,
CI, 0, 0, 0, -1, -1, {
".",
NULL },
"\xf8" },
1900 { UDA,
CI, 0, 0, 0, 0, 1, {
".",
NULL },
"\x7f" },
1902 { UDA, CPI, 4, 0, 0, 4, 4, {
"\\B",
NULL },
"\xf4\x8f\xbf\xbf#" },
1903 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1904 { UDA, CPI, 4, 1, 1, -1, -1, {
"\\B",
"\\b" },
"\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1905 { UDA, CPI, 4, 0, 0, 4, 4, {
"\\B",
NULL },
"#\xef\xbf\xbf#" },
1906 { UDA, CPI, 4, 0, 0, 4, 4, {
"\\B",
NULL },
"#\xe0\xa0\x80#" },
1907 { UDA, CPI, 4, 0, 0, 4, 4, {
"\\B",
NULL },
"\xf0\x90\x80\x80#" },
1908 { UDA, CPI, 4, 0, 0, 4, 4, {
"\\B",
NULL },
"\xf3\xbf\xbf\xbf#" },
1909 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1910 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1911 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1912 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1913 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1914 { UDA, CPI, 4, 0, 1, -1, -1, {
"\\B",
"\\b" },
"\xef\x80\x80\x80\xef\x80\x80" },
1915 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\x80\x80\x80\x80\x80\x80\x80\x80" },
1916 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1917 { UDA, CPI, 4, 2, 2, -1, -1, {
"\\B",
"\\b" },
"#\xe0\xa0\x80\xe0\xa0\x80#" },
1918 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"#\xf0\x80\x80\xf0\x80\x80#" },
1919 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"#\xed\xa0\x80\xed\xa0\x80#" },
1920 { UDA, CPI, 4, 0, 0, 4, 4, {
"\\B",
NULL },
"##\xdf\xbf#" },
1921 { UDA, CPI, 4, 2, 0, 2, 2, {
"\\B",
NULL },
"##\xdf\xbf#" },
1922 { UDA, CPI, 4, 0, 0, 4, 4, {
"\\B",
NULL },
"##\xc2\x80#" },
1923 { UDA, CPI, 4, 2, 0, 2, 2, {
"\\B",
NULL },
"##\xc2\x80#" },
1924 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"##\xc1\xbf\xc1\xbf##" },
1925 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"##\xdf\xc0\xdf\xc0##" },
1926 { UDA, CPI, 4, 0, 0, -1, -1, {
"\\B",
"\\b" },
"##\xe0\x80\xe0\x80##" },
1928 { UDA, CPI, 3, 0, 0, 3, 3, {
"\\B",
NULL },
"\xef\xbf\xbf#" },
1929 { UDA, CPI, 3, 0, 0, 3, 3, {
"\\B",
NULL },
"\xe0\xa0\x80#" },
1930 { UDA, CPI, 3, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xe0\x9f\xbf\xe0\x9f\xbf" },
1931 { UDA, CPI, 3, 1, 1, -1, -1, {
"\\B",
"\\b" },
"\xef\xbf\xbf\xef\xbf\xbf" },
1932 { UDA, CPI, 3, 0, 1, -1, -1, {
"\\B",
"\\b" },
"\xdf\x80\x80\xdf\x80" },
1933 { UDA, CPI, 3, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xef\xbf\xff\xef\xbf\xff" },
1934 { UDA, CPI, 3, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xef\xff\xbf\xef\xff\xbf" },
1935 { UDA, CPI, 3, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xed\xbf\xbf\xed\xbf\xbf" },
1937 { UDA, CPI, 2, 0, 0, 2, 2, {
"\\B",
NULL },
"\xdf\xbf#" },
1938 { UDA, CPI, 2, 0, 0, 2, 2, {
"\\B",
NULL },
"\xc2\x80#" },
1939 { UDA, CPI, 2, 1, 1, -1, -1, {
"\\B",
"\\b" },
"\xdf\xbf\xdf\xbf" },
1940 { UDA, CPI, 2, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xc1\xbf\xc1\xbf" },
1941 { UDA, CPI, 2, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xe0\x80\xe0\x80" },
1942 { UDA, CPI, 2, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xdf\xff\xdf\xff" },
1943 { UDA, CPI, 2, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xff\xbf\xff\xbf" },
1945 { UDA, CPI, 1, 0, 0, 1, 1, {
"\\B",
NULL },
"\x7f#" },
1946 { UDA, CPI, 1, 0, 0, 1, 1, {
"\\B",
NULL },
"\x01#" },
1947 { UDA, CPI, 1, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\x80\x80" },
1948 { UDA, CPI, 1, 0, 0, -1, -1, {
"\\B",
"\\b" },
"\xb0\xb0" },
1950 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, {
"(.)\\1",
NULL },
"aA" },
1951 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, {
"(.)\\1",
NULL },
"a\xff" },
1952 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, {
"(.)\\1",
NULL },
"\xc3\xa1\xc3\x81" },
1953 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, {
"(.)\\1",
NULL },
"\xc3\xa1\xc3\x81" },
1954 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, {
"(.)\\1",
NULL },
"\xc2\x80\x80" },
1955 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, {
"(.)\\1",
NULL },
"\xe1\xbd\xb8\xe1\xbf\xb8" },
1956 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, {
"(.)\\1",
NULL },
"\xe1\xbd\xb8\xe1\xbf\xb8" },
1957 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, {
"(.)\\1",
NULL },
"\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1958 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, {
"(.)\\1",
NULL },
"\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1960 { UDA, CPI, 0, 0, 0, 0, 1, {
"\\X",
NULL },
"A" },
1961 { UDA, CPI, 0, 0, 0, -1, -1, {
"\\X",
NULL },
"\xff" },
1962 { UDA, CPI, 0, 0, 0, 0, 2, {
"\\X",
NULL },
"\xc3\xa1" },
1963 { UDA, CPI, 0, 0, 1, -1, -1, {
"\\X",
NULL },
"\xc3\xa1" },
1964 { UDA, CPI, 0, 0, 0, -1, -1, {
"\\X",
NULL },
"\xc3\x7f" },
1965 { UDA, CPI, 0, 0, 0, 0, 3, {
"\\X",
NULL },
"\xe1\xbd\xb8" },
1966 { UDA, CPI, 0, 0, 1, -1, -1, {
"\\X",
NULL },
"\xe1\xbd\xb8" },
1967 { UDA, CPI, 0, 0, 0, 0, 4, {
"\\X",
NULL },
"\xf0\x90\x90\x80" },
1968 { UDA, CPI, 0, 0, 1, -1, -1, {
"\\X",
NULL },
"\xf0\x90\x90\x80" },
1970 { UDA, CPI, 0, 0, 0, -1, -1, {
"[^#]",
NULL },
"#" },
1971 { UDA, CPI, 0, 0, 0, 0, 4, {
"[^#]",
NULL },
"\xf4\x8f\xbf\xbf" },
1972 { UDA, CPI, 0, 0, 0, -1, -1, {
"[^#]",
NULL },
"\xf4\x90\x80\x80" },
1973 { UDA, CPI, 0, 0, 0, -1, -1, {
"[^#]",
NULL },
"\xc1\x80" },
1975 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 2, 3, {
"^\\W",
NULL },
" \x0a#"},
1976 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 14, 15, {
"^\\W",
NULL },
" \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1977 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 3, 4, {
"^\\W",
NULL },
" \xf8\x0a#"},
1978 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 3, 4, {
"^\\W",
NULL },
" \xc3\x0a#"},
1979 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 3, 4, {
"^\\W",
NULL },
" \xf1\x0a#"},
1980 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 4, 5, {
"^\\W",
NULL },
" \xf2\xbf\x0a#"},
1981 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 5, 6, {
"^\\W",
NULL },
" \xf2\xbf\xbf\x0a#"},
1982 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 3, 4, {
"^\\W",
NULL },
" \xef\x0a#"},
1983 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 4, 5, {
"^\\W",
NULL },
" \xef\xbf\x0a#"},
1984 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 5, 6, {
"^\\W",
NULL },
" \x85#\xc2\x85#"},
1985 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 7, 8, {
"^\\W",
NULL },
" \xe2\x80\xf8\xe2\x80\xa8#"},
1987 {
PCRE2_UTF |
PCRE2_FIRSTLINE,
CI, 0, 0, 0, -1, -1, {
"#",
NULL },
"\xe2\x80\xf8\xe2\x80\xa8#"},
1988 {
PCRE2_UTF |
PCRE2_FIRSTLINE,
CI, 0, 0, 0, 3, 4, {
"#",
NULL },
"\xe2\x80\xf8#\xe2\x80\xa8#"},
1989 {
PCRE2_UTF |
PCRE2_FIRSTLINE,
CI, 0, 0, 0, -1, -1, {
"#",
NULL },
"abcd\xc2\x85#"},
1990 {
PCRE2_UTF |
PCRE2_FIRSTLINE,
CI, 0, 0, 0, 1, 2, {
"#",
NULL },
"\x85#\xc2\x85#"},
1991 {
PCRE2_UTF |
PCRE2_FIRSTLINE,
CI, 0, 0, 0, 5, 6, {
"#",
NULL },
"\xef,\x80,\xf8#\x0a"},
1992 {
PCRE2_UTF |
PCRE2_FIRSTLINE,
CI, 0, 0, 0, -1, -1, {
"#",
NULL },
"\xef,\x80,\xf8\x0a#"},
1994 {
PCRE2_UTF |
PCRE2_NO_START_OPTIMIZE,
CI, 0, 0, 0, 4, 8, {
"#\xc7\x85#",
NULL },
"\x80\x80#\xc7#\xc7\x85#" },
1995 {
PCRE2_UTF |
PCRE2_NO_START_OPTIMIZE,
CI, 0, 0, 0, 7, 11, {
"#\xc7\x85#",
NULL },
"\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1996 {
PCRE2_UTF,
CI, 0, 0, 0, 4, 8, {
"#\xc7\x85#",
NULL },
"\x80\x80#\xc7#\xc7\x85#" },
1997 {
PCRE2_UTF,
CI, 0, 0, 0, 7, 11, {
"#\xc7\x85#",
NULL },
"\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1999 {
PCRE2_UTF |
PCRE2_UCP,
CI, 0, 0, 0, -1, -1, {
"[\\s]",
NULL },
"\xed\xa0\x80" },
2000 {
PCRE2_UTF,
CI, 0, 0, 0, 0, 3, {
"[\\D]",
NULL },
"\xe0\xab\xaa@" },
2001 {
PCRE2_UTF,
CI, 0, 0, 0, 0, 3, {
"\\D+",
NULL },
"n\xc3\xb1" },
2002 {
PCRE2_UTF,
CI, 0, 0, 0, 0, 5, {
"\\W+",
NULL },
"@\xf0\x9d\x84\x9e" },
2005 { 0,
PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, {
"\\X{2}",
NULL },
"\r\n\n" },
2006 { 0,
PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, {
"\\R{2}",
NULL },
"\r\n\n" },
2008 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 0, 0, 0, -1, -1, {
"^.a", &invalid_utf8_newline_cr },
"\xc3\xa7#a" },
2017 static int run_invalid_utf8_test(
const struct invalid_utf8_regression_test_case *current,
2025 if (current->pattern[
i] ==
NULL)
2029 current->compile_options, &errorcode, &erroroffset, ccontext);
2032 printf(
"Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (
int)erroroffset);
2037 printf(
"Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2042 length = (
PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2046 length, current->start_offset - current->skip_left, 0, mdata,
NULL);
2048 if (check_invalid_utf_result(pattern_index,
"match",
result, current->match_start, current->match_end, ovector)) {
2058 if (check_invalid_utf_result(pattern_index,
"partial match",
result, current->match_start, current->match_end, ovector)) {
2070 const struct invalid_utf8_regression_test_case *current;
2073 int total = 0, successful = 0;
2076 printf(
"\nRunning invalid-utf8 JIT regression tests\n");
2082 for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2087 if (current->pattern[1] != &invalid_utf8_newline_cr)
2089 if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2091 if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2095 if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2105 if ((total % 60) == 0)
2109 if ((total % 60) != 0)
2115 if (total == successful) {
2116 printf(
"\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2119 printf(
"\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2133 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2135 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2136 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2137 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2139 struct invalid_utf16_regression_test_case {
2141 int jit_compile_options;
2152 static PCRE2_UCHAR16 non_word_boundary16[] = {
'\\',
'B', 0 };
2154 static PCRE2_UCHAR16 backreference16[] = {
'(',
'.',
')',
'\\',
'1', 0 };
2156 static PCRE2_UCHAR16 nothashmark16[] = {
'[',
'^',
'#',
']', 0 };
2158 static PCRE2_UCHAR16 generic16[] = {
'#', 0xd800, 0xdc00,
'#', 0 };
2159 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01,
'#', 0 };
2160 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2161 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2162 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2163 static PCRE2_UCHAR16 test16_5[] = {
'#', 0xd800, 0xdc00,
'#', 0 };
2165 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2166 static PCRE2_UCHAR16 test16_8[] = {
'#', 0xd800, 0xdc00, 0 };
2168 static PCRE2_UCHAR16 test16_10[] = {
' ', 0xdc00, 0xd800, 0x2028,
'#', 0 };
2169 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00,
'#', 0xd800, 0xdc00,
'#', 0 };
2170 static PCRE2_UCHAR16 test16_12[] = {
'#', 0xd800, 0xdc00, 0xd800,
'#', 0xd800, 0xdc00, 0xdc00, 0xdc00,
'#', 0xd800, 0xdc00,
'#', 0 };
2172 static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2173 { UDA,
CI, 0, 0, 0, 0, 1, { allany16,
NULL }, test16_1 },
2174 { UDA,
CI, 1, 0, 0, 1, 2, { allany16,
NULL }, test16_1 },
2175 { UDA,
CI, 2, 0, 0, 2, 3, { allany16,
NULL }, test16_1 },
2176 { UDA,
CI, 3, 0, 0, 3, 4, { allany16,
NULL }, test16_1 },
2177 { UDA,
CI, 0, 0, 0, 0, 2, { allany16,
NULL }, test16_2 },
2178 { UDA,
CI, 0, 0, 3, -1, -1, { allany16,
NULL }, test16_2 },
2179 { UDA,
CI, 1, 0, 0, -1, -1, { allany16,
NULL }, test16_2 },
2180 { UDA,
CI, 0, 0, 0, 0, 2, { allany16,
NULL }, test16_3 },
2181 { UDA,
CI, 0, 0, 3, -1, -1, { allany16,
NULL }, test16_3 },
2182 { UDA,
CI, 1, 0, 0, -1, -1, { allany16,
NULL }, test16_3 },
2184 { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16,
NULL }, test16_1 },
2185 { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16,
NULL }, test16_1 },
2186 { UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16,
NULL }, test16_1 },
2187 { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16,
NULL }, test16_1 },
2188 { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16,
NULL }, test16_2 },
2189 { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16,
NULL }, test16_3 },
2190 { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2191 { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2192 { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2193 { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2195 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16,
NULL }, test16_6 },
2196 { UDA |
PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16,
NULL }, test16_6 },
2197 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16,
NULL }, test16_7 },
2198 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16,
NULL }, test16_7 },
2200 { UDA, CPI, 0, 0, 0, 0, 1, { grapheme16,
NULL }, test16_6 },
2201 { UDA, CPI, 1, 0, 0, 1, 2, { grapheme16,
NULL }, test16_6 },
2202 { UDA, CPI, 2, 0, 0, -1, -1, { grapheme16,
NULL }, test16_6 },
2203 { UDA, CPI, 0, 0, 0, 0, 2, { grapheme16,
NULL }, test16_7 },
2204 { UDA, CPI, 2, 0, 0, 2, 4, { grapheme16,
NULL }, test16_7 },
2205 { UDA, CPI, 1, 0, 0, -1, -1, { grapheme16,
NULL }, test16_7 },
2207 { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16,
NULL }, test16_8 },
2208 { UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16,
NULL }, test16_8 },
2209 { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16,
NULL }, test16_8 },
2211 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 2, 3, { afternl16,
NULL }, test16_9 },
2212 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 4, 5, { afternl16,
NULL }, test16_10 },
2214 {
PCRE2_UTF |
PCRE2_NO_START_OPTIMIZE,
CI, 0, 0, 0, 5, 9, { generic16,
NULL }, test16_11 },
2215 {
PCRE2_UTF |
PCRE2_NO_START_OPTIMIZE,
CI, 0, 0, 0, 9, 13, { generic16,
NULL }, test16_12 },
2216 {
PCRE2_UTF,
CI, 0, 0, 0, 5, 9, { generic16,
NULL }, test16_11 },
2217 {
PCRE2_UTF,
CI, 0, 0, 0, 9, 13, { generic16,
NULL }, test16_12 },
2226 static int run_invalid_utf16_test(
const struct invalid_utf16_regression_test_case *current,
2235 if (current->pattern[
i] ==
NULL)
2239 current->compile_options, &errorcode, &erroroffset, ccontext);
2242 printf(
"Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (
int)erroroffset);
2247 printf(
"Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2252 input = current->input;
2255 while (*
input++ != 0)
2258 length -= current->skip_left + current->skip_right;
2262 length, current->start_offset - current->skip_left, 0, mdata,
NULL);
2264 if (check_invalid_utf_result(pattern_index,
"match",
result, current->match_start, current->match_end, ovector)) {
2274 if (check_invalid_utf_result(pattern_index,
"partial match",
result, current->match_start, current->match_end, ovector)) {
2286 const struct invalid_utf16_regression_test_case *current;
2289 int total = 0, successful = 0;
2292 printf(
"\nRunning invalid-utf16 JIT regression tests\n");
2298 for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2303 if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2305 if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2313 if ((total % 60) == 0)
2317 if ((total % 60) != 0)
2323 if (total == successful) {
2324 printf(
"\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2327 printf(
"\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2341 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2343 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2344 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2345 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2347 struct invalid_utf32_regression_test_case {
2349 int jit_compile_options;
2360 static PCRE2_UCHAR32 non_word_boundary32[] = {
'\\',
'B', 0 };
2362 static PCRE2_UCHAR32 backreference32[] = {
'(',
'.',
')',
'\\',
'1', 0 };
2364 static PCRE2_UCHAR32 nothashmark32[] = {
'[',
'^',
'#',
']', 0 };
2366 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2367 static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2368 static PCRE2_UCHAR32 test32_3[] = {
'a',
'A', 0x110000, 0 };
2369 static PCRE2_UCHAR32 test32_4[] = {
'#', 0x10ffff, 0x110000, 0 };
2371 static PCRE2_UCHAR32 test32_6[] = {
' ', 0x110000, 0x2028,
'#', 0 };
2373 static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2374 { UDA,
CI, 0, 0, 0, 0, 1, { allany32,
NULL }, test32_1 },
2375 { UDA,
CI, 2, 0, 0, -1, -1, { allany32,
NULL }, test32_1 },
2376 { UDA,
CI, 0, 0, 0, 0, 1, { allany32,
NULL }, test32_2 },
2377 { UDA,
CI, 1, 0, 0, 1, 2, { allany32,
NULL }, test32_2 },
2378 { UDA,
CI, 2, 0, 0, -1, -1, { allany32,
NULL }, test32_2 },
2379 { UDA,
CI, 3, 0, 0, -1, -1, { allany32,
NULL }, test32_2 },
2381 { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32,
NULL }, test32_1 },
2382 { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2383 { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32,
NULL }, test32_2 },
2384 { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2385 { UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2387 { UDA |
PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32,
NULL }, test32_3 },
2388 { UDA |
PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32,
NULL }, test32_3 },
2390 { UDA, CPI, 0, 0, 0, 0, 1, { grapheme32,
NULL }, test32_1 },
2391 { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32,
NULL }, test32_1 },
2392 { UDA, CPI, 1, 0, 0, 1, 2, { grapheme32,
NULL }, test32_2 },
2393 { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32,
NULL }, test32_2 },
2394 { UDA, CPI, 3, 0, 0, -1, -1, { grapheme32,
NULL }, test32_2 },
2395 { UDA, CPI, 4, 0, 0, 4, 5, { grapheme32,
NULL }, test32_2 },
2397 { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32,
NULL }, test32_4 },
2398 { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32,
NULL }, test32_4 },
2399 { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32,
NULL }, test32_4 },
2400 { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32,
NULL }, test32_2 },
2401 { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32,
NULL }, test32_2 },
2403 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 2, 3, { afternl32,
NULL }, test32_5 },
2404 {
PCRE2_UTF |
PCRE2_MULTILINE,
CI, 1, 0, 0, 3, 4, { afternl32,
NULL }, test32_6 },
2413 static int run_invalid_utf32_test(
const struct invalid_utf32_regression_test_case *current,
2422 if (current->pattern[
i] ==
NULL)
2426 current->compile_options, &errorcode, &erroroffset, ccontext);
2429 printf(
"Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (
int)erroroffset);
2434 printf(
"Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2439 input = current->input;
2442 while (*
input++ != 0)
2445 length -= current->skip_left + current->skip_right;
2449 length, current->start_offset - current->skip_left, 0, mdata,
NULL);
2451 if (check_invalid_utf_result(pattern_index,
"match",
result, current->match_start, current->match_end, ovector)) {
2461 if (check_invalid_utf_result(pattern_index,
"partial match",
result, current->match_start, current->match_end, ovector)) {
2473 const struct invalid_utf32_regression_test_case *current;
2476 int total = 0, successful = 0;
2479 printf(
"\nRunning invalid-utf32 JIT regression tests\n");
2485 for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2490 if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2492 if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2500 if ((total % 60) == 0)
2504 if ((total % 60) != 0)
2510 if (total == successful) {
2511 printf(
"\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2514 printf(
"\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
static SQLCHAR output[256]
static char * return_value
unsigned int
A callback function used to compare two keys in a database.
PCRE2_SPTR8 pcre2_get_mark_8(pcre2_match_data_8 *)
pcre2_code_8 * pcre2_compile_8(PCRE2_SPTR8, size_t, uint32_t, int *, size_t *, pcre2_compile_context_8 *)
#define PCRE2_DOLLAR_ENDONLY
void pcre2_jit_stack_assign_32(pcre2_match_context_32 *, pcre2_jit_callback_32, void *)
struct pcre2_real_code_16 pcre2_code_16
pcre2_code_32 * pcre2_compile_32(PCRE2_SPTR32, size_t, uint32_t, int *, size_t *, pcre2_compile_context_32 *)
pcre2_jit_stack_8 * pcre2_jit_stack_create_8(size_t, size_t, pcre2_general_context_8 *)
#define PCRE2_ZERO_TERMINATED
void pcre2_compile_context_free_16(pcre2_compile_context_16 *)
struct pcre2_real_match_context_8 pcre2_match_context_8
#define PCRE2_NOTEMPTY_ATSTART
const PCRE2_UCHAR8 * PCRE2_SPTR8
int pcre2_set_newline_32(pcre2_compile_context_32 *, uint32_t)
pcre2_compile_context_16 * pcre2_compile_context_create_16(pcre2_general_context_16 *)
void pcre2_jit_stack_free_32(pcre2_jit_stack_32 *)
#define PCRE2_ENDANCHORED
#define PCRE2_BSR_ANYCRLF
int pcre2_config_16(uint32_t, void *)
void pcre2_match_data_free_8(pcre2_match_data_8 *)
int pcre2_set_newline_16(pcre2_compile_context_16 *, uint32_t)
pcre2_match_context_16 * pcre2_match_context_create_16(pcre2_general_context_16 *)
struct pcre2_real_match_data_16 pcre2_match_data_16
#define PCRE2_AUTO_CALLOUT
#define PCRE2_CONFIG_UNICODE
int pcre2_set_bsr_8(pcre2_compile_context_8 *, uint32_t)
pcre2_match_data_32 * pcre2_match_data_create_32(uint32_t, pcre2_general_context_32 *)
int pcre2_jit_match_32(const pcre2_code_32 *, PCRE2_SPTR32, size_t, size_t, uint32_t, pcre2_match_data_32 *, pcre2_match_context_32 *)
struct pcre2_real_match_context_32 pcre2_match_context_32
int pcre2_config_8(uint32_t, void *)
#define PCRE2_NEWLINE_ANYCRLF
PCRE2_SPTR32 pcre2_get_mark_32(pcre2_match_data_32 *)
void pcre2_jit_stack_assign_16(pcre2_match_context_16 *, pcre2_jit_callback_16, void *)
int pcre2_match_32(const pcre2_code_32 *, PCRE2_SPTR32, size_t, size_t, uint32_t, pcre2_match_data_32 *, pcre2_match_context_32 *)
size_t * pcre2_get_ovector_pointer_16(pcre2_match_data_16 *)
void pcre2_jit_stack_free_8(pcre2_jit_stack_8 *)
void pcre2_match_context_free_8(pcre2_match_context_8 *)
void pcre2_code_free_8(pcre2_code_8 *)
void pcre2_match_data_free_16(pcre2_match_data_16 *)
void pcre2_code_free_32(pcre2_code_32 *)
struct pcre2_real_jit_stack_16 pcre2_jit_stack_16
#define PCRE2_MATCH_UNSET_BACKREF
#define PCRE2_PARTIAL_SOFT
pcre2_compile_context_8 * pcre2_compile_context_create_8(pcre2_general_context_8 *)
void pcre2_code_free_16(pcre2_code_16 *)
size_t * pcre2_get_ovector_pointer_32(pcre2_match_data_32 *)
pcre2_match_data_16 * pcre2_match_data_create_16(uint32_t, pcre2_general_context_16 *)
#define PCRE2_PARTIAL_HARD
void pcre2_match_data_free_32(pcre2_match_data_32 *)
size_t * pcre2_get_ovector_pointer_8(pcre2_match_data_8 *)
struct pcre2_real_code_32 pcre2_code_32
struct pcre2_real_compile_context_16 pcre2_compile_context_16
int pcre2_jit_match_16(const pcre2_code_16 *, PCRE2_SPTR16, size_t, size_t, uint32_t, pcre2_match_data_16 *, pcre2_match_context_16 *)
int pcre2_set_match_limit_8(pcre2_match_context_8 *, uint32_t)
int pcre2_jit_match_8(const pcre2_code_8 *, PCRE2_SPTR8, size_t, size_t, uint32_t, pcre2_match_data_8 *, pcre2_match_context_8 *)
int pcre2_set_match_limit_16(pcre2_match_context_16 *, uint32_t)
struct pcre2_real_jit_stack_8 pcre2_jit_stack_8
void pcre2_match_context_free_16(pcre2_match_context_16 *)
void pcre2_compile_context_free_8(pcre2_compile_context_8 *)
int pcre2_set_bsr_32(pcre2_compile_context_32 *, uint32_t)
int pcre2_set_bsr_16(pcre2_compile_context_16 *, uint32_t)
int pcre2_jit_compile_16(pcre2_code_16 *, uint32_t)
struct pcre2_real_match_data_32 pcre2_match_data_32
pcre2_jit_stack_32 * pcre2_jit_stack_create_32(size_t, size_t, pcre2_general_context_32 *)
void pcre2_match_context_free_32(pcre2_match_context_32 *)
pcre2_match_context_32 * pcre2_match_context_create_32(pcre2_general_context_32 *)
pcre2_jit_stack_16 * pcre2_jit_stack_create_16(size_t, size_t, pcre2_general_context_16 *)
int pcre2_set_newline_8(pcre2_compile_context_8 *, uint32_t)
#define PCRE2_JIT_PARTIAL_HARD
struct pcre2_real_match_data_8 pcre2_match_data_8
int pcre2_match_16(const pcre2_code_16 *, PCRE2_SPTR16, size_t, size_t, uint32_t, pcre2_match_data_16 *, pcre2_match_context_16 *)
struct pcre2_real_code_8 pcre2_code_8
#define PCRE2_JIT_COMPLETE
#define PCRE2_NEWLINE_CRLF
pcre2_match_context_8 * pcre2_match_context_create_8(pcre2_general_context_8 *)
pcre2_match_data_8 * pcre2_match_data_create_8(uint32_t, pcre2_general_context_8 *)
int pcre2_jit_compile_8(pcre2_code_8 *, uint32_t)
void pcre2_jit_stack_assign_8(pcre2_match_context_8 *, pcre2_jit_callback_8, void *)
#define PCRE2_JIT_PARTIAL_SOFT
pcre2_code_16 * pcre2_compile_16(PCRE2_SPTR16, size_t, uint32_t, int *, size_t *, pcre2_compile_context_16 *)
PCRE2_SPTR16 pcre2_get_mark_16(pcre2_match_data_16 *)
struct pcre2_real_jit_stack_32 pcre2_jit_stack_32
int pcre2_config_32(uint32_t, void *)
#define PCRE2_NO_UTF_CHECK
void pcre2_compile_context_free_32(pcre2_compile_context_32 *)
#define PCRE2_BSR_UNICODE
#define PCRE2_NO_START_OPTIMIZE
void pcre2_jit_stack_free_16(pcre2_jit_stack_16 *)
struct pcre2_real_compile_context_8 pcre2_compile_context_8
#define PCRE2_ERROR_PARTIAL
struct pcre2_real_compile_context_32 pcre2_compile_context_32
int pcre2_set_match_limit_32(pcre2_match_context_32 *, uint32_t)
#define PCRE2_NEWLINE_ANY
struct pcre2_real_match_context_16 pcre2_match_context_16
int pcre2_jit_compile_32(pcre2_code_32 *, uint32_t)
pcre2_compile_context_32 * pcre2_compile_context_create_32(pcre2_general_context_32 *)
int pcre2_match_8(const pcre2_code_8 *, PCRE2_SPTR8, size_t, size_t, uint32_t, pcre2_match_data_8 *, pcre2_match_context_8 *)
#define PCRE2_CONFIG_JITTARGET
static int check_ascii(const char *input)
static int regression_tests(void)
static int invalid_utf32_regression_tests(void)
static struct regression_test_case regression_test_cases[]
static int invalid_utf8_regression_tests(void)
static int invalid_utf16_regression_tests(void)