blob: c7d237a2fdc4a0b0ebdc50657a558461823502d3 [file] [log] [blame]
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
# features that are not compatible with the 8-bit library, or which give
# different output in 16-bit or 32-bit mode. The output for the two widths is
# different, so they have separate output files.
#forbid_utf
#newline_default LF ANY ANYCRLF
/[^\x{c4}]/IB
/\x{100}/I
/ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional leading comment
(?: (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address
| # or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
\(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) | # comments, or...
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
# quoted strings
)*
< (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # leading <
(?: @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* , (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
)* # further okay, if led by comma
: # closing colon
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* )? # optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address spec
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* > # trailing >
# name and address
) (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
/[\h]/B
>\x09<
/[\h]+/B
>\x09\x20\xa0<
/[\v]/B
/[^\h]/B
/\h+/I
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
/[\h\x{dc00}]+/IB
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
/\H+/I
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
/[\H\x{d800}]+/
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
/\v+/I
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
/[\v\x{dc00}]+/IB
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
/\V+/I
\x{2028}\x{2029}\x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
/[\V\x{d800}]+/
\x{2028}\x{2029}\x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
/\R+/I,bsr=unicode
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
XX
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
XX
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
/^\x{ffff}+/i
\x{ffff}
/^\x{ffff}?/i
\x{ffff}
/^\x{ffff}*/i
\x{ffff}
/^\x{ffff}{3}/i
\x{ffff}\x{ffff}\x{ffff}
/^\x{ffff}{0,3}/i
\x{ffff}
/[^\x00-a]{12,}[^b-\xff]*/B
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
/^[\x{1234}\x{4321}]{2,4}?/
\x{1234}\x{1234}\x{1234}
# Check maximum non-UTF character size for the 16-bit library.
/\x{ffff}/
A\x{ffff}B
/\x{10000}/
/\o{20000}/
# Check maximum character size for the 32-bit library. These will all give
# errors in the 16-bit library.
/\x{110000}/
/\x{7fffffff}/
/\x{80000000}/
/\x{ffffffff}/
/\x{100000000}/
/\o{17777777777}/
/\o{20000000000}/
/\o{37777777777}/
/\o{40000000000}/
/\x{7fffffff}\x{7fffffff}/I
/\x{80000000}\x{80000000}/I
/\x{ffffffff}\x{ffffffff}/I
# Non-UTF characters
/.{2,3}/
\x{400000}\x{400001}\x{400002}\x{400003}
/\x{400000}\x{800000}/IBi
# Check character ranges
/[\H]/IB
/[\V]/IB
/(*THEN:\[A]{65501})/expand
# We can use pcre2test's utf8_input modifier to create wide pattern characters,
# even though this test is run when UTF is not supported.
/a\x{d800}b/utf8_input
aí €b
a\x{d800}b
a\o{154000}b
\= Expect warning unless 32bit
a\N{U+d800}b
/a\x{ffff}b/utf8_input
aï¿¿b
a\x{ffff}b
a\o{177777}b
a\N{U+ffff}b
/abý¿¿¿¿¿z/utf8_input
abý¿¿¿¿¿z
ab\x{7fffffff}z
ab\o{17777777777}z
ab\N{U+7fffffff}z
/abÿý¿¿¿¿¿z/utf8_input
abÿý¿¿¿¿¿z
ab\x{ffffffff}z
/abÿAz/utf8_input
abÿAz
ab\x{80000041}z
\= Expect no match
abAz
aAz
ab\377Az
ab\xff\N{U+0041}z
ab\N{U+ff}\N{U+41}z
/ab\x{80000041}z/
ab\x{80000041}z
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
/abc/substitute_extended,replace=>\777<
abc
/abc/substitute_extended,replace=>\o{012345}<
abc
# Character range merging tests
/[\x{100}-\x{200}\H\x{8000}-\x{9000}]/B
/[\x{100}-\x{200}\V\x{8000}-\x{9000}]/B
/[\x00-\x{6000}\x{3000}-\x{ffff}]#[\x00-\x{6000}\x{3000}-\x{ffff}]{5,7}?/B
/[\x00-\x{6000}\x{3000}-\x{ffffffff}]#[\x00-\x{6000}\x{3000}-\x{ffffffff}]{5,7}?/B
/[\x00-\x2f\x11-\xff]*?!/B
abcd!e
/i/turkish_casing
# Character list tests
/([\x{100}-\x{7fff}\x{9000}\x{9002}\x{9004}\x{9006}\x{9008}\x{10000}-\x{7fffffff}]{3,8}?).#/B
\x{9001}\x{9007}\x{8000}\x{ffff}\x{9002}\x{7fff}\x{10000}\x{7fffffff}\x{500000}\x{9006}#
/([\x{3000}\x{3001}\x{3003}\x{3004}\x{3006}\x{3007}\x{8000}-\x{ffff}\x{100001}\x{100002}\x{100004}\x{100005}\x{100007}\x{100008}\x{10000a}\x{10000b}\x{80000000}-\x{ffffffff}]{5,}).#/B
\x{2fff}\x{3002}\x{7fff}\x{100000}\x{7fffffff}\x{3000}\x{3007}\x{8000}\x{ffff}\x{100001}\x{10000b}\x{80000000}\x{ffffffff}\x{3000}#
/([^\x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}]+).#/B
\x{4000}\x{4002}\x{4004}\x{4005}\x{4007}\x{4009}\x{400a}\x{3fff}\x{4001}\x{4003}\x{4006}\x{4008}\x{400b}\x{100}#
\x{f000}\x{f002}\x{f004}\x{f005}\x{f007}\x{f009}\x{f00a}\x{efff}\x{f001}\x{f003}\x{f006}\x{f008}\x{f00b}\x{100}#
\x{100000}\x{100002}\x{100004}\x{100005}\x{100007}\x{100009}\x{10000a}\x{fffff}\x{100001}\x{100003}\x{100006}\x{100008}\x{10000b}\x{100}#
\x{a0000000}\x{a0000002}\x{a0000004}\x{a0000005}\x{a0000007}\x{a0000009}\x{a000000a}\x{9fffffff}\x{a0000001}\x{a0000003}\x{a0000006}\x{a0000008}\x{a000000b}\x{100}#
# --------------
# EXTENDED CHARACTER CLASSES (UTS#18)
# META_BIGVALUE tests
/\x{80000000}/B
\x{80000000}
\= Expect no match
\x{7fffffff}
\x{80000001}
/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B
\x{80000002}
\x{8fffffff}
\= Expect no match
\x{7fffffff}
\x{90000000}
/\x{80000000}/B,alt_extended_class
\x{80000000}
\= Expect no match
\x{7fffffff}
\x{80000001}
/[\x{80000000}-\x{8000000f}\x{8fffffff}]/B,alt_extended_class
\x{80000002}
\x{8fffffff}
\= Expect no match
\x{7fffffff}
\x{90000000}
/[\x{80000000}-\x{8000000f}--\x{80000002}]/B,alt_extended_class
\x{80000001}
\x{80000003}
\= Expect no match
\x{80000002}
/[[\x{80000000}-\x{8000000f}]--[\x{80000002}]]/B,alt_extended_class
\x{80000001}
\x{80000003}
\= Expect no match
\x{80000002}
# --------------
# EXTENDED CHARACTER CLASSES (Perl)
# META_BIGVALUE tests
/(?[[\x{80000000}-\x{8000000f}]+\x{8fffffff}])/B
\x{80000002}
\x{8fffffff}
\= Expect no match
\x{7fffffff}
\x{90000000}
/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B
\x{80000001}
\x{80000003}
\= Expect no match
\x{80000002}
/(?[[\x{80000000}-\x{8000000f}]-\x{80000002}])/B
\x{80000001}
\x{80000003}
\= Expect no match
\x{80000002}
# --------------
# End of testinput11