tests/onig.test - third_party/github.com/jqlang/jq - Git at Google

 # match builtin
 [match("( )*"; "g")]
 "abc"
 [{"offset":0,"length":0,"string":"","captures":[{"offset":0,"string":"","length":0,"name":null}]},{"offset":1,"length":0,"string":"","captures":[{"offset":1,"string":"","length":0,"name":null}]},{"offset":2,"length":0,"string":"","captures":[{"offset":2,"string":"","length":0,"name":null}]},{"offset":3,"length":0,"string":"","captures":[{"offset":3,"string":"","length":0,"name":null}]}]

 [match("( )*"; "gn")]
 "abc"
 []

 [match(""; "g")]
 "ab"
 [{"offset":0,"length":0,"string":"","captures":[]},{"offset":1,"length":0,"string":"","captures":[]},{"offset":2,"length":0,"string":"","captures":[]}]

 [match("a"; "gi")]
 "āáàä"
 []

 [match(["(bar)"])]
 "foo bar"
 [{"offset": 4, "length": 3, "string": "bar", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": null}]}]

 # offsets account for combining codepoints and multi-byte UTF-8
 [match("bar")]
 "ā bar with a combining codepoint U+0304"
 [{"offset": 3, "length": 3, "string": "bar", "captures":[]}]

 # matches with combining codepoints still count them in their length
 [match("bār")]
 "a bār"
 [{"offset": 2, "length": 4, "string": "bār", "captures":[]}]

 [match(".+?\\b")]
 "ā two-codepoint grapheme"
 [{"offset": 0, "length": 2, "string": "ā", "captures":[]}]

 [match(["foo (?<bar123>bar)? foo", "ig"])]
 "foo bar foo foo  foo"
 [{"offset": 0, "length": 11, "string": "foo bar foo", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": "bar123"}]},{"offset":12, "length": 8, "string": "foo  foo", "captures":[{"offset": -1, "length": 0, "string": null, "name": "bar123"}]}]

 #test builtin
 [test("( )*"; "gn")]
 "abc"
 [false]

 [test("ā")]
 "ā"
 [true]

 capture("(?<a>[a-z]+)-(?<n>[0-9]+)")
 "xyzzy-14"
 {"a":"xyzzy","n":"14"}


 # jq-coded utilities built on match:
 #
 # The second element in these tests' inputs tests the case where the
 # fromstring matches both the head and tail of the string
 [.[] | sub(", "; ":")]
 ["a,b, c, d, e,f", ", a,b, c, d, e,f, "]
 ["a,b:c, d, e,f",":a,b, c, d, e,f, "]

 sub("^(?<head>.)"; "Head=\(.head) Tail=")
 "abcdef"
 "Head=a Tail=bcdef"

 [.[] | gsub(", "; ":")]
 ["a,b, c, d, e,f",", a,b, c, d, e,f, "]
 ["a,b:c:d:e,f",":a,b:c:d:e,f:"]

 gsub("(?<d>\\d)"; ":\(.d);")
 "a1b2"
 "a:1;b:2;"

 gsub("a";"b")
 "aaaaa"
 "bbbbb"

 gsub("(.*)"; ""; "x")
 ""
 ""

 gsub(""; "a"; "g")
 ""
 "a"

 gsub("^"; ""; "g")
 "a"
 "a"

 gsub(""; "a"; "g")
 "a"
 "aaa"

 gsub("$"; "a"; "g")
 "a"
 "aa"

 gsub("^"; "a")
 ""
 "a"

 gsub("(?=u)"; "u")
 "qux"
 "quux"

 gsub("^.*a"; "b")
 "aaa"
 "b"

 gsub("^.*?a"; "b")
 "aaa"
 "baa"

 # The following is for regression testing and should not be construed as a requirement:
 [gsub("a"; "b", "c")]
 "a"
 ["b","c"]

 [.[] | scan(", ")]
 ["a,b, c, d, e,f",", a,b, c, d, e,f, "]
 [", ",", ",", ",", ",", ",", ",", ",", "]

 [.[]|[[sub(", *";":")], [gsub(", *";":")], [scan(", *")]]]
 ["a,b, c, d, e,f",", a,b, c, d, e,f, "]
 [[["a:b, c, d, e,f"],["a:b:c:d:e:f"],[",",", ",", ",", ",","]],[[":a,b, c, d, e,f, "],[":a:b:c:d:e:f:"],[", ",",",", ",", ",", ",",",", "]]]

 [.[]|[[sub(", +";":")], [gsub(", +";":")], [scan(", +")]]]
 ["a,b, c, d, e,f",", a,b, c, d, e,f, "]
 [[["a,b:c, d, e,f"],["a,b:c:d:e,f"],[", ",", ",", "]],[[":a,b, c, d, e,f, "],[":a,b:c:d:e,f:"],[", ",", ",", ",", ",", "]]]

 [.[] | scan("b+"; "i")]
 ["","bBb","abcABBBCabbbc"]
 ["bBb","b","BBB","bbb"]

 # reference to named captures
 gsub("(?<x>.)[^a]*"; "+\(.x)-")
 "Abcabc"
 "+A-+a-"

 gsub("(?<x>.)(?<y>[0-9])"; "\(.x|ascii_downcase)\(.y)")
 "A1 B2 CD"
 "a1 b2 CD"

 gsub("\\b(?<x>.)"; "\(.x|ascii_downcase)")
 "ABC DEF"
 "aBC dEF"

 gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")
 "123foo456bar"
 "ZfooZbarZ"

 # utf-8
 sub("(?<x>.)"; "\(.x)!")
 "’"
 "’!"

 [sub("a"; "b", "c")]
 "a"
 ["b","c"]

 [sub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
 "aB"
 ["AB","aB","cB"]

 [gsub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
 "aB"
 ["AB","ab","cc"]

 # splits and _nwise
 [splits("")]
 "ab"
 ["","a","b",""]
	# match builtin
	[match("( )*"; "g")]
	"abc"
	[{"offset":0,"length":0,"string":"","captures":[{"offset":0,"string":"","length":0,"name":null}]},{"offset":1,"length":0,"string":"","captures":[{"offset":1,"string":"","length":0,"name":null}]},{"offset":2,"length":0,"string":"","captures":[{"offset":2,"string":"","length":0,"name":null}]},{"offset":3,"length":0,"string":"","captures":[{"offset":3,"string":"","length":0,"name":null}]}]

	[match("( )*"; "gn")]
	"abc"
	[]

	[match(""; "g")]
	"ab"
	[{"offset":0,"length":0,"string":"","captures":[]},{"offset":1,"length":0,"string":"","captures":[]},{"offset":2,"length":0,"string":"","captures":[]}]

	[match("a"; "gi")]
	"āáàä"
	[]

	[match(["(bar)"])]
	"foo bar"
	[{"offset": 4, "length": 3, "string": "bar", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": null}]}]

	# offsets account for combining codepoints and multi-byte UTF-8
	[match("bar")]
	"ā bar with a combining codepoint U+0304"
	[{"offset": 3, "length": 3, "string": "bar", "captures":[]}]

	# matches with combining codepoints still count them in their length
	[match("bār")]
	"a bār"
	[{"offset": 2, "length": 4, "string": "bār", "captures":[]}]

	[match(".+?\\b")]
	"ā two-codepoint grapheme"
	[{"offset": 0, "length": 2, "string": "ā", "captures":[]}]

	[match(["foo (?<bar123>bar)? foo", "ig"])]
	"foo bar foo foo foo"
	[{"offset": 0, "length": 11, "string": "foo bar foo", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": "bar123"}]},{"offset":12, "length": 8, "string": "foo foo", "captures":[{"offset": -1, "length": 0, "string": null, "name": "bar123"}]}]

	#test builtin
	[test("( )*"; "gn")]
	"abc"
	[false]

	[test("ā")]
	"ā"
	[true]

	capture("(?<a>[a-z]+)-(?<n>[0-9]+)")
	"xyzzy-14"
	{"a":"xyzzy","n":"14"}


	# jq-coded utilities built on match:
	#
	# The second element in these tests' inputs tests the case where the
	# fromstring matches both the head and tail of the string
	[.[] \| sub(", "; ":")]
	["a,b, c, d, e,f", ", a,b, c, d, e,f, "]
	["a,b:c, d, e,f",":a,b, c, d, e,f, "]

	sub("^(?<head>.)"; "Head=\(.head) Tail=")
	"abcdef"
	"Head=a Tail=bcdef"

	[.[] \| gsub(", "; ":")]
	["a,b, c, d, e,f",", a,b, c, d, e,f, "]
	["a,b:c:d:e,f",":a,b:c:d:e,f:"]

	gsub("(?<d>\\d)"; ":\(.d);")
	"a1b2"
	"a:1;b:2;"

	gsub("a";"b")
	"aaaaa"
	"bbbbb"

	gsub("(.*)"; ""; "x")
	""
	""

	gsub(""; "a"; "g")
	""
	"a"

	gsub("^"; ""; "g")
	"a"
	"a"

	gsub(""; "a"; "g")
	"a"
	"aaa"

	gsub("$"; "a"; "g")
	"a"
	"aa"

	gsub("^"; "a")
	""
	"a"

	gsub("(?=u)"; "u")
	"qux"
	"quux"

	gsub("^.*a"; "b")
	"aaa"
	"b"

	gsub("^.*?a"; "b")
	"aaa"
	"baa"

	# The following is for regression testing and should not be construed as a requirement:
	[gsub("a"; "b", "c")]
	"a"
	["b","c"]

	[.[] \| scan(", ")]
	["a,b, c, d, e,f",", a,b, c, d, e,f, "]
	[", ",", ",", ",", ",", ",", ",", ",", "]

	[.[]\|[[sub(", ";":")], [gsub(", ";":")], [scan(", *")]]]
	["a,b, c, d, e,f",", a,b, c, d, e,f, "]
	[[["a:b, c, d, e,f"],["a:b:c:d:e:f"],[",",", ",", ",", ",","]],[[":a,b, c, d, e,f, "],[":a:b:c:d:e:f:"],[", ",",",", ",", ",", ",",",", "]]]

	[.[]\|[[sub(", +";":")], [gsub(", +";":")], [scan(", +")]]]
	["a,b, c, d, e,f",", a,b, c, d, e,f, "]
	[[["a,b:c, d, e,f"],["a,b:c:d:e,f"],[", ",", ",", "]],[[":a,b, c, d, e,f, "],[":a,b:c:d:e,f:"],[", ",", ",", ",", ",", "]]]

	[.[] \| scan("b+"; "i")]
	["","bBb","abcABBBCabbbc"]
	["bBb","b","BBB","bbb"]

	# reference to named captures
	gsub("(?<x>.)[^a]*"; "+\(.x)-")
	"Abcabc"
	"+A-+a-"

	gsub("(?<x>.)(?<y>[0-9])"; "\(.x\|ascii_downcase)\(.y)")
	"A1 B2 CD"
	"a1 b2 CD"

	gsub("\\b(?<x>.)"; "\(.x\|ascii_downcase)")
	"ABC DEF"
	"aBC dEF"

	gsub("[^a-z](?<x>[a-z])"; "Z\(.x)")
	"123foo456bar"
	"ZfooZbarZ"

	# utf-8
	sub("(?<x>.)"; "\(.x)!")
	"’"
	"’!"

	[sub("a"; "b", "c")]
	"a"
	["b","c"]

	[sub("(?<a>.)"; "\(.a\|ascii_upcase)", "\(.a\|ascii_downcase)", "c")]
	"aB"
	["AB","aB","cB"]

	[gsub("(?<a>.)"; "\(.a\|ascii_upcase)", "\(.a\|ascii_downcase)", "c")]
	"aB"
	["AB","ab","cc"]

	# splits and _nwise
	[splits("")]
	"ab"
	["","a","b",""]