Slightly speed up the contains keyword.
Saves some unnecessary repeated validator re-creation while validating
arrays.
In a quick benchmark (added here) and on my local machine (an M2 Mini)
this goes from:
```
baseline: Mean +- std dev: 3.55 us +- 0.04 us
beginning: Mean +- std dev: 3.37 ms +- 0.02 ms
middle: Mean +- std dev: 3.37 ms +- 0.03 ms
end: Mean +- std dev: 3.36 ms +- 0.02 ms
invalid: Mean +- std dev: 3.40 ms +- 0.02 ms
```
to:
```
baseline: Mean +- std dev: 4.27 us +- 0.05 us
beginning: Mean +- std dev: 2.65 ms +- 0.01 ms
middle: Mean +- std dev: 2.66 ms +- 0.02 ms
end: Mean +- std dev: 2.67 ms +- 0.02 ms
invalid: Mean +- std dev: 2.70 ms +- 0.02 ms
```
on the included example (synthetic of course, but not ridiculously so).
(The lack of difference in timing for how far into the array we get
before finding a match seems interesting but probably requires a
benchmark with a more interesting subschema we're matching on).
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 592c941..b91e882 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,3 +1,8 @@
+v4.21.1
+=======
+
+* Slightly speed up the ``contains`` keyword by removing some unnecessary validator (re-)creation.
+
v4.21.0
=======
diff --git a/jsonschema/_keywords.py b/jsonschema/_keywords.py
index 69d7580..f30f954 100644
--- a/jsonschema/_keywords.py
+++ b/jsonschema/_keywords.py
@@ -95,8 +95,10 @@
min_contains = schema.get("minContains", 1)
max_contains = schema.get("maxContains", len(instance))
+ contains_validator = validator.evolve(schema=contains)
+
for each in instance:
- if validator.evolve(schema=contains).is_valid(each):
+ if contains_validator.is_valid(each):
matches += 1
if matches > max_contains:
yield ValidationError(
diff --git a/jsonschema/benchmarks/contains.py b/jsonschema/benchmarks/contains.py
new file mode 100644
index 0000000..739cd04
--- /dev/null
+++ b/jsonschema/benchmarks/contains.py
@@ -0,0 +1,28 @@
+"""
+A benchmark for validation of the `contains` keyword.
+"""
+
+from pyperf import Runner
+
+from jsonschema import Draft202012Validator
+
+schema = {
+ "type": "array",
+ "contains": {"const": 37},
+}
+validator = Draft202012Validator(schema)
+
+size = 1000
+beginning = [37] + [0] * (size - 1)
+middle = [0] * (size // 2) + [37] + [0] * (size // 2)
+end = [0] * (size - 1) + [37]
+invalid = [0] * size
+
+
+if __name__ == "__main__":
+ runner = Runner()
+ runner.bench_func("baseline", lambda: validator.is_valid([]))
+ runner.bench_func("beginning", lambda: validator.is_valid(beginning))
+ runner.bench_func("middle", lambda: validator.is_valid(middle))
+ runner.bench_func("end", lambda: validator.is_valid(end))
+ runner.bench_func("invalid", lambda: validator.is_valid(invalid))