Skip to content

Commit dfa4fd3

Browse files
committed
Bench unmarshal
1 parent 9342ce6 commit dfa4fd3

2 files changed

Lines changed: 217 additions & 1 deletion

File tree

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,7 @@ docs-cleanup:
4848
cleanup: dist-cleanup test-cleanup
4949

5050
bench-paths:
51-
@PYTHONHASHSEED=0 python tests/benchmarks/bench_paths.py --paths 500 --templates-ratio 0.7 --lookups 2000 --output bench-paths.json
51+
@PYTHONHASHSEED=0 python tests/benchmarks/bench_paths.py --paths 500 --templates-ratio 0.7 --lookups 2000 --output bench-paths.json
52+
53+
bench-unmarshal:
54+
@PYTHONHASHSEED=0 python tests/benchmarks/bench_unmarshal.py --items 2000 --repeats 7 --warmup 2 --seed 1 --output bench-unmarshal.json
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/usr/bin/env python3
2+
"""Benchmark for SchemaUnmarshaller.unmarshal on a schema that exercises
3+
nested objects, arrays, and composition (oneOf / allOf).
4+
5+
This is the code path that the `feature/validation-context` branch
6+
modifies: validation now builds a `ValidationState` that the unmarshaller
7+
reuses, so we expect changes to show up here.
8+
"""
9+
import argparse
10+
import gc
11+
import json
12+
import random
13+
import statistics
14+
import time
15+
from dataclasses import dataclass
16+
from typing import Any
17+
from typing import Dict
18+
from typing import List
19+
20+
from jsonschema_path import SchemaPath
21+
22+
from openapi_core.unmarshalling.schemas import (
23+
oas30_write_schema_unmarshallers_factory,
24+
)
25+
26+
27+
@dataclass(frozen=True)
28+
class Result:
29+
items: int
30+
repeats: int
31+
warmup: int
32+
seconds: List[float]
33+
34+
def as_dict(self) -> Dict[str, Any]:
35+
return {
36+
"items": self.items,
37+
"repeats": self.repeats,
38+
"warmup": self.warmup,
39+
"seconds": self.seconds,
40+
"median_s": statistics.median(self.seconds),
41+
"mean_s": statistics.mean(self.seconds),
42+
"stdev_s": statistics.pstdev(self.seconds),
43+
"ops_per_sec_median": self.items
44+
/ statistics.median(self.seconds),
45+
}
46+
47+
48+
# A schema with: nested object, array of objects, oneOf, allOf.
49+
# Mirrors realistic API payloads where the validation-context refactor
50+
# should pay off (we avoid re-resolving composed schemas at unmarshal time).
51+
SCHEMA: Dict[str, Any] = {
52+
"type": "object",
53+
"properties": {
54+
"id": {"type": "integer"},
55+
"name": {"type": "string"},
56+
"tags": {"type": "array", "items": {"type": "string"}},
57+
"address": {
58+
"type": "object",
59+
"properties": {
60+
"street": {"type": "string"},
61+
"city": {"type": "string"},
62+
"zip": {"type": "string"},
63+
},
64+
},
65+
"contact": {
66+
"oneOf": [
67+
{
68+
"type": "object",
69+
"properties": {
70+
"kind": {"type": "string"},
71+
"email": {"type": "string"},
72+
},
73+
"required": ["kind", "email"],
74+
},
75+
{
76+
"type": "object",
77+
"properties": {
78+
"kind": {"type": "string"},
79+
"phone": {"type": "string"},
80+
},
81+
"required": ["kind", "phone"],
82+
},
83+
]
84+
},
85+
"audit": {
86+
"allOf": [
87+
{
88+
"type": "object",
89+
"properties": {
90+
"created_by": {"type": "string"},
91+
},
92+
},
93+
{
94+
"type": "object",
95+
"properties": {
96+
"created_at": {"type": "string"},
97+
},
98+
},
99+
]
100+
},
101+
"items": {
102+
"type": "array",
103+
"items": {
104+
"type": "object",
105+
"properties": {
106+
"sku": {"type": "string"},
107+
"qty": {"type": "integer"},
108+
"price": {"type": "number"},
109+
},
110+
},
111+
},
112+
},
113+
}
114+
115+
SPEC: Dict[str, Any] = {
116+
"openapi": "3.0.0",
117+
"info": {"title": "bench", "version": "0"},
118+
"paths": {},
119+
}
120+
121+
122+
def build_values(n: int, seed: int) -> List[Dict[str, Any]]:
123+
rnd = random.Random(seed)
124+
out: List[Dict[str, Any]] = []
125+
for i in range(n):
126+
# Alternate the oneOf branch so both are exercised.
127+
if i % 2 == 0:
128+
contact = {"kind": "email", "email": f"u{i}@example.com"}
129+
else:
130+
contact = {"kind": "phone", "phone": f"+1-555-{i:04d}"}
131+
out.append(
132+
{
133+
"id": i,
134+
"name": f"item-{i}",
135+
"tags": [f"t{rnd.randrange(100)}" for _ in range(5)],
136+
"address": {
137+
"street": f"{rnd.randrange(9999)} Main St",
138+
"city": "Springfield",
139+
"zip": f"{rnd.randrange(99999):05d}",
140+
},
141+
"contact": contact,
142+
"audit": {
143+
"created_by": "alice",
144+
"created_at": "2026-01-01T00:00:00Z",
145+
},
146+
"items": [
147+
{
148+
"sku": f"sku-{rnd.randrange(10_000)}",
149+
"qty": rnd.randrange(100),
150+
"price": rnd.random() * 100,
151+
}
152+
for _ in range(4)
153+
],
154+
}
155+
)
156+
return out
157+
158+
159+
def run_once(unmarshaller: Any, values: List[Dict[str, Any]]) -> float:
160+
t0 = time.perf_counter()
161+
for v in values:
162+
unmarshaller.unmarshal(v)
163+
return time.perf_counter() - t0
164+
165+
166+
def main() -> None:
167+
ap = argparse.ArgumentParser()
168+
ap.add_argument("--items", type=int, default=2000)
169+
ap.add_argument("--repeats", type=int, default=7)
170+
ap.add_argument("--warmup", type=int, default=2)
171+
ap.add_argument("--seed", type=int, default=1)
172+
ap.add_argument("--output", type=str, default="")
173+
ap.add_argument("--no-gc", action="store_true")
174+
args = ap.parse_args()
175+
176+
spec = SchemaPath.from_dict(SPEC)
177+
schema = SchemaPath.from_dict(SCHEMA)
178+
unmarshaller = oas30_write_schema_unmarshallers_factory.create(
179+
spec, schema
180+
)
181+
182+
values = build_values(args.items, args.seed)
183+
184+
if args.no_gc:
185+
gc.disable()
186+
187+
for _ in range(args.warmup):
188+
run_once(unmarshaller, values)
189+
190+
seconds: List[float] = []
191+
for _ in range(args.repeats):
192+
seconds.append(run_once(unmarshaller, values))
193+
194+
if args.no_gc:
195+
gc.enable()
196+
197+
result = Result(
198+
items=args.items,
199+
repeats=args.repeats,
200+
warmup=args.warmup,
201+
seconds=seconds,
202+
)
203+
204+
payload = result.as_dict()
205+
print(json.dumps(payload, indent=2, sort_keys=True))
206+
207+
if args.output:
208+
with open(args.output, "w", encoding="utf-8") as f:
209+
json.dump(payload, f, indent=2, sort_keys=True)
210+
211+
212+
if __name__ == "__main__":
213+
main()

0 commit comments

Comments
 (0)