Skip to content

Commit 9af2322

Browse files
committed
Add data validation pre-commit
1 parent 29ffd35 commit 9af2322

File tree

5 files changed

+63
-15
lines changed

5 files changed

+63
-15
lines changed

.github/workflows/validate-data.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ on:
55
branches: [ main ]
66
paths:
77
- 'scripts/validate_data.py'
8+
- 'eval.schema.json'
89
- 'data/**'
910
pull_request:
1011
paths:
1112
- 'scripts/validate_data.py'
13+
- 'eval.schema.json'
1214
- 'data/**'
1315

1416
jobs:
@@ -31,8 +33,5 @@ jobs:
3133
# Disable cache because there are no requirements lockfiles
3234
enable-cache: false
3335

34-
- name: Download latest schema
35-
run: wget https://raw.githubusercontent.com/evaleval/evalHub/refs/heads/main/schema/eval.schema.json
36-
3736
- name: Validate data
38-
run: uv run scripts/validate_data.py --schema-path eval.schema.json data
37+
run: uv run pre-commit run --all-files

.pre-commit-config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
repos:
2+
- repo: local
3+
hooks:
4+
- id: validate-data
5+
name: Validate data
6+
entry: uv run python scripts/validate_data.py --schema-path eval.schema.json
7+
exclude: '^.*\.schema\.json$'
8+
language: system
9+
types_or: [json]

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,29 @@
11
# every_eval_ever
2+
3+
## Data Validation
4+
5+
This repository has a pre-commit that will validate that JSON files conform to the JSON schema. The pre-commit requires using [uv](https://docs.astral.sh/uv/) for dependency management.
6+
7+
To run the pre-commit on git staged files only:
8+
9+
```sh
10+
uv run pre-commit run
11+
```
12+
13+
To run the pre-commit on all files:
14+
15+
```sh
16+
uv run pre-commit run --all-files
17+
```
18+
19+
To run the pre-commit specific files:
20+
21+
```sh
22+
uv run pre-commit run --files /path/to/data/a.json /path/to/data/b.json
23+
```
24+
25+
To optionally install the pre-commit so that it will run when before `git commit` (optional):
26+
27+
```sh
28+
uv run pre-commit install
29+
```

pyproject.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[project]
2+
name = "every-eval-ever"
3+
version = "0.1.0"
4+
description = "Every Eval Ever"
5+
readme = "README.md"
6+
authors = [
7+
{ name = "EvalEval", email = "[email protected]" }
8+
]
9+
requires-python = ">=3.12"
10+
dependencies = [
11+
"jsonschema>=4.25.1",
12+
]
13+
14+
[dependency-groups]
15+
dev = [
16+
"pre-commit>=4.3.0",
17+
]

scripts/validate_data.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
1-
# /// script
2-
# requires-python = ">=3.12"
3-
# dependencies = [
4-
# "jsonschema>=4.25",
5-
# ]
6-
# ///
7-
81
import argparse
92
import json
103
import os
4+
from typing import List
115

126
from jsonschema.exceptions import ValidationError
137
from jsonschema.protocols import Validator
@@ -27,12 +21,12 @@ def validate_file(file_path: str, validator: Validator) -> None:
2721
validator.validate(instance)
2822

2923

30-
def expand_paths(paths: str) -> str:
24+
def expand_paths(paths: List[str]) -> List[str]:
3125
"""Expand folders to file paths"""
32-
file_paths = []
26+
file_paths: List[str] = []
3327
for path in paths:
3428
if os.path.isfile(path) and path.endswith(".json"):
35-
file_paths.append(file_paths)
29+
file_paths.append(path)
3630
elif os.path.isdir(path):
3731
for root, _, file_names in os.walk(path):
3832
for file_name in file_names:
@@ -53,7 +47,7 @@ def annotate_error(file_path: str, message: str, **kwargs) -> None:
5347
def main() -> None:
5448
parser = argparse.ArgumentParser(
5549
prog="validate_data",
56-
description="Validates that the JSON data conforms to the Pydantic schema",
50+
description="Validates that the JSON data conforms to the JSON schema",
5751
)
5852
parser.add_argument(
5953
"paths", nargs="+", type=str, help="File or folder paths to the JSON data"
@@ -108,6 +102,7 @@ def main() -> None:
108102
print(f"{file_path}")
109103
print(" " + message)
110104
print()
105+
raise
111106
print(f"{num_passed} file(s) passed; {num_failed} file(s) failed")
112107
print()
113108
if num_failed > 0:

0 commit comments

Comments
 (0)