Skip to content

Commit 2065466

Browse files
author
Arturo R Montesinos
committed
Validate AI_CURATOR_RECIPE front-matter and headings; document venv setup
1 parent b51c382 commit 2065466

3 files changed

Lines changed: 150 additions & 3 deletions

File tree

docs/curation/PROMPT_instrument_curator_project.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,36 @@ Copy `docs/curation/CURATION_LOG_template.md` to `docs/curation/CURATION_LOG.md`
4242
- `status = in-progress`
4343

4444
## 4. Validate the Recipe
45-
Add or verify a CI step that validates the new recipe:
45+
46+
Create and activate a local Python virtual environment (per‑project, not global):
4647

4748
```bash
48-
python -m jsonschema -F frontmatter docs/curation/ai_curator_recipe.schema.json AI_CURATOR_RECIPE.md
49+
python3 -m venv .venv
50+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
4951
```
5052

53+
Install the minimal tools needed for recipe validation:
54+
55+
```bash
56+
pip install jsonschema pyyaml
57+
```
58+
59+
Then run the validator script from the repo root:
60+
61+
```bash
62+
python3 ./scripts/validate_recipe.py \
63+
docs/curation/ai_curator_recipe.schema.json \
64+
AI_CURATOR_RECIPE.md
65+
```
66+
67+
This checks:
68+
69+
- YAML front‑matter against ai_curator_recipe.schema.json
70+
- The presence and order of required ## section headings
71+
72+
Add or verify a CI step that runs the same validate_recipe.py command to validate the recipe.
73+
74+
5175
If the validation fails, fix headings or front-matter until it passes.
5276

5377
## 5. Add Tooling Hooks

docs/curation/ai_curator_recipe.schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
],
2626
"additionalProperties": false,
2727
"unevaluatedProperties": false,
28-
"$comment": "Front-matter validation only; headings validated by markdown-linter rule.",
28+
"$comment": "Front-matter validated via JSON Schema; '## ' headings validated by scripts/validate_recipe.py against definitions.required_headings.",
2929
"definitions": {
3030
"required_headings": {
3131
"description": "List of section headings that must appear in order.",

scripts/validate_recipe.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import json
4+
from pathlib import Path
5+
6+
import yaml
7+
from jsonschema import Draft202012Validator
8+
9+
10+
def _split_frontmatter_and_body(md_path: Path) -> tuple[dict, str]:
11+
text = md_path.read_text(encoding="utf-8")
12+
if not text.startswith("---"):
13+
raise ValueError(f"{md_path} does not start with YAML frontmatter ('---').")
14+
15+
first_sep = text.find("\n---", 3)
16+
if first_sep == -1:
17+
raise ValueError(f"{md_path} frontmatter block not properly terminated with '---'.")
18+
19+
frontmatter_block = text[0:first_sep]
20+
body = text[first_sep + len("\n---") :].lstrip("\n")
21+
22+
frontmatter = yaml.safe_load(frontmatter_block.lstrip("-\n")) or {}
23+
return frontmatter, body
24+
25+
26+
def _extract_headings(markdown_body: str) -> list[str]:
27+
"""Return the text of headings that start with '## ' only."""
28+
headings: list[str] = []
29+
for line in markdown_body.splitlines():
30+
# we care only about second-level headings, literally starting with "## "
31+
if line.startswith("## "):
32+
heading_text = line[len("## ") :].strip()
33+
if heading_text:
34+
headings.append(heading_text)
35+
return headings
36+
37+
38+
def _check_heading_order(markdown_body: str, expected_order: list[str]) -> list[str]:
39+
"""
40+
Check that:
41+
- for each expected heading text E in expected_order,
42+
- there is a corresponding '## ' heading whose text CONTAINS E,
43+
- and they appear in the same order.
44+
45+
Extra headings are allowed; mismatch in order or missing headings is an error.
46+
"""
47+
errors: list[str] = []
48+
actual = _extract_headings(markdown_body)
49+
50+
# Greedy left-to-right match where each expected string must appear
51+
# as a substring of some heading, in order.
52+
idx = 0
53+
matched: list[str] = []
54+
for expected in expected_order:
55+
found_at = None
56+
while idx < len(actual):
57+
if expected in actual[idx]:
58+
found_at = idx
59+
matched.append(actual[idx])
60+
idx += 1
61+
break
62+
idx += 1
63+
if found_at is None:
64+
errors.append(
65+
f"Missing or out-of-order heading containing: {expected!r}.\n"
66+
f" All '## ' headings: {actual}"
67+
)
68+
# stop early; further checks won't be meaningful
69+
return errors
70+
71+
# Optional: if you want to ensure there are no unexpected extra headings
72+
# or stricter equality, you could add more checks here.
73+
return errors
74+
75+
76+
def main(argv: list[str] | None = None) -> int:
77+
argv = argv or sys.argv[1:]
78+
if len(argv) != 2:
79+
print("Usage: validate_recipe SCHEMA_JSON_PATH AI_CURATOR_RECIPE.md", file=sys.stderr)
80+
return 1
81+
82+
schema_path = Path(argv[0])
83+
recipe_path = Path(argv[1])
84+
85+
if not schema_path.is_file():
86+
print(f"Schema file not found: {schema_path}", file=sys.stderr)
87+
return 1
88+
if not recipe_path.is_file():
89+
print(f"Markdown file not found: {recipe_path}", file=sys.stderr)
90+
return 1
91+
92+
schema = json.loads(schema_path.read_text(encoding="utf-8"))
93+
frontmatter, body = _split_frontmatter_and_body(recipe_path)
94+
95+
# JSON Schema: front-matter
96+
validator = Draft202012Validator(schema)
97+
schema_errors = sorted(validator.iter_errors(frontmatter), key=lambda e: e.path)
98+
99+
# Heading order from schema definitions.required_headings.enum
100+
required_headings = (
101+
schema.get("definitions", {})
102+
.get("required_headings", {})
103+
.get("enum", [])
104+
)
105+
heading_errors: list[str] = []
106+
if required_headings:
107+
heading_errors = _check_heading_order(body, required_headings)
108+
109+
if not schema_errors and not heading_errors:
110+
print(f"{recipe_path} is valid according to {schema_path} and heading rules")
111+
return 0
112+
113+
print(f"{recipe_path} is INVALID:")
114+
for err in schema_errors:
115+
loc = ".".join(map(str, err.path)) or "<root>"
116+
print(f" - schema:{loc}: {err.message}")
117+
for msg in heading_errors:
118+
print(f" - heading: {msg}")
119+
return 1
120+
121+
122+
if __name__ == "__main__":
123+
raise SystemExit(main())

0 commit comments

Comments
 (0)