From c7fd7bf809286637cb1a8daa3e10e9d38cb60b64 Mon Sep 17 00:00:00 2001 From: Miguel Santos Date: Sat, 20 Jun 2026 21:45:09 +0100 Subject: [PATCH] feat: support multi-sheet workbooks Add optional --pfmea-sheet / --control-plan-sheet to select a worksheet by name in multi-sheet .xlsx files. When omitted, the active sheet is used (unchanged default). A missing sheet raises a clear ParseError listing the available sheet names; header auto-detection still applies to the selected sheet. Threaded through parse_pfmea / parse_control_plan / check_files. No change to scoring, matching, finding types or the JSON schema. Adds 9 tests (explicit selection, missing-sheet errors, default compatibility, header auto-detect on a selected sheet, CLI Markdown + JSON). Closes #4. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 4 + README.md | 13 ++ docs/ARCHITECTURE.md | 5 +- src/quality_docs_validator/cli.py | 10 +- .../modules/pfmea_control_plan.py | 17 +- src/quality_docs_validator/parsers/excel.py | 28 +++- tests/conftest.py | 18 ++ tests/test_multi_sheet.py | 156 ++++++++++++++++++ 8 files changed, 236 insertions(+), 15 deletions(-) create mode 100644 tests/test_multi_sheet.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 700f4b5..332388f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ All notable changes to this project are documented here. The format is based on ## [Unreleased] ### Added +- Multi-sheet workbook support: `--pfmea-sheet` / `--control-plan-sheet` select a worksheet by name + (the active sheet is still used when omitted, so existing behaviour is unchanged). A missing sheet + raises a clear error listing the available sheet names. Header auto-detection still applies to the + selected sheet. ([#4](https://github.com/migmcc/quality-docs-validator/issues/4)) - More common column-header aliases for both PFMEA and Control Plan (e.g. `Operation No`/`Op #`/ `Process No`, `Severity Rating`, `Detection Method`, `Special Characteristics`/`Key Characteristic`/`Critical Characteristic`, `Control Technique`/`Measurement Technique`, diff --git a/README.md b/README.md index bcd6c0c..3d8912d 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,19 @@ Markdown remains the default. The JSON contains `metadata` (tool, version, UTC t `verdict`, `score`, a `summary` (counts by severity and by finding type) and the full `findings` list. +### Multi-sheet workbooks + +If a workbook has several sheets, pick the right one by name (otherwise the active sheet is used): + +```bash +qdv pfmea-control-plan \ + --pfmea pfmea.xlsx --pfmea-sheet "PFMEA" \ + --control-plan control-plan.xlsx --control-plan-sheet "Control Plan" \ + --out report.md +``` + +If the named sheet does not exist, the error lists the available sheet names. + ## Architecture See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the module layout and data flow. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 6d7efbd..1a2f88a 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -78,8 +78,9 @@ from YAML (a small rule-interpretation layer) is deferred to a later iteration the hardening pass to avoid a rearchitecture. ## Known limitations (MVP) -- **`.xlsx` only**, single worksheet; the header row is auto-located within the first few rows but - merged/multi-line header cells are not specially handled. +- **`.xlsx` only**; one worksheet is read per file (selectable by name via `--pfmea-sheet` / + `--control-plan-sheet`, otherwise the active sheet). The header row is auto-located within the + first few rows, but merged/multi-line header cells are not specially handled. - **Recommended template + fixed aliases** only — no configurable column mapping (v0.2). - **Weak-method detection is heuristic** (phrase matching) and English-oriented; shipped as warnings. - **Matching is exact on normalised `operation_id`** — no fuzzy/description matching. diff --git a/src/quality_docs_validator/cli.py b/src/quality_docs_validator/cli.py index e6575c8..f2fd346 100644 --- a/src/quality_docs_validator/cli.py +++ b/src/quality_docs_validator/cli.py @@ -57,6 +57,14 @@ def pfmea_control_plan( control_plan: Path = typer.Option( ..., "--control-plan", help="Path to the Control Plan .xlsx file." ), + pfmea_sheet: str = typer.Option( + None, "--pfmea-sheet", help="Worksheet name for the PFMEA (default: the active sheet)." + ), + control_plan_sheet: str = typer.Option( + None, + "--control-plan-sheet", + help="Worksheet name for the Control Plan (default: the active sheet).", + ), out: Path = typer.Option( Path("report.md"), "--out", @@ -71,7 +79,7 @@ def pfmea_control_plan( ) -> None: """Check a PFMEA against a Control Plan for potential inconsistencies.""" try: - result = check_files(pfmea, control_plan) + result = check_files(pfmea, control_plan, pfmea_sheet, control_plan_sheet) except ParseError as exc: console.print(f"[red]Error:[/red] {exc}") raise typer.Exit(code=2) from exc diff --git a/src/quality_docs_validator/modules/pfmea_control_plan.py b/src/quality_docs_validator/modules/pfmea_control_plan.py index dbb7d62..8c1876e 100644 --- a/src/quality_docs_validator/modules/pfmea_control_plan.py +++ b/src/quality_docs_validator/modules/pfmea_control_plan.py @@ -168,10 +168,19 @@ def evaluate(match: MatchResult) -> list[Finding]: return findings -def check_files(pfmea_path: str | Path, control_plan_path: str | Path) -> ValidationResult: - """Full pipeline: parse -> match -> evaluate -> score.""" - pfmea = parse_pfmea(pfmea_path) - control_plan = parse_control_plan(control_plan_path) +def check_files( + pfmea_path: str | Path, + control_plan_path: str | Path, + pfmea_sheet: str | None = None, + control_plan_sheet: str | None = None, +) -> ValidationResult: + """Full pipeline: parse -> match -> evaluate -> score. + + Optional `pfmea_sheet` / `control_plan_sheet` select a worksheet by name in multi-sheet + workbooks; when omitted the active sheet is used (unchanged default behaviour). + """ + pfmea = parse_pfmea(pfmea_path, pfmea_sheet) + control_plan = parse_control_plan(control_plan_path, control_plan_sheet) match = match_rows(pfmea, control_plan) findings = evaluate(match) score, verdict = scoring.summarise(findings) diff --git a/src/quality_docs_validator/parsers/excel.py b/src/quality_docs_validator/parsers/excel.py index 07f8201..0a742e2 100644 --- a/src/quality_docs_validator/parsers/excel.py +++ b/src/quality_docs_validator/parsers/excel.py @@ -125,7 +125,7 @@ def _to_bool(value: object) -> bool: HEADER_SCAN_ROWS = 10 # how many leading rows to scan when locating the header row -def _load_rows(path: Path) -> list[tuple]: +def _load_rows(path: Path, sheet: str | None = None) -> list[tuple]: if not path.exists(): raise ParseError(f"File not found: {path}") if path.suffix.lower() != ".xlsx": @@ -137,14 +137,26 @@ def _load_rows(path: Path) -> list[tuple]: workbook = load_workbook(path, read_only=True, data_only=True) except Exception as exc: # openpyxl raises a variety of types for corrupt/non-xlsx files raise ParseError(f"Could not open '{path.name}' as an .xlsx workbook: {exc}") from exc - sheet = workbook.active + if sheet is None: + worksheet = workbook.active + elif sheet in workbook.sheetnames: + worksheet = workbook[sheet] + else: + available = ", ".join(f"'{name}'" for name in workbook.sheetnames) or "(none)" + workbook.close() + raise ParseError( + f"Worksheet '{sheet}' not found in '{path.name}'. Available sheets: {available}." + ) + + if worksheet is None: workbook.close() raise ParseError(f"Workbook '{path.name}' has no worksheets.") - rows = [r for r in sheet.iter_rows(values_only=True) if any(c is not None for c in r)] + rows = [r for r in worksheet.iter_rows(values_only=True) if any(c is not None for c in r)] workbook.close() if not rows: - raise ParseError(f"Worksheet in '{path.name}' is empty (no rows with content).") + target = f"Worksheet '{sheet}'" if sheet else "Worksheet" + raise ParseError(f"{target} in '{path.name}' is empty (no rows with content).") return rows @@ -165,9 +177,9 @@ def _find_header( ) -def parse_pfmea(path: str | Path) -> list[PFMEARow]: +def parse_pfmea(path: str | Path, sheet: str | None = None) -> list[PFMEARow]: path = Path(path) - rows = _load_rows(path) + rows = _load_rows(path, sheet) header_index, field_map = _find_header(rows, PFMEA_ALIASES, "PFMEA", path) data = rows[header_index + 1 :] @@ -196,9 +208,9 @@ def parse_pfmea(path: str | Path) -> list[PFMEARow]: return out -def parse_control_plan(path: str | Path) -> list[ControlPlanRow]: +def parse_control_plan(path: str | Path, sheet: str | None = None) -> list[ControlPlanRow]: path = Path(path) - rows = _load_rows(path) + rows = _load_rows(path, sheet) header_index, field_map = _find_header(rows, CONTROL_PLAN_ALIASES, "Control Plan", path) data = rows[header_index + 1 :] diff --git a/tests/conftest.py b/tests/conftest.py index c7a995b..455ae64 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,11 +20,29 @@ def _make_xlsx(path: Path, headers: list[str], rows: list[list]) -> Path: return path +def _make_multi_sheet_xlsx(path: Path, sheets: dict[str, tuple[list, list]]) -> Path: + """Build a workbook with several named sheets: {name: (headers, rows)}.""" + wb = Workbook() + wb.remove(wb.active) # drop the default empty sheet + for name, (headers, rows) in sheets.items(): + ws = wb.create_sheet(title=name) + ws.append(headers) + for row in rows: + ws.append(row) + wb.save(path) + return path + + @pytest.fixture def make_xlsx(): return _make_xlsx +@pytest.fixture +def make_multi_sheet_xlsx(): + return _make_multi_sheet_xlsx + + @pytest.fixture(scope="session") def example_files() -> tuple[Path, Path]: """The committed synthetic examples; regenerate them if missing.""" diff --git a/tests/test_multi_sheet.py b/tests/test_multi_sheet.py new file mode 100644 index 0000000..da8fb3a --- /dev/null +++ b/tests/test_multi_sheet.py @@ -0,0 +1,156 @@ +"""Tests for multi-sheet workbook support (issue #4).""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from quality_docs_validator.cli import app +from quality_docs_validator.modules.pfmea_control_plan import check_files +from quality_docs_validator.parsers.excel import ( + ParseError, + parse_control_plan, + parse_pfmea, +) + +runner = CliRunner() + +PFMEA_HEADERS = ["Operation ID", "Failure Mode", "Severity", "Special Characteristic"] +CP_HEADERS = ["Operation ID", "Control Method", "Reaction Plan", "Special Characteristic"] + + +def _pfmea_multi(make_multi_sheet_xlsx, tmp_path: Path) -> Path: + return make_multi_sheet_xlsx( + tmp_path / "pfmea.xlsx", + { + "Cover": (["Title"], [["Project X PFMEA"]]), + "PFMEA": (PFMEA_HEADERS, [["10", "Crack", 9, "Yes"]]), + "Notes": (["Note"], [["ignore me"]]), + }, + ) + + +def _cp_multi(make_multi_sheet_xlsx, tmp_path: Path) -> Path: + return make_multi_sheet_xlsx( + tmp_path / "cp.xlsx", + { + "Cover": (["Title"], [["Project X Control Plan"]]), + "Control Plan": (CP_HEADERS, [["10", "Visual inspection", "", "No"]]), + }, + ) + + +def test_pfmea_explicit_sheet_selection(make_multi_sheet_xlsx, tmp_path: Path) -> None: + path = _pfmea_multi(make_multi_sheet_xlsx, tmp_path) + rows = parse_pfmea(path, sheet="PFMEA") + assert len(rows) == 1 + assert rows[0].operation_id == "10" + assert rows[0].severity == 9 + assert rows[0].special_characteristic is True + + +def test_control_plan_explicit_sheet_selection(make_multi_sheet_xlsx, tmp_path: Path) -> None: + path = _cp_multi(make_multi_sheet_xlsx, tmp_path) + rows = parse_control_plan(path, sheet="Control Plan") + assert len(rows) == 1 + assert rows[0].operation_id == "10" + assert rows[0].control_method == "Visual inspection" + + +def test_missing_pfmea_sheet_lists_available(make_multi_sheet_xlsx, tmp_path: Path) -> None: + path = _pfmea_multi(make_multi_sheet_xlsx, tmp_path) + with pytest.raises(ParseError) as exc: + parse_pfmea(path, sheet="DoesNotExist") + msg = str(exc.value) + assert "DoesNotExist" in msg + assert "'PFMEA'" in msg and "'Cover'" in msg # lists available sheets + + +def test_missing_control_plan_sheet_lists_available(make_multi_sheet_xlsx, tmp_path: Path) -> None: + path = _cp_multi(make_multi_sheet_xlsx, tmp_path) + with pytest.raises(ParseError) as exc: + parse_control_plan(path, sheet="Nope") + msg = str(exc.value) + assert "Nope" in msg + assert "'Control Plan'" in msg + + +def test_default_no_sheet_is_backwards_compatible(make_xlsx, tmp_path: Path) -> None: + # Single-sheet workbook with no sheet argument keeps the v0.1/v0.2 behaviour. + pfmea = make_xlsx(tmp_path / "pf.xlsx", PFMEA_HEADERS, [["10", "Crack", 9, "No"]]) + cp = make_xlsx(tmp_path / "cp.xlsx", CP_HEADERS, [["10", "Pressure gauge", "Rework", "No"]]) + result = check_files(pfmea, cp) + assert result.pfmea_rows == 1 + assert result.control_plan_rows == 1 + + +def test_header_autodetection_with_selected_sheet(make_multi_sheet_xlsx, tmp_path: Path) -> None: + # The selected sheet has a leading title row before the header -> still auto-detected. + path = make_multi_sheet_xlsx( + tmp_path / "pfmea.xlsx", + { + "Intro": (["x"], [["y"]]), + "PFMEA": ( + ["PFMEA - Project X", None, None, None], + [PFMEA_HEADERS, ["10", "Crack", 9, "No"]], + ), + }, + ) + rows = parse_pfmea(path, sheet="PFMEA") + assert len(rows) == 1 + assert rows[0].operation_id == "10" + + +def test_cli_markdown_with_explicit_sheets(make_multi_sheet_xlsx, tmp_path: Path) -> None: + pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path) + cp = _cp_multi(make_multi_sheet_xlsx, tmp_path) + out = tmp_path / "report.md" + result = runner.invoke( + app, + [ + "pfmea-control-plan", + "--pfmea", str(pfmea), "--pfmea-sheet", "PFMEA", + "--control-plan", str(cp), "--control-plan-sheet", "Control Plan", + "--out", str(out), + ], + ) + assert result.exit_code == 0 + assert "# PFMEA" in out.read_text(encoding="utf-8") + + +def test_cli_json_with_explicit_sheets(make_multi_sheet_xlsx, tmp_path: Path) -> None: + pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path) + cp = _cp_multi(make_multi_sheet_xlsx, tmp_path) + out = tmp_path / "report.json" + result = runner.invoke( + app, + [ + "pfmea-control-plan", + "--pfmea", str(pfmea), "--pfmea-sheet", "PFMEA", + "--control-plan", str(cp), "--control-plan-sheet", "Control Plan", + "--format", "json", "--out", str(out), + ], + ) + assert result.exit_code == 0 + data = json.loads(out.read_text(encoding="utf-8")) + assert "verdict" in data and "findings" in data + + +def test_cli_clear_error_for_missing_sheet(make_multi_sheet_xlsx, tmp_path: Path) -> None: + pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path) + cp = _cp_multi(make_multi_sheet_xlsx, tmp_path) + result = runner.invoke( + app, + [ + "pfmea-control-plan", + "--pfmea", str(pfmea), "--pfmea-sheet", "Wrong", + "--control-plan", str(cp), + "--out", str(tmp_path / "r.md"), + ], + ) + assert result.exit_code == 2 + assert "Wrong" in result.stdout + assert "Available sheets" in result.stdout