From c7fd7bf809286637cb1a8daa3e10e9d38cb60b64 Mon Sep 17 00:00:00 2001
From: Miguel Santos <migmcc@gmail.com>
Date: Sat, 20 Jun 2026 21:45:09 +0100
Subject: [PATCH] feat: support multi-sheet workbooks

Add optional --pfmea-sheet / --control-plan-sheet to select a worksheet by name
in multi-sheet .xlsx files. When omitted, the active sheet is used (unchanged
default). A missing sheet raises a clear ParseError listing the available sheet
names; header auto-detection still applies to the selected sheet.

Threaded through parse_pfmea / parse_control_plan / check_files. No change to
scoring, matching, finding types or the JSON schema. Adds 9 tests (explicit
selection, missing-sheet errors, default compatibility, header auto-detect on a
selected sheet, CLI Markdown + JSON). Closes #4.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                                  |   4 +
 README.md                                     |  13 ++
 docs/ARCHITECTURE.md                          |   5 +-
 src/quality_docs_validator/cli.py             |  10 +-
 .../modules/pfmea_control_plan.py             |  17 +-
 src/quality_docs_validator/parsers/excel.py   |  28 +++-
 tests/conftest.py                             |  18 ++
 tests/test_multi_sheet.py                     | 156 ++++++++++++++++++
 8 files changed, 236 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_multi_sheet.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 700f4b5..332388f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ All notable changes to this project are documented here. The format is based on
 ## [Unreleased]
 
 ### Added
+- Multi-sheet workbook support: `--pfmea-sheet` / `--control-plan-sheet` select a worksheet by name
+  (the active sheet is still used when omitted, so existing behaviour is unchanged). A missing sheet
+  raises a clear error listing the available sheet names. Header auto-detection still applies to the
+  selected sheet. ([#4](https://github.com/migmcc/quality-docs-validator/issues/4))
 - More common column-header aliases for both PFMEA and Control Plan (e.g. `Operation No`/`Op #`/
   `Process No`, `Severity Rating`, `Detection Method`, `Special Characteristics`/`Key
   Characteristic`/`Critical Characteristic`, `Control Technique`/`Measurement Technique`,
diff --git a/README.md b/README.md
index bcd6c0c..3d8912d 100644
--- a/README.md
+++ b/README.md
@@ -103,6 +103,19 @@ Markdown remains the default. The JSON contains `metadata` (tool, version, UTC t
 `verdict`, `score`, a `summary` (counts by severity and by finding type) and the full `findings`
 list.
 
+### Multi-sheet workbooks
+
+If a workbook has several sheets, pick the right one by name (otherwise the active sheet is used):
+
+```bash
+qdv pfmea-control-plan \
+  --pfmea pfmea.xlsx --pfmea-sheet "PFMEA" \
+  --control-plan control-plan.xlsx --control-plan-sheet "Control Plan" \
+  --out report.md
+```
+
+If the named sheet does not exist, the error lists the available sheet names.
+
 ## Architecture
 
 See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the module layout and data flow.
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 6d7efbd..1a2f88a 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -78,8 +78,9 @@ from YAML (a small rule-interpretation layer) is deferred to a later iteration 
 the hardening pass to avoid a rearchitecture.
 
 ## Known limitations (MVP)
-- **`.xlsx` only**, single worksheet; the header row is auto-located within the first few rows but
-  merged/multi-line header cells are not specially handled.
+- **`.xlsx` only**; one worksheet is read per file (selectable by name via `--pfmea-sheet` /
+  `--control-plan-sheet`, otherwise the active sheet). The header row is auto-located within the
+  first few rows, but merged/multi-line header cells are not specially handled.
 - **Recommended template + fixed aliases** only — no configurable column mapping (v0.2).
 - **Weak-method detection is heuristic** (phrase matching) and English-oriented; shipped as warnings.
 - **Matching is exact on normalised `operation_id`** — no fuzzy/description matching.
diff --git a/src/quality_docs_validator/cli.py b/src/quality_docs_validator/cli.py
index e6575c8..f2fd346 100644
--- a/src/quality_docs_validator/cli.py
+++ b/src/quality_docs_validator/cli.py
@@ -57,6 +57,14 @@ def pfmea_control_plan(
     control_plan: Path = typer.Option(
         ..., "--control-plan", help="Path to the Control Plan .xlsx file."
     ),
+    pfmea_sheet: str = typer.Option(
+        None, "--pfmea-sheet", help="Worksheet name for the PFMEA (default: the active sheet)."
+    ),
+    control_plan_sheet: str = typer.Option(
+        None,
+        "--control-plan-sheet",
+        help="Worksheet name for the Control Plan (default: the active sheet).",
+    ),
     out: Path = typer.Option(
         Path("report.md"),
         "--out",
@@ -71,7 +79,7 @@ def pfmea_control_plan(
 ) -> None:
     """Check a PFMEA against a Control Plan for potential inconsistencies."""
     try:
-        result = check_files(pfmea, control_plan)
+        result = check_files(pfmea, control_plan, pfmea_sheet, control_plan_sheet)
     except ParseError as exc:
         console.print(f"[red]Error:[/red] {exc}")
         raise typer.Exit(code=2) from exc
diff --git a/src/quality_docs_validator/modules/pfmea_control_plan.py b/src/quality_docs_validator/modules/pfmea_control_plan.py
index dbb7d62..8c1876e 100644
--- a/src/quality_docs_validator/modules/pfmea_control_plan.py
+++ b/src/quality_docs_validator/modules/pfmea_control_plan.py
@@ -168,10 +168,19 @@ def evaluate(match: MatchResult) -> list[Finding]:
     return findings
 
 
-def check_files(pfmea_path: str | Path, control_plan_path: str | Path) -> ValidationResult:
-    """Full pipeline: parse -> match -> evaluate -> score."""
-    pfmea = parse_pfmea(pfmea_path)
-    control_plan = parse_control_plan(control_plan_path)
+def check_files(
+    pfmea_path: str | Path,
+    control_plan_path: str | Path,
+    pfmea_sheet: str | None = None,
+    control_plan_sheet: str | None = None,
+) -> ValidationResult:
+    """Full pipeline: parse -> match -> evaluate -> score.
+
+    Optional `pfmea_sheet` / `control_plan_sheet` select a worksheet by name in multi-sheet
+    workbooks; when omitted the active sheet is used (unchanged default behaviour).
+    """
+    pfmea = parse_pfmea(pfmea_path, pfmea_sheet)
+    control_plan = parse_control_plan(control_plan_path, control_plan_sheet)
     match = match_rows(pfmea, control_plan)
     findings = evaluate(match)
     score, verdict = scoring.summarise(findings)
diff --git a/src/quality_docs_validator/parsers/excel.py b/src/quality_docs_validator/parsers/excel.py
index 07f8201..0a742e2 100644
--- a/src/quality_docs_validator/parsers/excel.py
+++ b/src/quality_docs_validator/parsers/excel.py
@@ -125,7 +125,7 @@ def _to_bool(value: object) -> bool:
 HEADER_SCAN_ROWS = 10  # how many leading rows to scan when locating the header row
 
 
-def _load_rows(path: Path) -> list[tuple]:
+def _load_rows(path: Path, sheet: str | None = None) -> list[tuple]:
     if not path.exists():
         raise ParseError(f"File not found: {path}")
     if path.suffix.lower() != ".xlsx":
@@ -137,14 +137,26 @@ def _load_rows(path: Path) -> list[tuple]:
         workbook = load_workbook(path, read_only=True, data_only=True)
     except Exception as exc:  # openpyxl raises a variety of types for corrupt/non-xlsx files
         raise ParseError(f"Could not open '{path.name}' as an .xlsx workbook: {exc}") from exc
-    sheet = workbook.active
+
     if sheet is None:
+        worksheet = workbook.active
+    elif sheet in workbook.sheetnames:
+        worksheet = workbook[sheet]
+    else:
+        available = ", ".join(f"'{name}'" for name in workbook.sheetnames) or "(none)"
+        workbook.close()
+        raise ParseError(
+            f"Worksheet '{sheet}' not found in '{path.name}'. Available sheets: {available}."
+        )
+
+    if worksheet is None:
         workbook.close()
         raise ParseError(f"Workbook '{path.name}' has no worksheets.")
-    rows = [r for r in sheet.iter_rows(values_only=True) if any(c is not None for c in r)]
+    rows = [r for r in worksheet.iter_rows(values_only=True) if any(c is not None for c in r)]
     workbook.close()
     if not rows:
-        raise ParseError(f"Worksheet in '{path.name}' is empty (no rows with content).")
+        target = f"Worksheet '{sheet}'" if sheet else "Worksheet"
+        raise ParseError(f"{target} in '{path.name}' is empty (no rows with content).")
     return rows
 
 
@@ -165,9 +177,9 @@ def _find_header(
     )
 
 
-def parse_pfmea(path: str | Path) -> list[PFMEARow]:
+def parse_pfmea(path: str | Path, sheet: str | None = None) -> list[PFMEARow]:
     path = Path(path)
-    rows = _load_rows(path)
+    rows = _load_rows(path, sheet)
     header_index, field_map = _find_header(rows, PFMEA_ALIASES, "PFMEA", path)
     data = rows[header_index + 1 :]
 
@@ -196,9 +208,9 @@ def parse_pfmea(path: str | Path) -> list[PFMEARow]:
     return out
 
 
-def parse_control_plan(path: str | Path) -> list[ControlPlanRow]:
+def parse_control_plan(path: str | Path, sheet: str | None = None) -> list[ControlPlanRow]:
     path = Path(path)
-    rows = _load_rows(path)
+    rows = _load_rows(path, sheet)
     header_index, field_map = _find_header(rows, CONTROL_PLAN_ALIASES, "Control Plan", path)
     data = rows[header_index + 1 :]
 
diff --git a/tests/conftest.py b/tests/conftest.py
index c7a995b..455ae64 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,11 +20,29 @@ def _make_xlsx(path: Path, headers: list[str], rows: list[list]) -> Path:
     return path
 
 
+def _make_multi_sheet_xlsx(path: Path, sheets: dict[str, tuple[list, list]]) -> Path:
+    """Build a workbook with several named sheets: {name: (headers, rows)}."""
+    wb = Workbook()
+    wb.remove(wb.active)  # drop the default empty sheet
+    for name, (headers, rows) in sheets.items():
+        ws = wb.create_sheet(title=name)
+        ws.append(headers)
+        for row in rows:
+            ws.append(row)
+    wb.save(path)
+    return path
+
+
 @pytest.fixture
 def make_xlsx():
     return _make_xlsx
 
 
+@pytest.fixture
+def make_multi_sheet_xlsx():
+    return _make_multi_sheet_xlsx
+
+
 @pytest.fixture(scope="session")
 def example_files() -> tuple[Path, Path]:
     """The committed synthetic examples; regenerate them if missing."""
diff --git a/tests/test_multi_sheet.py b/tests/test_multi_sheet.py
new file mode 100644
index 0000000..da8fb3a
--- /dev/null
+++ b/tests/test_multi_sheet.py
@@ -0,0 +1,156 @@
+"""Tests for multi-sheet workbook support (issue #4)."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from typer.testing import CliRunner
+
+from quality_docs_validator.cli import app
+from quality_docs_validator.modules.pfmea_control_plan import check_files
+from quality_docs_validator.parsers.excel import (
+    ParseError,
+    parse_control_plan,
+    parse_pfmea,
+)
+
+runner = CliRunner()
+
+PFMEA_HEADERS = ["Operation ID", "Failure Mode", "Severity", "Special Characteristic"]
+CP_HEADERS = ["Operation ID", "Control Method", "Reaction Plan", "Special Characteristic"]
+
+
+def _pfmea_multi(make_multi_sheet_xlsx, tmp_path: Path) -> Path:
+    return make_multi_sheet_xlsx(
+        tmp_path / "pfmea.xlsx",
+        {
+            "Cover": (["Title"], [["Project X PFMEA"]]),
+            "PFMEA": (PFMEA_HEADERS, [["10", "Crack", 9, "Yes"]]),
+            "Notes": (["Note"], [["ignore me"]]),
+        },
+    )
+
+
+def _cp_multi(make_multi_sheet_xlsx, tmp_path: Path) -> Path:
+    return make_multi_sheet_xlsx(
+        tmp_path / "cp.xlsx",
+        {
+            "Cover": (["Title"], [["Project X Control Plan"]]),
+            "Control Plan": (CP_HEADERS, [["10", "Visual inspection", "", "No"]]),
+        },
+    )
+
+
+def test_pfmea_explicit_sheet_selection(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    path = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
+    rows = parse_pfmea(path, sheet="PFMEA")
+    assert len(rows) == 1
+    assert rows[0].operation_id == "10"
+    assert rows[0].severity == 9
+    assert rows[0].special_characteristic is True
+
+
+def test_control_plan_explicit_sheet_selection(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    path = _cp_multi(make_multi_sheet_xlsx, tmp_path)
+    rows = parse_control_plan(path, sheet="Control Plan")
+    assert len(rows) == 1
+    assert rows[0].operation_id == "10"
+    assert rows[0].control_method == "Visual inspection"
+
+
+def test_missing_pfmea_sheet_lists_available(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    path = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
+    with pytest.raises(ParseError) as exc:
+        parse_pfmea(path, sheet="DoesNotExist")
+    msg = str(exc.value)
+    assert "DoesNotExist" in msg
+    assert "'PFMEA'" in msg and "'Cover'" in msg  # lists available sheets
+
+
+def test_missing_control_plan_sheet_lists_available(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    path = _cp_multi(make_multi_sheet_xlsx, tmp_path)
+    with pytest.raises(ParseError) as exc:
+        parse_control_plan(path, sheet="Nope")
+    msg = str(exc.value)
+    assert "Nope" in msg
+    assert "'Control Plan'" in msg
+
+
+def test_default_no_sheet_is_backwards_compatible(make_xlsx, tmp_path: Path) -> None:
+    # Single-sheet workbook with no sheet argument keeps the v0.1/v0.2 behaviour.
+    pfmea = make_xlsx(tmp_path / "pf.xlsx", PFMEA_HEADERS, [["10", "Crack", 9, "No"]])
+    cp = make_xlsx(tmp_path / "cp.xlsx", CP_HEADERS, [["10", "Pressure gauge", "Rework", "No"]])
+    result = check_files(pfmea, cp)
+    assert result.pfmea_rows == 1
+    assert result.control_plan_rows == 1
+
+
+def test_header_autodetection_with_selected_sheet(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    # The selected sheet has a leading title row before the header -> still auto-detected.
+    path = make_multi_sheet_xlsx(
+        tmp_path / "pfmea.xlsx",
+        {
+            "Intro": (["x"], [["y"]]),
+            "PFMEA": (
+                ["PFMEA - Project X", None, None, None],
+                [PFMEA_HEADERS, ["10", "Crack", 9, "No"]],
+            ),
+        },
+    )
+    rows = parse_pfmea(path, sheet="PFMEA")
+    assert len(rows) == 1
+    assert rows[0].operation_id == "10"
+
+
+def test_cli_markdown_with_explicit_sheets(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
+    cp = _cp_multi(make_multi_sheet_xlsx, tmp_path)
+    out = tmp_path / "report.md"
+    result = runner.invoke(
+        app,
+        [
+            "pfmea-control-plan",
+            "--pfmea", str(pfmea), "--pfmea-sheet", "PFMEA",
+            "--control-plan", str(cp), "--control-plan-sheet", "Control Plan",
+            "--out", str(out),
+        ],
+    )
+    assert result.exit_code == 0
+    assert "# PFMEA" in out.read_text(encoding="utf-8")
+
+
+def test_cli_json_with_explicit_sheets(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
+    cp = _cp_multi(make_multi_sheet_xlsx, tmp_path)
+    out = tmp_path / "report.json"
+    result = runner.invoke(
+        app,
+        [
+            "pfmea-control-plan",
+            "--pfmea", str(pfmea), "--pfmea-sheet", "PFMEA",
+            "--control-plan", str(cp), "--control-plan-sheet", "Control Plan",
+            "--format", "json", "--out", str(out),
+        ],
+    )
+    assert result.exit_code == 0
+    data = json.loads(out.read_text(encoding="utf-8"))
+    assert "verdict" in data and "findings" in data
+
+
+def test_cli_clear_error_for_missing_sheet(make_multi_sheet_xlsx, tmp_path: Path) -> None:
+    pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
+    cp = _cp_multi(make_multi_sheet_xlsx, tmp_path)
+    result = runner.invoke(
+        app,
+        [
+            "pfmea-control-plan",
+            "--pfmea", str(pfmea), "--pfmea-sheet", "Wrong",
+            "--control-plan", str(cp),
+            "--out", str(tmp_path / "r.md"),
+        ],
+    )
+    assert result.exit_code == 2
+    assert "Wrong" in result.stdout
+    assert "Available sheets" in result.stdout