Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ All notable changes to this project are documented here. The format is based on
## [Unreleased]

### Added
- Multi-sheet workbook support: `--pfmea-sheet` / `--control-plan-sheet` select a worksheet by name
(the active sheet is still used when omitted, so existing behaviour is unchanged). A missing sheet
raises a clear error listing the available sheet names. Header auto-detection still applies to the
selected sheet. ([#4](https://github.com/migmcc/quality-docs-validator/issues/4))
- More common column-header aliases for both PFMEA and Control Plan (e.g. `Operation No`/`Op #`/
`Process No`, `Severity Rating`, `Detection Method`, `Special Characteristics`/`Key
Characteristic`/`Critical Characteristic`, `Control Technique`/`Measurement Technique`,
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ Markdown remains the default. The JSON contains `metadata` (tool, version, UTC t
`verdict`, `score`, a `summary` (counts by severity and by finding type) and the full `findings`
list.

### Multi-sheet workbooks

If a workbook has several sheets, pick the right one by name (otherwise the active sheet is used):

```bash
qdv pfmea-control-plan \
--pfmea pfmea.xlsx --pfmea-sheet "PFMEA" \
--control-plan control-plan.xlsx --control-plan-sheet "Control Plan" \
--out report.md
```

If the named sheet does not exist, the error lists the available sheet names.

## Architecture

See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the module layout and data flow.
Expand Down
5 changes: 3 additions & 2 deletions docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ from YAML (a small rule-interpretation layer) is deferred to a later iteration
the hardening pass to avoid a rearchitecture.

## Known limitations (MVP)
- **`.xlsx` only**, single worksheet; the header row is auto-located within the first few rows but
merged/multi-line header cells are not specially handled.
- **`.xlsx` only**; one worksheet is read per file (selectable by name via `--pfmea-sheet` /
`--control-plan-sheet`, otherwise the active sheet). The header row is auto-located within the
first few rows, but merged/multi-line header cells are not specially handled.
- **Recommended template + fixed aliases** only — no configurable column mapping (v0.2).
- **Weak-method detection is heuristic** (phrase matching) and English-oriented; shipped as warnings.
- **Matching is exact on normalised `operation_id`** — no fuzzy/description matching.
Expand Down
10 changes: 9 additions & 1 deletion src/quality_docs_validator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ def pfmea_control_plan(
control_plan: Path = typer.Option(
..., "--control-plan", help="Path to the Control Plan .xlsx file."
),
pfmea_sheet: str = typer.Option(
None, "--pfmea-sheet", help="Worksheet name for the PFMEA (default: the active sheet)."
),
control_plan_sheet: str = typer.Option(
None,
"--control-plan-sheet",
help="Worksheet name for the Control Plan (default: the active sheet).",
),
out: Path = typer.Option(
Path("report.md"),
"--out",
Expand All @@ -71,7 +79,7 @@ def pfmea_control_plan(
) -> None:
"""Check a PFMEA against a Control Plan for potential inconsistencies."""
try:
result = check_files(pfmea, control_plan)
result = check_files(pfmea, control_plan, pfmea_sheet, control_plan_sheet)
except ParseError as exc:
console.print(f"[red]Error:[/red] {exc}")
raise typer.Exit(code=2) from exc
Expand Down
17 changes: 13 additions & 4 deletions src/quality_docs_validator/modules/pfmea_control_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,19 @@ def evaluate(match: MatchResult) -> list[Finding]:
return findings


def check_files(pfmea_path: str | Path, control_plan_path: str | Path) -> ValidationResult:
"""Full pipeline: parse -> match -> evaluate -> score."""
pfmea = parse_pfmea(pfmea_path)
control_plan = parse_control_plan(control_plan_path)
def check_files(
pfmea_path: str | Path,
control_plan_path: str | Path,
pfmea_sheet: str | None = None,
control_plan_sheet: str | None = None,
) -> ValidationResult:
"""Full pipeline: parse -> match -> evaluate -> score.

Optional `pfmea_sheet` / `control_plan_sheet` select a worksheet by name in multi-sheet
workbooks; when omitted the active sheet is used (unchanged default behaviour).
"""
pfmea = parse_pfmea(pfmea_path, pfmea_sheet)
control_plan = parse_control_plan(control_plan_path, control_plan_sheet)
match = match_rows(pfmea, control_plan)
findings = evaluate(match)
score, verdict = scoring.summarise(findings)
Expand Down
28 changes: 20 additions & 8 deletions src/quality_docs_validator/parsers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _to_bool(value: object) -> bool:
HEADER_SCAN_ROWS = 10 # how many leading rows to scan when locating the header row


def _load_rows(path: Path) -> list[tuple]:
def _load_rows(path: Path, sheet: str | None = None) -> list[tuple]:
if not path.exists():
raise ParseError(f"File not found: {path}")
if path.suffix.lower() != ".xlsx":
Expand All @@ -137,14 +137,26 @@ def _load_rows(path: Path) -> list[tuple]:
workbook = load_workbook(path, read_only=True, data_only=True)
except Exception as exc: # openpyxl raises a variety of types for corrupt/non-xlsx files
raise ParseError(f"Could not open '{path.name}' as an .xlsx workbook: {exc}") from exc
sheet = workbook.active

if sheet is None:
worksheet = workbook.active
elif sheet in workbook.sheetnames:
worksheet = workbook[sheet]
else:
available = ", ".join(f"'{name}'" for name in workbook.sheetnames) or "(none)"
workbook.close()
raise ParseError(
f"Worksheet '{sheet}' not found in '{path.name}'. Available sheets: {available}."
)

if worksheet is None:
workbook.close()
raise ParseError(f"Workbook '{path.name}' has no worksheets.")
rows = [r for r in sheet.iter_rows(values_only=True) if any(c is not None for c in r)]
rows = [r for r in worksheet.iter_rows(values_only=True) if any(c is not None for c in r)]
workbook.close()
if not rows:
raise ParseError(f"Worksheet in '{path.name}' is empty (no rows with content).")
target = f"Worksheet '{sheet}'" if sheet else "Worksheet"
raise ParseError(f"{target} in '{path.name}' is empty (no rows with content).")
return rows


Expand All @@ -165,9 +177,9 @@ def _find_header(
)


def parse_pfmea(path: str | Path) -> list[PFMEARow]:
def parse_pfmea(path: str | Path, sheet: str | None = None) -> list[PFMEARow]:
path = Path(path)
rows = _load_rows(path)
rows = _load_rows(path, sheet)
header_index, field_map = _find_header(rows, PFMEA_ALIASES, "PFMEA", path)
data = rows[header_index + 1 :]

Expand Down Expand Up @@ -196,9 +208,9 @@ def parse_pfmea(path: str | Path) -> list[PFMEARow]:
return out


def parse_control_plan(path: str | Path) -> list[ControlPlanRow]:
def parse_control_plan(path: str | Path, sheet: str | None = None) -> list[ControlPlanRow]:
path = Path(path)
rows = _load_rows(path)
rows = _load_rows(path, sheet)
header_index, field_map = _find_header(rows, CONTROL_PLAN_ALIASES, "Control Plan", path)
data = rows[header_index + 1 :]

Expand Down
18 changes: 18 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,29 @@ def _make_xlsx(path: Path, headers: list[str], rows: list[list]) -> Path:
return path


def _make_multi_sheet_xlsx(path: Path, sheets: dict[str, tuple[list, list]]) -> Path:
"""Build a workbook with several named sheets: {name: (headers, rows)}."""
wb = Workbook()
wb.remove(wb.active) # drop the default empty sheet
for name, (headers, rows) in sheets.items():
ws = wb.create_sheet(title=name)
ws.append(headers)
for row in rows:
ws.append(row)
wb.save(path)
return path


@pytest.fixture
def make_xlsx():
return _make_xlsx


@pytest.fixture
def make_multi_sheet_xlsx():
return _make_multi_sheet_xlsx


@pytest.fixture(scope="session")
def example_files() -> tuple[Path, Path]:
"""The committed synthetic examples; regenerate them if missing."""
Expand Down
156 changes: 156 additions & 0 deletions tests/test_multi_sheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""Tests for multi-sheet workbook support (issue #4)."""

from __future__ import annotations

import json
from pathlib import Path

import pytest
from typer.testing import CliRunner

from quality_docs_validator.cli import app
from quality_docs_validator.modules.pfmea_control_plan import check_files
from quality_docs_validator.parsers.excel import (
ParseError,
parse_control_plan,
parse_pfmea,
)

runner = CliRunner()

PFMEA_HEADERS = ["Operation ID", "Failure Mode", "Severity", "Special Characteristic"]
CP_HEADERS = ["Operation ID", "Control Method", "Reaction Plan", "Special Characteristic"]


def _pfmea_multi(make_multi_sheet_xlsx, tmp_path: Path) -> Path:
return make_multi_sheet_xlsx(
tmp_path / "pfmea.xlsx",
{
"Cover": (["Title"], [["Project X PFMEA"]]),
"PFMEA": (PFMEA_HEADERS, [["10", "Crack", 9, "Yes"]]),
"Notes": (["Note"], [["ignore me"]]),
},
)


def _cp_multi(make_multi_sheet_xlsx, tmp_path: Path) -> Path:
return make_multi_sheet_xlsx(
tmp_path / "cp.xlsx",
{
"Cover": (["Title"], [["Project X Control Plan"]]),
"Control Plan": (CP_HEADERS, [["10", "Visual inspection", "", "No"]]),
},
)


def test_pfmea_explicit_sheet_selection(make_multi_sheet_xlsx, tmp_path: Path) -> None:
path = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
rows = parse_pfmea(path, sheet="PFMEA")
assert len(rows) == 1
assert rows[0].operation_id == "10"
assert rows[0].severity == 9
assert rows[0].special_characteristic is True


def test_control_plan_explicit_sheet_selection(make_multi_sheet_xlsx, tmp_path: Path) -> None:
path = _cp_multi(make_multi_sheet_xlsx, tmp_path)
rows = parse_control_plan(path, sheet="Control Plan")
assert len(rows) == 1
assert rows[0].operation_id == "10"
assert rows[0].control_method == "Visual inspection"


def test_missing_pfmea_sheet_lists_available(make_multi_sheet_xlsx, tmp_path: Path) -> None:
path = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
with pytest.raises(ParseError) as exc:
parse_pfmea(path, sheet="DoesNotExist")
msg = str(exc.value)
assert "DoesNotExist" in msg
assert "'PFMEA'" in msg and "'Cover'" in msg # lists available sheets


def test_missing_control_plan_sheet_lists_available(make_multi_sheet_xlsx, tmp_path: Path) -> None:
path = _cp_multi(make_multi_sheet_xlsx, tmp_path)
with pytest.raises(ParseError) as exc:
parse_control_plan(path, sheet="Nope")
msg = str(exc.value)
assert "Nope" in msg
assert "'Control Plan'" in msg


def test_default_no_sheet_is_backwards_compatible(make_xlsx, tmp_path: Path) -> None:
# Single-sheet workbook with no sheet argument keeps the v0.1/v0.2 behaviour.
pfmea = make_xlsx(tmp_path / "pf.xlsx", PFMEA_HEADERS, [["10", "Crack", 9, "No"]])
cp = make_xlsx(tmp_path / "cp.xlsx", CP_HEADERS, [["10", "Pressure gauge", "Rework", "No"]])
result = check_files(pfmea, cp)
assert result.pfmea_rows == 1
assert result.control_plan_rows == 1


def test_header_autodetection_with_selected_sheet(make_multi_sheet_xlsx, tmp_path: Path) -> None:
# The selected sheet has a leading title row before the header -> still auto-detected.
path = make_multi_sheet_xlsx(
tmp_path / "pfmea.xlsx",
{
"Intro": (["x"], [["y"]]),
"PFMEA": (
["PFMEA - Project X", None, None, None],
[PFMEA_HEADERS, ["10", "Crack", 9, "No"]],
),
},
)
rows = parse_pfmea(path, sheet="PFMEA")
assert len(rows) == 1
assert rows[0].operation_id == "10"


def test_cli_markdown_with_explicit_sheets(make_multi_sheet_xlsx, tmp_path: Path) -> None:
pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
cp = _cp_multi(make_multi_sheet_xlsx, tmp_path)
out = tmp_path / "report.md"
result = runner.invoke(
app,
[
"pfmea-control-plan",
"--pfmea", str(pfmea), "--pfmea-sheet", "PFMEA",
"--control-plan", str(cp), "--control-plan-sheet", "Control Plan",
"--out", str(out),
],
)
assert result.exit_code == 0
assert "# PFMEA" in out.read_text(encoding="utf-8")


def test_cli_json_with_explicit_sheets(make_multi_sheet_xlsx, tmp_path: Path) -> None:
pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
cp = _cp_multi(make_multi_sheet_xlsx, tmp_path)
out = tmp_path / "report.json"
result = runner.invoke(
app,
[
"pfmea-control-plan",
"--pfmea", str(pfmea), "--pfmea-sheet", "PFMEA",
"--control-plan", str(cp), "--control-plan-sheet", "Control Plan",
"--format", "json", "--out", str(out),
],
)
assert result.exit_code == 0
data = json.loads(out.read_text(encoding="utf-8"))
assert "verdict" in data and "findings" in data


def test_cli_clear_error_for_missing_sheet(make_multi_sheet_xlsx, tmp_path: Path) -> None:
pfmea = _pfmea_multi(make_multi_sheet_xlsx, tmp_path)
cp = _cp_multi(make_multi_sheet_xlsx, tmp_path)
result = runner.invoke(
app,
[
"pfmea-control-plan",
"--pfmea", str(pfmea), "--pfmea-sheet", "Wrong",
"--control-plan", str(cp),
"--out", str(tmp_path / "r.md"),
],
)
assert result.exit_code == 2
assert "Wrong" in result.stdout
assert "Available sheets" in result.stdout
Loading