From 4d46e91fcd469cb993760915fdd1291a1f39080d Mon Sep 17 00:00:00 2001 From: Miguel Santos Date: Sat, 20 Jun 2026 21:31:59 +0100 Subject: [PATCH] feat: add more column aliases Extend the PFMEA and Control Plan header alias sets with common, unambiguous variants (Operation No / Op # / Process No, Severity Rating, Detection Method, Special Characteristics / Key Characteristic / Critical Characteristic, Control Technique / Measurement Technique, Out of Control Action, Step Description). Existing aliases and all behaviour are unchanged. Ambiguous tokens (control, method, description, number, id, status) are not added; "inspection/evaluation method" stay mapped to detection_method only. Adds parametrized alias tests + a no-regression check; missing required column still raises a clear ParseError. Scoring, matching, finding types and the Markdown/JSON output are untouched. Closes #2. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 5 + README.md | 3 +- src/quality_docs_validator/parsers/excel.py | 50 +++++++--- tests/test_aliases.py | 101 ++++++++++++++++++++ 4 files changed, 147 insertions(+), 12 deletions(-) create mode 100644 tests/test_aliases.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ea75c36..700f4b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ All notable changes to this project are documented here. The format is based on ## [Unreleased] ### Added +- More common column-header aliases for both PFMEA and Control Plan (e.g. `Operation No`/`Op #`/ + `Process No`, `Severity Rating`, `Detection Method`, `Special Characteristics`/`Key + Characteristic`/`Critical Characteristic`, `Control Technique`/`Measurement Technique`, + `Out of Control Action`). Existing aliases and behaviour are unchanged. + ([#2](https://github.com/migmcc/quality-docs-validator/issues/2)) - JSON report output via `--format json` (Markdown remains the default). The JSON includes `metadata` (tool, version, UTC timestamp, format), `inputs`, `verdict`, `score`, a `summary` (counts by severity and by finding type) and the full `findings` list. Validation, scoring and diff --git a/README.md b/README.md index 9701352..bcd6c0c 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,8 @@ confidential supplier/customer data never leaves it. A single module: **PFMEA ↔ Control Plan consistency checker**. -- Reads a PFMEA `.xlsx` and a Control Plan `.xlsx` (recommended template + simple column aliases). +- Reads a PFMEA `.xlsx` and a Control Plan `.xlsx` (recommended template + common header aliases — + e.g. `Operation No`, `Op #`, `Severity Rating`, `Special Characteristic`, `Reaction Plan`). - Matches rows by `operation_id` / process step. - Applies six explicit, documented checks (see [docs/FINDINGS.md](docs/FINDINGS.md)): `UNMATCHED_PROCESS_STEP`, `MISSING_CONTROL`, `SPECIAL_CHARACTERISTIC_NOT_CONTROLLED`, diff --git a/src/quality_docs_validator/parsers/excel.py b/src/quality_docs_validator/parsers/excel.py index 30d5424..07f8201 100644 --- a/src/quality_docs_validator/parsers/excel.py +++ b/src/quality_docs_validator/parsers/excel.py @@ -22,28 +22,56 @@ class ParseError(Exception): """Raised when a file cannot be parsed into the expected document shape.""" -# Canonical field -> accepted normalised header aliases. +# Canonical field -> accepted normalised header aliases. Headers are normalised (lowercased, +# non-alphanumeric stripped) before matching, so "Operation No." == "operationno" and "Op #" == "op". +# Aliases are deliberately specific; ambiguous generic tokens (control, method, description, number, +# id, status) are not used as new aliases to avoid mis-mapping columns. PFMEA_ALIASES: dict[str, set[str]] = { - "operation_id": {"operationid", "opid", "opno", "operationno", "operation", "processnumber"}, - "process_step": {"processstep", "processfunction", "operationdescription", "step", "processname"}, + "operation_id": { + "operationid", "opid", "opno", "operationno", "operation", "processnumber", + "operationnumber", "opnumber", "op", "processno", "stepno", + }, + "process_step": { + "processstep", "processfunction", "operationdescription", "step", "processname", + "processdescription", "stepdescription", + }, "failure_mode": {"failuremode", "potentialfailuremode", "failure"}, "effect": {"effect", "potentialeffect", "effectsoffailure"}, - "severity": {"severity", "sev", "s"}, + "severity": {"severity", "sev", "s", "severityrating"}, "cause": {"cause", "potentialcause", "causeoffailure"}, "prevention_control": {"preventioncontrol", "currentpreventioncontrol", "prevention"}, - "detection_control": {"detectioncontrol", "currentdetectioncontrol", "currentcontrolsdetection"}, + "detection_control": { + "detectioncontrol", "currentdetectioncontrol", "currentcontrolsdetection", "detectionmethod", + }, "detection": {"detection", "det", "d"}, - "special_characteristic": {"specialcharacteristic", "specialchar", "classification", "sc"}, + "special_characteristic": { + "specialcharacteristic", "specialcharacteristics", "specialchar", "classification", "sc", + "keycharacteristic", "criticalcharacteristic", + }, } CONTROL_PLAN_ALIASES: dict[str, set[str]] = { - "operation_id": {"operationid", "opid", "opno", "operationno", "operation", "processnumber"}, - "process_step": {"processstep", "processname", "processdescription", "operationdescription", "step"}, + "operation_id": { + "operationid", "opid", "opno", "operationno", "operation", "processnumber", + "operationnumber", "opnumber", "op", "processno", "stepno", + }, + "process_step": { + "processstep", "processname", "processdescription", "operationdescription", "step", + "stepdescription", + }, "characteristic": {"characteristic", "productcharacteristic", "processcharacteristic"}, - "special_characteristic": {"specialcharacteristic", "specialchar", "classification", "sc"}, - "control_method": {"controlmethod", "control", "method", "evaluationmeasurementtechnique"}, + "special_characteristic": { + "specialcharacteristic", "specialcharacteristics", "specialchar", "classification", "sc", + "keycharacteristic", "criticalcharacteristic", + }, + "control_method": { + "controlmethod", "control", "method", "evaluationmeasurementtechnique", + "controltechnique", "measurementtechnique", + }, "detection_method": {"detectionmethod", "inspectionmethod", "evaluationmethod"}, - "reaction_plan": {"reactionplan", "reaction", "responseplan", "correctiveaction"}, + "reaction_plan": { + "reactionplan", "reaction", "responseplan", "correctiveaction", "outofcontrolaction", + }, } _TRUE_TOKENS = {"yes", "y", "true", "1", "x", "cc", "sc", "critical", "significant", "*", "✓", "✔"} diff --git a/tests/test_aliases.py b/tests/test_aliases.py new file mode 100644 index 0000000..be7ff5b --- /dev/null +++ b/tests/test_aliases.py @@ -0,0 +1,101 @@ +"""Parametrized tests for column-header aliases (issue #2). + +Covers the aliases added for real-world PFMEA / Control Plan templates, plus a no-regression check +that the original aliases still resolve. The header `operation_id` is required, so for non-key +fields the workbook also carries an `Operation ID` column. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from quality_docs_validator.parsers.excel import ( + ParseError, + parse_control_plan, + parse_pfmea, +) + +# (alias header text, canonical field, raw cell value, expected parsed value) +PFMEA_NEW_ALIASES = [ + ("Operation Number", "operation_id", "10", "10"), + ("Op Number", "operation_id", "10", "10"), + ("Op #", "operation_id", "10", "10"), + ("Process No", "operation_id", "10", "10"), + ("Step No", "operation_id", "10", "10"), + ("Process Description", "process_step", "Welding", "Welding"), + ("Step Description", "process_step", "Welding", "Welding"), + ("Severity Rating", "severity", 9, 9), + ("Detection Method", "detection_control", "Gauge R&R", "Gauge R&R"), + ("Special Characteristics", "special_characteristic", "Yes", True), + ("Key Characteristic", "special_characteristic", "Yes", True), + ("Critical Characteristic", "special_characteristic", "Yes", True), +] + +CONTROL_PLAN_NEW_ALIASES = [ + ("Operation Number", "operation_id", "10", "10"), + ("Op Number", "operation_id", "10", "10"), + ("Op #", "operation_id", "10", "10"), + ("Process No", "operation_id", "10", "10"), + ("Step No", "operation_id", "10", "10"), + ("Step Description", "process_step", "Welding", "Welding"), + ("Control Technique", "control_method", "SPC chart", "SPC chart"), + ("Measurement Technique", "control_method", "CMM", "CMM"), + ("Out of Control Action", "reaction_plan", "Stop line", "Stop line"), + ("Special Characteristics", "special_characteristic", "Yes", True), + ("Key Characteristic", "special_characteristic", "Yes", True), + ("Critical Characteristic", "special_characteristic", "Yes", True), +] + + +@pytest.mark.parametrize("header,field,value,expected", PFMEA_NEW_ALIASES) +def test_pfmea_new_aliases(make_xlsx, tmp_path, header, field, value, expected) -> None: + if field == "operation_id": + path = make_xlsx(tmp_path / "p.xlsx", [header], [[value]]) + else: + path = make_xlsx(tmp_path / "p.xlsx", ["Operation ID", header], [["10", value]]) + rows = parse_pfmea(path) + assert getattr(rows[0], field) == expected + + +@pytest.mark.parametrize("header,field,value,expected", CONTROL_PLAN_NEW_ALIASES) +def test_control_plan_new_aliases(make_xlsx, tmp_path, header, field, value, expected) -> None: + if field == "operation_id": + path = make_xlsx(tmp_path / "cp.xlsx", [header], [[value]]) + else: + path = make_xlsx(tmp_path / "cp.xlsx", ["Operation ID", header], [["10", value]]) + rows = parse_control_plan(path) + assert getattr(rows[0], field) == expected + + +def test_original_aliases_still_work(make_xlsx, tmp_path: Path) -> None: + # A spread of pre-existing aliases must keep resolving (no regression). + pfmea = make_xlsx( + tmp_path / "pf.xlsx", + ["Op No", "Process Function", "Potential Failure Mode", "Sev", "Classification"], + [["10", "Weld", "Crack", 9, "Yes"]], + ) + rows = parse_pfmea(pfmea) + assert rows[0].operation_id == "10" + assert rows[0].process_step == "Weld" + assert rows[0].severity == 9 + assert rows[0].special_characteristic is True + + cp = make_xlsx( + tmp_path / "cp.xlsx", + ["Operation No", "Control Method", "Reaction Plan"], + [["10", "CMM", "Rework"]], + ) + cp_rows = parse_control_plan(cp) + assert cp_rows[0].operation_id == "10" + assert cp_rows[0].control_method == "CMM" + assert cp_rows[0].reaction_plan == "Rework" + + +def test_missing_required_column_still_errors_clearly(make_xlsx, tmp_path: Path) -> None: + # No operation_id alias present -> clear ParseError (unchanged behaviour). + path = make_xlsx(tmp_path / "p.xlsx", ["Failure Mode", "Severity"], [["Crack", 9]]) + with pytest.raises(ParseError) as exc: + parse_pfmea(path) + assert "operation_id" in str(exc.value)