From 4d46e91fcd469cb993760915fdd1291a1f39080d Mon Sep 17 00:00:00 2001
From: Miguel Santos <migmcc@gmail.com>
Date: Sat, 20 Jun 2026 21:31:59 +0100
Subject: [PATCH] feat: add more column aliases

Extend the PFMEA and Control Plan header alias sets with common, unambiguous
variants (Operation No / Op # / Process No, Severity Rating, Detection Method,
Special Characteristics / Key Characteristic / Critical Characteristic,
Control Technique / Measurement Technique, Out of Control Action, Step
Description). Existing aliases and all behaviour are unchanged.

Ambiguous tokens (control, method, description, number, id, status) are not
added; "inspection/evaluation method" stay mapped to detection_method only.

Adds parametrized alias tests + a no-regression check; missing required column
still raises a clear ParseError. Scoring, matching, finding types and the
Markdown/JSON output are untouched. Closes #2.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                                |   5 +
 README.md                                   |   3 +-
 src/quality_docs_validator/parsers/excel.py |  50 +++++++---
 tests/test_aliases.py                       | 101 ++++++++++++++++++++
 4 files changed, 147 insertions(+), 12 deletions(-)
 create mode 100644 tests/test_aliases.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea75c36..700f4b5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,11 @@ All notable changes to this project are documented here. The format is based on
 ## [Unreleased]
 
 ### Added
+- More common column-header aliases for both PFMEA and Control Plan (e.g. `Operation No`/`Op #`/
+  `Process No`, `Severity Rating`, `Detection Method`, `Special Characteristics`/`Key
+  Characteristic`/`Critical Characteristic`, `Control Technique`/`Measurement Technique`,
+  `Out of Control Action`). Existing aliases and behaviour are unchanged.
+  ([#2](https://github.com/migmcc/quality-docs-validator/issues/2))
 - JSON report output via `--format json` (Markdown remains the default). The JSON includes
   `metadata` (tool, version, UTC timestamp, format), `inputs`, `verdict`, `score`, a `summary`
   (counts by severity and by finding type) and the full `findings` list. Validation, scoring and
diff --git a/README.md b/README.md
index 9701352..bcd6c0c 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,8 @@ confidential supplier/customer data never leaves it.
 
 A single module: **PFMEA ↔ Control Plan consistency checker**.
 
-- Reads a PFMEA `.xlsx` and a Control Plan `.xlsx` (recommended template + simple column aliases).
+- Reads a PFMEA `.xlsx` and a Control Plan `.xlsx` (recommended template + common header aliases —
+  e.g. `Operation No`, `Op #`, `Severity Rating`, `Special Characteristic`, `Reaction Plan`).
 - Matches rows by `operation_id` / process step.
 - Applies six explicit, documented checks (see [docs/FINDINGS.md](docs/FINDINGS.md)):
   `UNMATCHED_PROCESS_STEP`, `MISSING_CONTROL`, `SPECIAL_CHARACTERISTIC_NOT_CONTROLLED`,
diff --git a/src/quality_docs_validator/parsers/excel.py b/src/quality_docs_validator/parsers/excel.py
index 30d5424..07f8201 100644
--- a/src/quality_docs_validator/parsers/excel.py
+++ b/src/quality_docs_validator/parsers/excel.py
@@ -22,28 +22,56 @@ class ParseError(Exception):
     """Raised when a file cannot be parsed into the expected document shape."""
 
 
-# Canonical field -> accepted normalised header aliases.
+# Canonical field -> accepted normalised header aliases. Headers are normalised (lowercased,
+# non-alphanumeric stripped) before matching, so "Operation No." == "operationno" and "Op #" == "op".
+# Aliases are deliberately specific; ambiguous generic tokens (control, method, description, number,
+# id, status) are not used as new aliases to avoid mis-mapping columns.
 PFMEA_ALIASES: dict[str, set[str]] = {
-    "operation_id": {"operationid", "opid", "opno", "operationno", "operation", "processnumber"},
-    "process_step": {"processstep", "processfunction", "operationdescription", "step", "processname"},
+    "operation_id": {
+        "operationid", "opid", "opno", "operationno", "operation", "processnumber",
+        "operationnumber", "opnumber", "op", "processno", "stepno",
+    },
+    "process_step": {
+        "processstep", "processfunction", "operationdescription", "step", "processname",
+        "processdescription", "stepdescription",
+    },
     "failure_mode": {"failuremode", "potentialfailuremode", "failure"},
     "effect": {"effect", "potentialeffect", "effectsoffailure"},
-    "severity": {"severity", "sev", "s"},
+    "severity": {"severity", "sev", "s", "severityrating"},
     "cause": {"cause", "potentialcause", "causeoffailure"},
     "prevention_control": {"preventioncontrol", "currentpreventioncontrol", "prevention"},
-    "detection_control": {"detectioncontrol", "currentdetectioncontrol", "currentcontrolsdetection"},
+    "detection_control": {
+        "detectioncontrol", "currentdetectioncontrol", "currentcontrolsdetection", "detectionmethod",
+    },
     "detection": {"detection", "det", "d"},
-    "special_characteristic": {"specialcharacteristic", "specialchar", "classification", "sc"},
+    "special_characteristic": {
+        "specialcharacteristic", "specialcharacteristics", "specialchar", "classification", "sc",
+        "keycharacteristic", "criticalcharacteristic",
+    },
 }
 
 CONTROL_PLAN_ALIASES: dict[str, set[str]] = {
-    "operation_id": {"operationid", "opid", "opno", "operationno", "operation", "processnumber"},
-    "process_step": {"processstep", "processname", "processdescription", "operationdescription", "step"},
+    "operation_id": {
+        "operationid", "opid", "opno", "operationno", "operation", "processnumber",
+        "operationnumber", "opnumber", "op", "processno", "stepno",
+    },
+    "process_step": {
+        "processstep", "processname", "processdescription", "operationdescription", "step",
+        "stepdescription",
+    },
     "characteristic": {"characteristic", "productcharacteristic", "processcharacteristic"},
-    "special_characteristic": {"specialcharacteristic", "specialchar", "classification", "sc"},
-    "control_method": {"controlmethod", "control", "method", "evaluationmeasurementtechnique"},
+    "special_characteristic": {
+        "specialcharacteristic", "specialcharacteristics", "specialchar", "classification", "sc",
+        "keycharacteristic", "criticalcharacteristic",
+    },
+    "control_method": {
+        "controlmethod", "control", "method", "evaluationmeasurementtechnique",
+        "controltechnique", "measurementtechnique",
+    },
     "detection_method": {"detectionmethod", "inspectionmethod", "evaluationmethod"},
-    "reaction_plan": {"reactionplan", "reaction", "responseplan", "correctiveaction"},
+    "reaction_plan": {
+        "reactionplan", "reaction", "responseplan", "correctiveaction", "outofcontrolaction",
+    },
 }
 
 _TRUE_TOKENS = {"yes", "y", "true", "1", "x", "cc", "sc", "critical", "significant", "*", "✓", "✔"}
diff --git a/tests/test_aliases.py b/tests/test_aliases.py
new file mode 100644
index 0000000..be7ff5b
--- /dev/null
+++ b/tests/test_aliases.py
@@ -0,0 +1,101 @@
+"""Parametrized tests for column-header aliases (issue #2).
+
+Covers the aliases added for real-world PFMEA / Control Plan templates, plus a no-regression check
+that the original aliases still resolve. The header `operation_id` is required, so for non-key
+fields the workbook also carries an `Operation ID` column.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from quality_docs_validator.parsers.excel import (
+    ParseError,
+    parse_control_plan,
+    parse_pfmea,
+)
+
+# (alias header text, canonical field, raw cell value, expected parsed value)
+PFMEA_NEW_ALIASES = [
+    ("Operation Number", "operation_id", "10", "10"),
+    ("Op Number", "operation_id", "10", "10"),
+    ("Op #", "operation_id", "10", "10"),
+    ("Process No", "operation_id", "10", "10"),
+    ("Step No", "operation_id", "10", "10"),
+    ("Process Description", "process_step", "Welding", "Welding"),
+    ("Step Description", "process_step", "Welding", "Welding"),
+    ("Severity Rating", "severity", 9, 9),
+    ("Detection Method", "detection_control", "Gauge R&R", "Gauge R&R"),
+    ("Special Characteristics", "special_characteristic", "Yes", True),
+    ("Key Characteristic", "special_characteristic", "Yes", True),
+    ("Critical Characteristic", "special_characteristic", "Yes", True),
+]
+
+CONTROL_PLAN_NEW_ALIASES = [
+    ("Operation Number", "operation_id", "10", "10"),
+    ("Op Number", "operation_id", "10", "10"),
+    ("Op #", "operation_id", "10", "10"),
+    ("Process No", "operation_id", "10", "10"),
+    ("Step No", "operation_id", "10", "10"),
+    ("Step Description", "process_step", "Welding", "Welding"),
+    ("Control Technique", "control_method", "SPC chart", "SPC chart"),
+    ("Measurement Technique", "control_method", "CMM", "CMM"),
+    ("Out of Control Action", "reaction_plan", "Stop line", "Stop line"),
+    ("Special Characteristics", "special_characteristic", "Yes", True),
+    ("Key Characteristic", "special_characteristic", "Yes", True),
+    ("Critical Characteristic", "special_characteristic", "Yes", True),
+]
+
+
+@pytest.mark.parametrize("header,field,value,expected", PFMEA_NEW_ALIASES)
+def test_pfmea_new_aliases(make_xlsx, tmp_path, header, field, value, expected) -> None:
+    if field == "operation_id":
+        path = make_xlsx(tmp_path / "p.xlsx", [header], [[value]])
+    else:
+        path = make_xlsx(tmp_path / "p.xlsx", ["Operation ID", header], [["10", value]])
+    rows = parse_pfmea(path)
+    assert getattr(rows[0], field) == expected
+
+
+@pytest.mark.parametrize("header,field,value,expected", CONTROL_PLAN_NEW_ALIASES)
+def test_control_plan_new_aliases(make_xlsx, tmp_path, header, field, value, expected) -> None:
+    if field == "operation_id":
+        path = make_xlsx(tmp_path / "cp.xlsx", [header], [[value]])
+    else:
+        path = make_xlsx(tmp_path / "cp.xlsx", ["Operation ID", header], [["10", value]])
+    rows = parse_control_plan(path)
+    assert getattr(rows[0], field) == expected
+
+
+def test_original_aliases_still_work(make_xlsx, tmp_path: Path) -> None:
+    # A spread of pre-existing aliases must keep resolving (no regression).
+    pfmea = make_xlsx(
+        tmp_path / "pf.xlsx",
+        ["Op No", "Process Function", "Potential Failure Mode", "Sev", "Classification"],
+        [["10", "Weld", "Crack", 9, "Yes"]],
+    )
+    rows = parse_pfmea(pfmea)
+    assert rows[0].operation_id == "10"
+    assert rows[0].process_step == "Weld"
+    assert rows[0].severity == 9
+    assert rows[0].special_characteristic is True
+
+    cp = make_xlsx(
+        tmp_path / "cp.xlsx",
+        ["Operation No", "Control Method", "Reaction Plan"],
+        [["10", "CMM", "Rework"]],
+    )
+    cp_rows = parse_control_plan(cp)
+    assert cp_rows[0].operation_id == "10"
+    assert cp_rows[0].control_method == "CMM"
+    assert cp_rows[0].reaction_plan == "Rework"
+
+
+def test_missing_required_column_still_errors_clearly(make_xlsx, tmp_path: Path) -> None:
+    # No operation_id alias present -> clear ParseError (unchanged behaviour).
+    path = make_xlsx(tmp_path / "p.xlsx", ["Failure Mode", "Severity"], [["Crack", 9]])
+    with pytest.raises(ParseError) as exc:
+        parse_pfmea(path)
+    assert "operation_id" in str(exc.value)