Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ All notable changes to this project are documented here. The format is based on
## [Unreleased]

### Added
- More common column-header aliases for both PFMEA and Control Plan (e.g. `Operation No`/`Op #`/
`Process No`, `Severity Rating`, `Detection Method`, `Special Characteristics`/`Key
Characteristic`/`Critical Characteristic`, `Control Technique`/`Measurement Technique`,
`Out of Control Action`). Existing aliases and behaviour are unchanged.
([#2](https://github.com/migmcc/quality-docs-validator/issues/2))
- JSON report output via `--format json` (Markdown remains the default). The JSON includes
`metadata` (tool, version, UTC timestamp, format), `inputs`, `verdict`, `score`, a `summary`
(counts by severity and by finding type) and the full `findings` list. Validation, scoring and
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ confidential supplier/customer data never leaves it.

A single module: **PFMEA ↔ Control Plan consistency checker**.

- Reads a PFMEA `.xlsx` and a Control Plan `.xlsx` (recommended template + simple column aliases).
- Reads a PFMEA `.xlsx` and a Control Plan `.xlsx` (recommended template + common header aliases —
e.g. `Operation No`, `Op #`, `Severity Rating`, `Special Characteristic`, `Reaction Plan`).
- Matches rows by `operation_id` / process step.
- Applies six explicit, documented checks (see [docs/FINDINGS.md](docs/FINDINGS.md)):
`UNMATCHED_PROCESS_STEP`, `MISSING_CONTROL`, `SPECIAL_CHARACTERISTIC_NOT_CONTROLLED`,
Expand Down
50 changes: 39 additions & 11 deletions src/quality_docs_validator/parsers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,56 @@ class ParseError(Exception):
"""Raised when a file cannot be parsed into the expected document shape."""


# Canonical field -> accepted normalised header aliases.
# Canonical field -> accepted normalised header aliases. Headers are normalised (lowercased,
# non-alphanumeric stripped) before matching, so "Operation No." == "operationno" and "Op #" == "op".
# Aliases are deliberately specific; ambiguous generic tokens (control, method, description, number,
# id, status) are not used as new aliases to avoid mis-mapping columns.
PFMEA_ALIASES: dict[str, set[str]] = {
"operation_id": {"operationid", "opid", "opno", "operationno", "operation", "processnumber"},
"process_step": {"processstep", "processfunction", "operationdescription", "step", "processname"},
"operation_id": {
"operationid", "opid", "opno", "operationno", "operation", "processnumber",
"operationnumber", "opnumber", "op", "processno", "stepno",
},
"process_step": {
"processstep", "processfunction", "operationdescription", "step", "processname",
"processdescription", "stepdescription",
},
"failure_mode": {"failuremode", "potentialfailuremode", "failure"},
"effect": {"effect", "potentialeffect", "effectsoffailure"},
"severity": {"severity", "sev", "s"},
"severity": {"severity", "sev", "s", "severityrating"},
"cause": {"cause", "potentialcause", "causeoffailure"},
"prevention_control": {"preventioncontrol", "currentpreventioncontrol", "prevention"},
"detection_control": {"detectioncontrol", "currentdetectioncontrol", "currentcontrolsdetection"},
"detection_control": {
"detectioncontrol", "currentdetectioncontrol", "currentcontrolsdetection", "detectionmethod",
},
"detection": {"detection", "det", "d"},
"special_characteristic": {"specialcharacteristic", "specialchar", "classification", "sc"},
"special_characteristic": {
"specialcharacteristic", "specialcharacteristics", "specialchar", "classification", "sc",
"keycharacteristic", "criticalcharacteristic",
},
}

CONTROL_PLAN_ALIASES: dict[str, set[str]] = {
"operation_id": {"operationid", "opid", "opno", "operationno", "operation", "processnumber"},
"process_step": {"processstep", "processname", "processdescription", "operationdescription", "step"},
"operation_id": {
"operationid", "opid", "opno", "operationno", "operation", "processnumber",
"operationnumber", "opnumber", "op", "processno", "stepno",
},
"process_step": {
"processstep", "processname", "processdescription", "operationdescription", "step",
"stepdescription",
},
"characteristic": {"characteristic", "productcharacteristic", "processcharacteristic"},
"special_characteristic": {"specialcharacteristic", "specialchar", "classification", "sc"},
"control_method": {"controlmethod", "control", "method", "evaluationmeasurementtechnique"},
"special_characteristic": {
"specialcharacteristic", "specialcharacteristics", "specialchar", "classification", "sc",
"keycharacteristic", "criticalcharacteristic",
},
"control_method": {
"controlmethod", "control", "method", "evaluationmeasurementtechnique",
"controltechnique", "measurementtechnique",
},
"detection_method": {"detectionmethod", "inspectionmethod", "evaluationmethod"},
"reaction_plan": {"reactionplan", "reaction", "responseplan", "correctiveaction"},
"reaction_plan": {
"reactionplan", "reaction", "responseplan", "correctiveaction", "outofcontrolaction",
},
}

_TRUE_TOKENS = {"yes", "y", "true", "1", "x", "cc", "sc", "critical", "significant", "*", "✓", "✔"}
Expand Down
101 changes: 101 additions & 0 deletions tests/test_aliases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Parametrized tests for column-header aliases (issue #2).

Covers the aliases added for real-world PFMEA / Control Plan templates, plus a no-regression check
that the original aliases still resolve. The header `operation_id` is required, so for non-key
fields the workbook also carries an `Operation ID` column.
"""

from __future__ import annotations

from pathlib import Path

import pytest

from quality_docs_validator.parsers.excel import (
ParseError,
parse_control_plan,
parse_pfmea,
)

# (alias header text, canonical field, raw cell value, expected parsed value)
PFMEA_NEW_ALIASES = [
("Operation Number", "operation_id", "10", "10"),
("Op Number", "operation_id", "10", "10"),
("Op #", "operation_id", "10", "10"),
("Process No", "operation_id", "10", "10"),
("Step No", "operation_id", "10", "10"),
("Process Description", "process_step", "Welding", "Welding"),
("Step Description", "process_step", "Welding", "Welding"),
("Severity Rating", "severity", 9, 9),
("Detection Method", "detection_control", "Gauge R&R", "Gauge R&R"),
("Special Characteristics", "special_characteristic", "Yes", True),
("Key Characteristic", "special_characteristic", "Yes", True),
("Critical Characteristic", "special_characteristic", "Yes", True),
]

CONTROL_PLAN_NEW_ALIASES = [
("Operation Number", "operation_id", "10", "10"),
("Op Number", "operation_id", "10", "10"),
("Op #", "operation_id", "10", "10"),
("Process No", "operation_id", "10", "10"),
("Step No", "operation_id", "10", "10"),
("Step Description", "process_step", "Welding", "Welding"),
("Control Technique", "control_method", "SPC chart", "SPC chart"),
("Measurement Technique", "control_method", "CMM", "CMM"),
("Out of Control Action", "reaction_plan", "Stop line", "Stop line"),
("Special Characteristics", "special_characteristic", "Yes", True),
("Key Characteristic", "special_characteristic", "Yes", True),
("Critical Characteristic", "special_characteristic", "Yes", True),
]


@pytest.mark.parametrize("header,field,value,expected", PFMEA_NEW_ALIASES)
def test_pfmea_new_aliases(make_xlsx, tmp_path, header, field, value, expected) -> None:
if field == "operation_id":
path = make_xlsx(tmp_path / "p.xlsx", [header], [[value]])
else:
path = make_xlsx(tmp_path / "p.xlsx", ["Operation ID", header], [["10", value]])
rows = parse_pfmea(path)
assert getattr(rows[0], field) == expected


@pytest.mark.parametrize("header,field,value,expected", CONTROL_PLAN_NEW_ALIASES)
def test_control_plan_new_aliases(make_xlsx, tmp_path, header, field, value, expected) -> None:
if field == "operation_id":
path = make_xlsx(tmp_path / "cp.xlsx", [header], [[value]])
else:
path = make_xlsx(tmp_path / "cp.xlsx", ["Operation ID", header], [["10", value]])
rows = parse_control_plan(path)
assert getattr(rows[0], field) == expected


def test_original_aliases_still_work(make_xlsx, tmp_path: Path) -> None:
# A spread of pre-existing aliases must keep resolving (no regression).
pfmea = make_xlsx(
tmp_path / "pf.xlsx",
["Op No", "Process Function", "Potential Failure Mode", "Sev", "Classification"],
[["10", "Weld", "Crack", 9, "Yes"]],
)
rows = parse_pfmea(pfmea)
assert rows[0].operation_id == "10"
assert rows[0].process_step == "Weld"
assert rows[0].severity == 9
assert rows[0].special_characteristic is True

cp = make_xlsx(
tmp_path / "cp.xlsx",
["Operation No", "Control Method", "Reaction Plan"],
[["10", "CMM", "Rework"]],
)
cp_rows = parse_control_plan(cp)
assert cp_rows[0].operation_id == "10"
assert cp_rows[0].control_method == "CMM"
assert cp_rows[0].reaction_plan == "Rework"


def test_missing_required_column_still_errors_clearly(make_xlsx, tmp_path: Path) -> None:
# No operation_id alias present -> clear ParseError (unchanged behaviour).
path = make_xlsx(tmp_path / "p.xlsx", ["Failure Mode", "Severity"], [["Crack", 9]])
with pytest.raises(ParseError) as exc:
parse_pfmea(path)
assert "operation_id" in str(exc.value)
Loading