From 1f40dcca180ef9982c08636cbc0349f114a7e04d Mon Sep 17 00:00:00 2001 From: Michal Piotrowski Date: Sun, 26 Apr 2026 19:23:30 +0200 Subject: [PATCH] Add typed exception hierarchy and regression-corpus scaffolding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typed exception hierarchy under llm_safe_pl.errors: - LlmSafeError (base, subclass of Exception) - MappingError, InputSizeError (also subclass ValueError so existing `except ValueError` code keeps catching) - DetectorError (also subclass RuntimeError; constructor signature is (detector_name) only — never input text or cause, to prevent PII from leaking into stack traces) Mapping.from_dict / from_json and the Shield input-size guard now raise the typed classes instead of bare ValueError. Constructor-time argument validation (Shield(max_input_bytes=-1), duplicate detector names) keeps raising plain ValueError. Regression-corpus scaffolding under tests/corpora/: - pl_pii_positive/ with three labeled samples (PESEL, email, IBAN) - pl_pii_negative/ with two safe-text samples - tests/test_corpus.py discovers .txt/.json pairs at collection time, asserts the default Shield matches every labeled span in positive samples and finds zero matches in negative samples - CONTRIBUTING.md gains an "Adding to the regression corpus" section with the JSON schema and offset rules 345 tests pass, coverage 96.50%, ruff/mypy clean. --- CHANGELOG.md | 10 ++ CONTRIBUTING.md | 24 +++ docs/errors.md | 49 ++++++ src/llm_safe_pl/__init__.py | 5 + src/llm_safe_pl/errors.py | 34 ++++ src/llm_safe_pl/models.py | 46 ++++-- src/llm_safe_pl/shield.py | 16 +- .../pl_pii_negative/sample01_office_text.json | 1 + .../pl_pii_negative/sample01_office_text.txt | 1 + .../pl_pii_negative/sample02_short_codes.json | 1 + .../pl_pii_negative/sample02_short_codes.txt | 1 + .../sample01_pesel_simple.json | 3 + .../pl_pii_positive/sample01_pesel_simple.txt | 1 + .../sample02_email_polish.json | 3 + .../pl_pii_positive/sample02_email_polish.txt | 1 + .../pl_pii_positive/sample03_iban.json | 3 + .../corpora/pl_pii_positive/sample03_iban.txt | 1 + tests/test_corpus.py | 153 ++++++++++++++++++ tests/test_errors.py | 69 ++++++++ tests/test_public_api.py | 20 ++- tests/test_security_hardening.py | 13 ++ 21 files changed, 431 insertions(+), 24 deletions(-) create mode 100644 docs/errors.md create mode 100644 src/llm_safe_pl/errors.py create mode 100644 tests/corpora/pl_pii_negative/sample01_office_text.json create mode 100644 tests/corpora/pl_pii_negative/sample01_office_text.txt create mode 100644 tests/corpora/pl_pii_negative/sample02_short_codes.json create mode 100644 tests/corpora/pl_pii_negative/sample02_short_codes.txt create mode 100644 tests/corpora/pl_pii_positive/sample01_pesel_simple.json create mode 100644 tests/corpora/pl_pii_positive/sample01_pesel_simple.txt create mode 100644 tests/corpora/pl_pii_positive/sample02_email_polish.json create mode 100644 tests/corpora/pl_pii_positive/sample02_email_polish.txt create mode 100644 tests/corpora/pl_pii_positive/sample03_iban.json create mode 100644 tests/corpora/pl_pii_positive/sample03_iban.txt create mode 100644 tests/test_corpus.py create mode 100644 tests/test_errors.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8421dc3..37c7a45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `llm_safe_pl.errors` module with typed exception hierarchy: `LlmSafeError` (base), `MappingError` and `InputSizeError` (both also subclass `ValueError` for backwards compatibility), and `DetectorError` (also subclass of `RuntimeError`). All four are re-exported from the top-level package. See `docs/errors.md`. +- `tests/corpora/` regression-corpus scaffolding with `pl_pii_positive/` and `pl_pii_negative/` directories. `tests/test_corpus.py` discovers `.txt`/`.json` pairs at collection time and asserts current detector behavior — adding more samples strengthens regression coverage without changing test code. + +### Changed + +- `Mapping.from_dict` / `from_json` now raise `MappingError` instead of bare `ValueError` (the new class still catches as `ValueError`, so existing handlers keep working). +- `Shield.anonymize` / `detect` raise `InputSizeError` instead of bare `ValueError` when input exceeds `max_input_bytes` (still catches as `ValueError`). + ## [0.2.0] - 2026-04-26 Service-pack release: a large algorithmic-perf fix and a security/hardening diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f9b17d8..6eeda3b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -53,6 +53,30 @@ Python 3.10 or newer is required. 4. Push the branch and open a pull request against `main`. 5. CI runs on every push. Please address failures before asking for review. +## Adding to the regression corpus + +The regression corpus under `tests/corpora/` is the ground truth for detector +precision and recall. Adding samples is the cheapest way to harden coverage. + +Layout: + +- `tests/corpora/pl_pii_positive/.txt` — source text containing PII. +- `tests/corpora/pl_pii_positive/.json` — list of objects with + `{type, start, end, value}` covering every span the default `Shield()` + must detect. Spans must not overlap. +- `tests/corpora/pl_pii_negative/.txt` — source text that must produce + zero matches under the default `Shield()`. +- `tests/corpora/pl_pii_negative/.json` — empty list, or omit the file + entirely. + +Naming: lowercase, snake_case, prefix with `sampleNN_` for sort order. +Character offsets are Python string indices (not UTF-8 bytes); negative +samples should not include strings the *current* detectors flag — wait until +the relevant fix lands before promoting an aspirational negative. + +After adding samples, run `pytest tests/test_corpus.py -v`. The loader checks +that every labeled span actually matches its `value` in the source text. + ## Commit and PR style - Write commit messages in the imperative mood ("Add PESEL validator", not "Added"). diff --git a/docs/errors.md b/docs/errors.md new file mode 100644 index 0000000..475f910 --- /dev/null +++ b/docs/errors.md @@ -0,0 +1,49 @@ +# Exception hierarchy + +`llm-safe-pl` exposes a small typed hierarchy from `llm_safe_pl.errors` (also +re-exported from the top-level package). All library errors descend from +`LlmSafeError`; specific subclasses also inherit from a relevant builtin so +existing `except ValueError` code keeps catching them. + +``` +Exception +└── LlmSafeError + ├── MappingError (also subclass of ValueError) + ├── InputSizeError (also subclass of ValueError) + └── DetectorError (also subclass of RuntimeError) +``` + +## When each is raised + +| Class | Raised by | Builtin compat | +|-------------------|-------------------------------------------------|------------------| +| `MappingError` | `Mapping.from_dict` / `from_json` validation | `ValueError` | +| `InputSizeError` | `Shield.anonymize` / `detect` exceeding `max_input_bytes` | `ValueError` | +| `DetectorError` | Reserved for detector-dispatch failures; the class is exported but not yet raised internally | `RuntimeError` | + +## Why typed classes + +A bare `ValueError` doesn't tell the caller whether the problem is hostile +mapping JSON, an oversized input, or a bug — they all look the same in +`except`. The typed hierarchy lets handlers branch on cause: + +```python +from llm_safe_pl import InputSizeError, MappingError, Shield + +shield = Shield(max_input_bytes=1_000_000) +try: + result = shield.anonymize(text) +except InputSizeError: + # Caller-side: trim or reject the input. + ... +except MappingError: + # Hostile or corrupt persisted Mapping — treat as integrity failure. + ... +``` + +## `DetectorError` deliberately drops context + +`DetectorError.__init__` accepts only `detector_name` — never the input text or +an exception cause. Both can carry PII; surfacing them in a stack trace is the +class of leak the typed wrapper exists to prevent. Use `raise DetectorError(name) from None` +when re-raising a wrapped detector failure. diff --git a/src/llm_safe_pl/__init__.py b/src/llm_safe_pl/__init__.py index 9fb8c99..560941a 100644 --- a/src/llm_safe_pl/__init__.py +++ b/src/llm_safe_pl/__init__.py @@ -7,6 +7,7 @@ from importlib.metadata import PackageNotFoundError from importlib.metadata import version as _version +from llm_safe_pl.errors import DetectorError, InputSizeError, LlmSafeError, MappingError from llm_safe_pl.models import AnonymizeResult, Mapping, Match, PIIType from llm_safe_pl.shield import Shield @@ -20,7 +21,11 @@ __all__ = [ "AnonymizeResult", + "DetectorError", + "InputSizeError", + "LlmSafeError", "Mapping", + "MappingError", "Match", "PIIType", "Shield", diff --git a/src/llm_safe_pl/errors.py b/src/llm_safe_pl/errors.py new file mode 100644 index 0000000..2e93000 --- /dev/null +++ b/src/llm_safe_pl/errors.py @@ -0,0 +1,34 @@ +"""Typed exception hierarchy for llm-safe-pl. + +All library errors descend from :class:`LlmSafeError`. Specific subclasses also +inherit from a relevant builtin (``ValueError`` for input/data errors, +``RuntimeError`` for dispatch failures) so legacy ``except ValueError`` code +keeps catching them. + +:class:`DetectorError` deliberately does NOT accept the original text or an +exception cause — both can carry PII. The class signature is exactly +``(detector_name)``; raise it via ``raise DetectorError(name) from None`` to +suppress the implicit cause chain. +""" + +from __future__ import annotations + + +class LlmSafeError(Exception): + """Base class for all llm-safe-pl errors.""" + + +class MappingError(LlmSafeError, ValueError): + """Raised when a Mapping fails validation (e.g. ``Mapping.from_dict``).""" + + +class InputSizeError(LlmSafeError, ValueError): + """Raised when input exceeds ``Shield(max_input_bytes=...)``.""" + + +class DetectorError(LlmSafeError, RuntimeError): + """Raised when a detector fails. Original text and cause are not attached.""" + + def __init__(self, detector_name: str) -> None: + super().__init__(f"detector {detector_name!r} failed") + self.detector_name = detector_name diff --git a/src/llm_safe_pl/models.py b/src/llm_safe_pl/models.py index 54cb1bb..44da8de 100644 --- a/src/llm_safe_pl/models.py +++ b/src/llm_safe_pl/models.py @@ -12,6 +12,8 @@ from enum import Enum from typing import Any +from llm_safe_pl.errors import MappingError + _TOKEN_SHAPE = re.compile(r"^\[([A-Z][A-Z_]*)_(\d+)\]$") @@ -102,55 +104,65 @@ def to_dict(self) -> dict[str, Any]: def from_dict(cls, data: dict[str, Any]) -> Mapping: """Load a Mapping from its JSON-dict shape with strict validation. - Raises ``ValueError`` on any of: wrong schema version, malformed - token shape, type/token-prefix mismatch, counters that don't cover - their entries, non-int counter values, missing required fields. + Raises :class:`~llm_safe_pl.errors.MappingError` on any of: wrong schema + version, malformed token shape, type/token-prefix mismatch, counters + that don't cover their entries, non-int counter values, missing + required fields. ``MappingError`` subclasses ``ValueError`` so existing + ``except ValueError`` code keeps catching it. Validation matters because Mapping JSON is the cross-process trust boundary — a tampered file should fail loudly, not silently corrupt the Mapping. """ if not isinstance(data, dict): - raise ValueError(f"Mapping.from_dict expected a dict, got {type(data).__name__}") + raise MappingError(f"Mapping.from_dict expected a dict, got {type(data).__name__}") version = data.get("schema_version") if version != cls.SCHEMA_VERSION: - raise ValueError(f"Unsupported mapping schema version: {version!r}") + raise MappingError(f"Unsupported mapping schema version: {version!r}") raw_counters = data.get("counters", {}) if not isinstance(raw_counters, dict): - raise ValueError(f"counters must be a dict, got {type(raw_counters).__name__}") + raise MappingError(f"counters must be a dict, got {type(raw_counters).__name__}") counters: dict[PIIType, int] = {} for t, n in raw_counters.items(): if not isinstance(n, int) or isinstance(n, bool) or n < 0: - raise ValueError(f"counter for {t!r} must be a non-negative int, got {n!r}") - counters[PIIType(t)] = n + raise MappingError(f"counter for {t!r} must be a non-negative int, got {n!r}") + try: + counters[PIIType(t)] = n + except ValueError as exc: + raise MappingError(f"unknown PII type in counters: {t!r}") from exc raw_entries = data.get("entries") if raw_entries is None: - raise ValueError("Mapping.from_dict requires an 'entries' field") + raise MappingError("Mapping.from_dict requires an 'entries' field") if not isinstance(raw_entries, list): - raise ValueError(f"entries must be a list, got {type(raw_entries).__name__}") + raise MappingError(f"entries must be a list, got {type(raw_entries).__name__}") m = cls() m._counters = counters max_per_type: dict[PIIType, int] = {} for entry in raw_entries: if not isinstance(entry, dict): - raise ValueError(f"each entry must be a dict, got {type(entry).__name__}") + raise MappingError(f"each entry must be a dict, got {type(entry).__name__}") for required in ("token", "type", "value"): if required not in entry: - raise ValueError(f"entry missing required field {required!r}: {entry!r}") + raise MappingError(f"entry missing required field {required!r}: {entry!r}") token = entry["token"] value = entry["value"] if not isinstance(token, str) or not isinstance(value, str): - raise ValueError(f"entry token and value must be strings: {entry!r}") - pii_type = PIIType(entry["type"]) + raise MappingError(f"entry token and value must be strings: {entry!r}") + try: + pii_type = PIIType(entry["type"]) + except ValueError as exc: + raise MappingError( + f"unknown PII type in entry {entry!r}: {entry['type']!r}" + ) from exc shape = _TOKEN_SHAPE.fullmatch(token) if shape is None: - raise ValueError(f"token {token!r} does not match [TYPE_NNN] shape") + raise MappingError(f"token {token!r} does not match [TYPE_NNN] shape") token_type_prefix = shape.group(1) if token_type_prefix != pii_type.value.upper(): - raise ValueError(f"token {token!r} prefix does not match type {pii_type.value!r}") + raise MappingError(f"token {token!r} prefix does not match type {pii_type.value!r}") counter_n = int(shape.group(2)) prev = max_per_type.get(pii_type, 0) if counter_n > prev: @@ -161,7 +173,7 @@ def from_dict(cls, data: dict[str, Any]) -> Mapping: for pii_type, observed_max in max_per_type.items(): declared = counters.get(pii_type, 0) if declared < observed_max: - raise ValueError( + raise MappingError( f"counter for {pii_type.value!r} is {declared} but entry " f"counter {observed_max} was issued" ) diff --git a/src/llm_safe_pl/shield.py b/src/llm_safe_pl/shield.py index 0a6f2a4..9a5603e 100644 --- a/src/llm_safe_pl/shield.py +++ b/src/llm_safe_pl/shield.py @@ -25,6 +25,7 @@ from llm_safe_pl.deanonymizer import Deanonymizer from llm_safe_pl.detectors import DEFAULT_DETECTORS from llm_safe_pl.detectors.base import Detector +from llm_safe_pl.errors import InputSizeError from llm_safe_pl.models import AnonymizeResult, Mapping, Match from llm_safe_pl.strategies import Strategy @@ -36,11 +37,14 @@ class Shield: detectors: Custom detector list (default: ``DEFAULT_DETECTORS``). mapping: Preloaded Mapping (default: empty Mapping). strategy: Anonymization strategy (only ``TOKEN`` in v0.1). - max_input_bytes: If set, ``anonymize``/``detect`` raise ``ValueError`` - for inputs whose UTF-8 byte length exceeds this. Default ``None`` - (unlimited). Recommended for hardened pipelines that ingest - untrusted text — ``Shield.anonymize`` allocates O(n) memory in - input size, so an unbounded input is a DoS vector. + max_input_bytes: If set, ``anonymize``/``detect`` raise + :class:`~llm_safe_pl.errors.InputSizeError` for inputs whose UTF-8 + byte length exceeds this. ``InputSizeError`` subclasses + ``ValueError`` so existing ``except ValueError`` code keeps + catching it. Default ``None`` (unlimited). Recommended for + hardened pipelines that ingest untrusted text — ``Shield.anonymize`` + allocates O(n) memory in input size, so an unbounded input is a + DoS vector. """ def __init__( @@ -84,7 +88,7 @@ def _check_input_size(self, text: str) -> None: return size = len(text.encode("utf-8")) if size > self._max_input_bytes: - raise ValueError(f"input is {size} bytes; max_input_bytes={self._max_input_bytes}") + raise InputSizeError(f"input is {size} bytes; max_input_bytes={self._max_input_bytes}") def anonymize(self, text: str) -> AnonymizeResult: self._check_input_size(text) diff --git a/tests/corpora/pl_pii_negative/sample01_office_text.json b/tests/corpora/pl_pii_negative/sample01_office_text.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/tests/corpora/pl_pii_negative/sample01_office_text.json @@ -0,0 +1 @@ +[] diff --git a/tests/corpora/pl_pii_negative/sample01_office_text.txt b/tests/corpora/pl_pii_negative/sample01_office_text.txt new file mode 100644 index 0000000..d50bfc1 --- /dev/null +++ b/tests/corpora/pl_pii_negative/sample01_office_text.txt @@ -0,0 +1 @@ +Sklep otwarty od 9:00 do 17:00. Spotkanie planowe 12 maja 2024. \ No newline at end of file diff --git a/tests/corpora/pl_pii_negative/sample02_short_codes.json b/tests/corpora/pl_pii_negative/sample02_short_codes.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/tests/corpora/pl_pii_negative/sample02_short_codes.json @@ -0,0 +1 @@ +[] diff --git a/tests/corpora/pl_pii_negative/sample02_short_codes.txt b/tests/corpora/pl_pii_negative/sample02_short_codes.txt new file mode 100644 index 0000000..4490ca1 --- /dev/null +++ b/tests/corpora/pl_pii_negative/sample02_short_codes.txt @@ -0,0 +1 @@ +Numer referencyjny: 12345. Kod produktu: 9876. ID zamowienia: ABC123. \ No newline at end of file diff --git a/tests/corpora/pl_pii_positive/sample01_pesel_simple.json b/tests/corpora/pl_pii_positive/sample01_pesel_simple.json new file mode 100644 index 0000000..eb25b63 --- /dev/null +++ b/tests/corpora/pl_pii_positive/sample01_pesel_simple.json @@ -0,0 +1,3 @@ +[ + {"type": "pesel", "start": 7, "end": 18, "value": "44051401359"} +] diff --git a/tests/corpora/pl_pii_positive/sample01_pesel_simple.txt b/tests/corpora/pl_pii_positive/sample01_pesel_simple.txt new file mode 100644 index 0000000..7939ebe --- /dev/null +++ b/tests/corpora/pl_pii_positive/sample01_pesel_simple.txt @@ -0,0 +1 @@ +PESEL: 44051401359 \ No newline at end of file diff --git a/tests/corpora/pl_pii_positive/sample02_email_polish.json b/tests/corpora/pl_pii_positive/sample02_email_polish.json new file mode 100644 index 0000000..4680fe7 --- /dev/null +++ b/tests/corpora/pl_pii_positive/sample02_email_polish.json @@ -0,0 +1,3 @@ +[ + {"type": "email", "start": 16, "end": 30, "value": "jan@example.pl"} +] diff --git a/tests/corpora/pl_pii_positive/sample02_email_polish.txt b/tests/corpora/pl_pii_positive/sample02_email_polish.txt new file mode 100644 index 0000000..2b44eaf --- /dev/null +++ b/tests/corpora/pl_pii_positive/sample02_email_polish.txt @@ -0,0 +1 @@ +Skontaktuj się: jan@example.pl \ No newline at end of file diff --git a/tests/corpora/pl_pii_positive/sample03_iban.json b/tests/corpora/pl_pii_positive/sample03_iban.json new file mode 100644 index 0000000..0db0e65 --- /dev/null +++ b/tests/corpora/pl_pii_positive/sample03_iban.json @@ -0,0 +1,3 @@ +[ + {"type": "iban", "start": 7, "end": 35, "value": "PL61109010140000071219812874"} +] diff --git a/tests/corpora/pl_pii_positive/sample03_iban.txt b/tests/corpora/pl_pii_positive/sample03_iban.txt new file mode 100644 index 0000000..c092990 --- /dev/null +++ b/tests/corpora/pl_pii_positive/sample03_iban.txt @@ -0,0 +1 @@ +Konto: PL61109010140000071219812874 \ No newline at end of file diff --git a/tests/test_corpus.py b/tests/test_corpus.py new file mode 100644 index 0000000..ce6996c --- /dev/null +++ b/tests/test_corpus.py @@ -0,0 +1,153 @@ +"""Regression tests against the labeled PII corpus under ``tests/corpora/``. + +The corpus is the ground truth for detector precision/recall as detector +coverage grows. Adding more samples to ``pl_pii_positive/`` and +``pl_pii_negative/`` strengthens regression coverage without changing test +code: the fixtures in this module discover ``.txt``/``.json`` pairs at +collection time. + +Format: + +- ``pl_pii_positive/.txt`` — source text +- ``pl_pii_positive/.json`` — list of ``{type, start, end, value}`` + objects covering the labeled spans (must not overlap) +- ``pl_pii_negative/.txt`` — source text +- ``pl_pii_negative/.json`` — empty list (or omit the file) + +The loader is kept inline rather than in a separate module to avoid the +sys.path gymnastics of cross-test imports; if a future test needs to reuse +the loader, lift it to a shared package then. +""" + +from __future__ import annotations + +import json +from collections.abc import Iterator +from dataclasses import dataclass +from itertools import pairwise +from pathlib import Path + +import pytest + +from llm_safe_pl import Shield + +_CORPORA_ROOT = Path(__file__).parent / "corpora" + + +@dataclass(frozen=True) +class ExpectedSpan: + type: str + start: int + end: int + value: str + + +@dataclass(frozen=True) +class CorpusSample: + name: str + text: str + spans: tuple[ExpectedSpan, ...] + + +def _check_no_overlap(spans: tuple[ExpectedSpan, ...], *, sample: str) -> None: + sorted_spans = sorted(spans, key=lambda s: s.start) + for a, b in pairwise(sorted_spans): + if a.end > b.start: + raise ValueError( + f"corpus sample {sample!r}: labels overlap " + f"({a.type}@{a.start}-{a.end} vs {b.type}@{b.start}-{b.end})" + ) + + +def _load_directory(name: str) -> Iterator[CorpusSample]: + directory = _CORPORA_ROOT / name + if not directory.is_dir(): + raise FileNotFoundError(f"corpus directory not found: {directory}") + for txt_path in sorted(directory.glob("*.txt")): + text = txt_path.read_text(encoding="utf-8") + json_path = txt_path.with_suffix(".json") + if json_path.is_file(): + raw = json.loads(json_path.read_text(encoding="utf-8")) + spans = tuple( + ExpectedSpan(type=e["type"], start=e["start"], end=e["end"], value=e["value"]) + for e in raw + ) + _check_no_overlap(spans, sample=txt_path.name) + else: + spans = () + yield CorpusSample(name=txt_path.stem, text=text, spans=spans) + + +_POSITIVE_SAMPLES = list(_load_directory("pl_pii_positive")) +_NEGATIVE_SAMPLES = list(_load_directory("pl_pii_negative")) + + +class TestLoader: + def test_positive_corpus_not_empty(self) -> None: + assert len(_POSITIVE_SAMPLES) >= 1, "positive corpus must not be empty" + + def test_negative_corpus_not_empty(self) -> None: + assert len(_NEGATIVE_SAMPLES) >= 1, "negative corpus must not be empty" + + def test_positive_samples_have_text(self) -> None: + for s in _POSITIVE_SAMPLES: + assert s.text, f"sample {s.name} has empty text" + + def test_positive_samples_have_spans(self) -> None: + for s in _POSITIVE_SAMPLES: + assert s.spans, f"positive sample {s.name} has no labeled spans" + + def test_overlap_detection_rejects_overlapping_labels(self) -> None: + spans = ( + ExpectedSpan(type="T", start=0, end=3, value="abc"), + ExpectedSpan(type="T", start=2, end=5, value="cde"), + ) + with pytest.raises(ValueError, match="overlap"): + _check_no_overlap(spans, sample="x.txt") + + def test_overlap_detection_allows_adjacent_labels(self) -> None: + spans = ( + ExpectedSpan(type="T", start=0, end=3, value="abc"), + ExpectedSpan(type="T", start=3, end=6, value="def"), + ) + # Adjacent (a.end == b.start) is fine. + _check_no_overlap(spans, sample="x.txt") + + def test_label_values_match_text_substrings(self) -> None: + for s in _POSITIVE_SAMPLES: + for span in s.spans: + assert s.text[span.start : span.end] == span.value, ( + f"sample {s.name}: label {span} does not match " + f"text[{span.start}:{span.end}]={s.text[span.start : span.end]!r}" + ) + + +class TestPositiveCorpus: + @pytest.mark.parametrize( + "sample", + _POSITIVE_SAMPLES, + ids=lambda s: s.name, + ) + def test_default_shield_finds_labeled_spans(self, sample: CorpusSample) -> None: + shield = Shield() + result = shield.detect(sample.text) + actual = {(m.start, m.end, m.type.value) for m in result} + expected = {(s.start, s.end, s.type) for s in sample.spans} + assert expected.issubset(actual), ( + f"sample {sample.name}: missing labeled spans {expected - actual}; got {actual}" + ) + + +class TestNegativeCorpus: + @pytest.mark.parametrize( + "sample", + _NEGATIVE_SAMPLES, + ids=lambda s: s.name, + ) + def test_default_shield_finds_no_matches(self, sample: CorpusSample) -> None: + shield = Shield() + result = shield.detect(sample.text) + assert len(result) == 0, ( + f"sample {sample.name}: expected zero matches, got " + f"{[(m.start, m.end, m.type.value, m.value) for m in result]}" + ) diff --git a/tests/test_errors.py b/tests/test_errors.py new file mode 100644 index 0000000..50e4e43 --- /dev/null +++ b/tests/test_errors.py @@ -0,0 +1,69 @@ +"""Tests for the typed exception hierarchy in ``llm_safe_pl.errors``.""" + +from __future__ import annotations + +import pytest + +from llm_safe_pl.errors import ( + DetectorError, + InputSizeError, + LlmSafeError, + MappingError, +) + + +class TestHierarchy: + def test_llm_safe_error_subclasses_exception(self) -> None: + assert issubclass(LlmSafeError, Exception) + assert not issubclass(LlmSafeError, ValueError) + + def test_mapping_error_is_value_error(self) -> None: + assert issubclass(MappingError, LlmSafeError) + assert issubclass(MappingError, ValueError) + + def test_input_size_error_is_value_error(self) -> None: + assert issubclass(InputSizeError, LlmSafeError) + assert issubclass(InputSizeError, ValueError) + + def test_detector_error_is_runtime_error(self) -> None: + assert issubclass(DetectorError, LlmSafeError) + assert issubclass(DetectorError, RuntimeError) + + +class TestExceptCompat: + def test_mapping_error_caught_as_value_error(self) -> None: + with pytest.raises(ValueError): + raise MappingError("x") + + def test_input_size_error_caught_as_value_error(self) -> None: + with pytest.raises(ValueError): + raise InputSizeError("x") + + def test_mapping_error_caught_as_llm_safe_error(self) -> None: + with pytest.raises(LlmSafeError): + raise MappingError("x") + + def test_input_size_error_caught_as_llm_safe_error(self) -> None: + with pytest.raises(LlmSafeError): + raise InputSizeError("x") + + def test_detector_error_caught_as_llm_safe_error(self) -> None: + with pytest.raises(LlmSafeError): + raise DetectorError("pesel") + + +class TestDetectorError: + def test_detector_name_attribute(self) -> None: + e = DetectorError("pesel") + assert e.detector_name == "pesel" + + def test_message_does_not_include_implicit_text(self) -> None: + e = DetectorError("pesel") + # The message includes the detector name only — never input text. + assert str(e) == "detector 'pesel' failed" + + def test_does_not_accept_extra_args(self) -> None: + # Signature is exactly (detector_name); a caller that tries to attach + # text or a cause via extra args should fail loudly. + with pytest.raises(TypeError): + DetectorError("pesel", "44051401359") # type: ignore[call-arg] diff --git a/tests/test_public_api.py b/tests/test_public_api.py index a4baa0c..fcf6056 100644 --- a/tests/test_public_api.py +++ b/tests/test_public_api.py @@ -4,19 +4,37 @@ def test_public_api_exports_are_importable() -> None: - from llm_safe_pl import AnonymizeResult, Mapping, Match, PIIType, Shield + from llm_safe_pl import ( + AnonymizeResult, + DetectorError, + InputSizeError, + LlmSafeError, + Mapping, + MappingError, + Match, + PIIType, + Shield, + ) assert Shield is not None assert Match is not None assert Mapping is not None assert AnonymizeResult is not None assert PIIType is not None + assert LlmSafeError is not None + assert MappingError is not None + assert InputSizeError is not None + assert DetectorError is not None def test_all_matches_expected_surface() -> None: assert set(llm_safe_pl.__all__) == { "AnonymizeResult", + "DetectorError", + "InputSizeError", + "LlmSafeError", "Mapping", + "MappingError", "Match", "PIIType", "Shield", diff --git a/tests/test_security_hardening.py b/tests/test_security_hardening.py index 0a34950..8b1d1ca 100644 --- a/tests/test_security_hardening.py +++ b/tests/test_security_hardening.py @@ -15,6 +15,7 @@ from llm_safe_pl.anonymizer import Anonymizer from llm_safe_pl.detectors.base import RegexDetector from llm_safe_pl.detectors.pesel import PeselDetector +from llm_safe_pl.errors import InputSizeError, MappingError from llm_safe_pl.models import Mapping, PIIType from llm_safe_pl.shield import Shield from llm_safe_pl.strategies import Strategy @@ -39,6 +40,12 @@ def test_rejects_non_dict(self) -> None: with pytest.raises(ValueError, match="expected a dict"): Mapping.from_dict([]) # type: ignore[arg-type] + def test_raises_typed_mapping_error(self) -> None: + # MappingError is the typed exception; ValueError compatibility is + # preserved via multi-inheritance. + with pytest.raises(MappingError): + Mapping.from_dict([]) # type: ignore[arg-type] + def test_rejects_wrong_schema_version(self) -> None: data = _baseline() data["schema_version"] = 2 @@ -160,6 +167,12 @@ def test_detect_respects_max_input_bytes(self) -> None: with pytest.raises(ValueError, match="max_input_bytes"): shield.detect("This is far longer than 10 bytes of text") + def test_max_input_bytes_raises_typed_error(self) -> None: + # InputSizeError is the typed exception; ValueError catching still works. + shield = Shield(max_input_bytes=10) + with pytest.raises(InputSizeError, match="max_input_bytes"): + shield.anonymize("This is far longer than 10 bytes of text") + def test_no_guard_by_default(self) -> None: shield = Shield() # Should not raise on a 10 KiB input.