From 1f40dcca180ef9982c08636cbc0349f114a7e04d Mon Sep 17 00:00:00 2001
From: Michal Piotrowski <piotrowski.michal@protonmail.com>
Date: Sun, 26 Apr 2026 19:23:30 +0200
Subject: [PATCH] Add typed exception hierarchy and regression-corpus
 scaffolding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Typed exception hierarchy under llm_safe_pl.errors:

- LlmSafeError (base, subclass of Exception)
- MappingError, InputSizeError (also subclass ValueError so existing
  `except ValueError` code keeps catching)
- DetectorError (also subclass RuntimeError; constructor signature is
  (detector_name) only — never input text or cause, to prevent PII
  from leaking into stack traces)

Mapping.from_dict / from_json and the Shield input-size guard now raise
the typed classes instead of bare ValueError. Constructor-time argument
validation (Shield(max_input_bytes=-1), duplicate detector names) keeps
raising plain ValueError.

Regression-corpus scaffolding under tests/corpora/:

- pl_pii_positive/ with three labeled samples (PESEL, email, IBAN)
- pl_pii_negative/ with two safe-text samples
- tests/test_corpus.py discovers .txt/.json pairs at collection time,
  asserts the default Shield matches every labeled span in positive
  samples and finds zero matches in negative samples
- CONTRIBUTING.md gains an "Adding to the regression corpus" section
  with the JSON schema and offset rules

345 tests pass, coverage 96.50%, ruff/mypy clean.
---
 CHANGELOG.md                                  |  10 ++
 CONTRIBUTING.md                               |  24 +++
 docs/errors.md                                |  49 ++++++
 src/llm_safe_pl/__init__.py                   |   5 +
 src/llm_safe_pl/errors.py                     |  34 ++++
 src/llm_safe_pl/models.py                     |  46 ++++--
 src/llm_safe_pl/shield.py                     |  16 +-
 .../pl_pii_negative/sample01_office_text.json |   1 +
 .../pl_pii_negative/sample01_office_text.txt  |   1 +
 .../pl_pii_negative/sample02_short_codes.json |   1 +
 .../pl_pii_negative/sample02_short_codes.txt  |   1 +
 .../sample01_pesel_simple.json                |   3 +
 .../pl_pii_positive/sample01_pesel_simple.txt |   1 +
 .../sample02_email_polish.json                |   3 +
 .../pl_pii_positive/sample02_email_polish.txt |   1 +
 .../pl_pii_positive/sample03_iban.json        |   3 +
 .../corpora/pl_pii_positive/sample03_iban.txt |   1 +
 tests/test_corpus.py                          | 153 ++++++++++++++++++
 tests/test_errors.py                          |  69 ++++++++
 tests/test_public_api.py                      |  20 ++-
 tests/test_security_hardening.py              |  13 ++
 21 files changed, 431 insertions(+), 24 deletions(-)
 create mode 100644 docs/errors.md
 create mode 100644 src/llm_safe_pl/errors.py
 create mode 100644 tests/corpora/pl_pii_negative/sample01_office_text.json
 create mode 100644 tests/corpora/pl_pii_negative/sample01_office_text.txt
 create mode 100644 tests/corpora/pl_pii_negative/sample02_short_codes.json
 create mode 100644 tests/corpora/pl_pii_negative/sample02_short_codes.txt
 create mode 100644 tests/corpora/pl_pii_positive/sample01_pesel_simple.json
 create mode 100644 tests/corpora/pl_pii_positive/sample01_pesel_simple.txt
 create mode 100644 tests/corpora/pl_pii_positive/sample02_email_polish.json
 create mode 100644 tests/corpora/pl_pii_positive/sample02_email_polish.txt
 create mode 100644 tests/corpora/pl_pii_positive/sample03_iban.json
 create mode 100644 tests/corpora/pl_pii_positive/sample03_iban.txt
 create mode 100644 tests/test_corpus.py
 create mode 100644 tests/test_errors.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8421dc3..37c7a45 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- `llm_safe_pl.errors` module with typed exception hierarchy: `LlmSafeError` (base), `MappingError` and `InputSizeError` (both also subclass `ValueError` for backwards compatibility), and `DetectorError` (also subclass of `RuntimeError`). All four are re-exported from the top-level package. See `docs/errors.md`.
+- `tests/corpora/` regression-corpus scaffolding with `pl_pii_positive/` and `pl_pii_negative/` directories. `tests/test_corpus.py` discovers `.txt`/`.json` pairs at collection time and asserts current detector behavior — adding more samples strengthens regression coverage without changing test code.
+
+### Changed
+
+- `Mapping.from_dict` / `from_json` now raise `MappingError` instead of bare `ValueError` (the new class still catches as `ValueError`, so existing handlers keep working).
+- `Shield.anonymize` / `detect` raise `InputSizeError` instead of bare `ValueError` when input exceeds `max_input_bytes` (still catches as `ValueError`).
+
 ## [0.2.0] - 2026-04-26
 
 Service-pack release: a large algorithmic-perf fix and a security/hardening
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f9b17d8..6eeda3b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -53,6 +53,30 @@ Python 3.10 or newer is required.
 4. Push the branch and open a pull request against `main`.
 5. CI runs on every push. Please address failures before asking for review.
 
+## Adding to the regression corpus
+
+The regression corpus under `tests/corpora/` is the ground truth for detector
+precision and recall. Adding samples is the cheapest way to harden coverage.
+
+Layout:
+
+- `tests/corpora/pl_pii_positive/<name>.txt` — source text containing PII.
+- `tests/corpora/pl_pii_positive/<name>.json` — list of objects with
+  `{type, start, end, value}` covering every span the default `Shield()`
+  must detect. Spans must not overlap.
+- `tests/corpora/pl_pii_negative/<name>.txt` — source text that must produce
+  zero matches under the default `Shield()`.
+- `tests/corpora/pl_pii_negative/<name>.json` — empty list, or omit the file
+  entirely.
+
+Naming: lowercase, snake_case, prefix with `sampleNN_` for sort order.
+Character offsets are Python string indices (not UTF-8 bytes); negative
+samples should not include strings the *current* detectors flag — wait until
+the relevant fix lands before promoting an aspirational negative.
+
+After adding samples, run `pytest tests/test_corpus.py -v`. The loader checks
+that every labeled span actually matches its `value` in the source text.
+
 ## Commit and PR style
 
 - Write commit messages in the imperative mood ("Add PESEL validator", not "Added").
diff --git a/docs/errors.md b/docs/errors.md
new file mode 100644
index 0000000..475f910
--- /dev/null
+++ b/docs/errors.md
@@ -0,0 +1,49 @@
+# Exception hierarchy
+
+`llm-safe-pl` exposes a small typed hierarchy from `llm_safe_pl.errors` (also
+re-exported from the top-level package). All library errors descend from
+`LlmSafeError`; specific subclasses also inherit from a relevant builtin so
+existing `except ValueError` code keeps catching them.
+
+```
+Exception
+└── LlmSafeError
+    ├── MappingError      (also subclass of ValueError)
+    ├── InputSizeError    (also subclass of ValueError)
+    └── DetectorError     (also subclass of RuntimeError)
+```
+
+## When each is raised
+
+| Class             | Raised by                                       | Builtin compat   |
+|-------------------|-------------------------------------------------|------------------|
+| `MappingError`    | `Mapping.from_dict` / `from_json` validation    | `ValueError`     |
+| `InputSizeError`  | `Shield.anonymize` / `detect` exceeding `max_input_bytes` | `ValueError` |
+| `DetectorError`   | Reserved for detector-dispatch failures; the class is exported but not yet raised internally | `RuntimeError` |
+
+## Why typed classes
+
+A bare `ValueError` doesn't tell the caller whether the problem is hostile
+mapping JSON, an oversized input, or a bug — they all look the same in
+`except`. The typed hierarchy lets handlers branch on cause:
+
+```python
+from llm_safe_pl import InputSizeError, MappingError, Shield
+
+shield = Shield(max_input_bytes=1_000_000)
+try:
+    result = shield.anonymize(text)
+except InputSizeError:
+    # Caller-side: trim or reject the input.
+    ...
+except MappingError:
+    # Hostile or corrupt persisted Mapping — treat as integrity failure.
+    ...
+```
+
+## `DetectorError` deliberately drops context
+
+`DetectorError.__init__` accepts only `detector_name` — never the input text or
+an exception cause. Both can carry PII; surfacing them in a stack trace is the
+class of leak the typed wrapper exists to prevent. Use `raise DetectorError(name) from None`
+when re-raising a wrapped detector failure.
diff --git a/src/llm_safe_pl/__init__.py b/src/llm_safe_pl/__init__.py
index 9fb8c99..560941a 100644
--- a/src/llm_safe_pl/__init__.py
+++ b/src/llm_safe_pl/__init__.py
@@ -7,6 +7,7 @@
 from importlib.metadata import PackageNotFoundError
 from importlib.metadata import version as _version
 
+from llm_safe_pl.errors import DetectorError, InputSizeError, LlmSafeError, MappingError
 from llm_safe_pl.models import AnonymizeResult, Mapping, Match, PIIType
 from llm_safe_pl.shield import Shield
 
@@ -20,7 +21,11 @@
 
 __all__ = [
     "AnonymizeResult",
+    "DetectorError",
+    "InputSizeError",
+    "LlmSafeError",
     "Mapping",
+    "MappingError",
     "Match",
     "PIIType",
     "Shield",
diff --git a/src/llm_safe_pl/errors.py b/src/llm_safe_pl/errors.py
new file mode 100644
index 0000000..2e93000
--- /dev/null
+++ b/src/llm_safe_pl/errors.py
@@ -0,0 +1,34 @@
+"""Typed exception hierarchy for llm-safe-pl.
+
+All library errors descend from :class:`LlmSafeError`. Specific subclasses also
+inherit from a relevant builtin (``ValueError`` for input/data errors,
+``RuntimeError`` for dispatch failures) so legacy ``except ValueError`` code
+keeps catching them.
+
+:class:`DetectorError` deliberately does NOT accept the original text or an
+exception cause — both can carry PII. The class signature is exactly
+``(detector_name)``; raise it via ``raise DetectorError(name) from None`` to
+suppress the implicit cause chain.
+"""
+
+from __future__ import annotations
+
+
+class LlmSafeError(Exception):
+    """Base class for all llm-safe-pl errors."""
+
+
+class MappingError(LlmSafeError, ValueError):
+    """Raised when a Mapping fails validation (e.g. ``Mapping.from_dict``)."""
+
+
+class InputSizeError(LlmSafeError, ValueError):
+    """Raised when input exceeds ``Shield(max_input_bytes=...)``."""
+
+
+class DetectorError(LlmSafeError, RuntimeError):
+    """Raised when a detector fails. Original text and cause are not attached."""
+
+    def __init__(self, detector_name: str) -> None:
+        super().__init__(f"detector {detector_name!r} failed")
+        self.detector_name = detector_name
diff --git a/src/llm_safe_pl/models.py b/src/llm_safe_pl/models.py
index 54cb1bb..44da8de 100644
--- a/src/llm_safe_pl/models.py
+++ b/src/llm_safe_pl/models.py
@@ -12,6 +12,8 @@
 from enum import Enum
 from typing import Any
 
+from llm_safe_pl.errors import MappingError
+
 _TOKEN_SHAPE = re.compile(r"^\[([A-Z][A-Z_]*)_(\d+)\]$")
 
 
@@ -102,55 +104,65 @@ def to_dict(self) -> dict[str, Any]:
     def from_dict(cls, data: dict[str, Any]) -> Mapping:
         """Load a Mapping from its JSON-dict shape with strict validation.
 
-        Raises ``ValueError`` on any of: wrong schema version, malformed
-        token shape, type/token-prefix mismatch, counters that don't cover
-        their entries, non-int counter values, missing required fields.
+        Raises :class:`~llm_safe_pl.errors.MappingError` on any of: wrong schema
+        version, malformed token shape, type/token-prefix mismatch, counters
+        that don't cover their entries, non-int counter values, missing
+        required fields. ``MappingError`` subclasses ``ValueError`` so existing
+        ``except ValueError`` code keeps catching it.
 
         Validation matters because Mapping JSON is the cross-process trust
         boundary — a tampered file should fail loudly, not silently corrupt
         the Mapping.
         """
         if not isinstance(data, dict):
-            raise ValueError(f"Mapping.from_dict expected a dict, got {type(data).__name__}")
+            raise MappingError(f"Mapping.from_dict expected a dict, got {type(data).__name__}")
         version = data.get("schema_version")
         if version != cls.SCHEMA_VERSION:
-            raise ValueError(f"Unsupported mapping schema version: {version!r}")
+            raise MappingError(f"Unsupported mapping schema version: {version!r}")
 
         raw_counters = data.get("counters", {})
         if not isinstance(raw_counters, dict):
-            raise ValueError(f"counters must be a dict, got {type(raw_counters).__name__}")
+            raise MappingError(f"counters must be a dict, got {type(raw_counters).__name__}")
         counters: dict[PIIType, int] = {}
         for t, n in raw_counters.items():
             if not isinstance(n, int) or isinstance(n, bool) or n < 0:
-                raise ValueError(f"counter for {t!r} must be a non-negative int, got {n!r}")
-            counters[PIIType(t)] = n
+                raise MappingError(f"counter for {t!r} must be a non-negative int, got {n!r}")
+            try:
+                counters[PIIType(t)] = n
+            except ValueError as exc:
+                raise MappingError(f"unknown PII type in counters: {t!r}") from exc
 
         raw_entries = data.get("entries")
         if raw_entries is None:
-            raise ValueError("Mapping.from_dict requires an 'entries' field")
+            raise MappingError("Mapping.from_dict requires an 'entries' field")
         if not isinstance(raw_entries, list):
-            raise ValueError(f"entries must be a list, got {type(raw_entries).__name__}")
+            raise MappingError(f"entries must be a list, got {type(raw_entries).__name__}")
 
         m = cls()
         m._counters = counters
         max_per_type: dict[PIIType, int] = {}
         for entry in raw_entries:
             if not isinstance(entry, dict):
-                raise ValueError(f"each entry must be a dict, got {type(entry).__name__}")
+                raise MappingError(f"each entry must be a dict, got {type(entry).__name__}")
             for required in ("token", "type", "value"):
                 if required not in entry:
-                    raise ValueError(f"entry missing required field {required!r}: {entry!r}")
+                    raise MappingError(f"entry missing required field {required!r}: {entry!r}")
             token = entry["token"]
             value = entry["value"]
             if not isinstance(token, str) or not isinstance(value, str):
-                raise ValueError(f"entry token and value must be strings: {entry!r}")
-            pii_type = PIIType(entry["type"])
+                raise MappingError(f"entry token and value must be strings: {entry!r}")
+            try:
+                pii_type = PIIType(entry["type"])
+            except ValueError as exc:
+                raise MappingError(
+                    f"unknown PII type in entry {entry!r}: {entry['type']!r}"
+                ) from exc
             shape = _TOKEN_SHAPE.fullmatch(token)
             if shape is None:
-                raise ValueError(f"token {token!r} does not match [TYPE_NNN] shape")
+                raise MappingError(f"token {token!r} does not match [TYPE_NNN] shape")
             token_type_prefix = shape.group(1)
             if token_type_prefix != pii_type.value.upper():
-                raise ValueError(f"token {token!r} prefix does not match type {pii_type.value!r}")
+                raise MappingError(f"token {token!r} prefix does not match type {pii_type.value!r}")
             counter_n = int(shape.group(2))
             prev = max_per_type.get(pii_type, 0)
             if counter_n > prev:
@@ -161,7 +173,7 @@ def from_dict(cls, data: dict[str, Any]) -> Mapping:
         for pii_type, observed_max in max_per_type.items():
             declared = counters.get(pii_type, 0)
             if declared < observed_max:
-                raise ValueError(
+                raise MappingError(
                     f"counter for {pii_type.value!r} is {declared} but entry "
                     f"counter {observed_max} was issued"
                 )
diff --git a/src/llm_safe_pl/shield.py b/src/llm_safe_pl/shield.py
index 0a6f2a4..9a5603e 100644
--- a/src/llm_safe_pl/shield.py
+++ b/src/llm_safe_pl/shield.py
@@ -25,6 +25,7 @@
 from llm_safe_pl.deanonymizer import Deanonymizer
 from llm_safe_pl.detectors import DEFAULT_DETECTORS
 from llm_safe_pl.detectors.base import Detector
+from llm_safe_pl.errors import InputSizeError
 from llm_safe_pl.models import AnonymizeResult, Mapping, Match
 from llm_safe_pl.strategies import Strategy
 
@@ -36,11 +37,14 @@ class Shield:
         detectors: Custom detector list (default: ``DEFAULT_DETECTORS``).
         mapping: Preloaded Mapping (default: empty Mapping).
         strategy: Anonymization strategy (only ``TOKEN`` in v0.1).
-        max_input_bytes: If set, ``anonymize``/``detect`` raise ``ValueError``
-            for inputs whose UTF-8 byte length exceeds this. Default ``None``
-            (unlimited). Recommended for hardened pipelines that ingest
-            untrusted text — ``Shield.anonymize`` allocates O(n) memory in
-            input size, so an unbounded input is a DoS vector.
+        max_input_bytes: If set, ``anonymize``/``detect`` raise
+            :class:`~llm_safe_pl.errors.InputSizeError` for inputs whose UTF-8
+            byte length exceeds this. ``InputSizeError`` subclasses
+            ``ValueError`` so existing ``except ValueError`` code keeps
+            catching it. Default ``None`` (unlimited). Recommended for
+            hardened pipelines that ingest untrusted text — ``Shield.anonymize``
+            allocates O(n) memory in input size, so an unbounded input is a
+            DoS vector.
     """
 
     def __init__(
@@ -84,7 +88,7 @@ def _check_input_size(self, text: str) -> None:
             return
         size = len(text.encode("utf-8"))
         if size > self._max_input_bytes:
-            raise ValueError(f"input is {size} bytes; max_input_bytes={self._max_input_bytes}")
+            raise InputSizeError(f"input is {size} bytes; max_input_bytes={self._max_input_bytes}")
 
     def anonymize(self, text: str) -> AnonymizeResult:
         self._check_input_size(text)
diff --git a/tests/corpora/pl_pii_negative/sample01_office_text.json b/tests/corpora/pl_pii_negative/sample01_office_text.json
new file mode 100644
index 0000000..fe51488
--- /dev/null
+++ b/tests/corpora/pl_pii_negative/sample01_office_text.json
@@ -0,0 +1 @@
+[]
diff --git a/tests/corpora/pl_pii_negative/sample01_office_text.txt b/tests/corpora/pl_pii_negative/sample01_office_text.txt
new file mode 100644
index 0000000..d50bfc1
--- /dev/null
+++ b/tests/corpora/pl_pii_negative/sample01_office_text.txt
@@ -0,0 +1 @@
+Sklep otwarty od 9:00 do 17:00. Spotkanie planowe 12 maja 2024.
\ No newline at end of file
diff --git a/tests/corpora/pl_pii_negative/sample02_short_codes.json b/tests/corpora/pl_pii_negative/sample02_short_codes.json
new file mode 100644
index 0000000..fe51488
--- /dev/null
+++ b/tests/corpora/pl_pii_negative/sample02_short_codes.json
@@ -0,0 +1 @@
+[]
diff --git a/tests/corpora/pl_pii_negative/sample02_short_codes.txt b/tests/corpora/pl_pii_negative/sample02_short_codes.txt
new file mode 100644
index 0000000..4490ca1
--- /dev/null
+++ b/tests/corpora/pl_pii_negative/sample02_short_codes.txt
@@ -0,0 +1 @@
+Numer referencyjny: 12345. Kod produktu: 9876. ID zamowienia: ABC123.
\ No newline at end of file
diff --git a/tests/corpora/pl_pii_positive/sample01_pesel_simple.json b/tests/corpora/pl_pii_positive/sample01_pesel_simple.json
new file mode 100644
index 0000000..eb25b63
--- /dev/null
+++ b/tests/corpora/pl_pii_positive/sample01_pesel_simple.json
@@ -0,0 +1,3 @@
+[
+  {"type": "pesel", "start": 7, "end": 18, "value": "44051401359"}
+]
diff --git a/tests/corpora/pl_pii_positive/sample01_pesel_simple.txt b/tests/corpora/pl_pii_positive/sample01_pesel_simple.txt
new file mode 100644
index 0000000..7939ebe
--- /dev/null
+++ b/tests/corpora/pl_pii_positive/sample01_pesel_simple.txt
@@ -0,0 +1 @@
+PESEL: 44051401359
\ No newline at end of file
diff --git a/tests/corpora/pl_pii_positive/sample02_email_polish.json b/tests/corpora/pl_pii_positive/sample02_email_polish.json
new file mode 100644
index 0000000..4680fe7
--- /dev/null
+++ b/tests/corpora/pl_pii_positive/sample02_email_polish.json
@@ -0,0 +1,3 @@
+[
+  {"type": "email", "start": 16, "end": 30, "value": "jan@example.pl"}
+]
diff --git a/tests/corpora/pl_pii_positive/sample02_email_polish.txt b/tests/corpora/pl_pii_positive/sample02_email_polish.txt
new file mode 100644
index 0000000..2b44eaf
--- /dev/null
+++ b/tests/corpora/pl_pii_positive/sample02_email_polish.txt
@@ -0,0 +1 @@
+Skontaktuj się: jan@example.pl
\ No newline at end of file
diff --git a/tests/corpora/pl_pii_positive/sample03_iban.json b/tests/corpora/pl_pii_positive/sample03_iban.json
new file mode 100644
index 0000000..0db0e65
--- /dev/null
+++ b/tests/corpora/pl_pii_positive/sample03_iban.json
@@ -0,0 +1,3 @@
+[
+  {"type": "iban", "start": 7, "end": 35, "value": "PL61109010140000071219812874"}
+]
diff --git a/tests/corpora/pl_pii_positive/sample03_iban.txt b/tests/corpora/pl_pii_positive/sample03_iban.txt
new file mode 100644
index 0000000..c092990
--- /dev/null
+++ b/tests/corpora/pl_pii_positive/sample03_iban.txt
@@ -0,0 +1 @@
+Konto: PL61109010140000071219812874
\ No newline at end of file
diff --git a/tests/test_corpus.py b/tests/test_corpus.py
new file mode 100644
index 0000000..ce6996c
--- /dev/null
+++ b/tests/test_corpus.py
@@ -0,0 +1,153 @@
+"""Regression tests against the labeled PII corpus under ``tests/corpora/``.
+
+The corpus is the ground truth for detector precision/recall as detector
+coverage grows. Adding more samples to ``pl_pii_positive/`` and
+``pl_pii_negative/`` strengthens regression coverage without changing test
+code: the fixtures in this module discover ``.txt``/``.json`` pairs at
+collection time.
+
+Format:
+
+- ``pl_pii_positive/<name>.txt`` — source text
+- ``pl_pii_positive/<name>.json`` — list of ``{type, start, end, value}``
+  objects covering the labeled spans (must not overlap)
+- ``pl_pii_negative/<name>.txt`` — source text
+- ``pl_pii_negative/<name>.json`` — empty list (or omit the file)
+
+The loader is kept inline rather than in a separate module to avoid the
+sys.path gymnastics of cross-test imports; if a future test needs to reuse
+the loader, lift it to a shared package then.
+"""
+
+from __future__ import annotations
+
+import json
+from collections.abc import Iterator
+from dataclasses import dataclass
+from itertools import pairwise
+from pathlib import Path
+
+import pytest
+
+from llm_safe_pl import Shield
+
+_CORPORA_ROOT = Path(__file__).parent / "corpora"
+
+
+@dataclass(frozen=True)
+class ExpectedSpan:
+    type: str
+    start: int
+    end: int
+    value: str
+
+
+@dataclass(frozen=True)
+class CorpusSample:
+    name: str
+    text: str
+    spans: tuple[ExpectedSpan, ...]
+
+
+def _check_no_overlap(spans: tuple[ExpectedSpan, ...], *, sample: str) -> None:
+    sorted_spans = sorted(spans, key=lambda s: s.start)
+    for a, b in pairwise(sorted_spans):
+        if a.end > b.start:
+            raise ValueError(
+                f"corpus sample {sample!r}: labels overlap "
+                f"({a.type}@{a.start}-{a.end} vs {b.type}@{b.start}-{b.end})"
+            )
+
+
+def _load_directory(name: str) -> Iterator[CorpusSample]:
+    directory = _CORPORA_ROOT / name
+    if not directory.is_dir():
+        raise FileNotFoundError(f"corpus directory not found: {directory}")
+    for txt_path in sorted(directory.glob("*.txt")):
+        text = txt_path.read_text(encoding="utf-8")
+        json_path = txt_path.with_suffix(".json")
+        if json_path.is_file():
+            raw = json.loads(json_path.read_text(encoding="utf-8"))
+            spans = tuple(
+                ExpectedSpan(type=e["type"], start=e["start"], end=e["end"], value=e["value"])
+                for e in raw
+            )
+            _check_no_overlap(spans, sample=txt_path.name)
+        else:
+            spans = ()
+        yield CorpusSample(name=txt_path.stem, text=text, spans=spans)
+
+
+_POSITIVE_SAMPLES = list(_load_directory("pl_pii_positive"))
+_NEGATIVE_SAMPLES = list(_load_directory("pl_pii_negative"))
+
+
+class TestLoader:
+    def test_positive_corpus_not_empty(self) -> None:
+        assert len(_POSITIVE_SAMPLES) >= 1, "positive corpus must not be empty"
+
+    def test_negative_corpus_not_empty(self) -> None:
+        assert len(_NEGATIVE_SAMPLES) >= 1, "negative corpus must not be empty"
+
+    def test_positive_samples_have_text(self) -> None:
+        for s in _POSITIVE_SAMPLES:
+            assert s.text, f"sample {s.name} has empty text"
+
+    def test_positive_samples_have_spans(self) -> None:
+        for s in _POSITIVE_SAMPLES:
+            assert s.spans, f"positive sample {s.name} has no labeled spans"
+
+    def test_overlap_detection_rejects_overlapping_labels(self) -> None:
+        spans = (
+            ExpectedSpan(type="T", start=0, end=3, value="abc"),
+            ExpectedSpan(type="T", start=2, end=5, value="cde"),
+        )
+        with pytest.raises(ValueError, match="overlap"):
+            _check_no_overlap(spans, sample="x.txt")
+
+    def test_overlap_detection_allows_adjacent_labels(self) -> None:
+        spans = (
+            ExpectedSpan(type="T", start=0, end=3, value="abc"),
+            ExpectedSpan(type="T", start=3, end=6, value="def"),
+        )
+        # Adjacent (a.end == b.start) is fine.
+        _check_no_overlap(spans, sample="x.txt")
+
+    def test_label_values_match_text_substrings(self) -> None:
+        for s in _POSITIVE_SAMPLES:
+            for span in s.spans:
+                assert s.text[span.start : span.end] == span.value, (
+                    f"sample {s.name}: label {span} does not match "
+                    f"text[{span.start}:{span.end}]={s.text[span.start : span.end]!r}"
+                )
+
+
+class TestPositiveCorpus:
+    @pytest.mark.parametrize(
+        "sample",
+        _POSITIVE_SAMPLES,
+        ids=lambda s: s.name,
+    )
+    def test_default_shield_finds_labeled_spans(self, sample: CorpusSample) -> None:
+        shield = Shield()
+        result = shield.detect(sample.text)
+        actual = {(m.start, m.end, m.type.value) for m in result}
+        expected = {(s.start, s.end, s.type) for s in sample.spans}
+        assert expected.issubset(actual), (
+            f"sample {sample.name}: missing labeled spans {expected - actual}; got {actual}"
+        )
+
+
+class TestNegativeCorpus:
+    @pytest.mark.parametrize(
+        "sample",
+        _NEGATIVE_SAMPLES,
+        ids=lambda s: s.name,
+    )
+    def test_default_shield_finds_no_matches(self, sample: CorpusSample) -> None:
+        shield = Shield()
+        result = shield.detect(sample.text)
+        assert len(result) == 0, (
+            f"sample {sample.name}: expected zero matches, got "
+            f"{[(m.start, m.end, m.type.value, m.value) for m in result]}"
+        )
diff --git a/tests/test_errors.py b/tests/test_errors.py
new file mode 100644
index 0000000..50e4e43
--- /dev/null
+++ b/tests/test_errors.py
@@ -0,0 +1,69 @@
+"""Tests for the typed exception hierarchy in ``llm_safe_pl.errors``."""
+
+from __future__ import annotations
+
+import pytest
+
+from llm_safe_pl.errors import (
+    DetectorError,
+    InputSizeError,
+    LlmSafeError,
+    MappingError,
+)
+
+
+class TestHierarchy:
+    def test_llm_safe_error_subclasses_exception(self) -> None:
+        assert issubclass(LlmSafeError, Exception)
+        assert not issubclass(LlmSafeError, ValueError)
+
+    def test_mapping_error_is_value_error(self) -> None:
+        assert issubclass(MappingError, LlmSafeError)
+        assert issubclass(MappingError, ValueError)
+
+    def test_input_size_error_is_value_error(self) -> None:
+        assert issubclass(InputSizeError, LlmSafeError)
+        assert issubclass(InputSizeError, ValueError)
+
+    def test_detector_error_is_runtime_error(self) -> None:
+        assert issubclass(DetectorError, LlmSafeError)
+        assert issubclass(DetectorError, RuntimeError)
+
+
+class TestExceptCompat:
+    def test_mapping_error_caught_as_value_error(self) -> None:
+        with pytest.raises(ValueError):
+            raise MappingError("x")
+
+    def test_input_size_error_caught_as_value_error(self) -> None:
+        with pytest.raises(ValueError):
+            raise InputSizeError("x")
+
+    def test_mapping_error_caught_as_llm_safe_error(self) -> None:
+        with pytest.raises(LlmSafeError):
+            raise MappingError("x")
+
+    def test_input_size_error_caught_as_llm_safe_error(self) -> None:
+        with pytest.raises(LlmSafeError):
+            raise InputSizeError("x")
+
+    def test_detector_error_caught_as_llm_safe_error(self) -> None:
+        with pytest.raises(LlmSafeError):
+            raise DetectorError("pesel")
+
+
+class TestDetectorError:
+    def test_detector_name_attribute(self) -> None:
+        e = DetectorError("pesel")
+        assert e.detector_name == "pesel"
+
+    def test_message_does_not_include_implicit_text(self) -> None:
+        e = DetectorError("pesel")
+        # The message includes the detector name only — never input text.
+        assert str(e) == "detector 'pesel' failed"
+
+    def test_does_not_accept_extra_args(self) -> None:
+        # Signature is exactly (detector_name); a caller that tries to attach
+        # text or a cause via extra args should fail loudly.
+        with pytest.raises(TypeError):
+            DetectorError("pesel", "44051401359")  # type: ignore[call-arg]
diff --git a/tests/test_public_api.py b/tests/test_public_api.py
index a4baa0c..fcf6056 100644
--- a/tests/test_public_api.py
+++ b/tests/test_public_api.py
@@ -4,19 +4,37 @@
 
 
 def test_public_api_exports_are_importable() -> None:
-    from llm_safe_pl import AnonymizeResult, Mapping, Match, PIIType, Shield
+    from llm_safe_pl import (
+        AnonymizeResult,
+        DetectorError,
+        InputSizeError,
+        LlmSafeError,
+        Mapping,
+        MappingError,
+        Match,
+        PIIType,
+        Shield,
+    )
 
     assert Shield is not None
     assert Match is not None
     assert Mapping is not None
     assert AnonymizeResult is not None
     assert PIIType is not None
+    assert LlmSafeError is not None
+    assert MappingError is not None
+    assert InputSizeError is not None
+    assert DetectorError is not None
 
 
 def test_all_matches_expected_surface() -> None:
     assert set(llm_safe_pl.__all__) == {
         "AnonymizeResult",
+        "DetectorError",
+        "InputSizeError",
+        "LlmSafeError",
         "Mapping",
+        "MappingError",
         "Match",
         "PIIType",
         "Shield",
diff --git a/tests/test_security_hardening.py b/tests/test_security_hardening.py
index 0a34950..8b1d1ca 100644
--- a/tests/test_security_hardening.py
+++ b/tests/test_security_hardening.py
@@ -15,6 +15,7 @@
 from llm_safe_pl.anonymizer import Anonymizer
 from llm_safe_pl.detectors.base import RegexDetector
 from llm_safe_pl.detectors.pesel import PeselDetector
+from llm_safe_pl.errors import InputSizeError, MappingError
 from llm_safe_pl.models import Mapping, PIIType
 from llm_safe_pl.shield import Shield
 from llm_safe_pl.strategies import Strategy
@@ -39,6 +40,12 @@ def test_rejects_non_dict(self) -> None:
         with pytest.raises(ValueError, match="expected a dict"):
             Mapping.from_dict([])  # type: ignore[arg-type]
 
+    def test_raises_typed_mapping_error(self) -> None:
+        # MappingError is the typed exception; ValueError compatibility is
+        # preserved via multi-inheritance.
+        with pytest.raises(MappingError):
+            Mapping.from_dict([])  # type: ignore[arg-type]
+
     def test_rejects_wrong_schema_version(self) -> None:
         data = _baseline()
         data["schema_version"] = 2
@@ -160,6 +167,12 @@ def test_detect_respects_max_input_bytes(self) -> None:
         with pytest.raises(ValueError, match="max_input_bytes"):
             shield.detect("This is far longer than 10 bytes of text")
 
+    def test_max_input_bytes_raises_typed_error(self) -> None:
+        # InputSizeError is the typed exception; ValueError catching still works.
+        shield = Shield(max_input_bytes=10)
+        with pytest.raises(InputSizeError, match="max_input_bytes"):
+            shield.anonymize("This is far longer than 10 bytes of text")
+
     def test_no_guard_by_default(self) -> None:
         shield = Shield()
         # Should not raise on a 10 KiB input.