From ea8362958e5e397c0a9709a049e38aad409b5c7b Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Tue, 21 Apr 2026 12:07:40 +0900
Subject: [PATCH 1/9] Move mapping files into resource folder

---
 {scripts => resources}/mappings/audience.json         | 0
 {scripts => resources}/mappings/content_formats.json  | 0
 {scripts => resources}/mappings/droppable.json        | 0
 {scripts => resources}/mappings/genres.json           | 0
 {scripts => resources}/mappings/literary_themes.json  | 0
 {scripts => resources}/mappings/literary_tropes.json  | 0
 {scripts => resources}/mappings/main_topics.json      | 0
 {scripts => resources}/mappings/people_overrides.json | 0
 {scripts => resources}/mappings/places_overrides.json | 0
 {scripts => resources}/mappings/subgenres.json        | 0
 10 files changed, 0 insertions(+), 0 deletions(-)
 rename {scripts => resources}/mappings/audience.json (100%)
 rename {scripts => resources}/mappings/content_formats.json (100%)
 rename {scripts => resources}/mappings/droppable.json (100%)
 rename {scripts => resources}/mappings/genres.json (100%)
 rename {scripts => resources}/mappings/literary_themes.json (100%)
 rename {scripts => resources}/mappings/literary_tropes.json (100%)
 rename {scripts => resources}/mappings/main_topics.json (100%)
 rename {scripts => resources}/mappings/people_overrides.json (100%)
 rename {scripts => resources}/mappings/places_overrides.json (100%)
 rename {scripts => resources}/mappings/subgenres.json (100%)

diff --git a/scripts/mappings/audience.json b/resources/mappings/audience.json
similarity index 100%
rename from scripts/mappings/audience.json
rename to resources/mappings/audience.json
diff --git a/scripts/mappings/content_formats.json b/resources/mappings/content_formats.json
similarity index 100%
rename from scripts/mappings/content_formats.json
rename to resources/mappings/content_formats.json
diff --git a/scripts/mappings/droppable.json b/resources/mappings/droppable.json
similarity index 100%
rename from scripts/mappings/droppable.json
rename to resources/mappings/droppable.json
diff --git a/scripts/mappings/genres.json b/resources/mappings/genres.json
similarity index 100%
rename from scripts/mappings/genres.json
rename to resources/mappings/genres.json
diff --git a/scripts/mappings/literary_themes.json b/resources/mappings/literary_themes.json
similarity index 100%
rename from scripts/mappings/literary_themes.json
rename to resources/mappings/literary_themes.json
diff --git a/scripts/mappings/literary_tropes.json b/resources/mappings/literary_tropes.json
similarity index 100%
rename from scripts/mappings/literary_tropes.json
rename to resources/mappings/literary_tropes.json
diff --git a/scripts/mappings/main_topics.json b/resources/mappings/main_topics.json
similarity index 100%
rename from scripts/mappings/main_topics.json
rename to resources/mappings/main_topics.json
diff --git a/scripts/mappings/people_overrides.json b/resources/mappings/people_overrides.json
similarity index 100%
rename from scripts/mappings/people_overrides.json
rename to resources/mappings/people_overrides.json
diff --git a/scripts/mappings/places_overrides.json b/resources/mappings/places_overrides.json
similarity index 100%
rename from scripts/mappings/places_overrides.json
rename to resources/mappings/places_overrides.json
diff --git a/scripts/mappings/subgenres.json b/resources/mappings/subgenres.json
similarity index 100%
rename from scripts/mappings/subgenres.json
rename to resources/mappings/subgenres.json

From 49ad631dc5ac8370a5f0a16929ce3a9a9a88de51 Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Tue, 21 Apr 2026 15:24:57 +0900
Subject: [PATCH 2/9] Refactor legacy migrate_subjects.py into several modules

---
 core/__init__.py                   |   5 +
 core/classifier_assembler.py       |  41 ++++++
 core/json_loader.py                |  32 +++++
 core/migrate_subject_classifier.py |  17 +++
 core/pack_registry.py              |  85 +++++++++++
 core/run_state.py                  |  22 +++
 core/subject_classifier.py         |  51 +++++++
 rule_engine/__init__.py            |   5 +
 rule_engine/base.py                |  15 ++
 rule_engine/normalization.py       |  24 ++++
 rule_packs/__init__.py             |  54 +++++++
 rule_packs/audience.py             |  22 +++
 rule_packs/content_formats.py      |  22 +++
 rule_packs/genres.py               |  22 +++
 rule_packs/literary_form.py        |  16 +++
 rule_packs/literary_themes.py      |  22 +++
 rule_packs/literary_tropes.py      |  22 +++
 rule_packs/main_topics.py          |  22 +++
 rule_packs/moods.py                |  16 +++
 rule_packs/people.py               |  23 +++
 rule_packs/places.py               |  23 +++
 rule_packs/subgenres.py            |  22 +++
 rule_packs/subject_diagnostics.py  |  41 ++++++
 rule_packs/times.py                |  21 +++
 rule_packs/utils.py                |  65 +++++++++
 rules/__init__.py                  |   8 ++
 rules/mapping_rule.py              |  17 +++
 rules/override_rule.py             |  20 +++
 rules/passthrough_rule.py          |  11 ++
 rules/prefix_rule.py               |  21 +++
 scripts/migrate_subjects.py        | 223 ++++-------------------------
 scripts/run_legacy_subjects.sh     |  22 +++
 32 files changed, 833 insertions(+), 199 deletions(-)
 create mode 100644 core/__init__.py
 create mode 100644 core/classifier_assembler.py
 create mode 100644 core/json_loader.py
 create mode 100644 core/migrate_subject_classifier.py
 create mode 100644 core/pack_registry.py
 create mode 100644 core/run_state.py
 create mode 100644 core/subject_classifier.py
 create mode 100644 rule_engine/__init__.py
 create mode 100644 rule_engine/base.py
 create mode 100644 rule_engine/normalization.py
 create mode 100644 rule_packs/__init__.py
 create mode 100644 rule_packs/audience.py
 create mode 100644 rule_packs/content_formats.py
 create mode 100644 rule_packs/genres.py
 create mode 100644 rule_packs/literary_form.py
 create mode 100644 rule_packs/literary_themes.py
 create mode 100644 rule_packs/literary_tropes.py
 create mode 100644 rule_packs/main_topics.py
 create mode 100644 rule_packs/moods.py
 create mode 100644 rule_packs/people.py
 create mode 100644 rule_packs/places.py
 create mode 100644 rule_packs/subgenres.py
 create mode 100644 rule_packs/subject_diagnostics.py
 create mode 100644 rule_packs/times.py
 create mode 100644 rule_packs/utils.py
 create mode 100644 rules/__init__.py
 create mode 100644 rules/mapping_rule.py
 create mode 100644 rules/override_rule.py
 create mode 100644 rules/passthrough_rule.py
 create mode 100644 rules/prefix_rule.py
 create mode 100755 scripts/run_legacy_subjects.sh

diff --git a/core/__init__.py b/core/__init__.py
new file mode 100644
index 0000000..ee7aaf5
--- /dev/null
+++ b/core/__init__.py
@@ -0,0 +1,5 @@
+"""Core orchestration and default migration assembly."""
+
+from .subject_classifier import DEFAULT_OUTPUT_TYPES, SubjectClassifier
+
+__all__ = ["DEFAULT_OUTPUT_TYPES", "SubjectClassifier"]
diff --git a/core/classifier_assembler.py b/core/classifier_assembler.py
new file mode 100644
index 0000000..23a732e
--- /dev/null
+++ b/core/classifier_assembler.py
@@ -0,0 +1,41 @@
+"""Assembly helpers for building migration classifiers."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+from core.json_loader import load_set
+from core.pack_registry import (
+    AVAILABLE_PACK_NAMES,
+    PACK_FACTORIES,
+    PACK_PRESETS,
+)
+from core.subject_classifier import SubjectClassifier
+
+
+def resolve_pack_names(enabled_packs: Iterable[str] | None) -> list[str]:
+    """Expand presets into concrete stable pack names."""
+    selected = list(enabled_packs or [])
+    expanded: list[str] = []
+    for name in selected:
+        if name in PACK_PRESETS:
+            expanded.extend(PACK_PRESETS[name])
+            continue
+        expanded.append(name)
+    return expanded
+
+
+def build_subject_classifier(
+    enabled_packs: Iterable[str] | None = None,
+) -> SubjectClassifier:
+    """Build the migration classifier from an explicit pack-name list."""
+    selected = resolve_pack_names(enabled_packs)
+    missing = [name for name in selected if name not in PACK_FACTORIES]
+    if missing:
+        available = ", ".join(AVAILABLE_PACK_NAMES)
+        missing_display = ", ".join(sorted(missing))
+        raise ValueError(
+            f"Unknown rule pack(s): {missing_display}. Available: {available}"
+        )
+
+    return SubjectClassifier(rule_packs=[PACK_FACTORIES[name]() for name in selected])
diff --git a/core/json_loader.py b/core/json_loader.py
new file mode 100644
index 0000000..1553804
--- /dev/null
+++ b/core/json_loader.py
@@ -0,0 +1,32 @@
+"""JSON resource loaders for migration assembly."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from rule_engine.normalization import normalize
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+MAPPINGS_DIR = REPO_ROOT / "resources" / "mappings"
+
+
+def load_mapping(name: str) -> dict[str, str]:
+    """Load a JSON mapping file from resources/mappings/."""
+    path = MAPPINGS_DIR / f"{name}.json"
+    if not path.exists():
+        return {}
+    with open(path) as handle:
+        return json.load(handle)
+
+
+def load_set(name: str) -> set[str]:
+    """Load a JSON list file as a normalized set."""
+    path = MAPPINGS_DIR / f"{name}.json"
+    if not path.exists():
+        return set()
+    with open(path) as handle:
+        data = json.load(handle)
+    if isinstance(data, list):
+        return {normalize(item) for item in data}
+    return {normalize(item) for item in data.keys()}
diff --git a/core/migrate_subject_classifier.py b/core/migrate_subject_classifier.py
new file mode 100644
index 0000000..38cc1a1
--- /dev/null
+++ b/core/migrate_subject_classifier.py
@@ -0,0 +1,17 @@
+"""Compatibility wrapper for migration classifier assembly."""
+
+from __future__ import annotations
+
+from core.classifier_assembler import (
+    build_subject_classifier,
+    resolve_pack_names,
+)
+from core.pack_registry import AVAILABLE_PACK_NAMES, PACK_FACTORIES, PACK_PRESETS
+
+__all__ = [
+    "AVAILABLE_PACK_NAMES",
+    "PACK_FACTORIES",
+    "PACK_PRESETS",
+    "build_subject_classifier",
+    "resolve_pack_names",
+]
diff --git a/core/pack_registry.py b/core/pack_registry.py
new file mode 100644
index 0000000..b1df0e7
--- /dev/null
+++ b/core/pack_registry.py
@@ -0,0 +1,85 @@
+"""Stable pack-name registry for migration assembly."""
+
+from __future__ import annotations
+
+from typing import Callable
+
+from core.json_loader import load_mapping, load_set
+from rule_packs import (
+    AudiencePack,
+    ContentFormatsPack,
+    GenresPack,
+    LiteraryFormPack,
+    LiteraryThemesPack,
+    LiteraryTropesPack,
+    MainTopicsPack,
+    MoodsPack,
+    PeoplePack,
+    PlacesPack,
+    SubgenresPack,
+    SUBJECT_PACK_CLASSES,
+    SubjectDiagnosticsPack,
+    TimesPack,
+)
+
+PackFactory = Callable[[], object]
+
+SUBJECT_PACK_BUILDERS = {pack_cls.name: pack_cls for pack_cls in SUBJECT_PACK_CLASSES}
+PACK_PRESETS: dict[str, tuple[str, ...]] = {
+    "subject_mappings": (
+        "literary_form",
+        "audience",
+        "genres",
+        "subgenres",
+        "content_formats",
+        "moods",
+        "literary_themes",
+        "literary_tropes",
+        "main_topics",
+        "subject_diagnostics",
+        "people",
+        "places",
+        "times",
+    ),
+}
+
+PACK_FACTORIES: dict[str, PackFactory] = {
+    "literary_form": lambda: LiteraryFormPack(remove_matched_subjects=True),
+    "audience": lambda: AudiencePack(
+        mapping=load_mapping("audience"),
+        remove_matched_subjects=True,
+    ),
+    "genres": lambda: GenresPack(
+        mapping=load_mapping("genres"),
+        remove_matched_subjects=True,
+    ),
+    "subgenres": lambda: SubgenresPack(
+        mapping=load_mapping("subgenres"),
+        remove_matched_subjects=True,
+    ),
+    "content_formats": lambda: ContentFormatsPack(
+        mapping=load_mapping("content_formats"),
+        remove_matched_subjects=True,
+    ),
+    "moods": lambda: MoodsPack(remove_matched_subjects=True),
+    "literary_themes": lambda: LiteraryThemesPack(
+        mapping=load_mapping("literary_themes"),
+        remove_matched_subjects=True,
+    ),
+    "literary_tropes": lambda: LiteraryTropesPack(
+        mapping=load_mapping("literary_tropes"),
+        remove_matched_subjects=True,
+    ),
+    "main_topics": lambda: MainTopicsPack(
+        mapping=load_mapping("main_topics"),
+        remove_matched_subjects=True,
+    ),
+    "subject_diagnostics": lambda: SubjectDiagnosticsPack(
+        droppable=load_set("droppable")
+    ),
+    "people": lambda: PeoplePack(overrides=load_mapping("people_overrides")),
+    "places": lambda: PlacesPack(overrides=load_mapping("places_overrides")),
+    "times": TimesPack,
+}
+
+AVAILABLE_PACK_NAMES = tuple(sorted({*PACK_FACTORIES, *PACK_PRESETS}))
diff --git a/core/run_state.py b/core/run_state.py
new file mode 100644
index 0000000..a78fae0
--- /dev/null
+++ b/core/run_state.py
@@ -0,0 +1,22 @@
+"""Shared runtime state for sequential subject classification."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class RunState:
+    """Mutable state shared by packs during sequential execution."""
+
+    work: Mapping[str, Any]
+    result: dict[str, list[str]]
+    remaining_subjects: list[str] = field(default_factory=list)
+
+    def add(self, output_type: str, value: str) -> None:
+        if output_type not in self.result:
+            self.result[output_type] = []
+        if value not in self.result[output_type]:
+            self.result[output_type].append(value)
diff --git a/core/subject_classifier.py b/core/subject_classifier.py
new file mode 100644
index 0000000..f85aef2
--- /dev/null
+++ b/core/subject_classifier.py
@@ -0,0 +1,51 @@
+"""Reusable classification core for subject migration."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from typing import Any
+
+from core.run_state import RunState
+
+DEFAULT_OUTPUT_TYPES = (
+    "literary_form",
+    "audience",
+    "genres",
+    "subgenres",
+    "content_formats",
+    "moods",
+    "literary_themes",
+    "literary_tropes",
+    "main_topics",
+    "sub_topics",
+    "people",
+    "places",
+    "times",
+    "things",
+    "reading_level",
+    "classification_codes",
+    "unmapped",
+)
+
+
+class SubjectClassifier:
+    """Public orchestration layer for work-level subject classification."""
+
+    def __init__(
+        self,
+        rule_packs: Iterable[Any],
+        output_types: Iterable[str] | None = None,
+    ) -> None:
+        self.rule_packs = list(rule_packs)
+        self.output_types = tuple(output_types or DEFAULT_OUTPUT_TYPES)
+
+    def classify_work(self, work: Mapping[str, Any]) -> dict[str, list[str]]:
+        """Run the enabled rule packs against a normalized work object."""
+        state = RunState(
+            work=work,
+            result={tag_type: [] for tag_type in self.output_types},
+            remaining_subjects=list(work.get("subjects", [])),
+        )
+        for pack in self.rule_packs:
+            pack.apply(state)
+        return state.result
diff --git a/rule_engine/__init__.py b/rule_engine/__init__.py
new file mode 100644
index 0000000..2386cef
--- /dev/null
+++ b/rule_engine/__init__.py
@@ -0,0 +1,5 @@
+"""Low-level rule engine primitives."""
+
+from .base import RulePack
+
+__all__ = ["RulePack"]
diff --git a/rule_engine/base.py b/rule_engine/base.py
new file mode 100644
index 0000000..552f7ea
--- /dev/null
+++ b/rule_engine/base.py
@@ -0,0 +1,15 @@
+"""Rule-pack interface for the migration core."""
+
+from __future__ import annotations
+
+from core.run_state import RunState
+
+
+class RulePack:
+    """A bounded unit of classification logic for one or more output types."""
+
+    name = ""
+    output_types: tuple[str, ...] = ()
+
+    def apply(self, state: RunState) -> None:
+        raise NotImplementedError
diff --git a/rule_engine/normalization.py b/rule_engine/normalization.py
new file mode 100644
index 0000000..7143e7e
--- /dev/null
+++ b/rule_engine/normalization.py
@@ -0,0 +1,24 @@
+"""Normalization and classification helpers."""
+
+from __future__ import annotations
+
+import re
+
+READING_LEVEL_RE = re.compile(
+    r"reading level.grade\s*\d+|grade\s*\d+|rl\s*\d+", re.IGNORECASE
+)
+CLASSIFICATION_RE = re.compile(
+    r"^[0-9]{3}(\.[0-9]+)?$|^[a-z]{1,3}\s*[0-9]+|^pr[0-9]", re.IGNORECASE
+)
+
+
+def normalize(value: str) -> str:
+    return value.lower().strip()
+
+
+def is_reading_level(value: str) -> bool:
+    return bool(READING_LEVEL_RE.search(value))
+
+
+def is_classification_code(value: str) -> bool:
+    return bool(CLASSIFICATION_RE.match(value.strip()))
diff --git a/rule_packs/__init__.py b/rule_packs/__init__.py
new file mode 100644
index 0000000..4dd2638
--- /dev/null
+++ b/rule_packs/__init__.py
@@ -0,0 +1,54 @@
+"""Concrete rule-pack modules."""
+
+from .audience import AudiencePack
+from .content_formats import ContentFormatsPack
+from .genres import GenresPack
+from .literary_form import LiteraryFormPack
+from .literary_themes import LiteraryThemesPack
+from .literary_tropes import LiteraryTropesPack
+from .main_topics import MainTopicsPack
+from .moods import MoodsPack
+from .people import PeoplePack
+from .places import PlacesPack
+from .subgenres import SubgenresPack
+from .subject_diagnostics import SubjectDiagnosticsPack
+from .times import TimesPack
+
+SUBJECT_PACK_CLASSES = (
+    LiteraryFormPack,
+    AudiencePack,
+    GenresPack,
+    SubgenresPack,
+    ContentFormatsPack,
+    MoodsPack,
+    LiteraryThemesPack,
+    LiteraryTropesPack,
+    MainTopicsPack,
+)
+
+FIELD_PACK_CLASSES = (
+    PeoplePack,
+    PlacesPack,
+    TimesPack,
+)
+
+ALL_PACK_CLASSES = SUBJECT_PACK_CLASSES + FIELD_PACK_CLASSES
+
+__all__ = [
+    "ALL_PACK_CLASSES",
+    "AudiencePack",
+    "ContentFormatsPack",
+    "FIELD_PACK_CLASSES",
+    "GenresPack",
+    "LiteraryFormPack",
+    "LiteraryThemesPack",
+    "LiteraryTropesPack",
+    "MainTopicsPack",
+    "MoodsPack",
+    "PeoplePack",
+    "PlacesPack",
+    "SUBJECT_PACK_CLASSES",
+    "SubgenresPack",
+    "SubjectDiagnosticsPack",
+    "TimesPack",
+]
diff --git a/rule_packs/audience.py b/rule_packs/audience.py
new file mode 100644
index 0000000..165c391
--- /dev/null
+++ b/rule_packs/audience.py
@@ -0,0 +1,22 @@
+"""Rule pack for audience tags."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_packs.utils import SubjectPack
+from rules import MappingRule, PrefixRule
+
+
+class AudiencePack(SubjectPack):
+    name = "audience"
+    output_types = ("audience",)
+    output_type = "audience"
+
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        remove_matched_subjects: bool = True,
+    ) -> None:
+        self.rules = (PrefixRule("audience"), MappingRule(mapping))
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/content_formats.py b/rule_packs/content_formats.py
new file mode 100644
index 0000000..c1260b4
--- /dev/null
+++ b/rule_packs/content_formats.py
@@ -0,0 +1,22 @@
+"""Rule pack for content_formats tags."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_packs.utils import SubjectPack
+from rules import MappingRule, PrefixRule
+
+
+class ContentFormatsPack(SubjectPack):
+    name = "content_formats"
+    output_types = ("content_formats",)
+    output_type = "content_formats"
+
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        remove_matched_subjects: bool = True,
+    ) -> None:
+        self.rules = (PrefixRule("format"), MappingRule(mapping))
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/genres.py b/rule_packs/genres.py
new file mode 100644
index 0000000..b110f12
--- /dev/null
+++ b/rule_packs/genres.py
@@ -0,0 +1,22 @@
+"""Rule pack for genre tags."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_packs.utils import SubjectPack
+from rules import MappingRule, PrefixRule
+
+
+class GenresPack(SubjectPack):
+    name = "genres"
+    output_types = ("genres",)
+    output_type = "genres"
+
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        remove_matched_subjects: bool = True,
+    ) -> None:
+        self.rules = (PrefixRule("genre"), MappingRule(mapping))
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/literary_form.py b/rule_packs/literary_form.py
new file mode 100644
index 0000000..1972b30
--- /dev/null
+++ b/rule_packs/literary_form.py
@@ -0,0 +1,16 @@
+"""Rule pack for literary_form."""
+
+from __future__ import annotations
+
+from rule_packs.utils import SubjectPack
+from rules import PrefixRule
+
+
+class LiteraryFormPack(SubjectPack):
+    name = "literary_form"
+    output_types = ("literary_form",)
+    output_type = "literary_form"
+
+    def __init__(self, remove_matched_subjects: bool = True) -> None:
+        self.rules = (PrefixRule("form"),)
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/literary_themes.py b/rule_packs/literary_themes.py
new file mode 100644
index 0000000..2357776
--- /dev/null
+++ b/rule_packs/literary_themes.py
@@ -0,0 +1,22 @@
+"""Rule pack for literary_themes tags."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_packs.utils import SubjectPack
+from rules import MappingRule, PrefixRule
+
+
+class LiteraryThemesPack(SubjectPack):
+    name = "literary_themes"
+    output_types = ("literary_themes",)
+    output_type = "literary_themes"
+
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        remove_matched_subjects: bool = True,
+    ) -> None:
+        self.rules = (PrefixRule("theme"), MappingRule(mapping))
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/literary_tropes.py b/rule_packs/literary_tropes.py
new file mode 100644
index 0000000..1c1db97
--- /dev/null
+++ b/rule_packs/literary_tropes.py
@@ -0,0 +1,22 @@
+"""Rule pack for literary_tropes tags."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_packs.utils import SubjectPack
+from rules import MappingRule, PrefixRule
+
+
+class LiteraryTropesPack(SubjectPack):
+    name = "literary_tropes"
+    output_types = ("literary_tropes",)
+    output_type = "literary_tropes"
+
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        remove_matched_subjects: bool = True,
+    ) -> None:
+        self.rules = (PrefixRule("trope"), MappingRule(mapping))
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/main_topics.py b/rule_packs/main_topics.py
new file mode 100644
index 0000000..5c4e359
--- /dev/null
+++ b/rule_packs/main_topics.py
@@ -0,0 +1,22 @@
+"""Rule pack for main_topics tags."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_packs.utils import SubjectPack
+from rules import MappingRule, PrefixRule
+
+
+class MainTopicsPack(SubjectPack):
+    name = "main_topics"
+    output_types = ("main_topics",)
+    output_type = "main_topics"
+
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        remove_matched_subjects: bool = True,
+    ) -> None:
+        self.rules = (PrefixRule("topic"), MappingRule(mapping))
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/moods.py b/rule_packs/moods.py
new file mode 100644
index 0000000..c436e0e
--- /dev/null
+++ b/rule_packs/moods.py
@@ -0,0 +1,16 @@
+"""Rule pack for moods tags."""
+
+from __future__ import annotations
+
+from rule_packs.utils import SubjectPack
+from rules import PrefixRule
+
+
+class MoodsPack(SubjectPack):
+    name = "moods"
+    output_types = ("moods",)
+    output_type = "moods"
+
+    def __init__(self, remove_matched_subjects: bool = True) -> None:
+        self.rules = (PrefixRule("mood"),)
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/people.py b/rule_packs/people.py
new file mode 100644
index 0000000..e5660f1
--- /dev/null
+++ b/rule_packs/people.py
@@ -0,0 +1,23 @@
+"""Rule pack for subject_people."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from core.run_state import RunState
+from rule_engine.base import RulePack
+from rules import OverrideRule
+
+
+class PeoplePack(RulePack):
+    name = "people"
+    output_types = ("people",)
+
+    def __init__(self, overrides: Mapping[str, str] | None = None) -> None:
+        self.rule = OverrideRule(overrides)
+
+    def apply(self, state: RunState) -> None:
+        for raw in state.work.get("subject_people", []):
+            value = self.rule.apply(raw)
+            if value is not None:
+                state.add("people", value)
diff --git a/rule_packs/places.py b/rule_packs/places.py
new file mode 100644
index 0000000..0a13464
--- /dev/null
+++ b/rule_packs/places.py
@@ -0,0 +1,23 @@
+"""Rule pack for subject_places."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from core.run_state import RunState
+from rule_engine.base import RulePack
+from rules import OverrideRule
+
+
+class PlacesPack(RulePack):
+    name = "places"
+    output_types = ("places",)
+
+    def __init__(self, overrides: Mapping[str, str] | None = None) -> None:
+        self.rule = OverrideRule(overrides)
+
+    def apply(self, state: RunState) -> None:
+        for raw in state.work.get("subject_places", []):
+            value = self.rule.apply(raw)
+            if value is not None:
+                state.add("places", value)
diff --git a/rule_packs/subgenres.py b/rule_packs/subgenres.py
new file mode 100644
index 0000000..eb832c5
--- /dev/null
+++ b/rule_packs/subgenres.py
@@ -0,0 +1,22 @@
+"""Rule pack for subgenre tags."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_packs.utils import SubjectPack
+from rules import MappingRule, PrefixRule
+
+
+class SubgenresPack(SubjectPack):
+    name = "subgenres"
+    output_types = ("subgenres",)
+    output_type = "subgenres"
+
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        remove_matched_subjects: bool = True,
+    ) -> None:
+        self.rules = (PrefixRule("subgenre"), MappingRule(mapping))
+        self.remove_matched_subjects = remove_matched_subjects
diff --git a/rule_packs/subject_diagnostics.py b/rule_packs/subject_diagnostics.py
new file mode 100644
index 0000000..4e6a0f7
--- /dev/null
+++ b/rule_packs/subject_diagnostics.py
@@ -0,0 +1,41 @@
+"""Rule pack for dropped, reading-level, classification, and unmapped subjects."""
+
+from __future__ import annotations
+
+from core.run_state import RunState
+from rule_engine.base import RulePack
+from rule_engine.normalization import (
+    is_classification_code,
+    is_reading_level,
+    normalize,
+)
+
+
+class SubjectDiagnosticsPack(RulePack):
+    name = "subject_diagnostics"
+    output_types = ("reading_level", "classification_codes", "unmapped")
+
+    def __init__(self, droppable: set[str] | None = None) -> None:
+        self.droppable = set(droppable or ())
+
+    def apply(self, state: RunState) -> None:
+        for raw in state.remaining_subjects:
+            key = normalize(raw)
+            if key in self.droppable:
+                continue
+
+            if is_reading_level(raw):
+                value = raw.strip()
+                if value:
+                    state.add("reading_level", value)
+                continue
+
+            if is_classification_code(raw):
+                value = raw.strip()
+                if value:
+                    state.add("classification_codes", value)
+                continue
+
+            value = raw.strip()
+            if value:
+                state.add("unmapped", value)
diff --git a/rule_packs/times.py b/rule_packs/times.py
new file mode 100644
index 0000000..c50a61a
--- /dev/null
+++ b/rule_packs/times.py
@@ -0,0 +1,21 @@
+"""Rule pack for subject_times."""
+
+from __future__ import annotations
+
+from core.run_state import RunState
+from rule_engine.base import RulePack
+from rules import PassthroughRule
+
+
+class TimesPack(RulePack):
+    name = "times"
+    output_types = ("times",)
+
+    def __init__(self) -> None:
+        self.rule = PassthroughRule()
+
+    def apply(self, state: RunState) -> None:
+        for raw in state.work.get("subject_times", []):
+            value = self.rule.apply(raw)
+            if value is not None:
+                state.add("times", value)
diff --git a/rule_packs/utils.py b/rule_packs/utils.py
new file mode 100644
index 0000000..c27827e
--- /dev/null
+++ b/rule_packs/utils.py
@@ -0,0 +1,65 @@
+"""Shared helpers for subject-based packs."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+from typing import Protocol
+
+from core.run_state import RunState
+from rule_engine.base import RulePack
+
+
+class SubjectValueRule(Protocol):
+    def match(self, raw: str) -> str | None: ...
+
+
+def classify_subject_value(raw: str, rules: Iterable[SubjectValueRule]) -> str | None:
+    for rule in rules:
+        match = rule.match(raw)
+        if match is not None:
+            return match
+    return None
+
+
+def classify_subject_values(
+    state: RunState,
+    output_type: str,
+    rules: Iterable[SubjectValueRule],
+) -> None:
+    next_subjects: list[str] = []
+    for raw in state.remaining_subjects:
+        match = classify_subject_value(raw, rules)
+        if match is None:
+            next_subjects.append(raw)
+            continue
+        state.add(output_type, match)
+    state.remaining_subjects = next_subjects
+
+
+def apply_subject_pack(
+    state: RunState,
+    output_type: str,
+    rules: Iterable[SubjectValueRule],
+    remove_matched_subjects: bool,
+) -> None:
+    if remove_matched_subjects:
+        classify_subject_values(state, output_type, rules)
+        return
+    for raw in state.remaining_subjects:
+        match = classify_subject_value(raw, rules)
+        if match is not None:
+            state.add(output_type, match)
+
+
+class SubjectPack(RulePack):
+    """Small helper for packs that operate on the shared subject sequence."""
+
+    output_type = ""
+
+    def apply(self, state: RunState) -> None:
+        apply_subject_pack(
+            state,
+            output_type=self.output_type,
+            rules=self.rules,
+            remove_matched_subjects=self.remove_matched_subjects,
+        )
diff --git a/rules/__init__.py b/rules/__init__.py
new file mode 100644
index 0000000..9cbc515
--- /dev/null
+++ b/rules/__init__.py
@@ -0,0 +1,8 @@
+"""Composable rule units for pack implementations."""
+
+from .mapping_rule import MappingRule
+from .override_rule import OverrideRule
+from .passthrough_rule import PassthroughRule
+from .prefix_rule import PrefixRule
+
+__all__ = ["MappingRule", "OverrideRule", "PassthroughRule", "PrefixRule"]
diff --git a/rules/mapping_rule.py b/rules/mapping_rule.py
new file mode 100644
index 0000000..2cb85d3
--- /dev/null
+++ b/rules/mapping_rule.py
@@ -0,0 +1,17 @@
+"""Direct mapping lookups for normalized values."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_engine.normalization import normalize
+
+
+class MappingRule:
+    """Match normalized input values against a provided mapping."""
+
+    def __init__(self, mapping: Mapping[str, str] | None = None) -> None:
+        self.mapping = dict(mapping or {})
+
+    def match(self, raw: str) -> str | None:
+        return self.mapping.get(normalize(raw))
diff --git a/rules/override_rule.py b/rules/override_rule.py
new file mode 100644
index 0000000..0fe74c1
--- /dev/null
+++ b/rules/override_rule.py
@@ -0,0 +1,20 @@
+"""Override-based normalization for field values."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rule_engine.normalization import normalize
+
+
+class OverrideRule:
+    """Normalize a field value using overrides with raw fallback."""
+
+    def __init__(self, overrides: Mapping[str, str] | None = None) -> None:
+        self.overrides = dict(overrides or {})
+
+    def apply(self, raw: str) -> str | None:
+        cleaned = raw.strip()
+        if not cleaned:
+            return None
+        return self.overrides.get(normalize(raw), cleaned)
diff --git a/rules/passthrough_rule.py b/rules/passthrough_rule.py
new file mode 100644
index 0000000..5dbb88b
--- /dev/null
+++ b/rules/passthrough_rule.py
@@ -0,0 +1,11 @@
+"""Passthrough normalization for field values."""
+
+from __future__ import annotations
+
+
+class PassthroughRule:
+    """Return cleaned field values without additional transformation."""
+
+    def apply(self, raw: str) -> str | None:
+        cleaned = raw.strip()
+        return cleaned or None
diff --git a/rules/prefix_rule.py b/rules/prefix_rule.py
new file mode 100644
index 0000000..11b49f9
--- /dev/null
+++ b/rules/prefix_rule.py
@@ -0,0 +1,21 @@
+"""Prefix-based matching for subject values."""
+
+from __future__ import annotations
+
+
+class PrefixRule:
+    """Match values like ``theme:love`` and return the normalized payload."""
+
+    def __init__(self, prefix: str) -> None:
+        self.prefix = prefix
+
+    def match(self, raw: str) -> str | None:
+        if not self.prefix or ":" not in raw:
+            return None
+        prefix, _, value = raw.partition(":")
+        if prefix.strip().lower() != self.prefix:
+            return None
+        cleaned = value.strip()
+        if not cleaned:
+            return None
+        return cleaned.title()
diff --git a/scripts/migrate_subjects.py b/scripts/migrate_subjects.py
index 4ec609b..57e8018 100644
--- a/scripts/migrate_subjects.py
+++ b/scripts/migrate_subjects.py
@@ -15,219 +15,32 @@
 import argparse
 import json
 import os
-import re
 import sys
 from pathlib import Path
 
-import requests
+REPO_ROOT = Path(__file__).resolve().parent.parent
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from core.classifier_assembler import build_subject_classifier
+from core.pack_registry import AVAILABLE_PACK_NAMES
 
 # ---------------------------------------------------------------------------
 # Paths
 # ---------------------------------------------------------------------------
 
-REPO_ROOT = Path(__file__).parent.parent
-MAPPINGS_DIR = Path(__file__).parent / "mappings"
-
 OL_WORK_URL = "https://openlibrary.org/works/{work_id}.json"
 
 
-# ---------------------------------------------------------------------------
-# Load mappings
-# ---------------------------------------------------------------------------
-
-def load_mapping(name: str) -> dict[str, str]:
-    """Load a JSON mapping file from scripts/mappings/."""
-    path = MAPPINGS_DIR / f"{name}.json"
-    if not path.exists():
-        return {}
-    with open(path) as f:
-        return json.load(f)
-
-
-def load_set(name: str) -> set[str]:
-    """Load a JSON list file as a set (e.g. droppable.json)."""
-    path = MAPPINGS_DIR / f"{name}.json"
-    if not path.exists():
-        return set()
-    with open(path) as f:
-        data = json.load(f)
-    if isinstance(data, list):
-        return {s.lower().strip() for s in data}
-    return set(data.keys())
-
-
-# ---------------------------------------------------------------------------
-# Normalization helpers
-# ---------------------------------------------------------------------------
-
-READING_LEVEL_RE = re.compile(
-    r"reading level.grade\s*\d+|grade\s*\d+|rl\s*\d+", re.IGNORECASE
-)
-CLASSIFICATION_RE = re.compile(
-    r"^[0-9]{3}(\.[0-9]+)?$|^[a-z]{1,3}\s*[0-9]+|^pr[0-9]", re.IGNORECASE
-)
-
-
-def normalize(s: str) -> str:
-    """Lowercase and strip a subject string for mapping lookup."""
-    return s.lower().strip()
-
-
-def is_reading_level(s: str) -> bool:
-    return bool(READING_LEVEL_RE.search(s))
-
-
-def is_classification_code(s: str) -> bool:
-    return bool(CLASSIFICATION_RE.match(s.strip()))
-
-
-# ---------------------------------------------------------------------------
-# Core classifier
-# ---------------------------------------------------------------------------
-
-class SubjectClassifier:
-    def __init__(self):
-        self.genres_map = load_mapping("genres")
-        self.subgenres_map = load_mapping("subgenres")
-        self.formats_map = load_mapping("content_formats")
-        self.themes_map = load_mapping("literary_themes")
-        self.tropes_map = load_mapping("literary_tropes")
-        self.topics_map = load_mapping("main_topics")
-        self.audience_map = load_mapping("audience")
-        self.droppable = load_set("droppable")
-        self.people_overrides = load_mapping("people_overrides")
-        self.places_overrides = load_mapping("places_overrides")
-
-    def classify_subject(self, raw: str) -> tuple[str, str | None]:
-        """
-        Classify a single subject string.
-
-        Returns (type, canonical_value) where type is one of:
-          literary_form, genres, subgenres, content_formats, literary_themes,
-          literary_tropes, main_topics, audience, reading_level,
-          classification_code, drop, unmapped
-        """
-        key = normalize(raw)
-
-        # Audience strings (before hard drops, since some overlap)
-        if key in self.audience_map:
-            return ("audience", self.audience_map[key])
-
-        # Hard drops
-        if key in self.droppable:
-            return ("drop", None)
-
-        # Reading levels
-        if is_reading_level(raw):
-            return ("reading_level", raw.strip())
-
-        # Classification codes (Dewey, LC call numbers)
-        if is_classification_code(raw):
-            return ("classification_code", raw.strip())
-
-        # Explicit prefix-typed tags (e.g. "form:novel", "genre:tragedy")
-        if ":" in raw:
-            prefix, _, value = raw.partition(":")
-            prefix = prefix.strip().lower()
-            value = value.strip()
-            type_map = {
-                "form": "literary_form",
-                "audience": "audience",
-                "genre": "genres",
-                "subgenre": "subgenres",
-                "format": "content_formats",
-                "theme": "literary_themes",
-                "trope": "literary_tropes",
-                "topic": "main_topics",
-                "mood": "moods",
-            }
-            if prefix in type_map:
-                return (type_map[prefix], value.title())
-
-        # Mapping lookups (in priority order)
-        if key in self.genres_map:
-            return ("genres", self.genres_map[key])
-        if key in self.subgenres_map:
-            return ("subgenres", self.subgenres_map[key])
-        if key in self.formats_map:
-            return ("content_formats", self.formats_map[key])
-        if key in self.themes_map:
-            return ("literary_themes", self.themes_map[key])
-        if key in self.tropes_map:
-            return ("literary_tropes", self.tropes_map[key])
-        if key in self.topics_map:
-            return ("main_topics", self.topics_map[key])
-
-        return ("unmapped", raw.strip())
-
-    def classify_work(self, work: dict) -> dict:
-        """
-        Given a work JSON dict (from OL API), produce a structured tag output.
-        """
-        result: dict[str, list] = {
-            "literary_form": [],
-            "audience": [],
-            "genres": [],
-            "subgenres": [],
-            "content_formats": [],
-            "moods": [],
-            "literary_themes": [],
-            "literary_tropes": [],
-            "main_topics": [],
-            "sub_topics": [],
-            "people": [],
-            "places": [],
-            "times": [],
-            "things": [],
-            "reading_level": [],
-            "classification_codes": [],
-            "unmapped": [],
-        }
-
-        # Classify flat subjects
-        for raw in work.get("subjects", []):
-            tag_type, value = self.classify_subject(raw)
-            if tag_type == "drop" or value is None:
-                continue
-            if tag_type == "reading_level":
-                result["reading_level"].append(value)
-            elif tag_type == "classification_code":
-                result["classification_codes"].append(value)
-            elif tag_type in result:
-                if value not in result[tag_type]:
-                    result[tag_type].append(value)
-            else:
-                result["unmapped"].append(raw)
-
-        # subject_people → canonical names
-        for raw in work.get("subject_people", []):
-            key = normalize(raw)
-            canonical = self.people_overrides.get(key, raw.strip())
-            if canonical not in result["people"]:
-                result["people"].append(canonical)
-
-        # subject_places → canonical places
-        for raw in work.get("subject_places", []):
-            key = normalize(raw)
-            canonical = self.places_overrides.get(key, raw.strip())
-            if canonical not in result["places"]:
-                result["places"].append(canonical)
-
-        # subject_times → pass through (times are free-form)
-        for raw in work.get("subject_times", []):
-            cleaned = raw.strip()
-            if cleaned and cleaned not in result["times"]:
-                result["times"].append(cleaned)
-
-        return result
-
-
 # ---------------------------------------------------------------------------
 # Fetching
 # ---------------------------------------------------------------------------
 
+
 def fetch_work(work_id: str) -> dict:
     """Fetch a work JSON from Open Library."""
+    import requests
+
     work_id = work_id.replace("/works/", "").strip()
     if not work_id.endswith(".json"):
         url = OL_WORK_URL.format(work_id=work_id)
@@ -247,6 +60,7 @@ def load_work_file(path: str) -> dict:
 # Output
 # ---------------------------------------------------------------------------
 
+
 def print_result(work_id: str, result: dict):
     print(f"\n=== {work_id} ===")
     for key, values in result.items():
@@ -268,6 +82,7 @@ def write_result(work_id: str, result: dict, output_dir: str):
 # CLI
 # ---------------------------------------------------------------------------
 
+
 def main():
     parser = argparse.ArgumentParser(
         description="Migrate OL legacy subjects to canonical typed tags."
@@ -277,11 +92,21 @@ def main():
     group.add_argument("--file", help="Path to a local work JSON file")
     group.add_argument("--batch", help="Path to newline-delimited OL Work IDs file")
 
-    parser.add_argument("--output", default="output", help="Output directory for batch mode")
-    parser.add_argument("--dry-run", action="store_true", help="Print results, don't write files")
+    parser.add_argument(
+        "--output", default="output", help="Output directory for batch mode"
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true", help="Print results, don't write files"
+    )
+    parser.add_argument(
+        "--pack",
+        action="append",
+        choices=AVAILABLE_PACK_NAMES,
+        help="Enable only the named rule pack. Repeat to combine multiple packs.",
+    )
 
     args = parser.parse_args()
-    classifier = SubjectClassifier()
+    classifier = build_subject_classifier(args.pack)
 
     if args.work:
         print(f"Fetching {args.work}...")
diff --git a/scripts/run_legacy_subjects.sh b/scripts/run_legacy_subjects.sh
new file mode 100755
index 0000000..829baa4
--- /dev/null
+++ b/scripts/run_legacy_subjects.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+python3 "${REPO_ROOT}/scripts/migrate_subjects.py" \
+  --pack literary_form \
+  --pack audience \
+  --pack genres \
+  --pack subgenres \
+  --pack content_formats \
+  --pack moods \
+  --pack literary_themes \
+  --pack literary_tropes \
+  --pack main_topics \
+  --pack subject_diagnostics \
+  --pack people \
+  --pack places \
+  --pack times \
+  "$@"

From e656611eb32fb77f6029310e337f5ea4558b8bd3 Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Tue, 21 Apr 2026 15:25:40 +0900
Subject: [PATCH 3/9] Update README.md

---
 scripts/README.md | 63 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 5 deletions(-)

diff --git a/scripts/README.md b/scripts/README.md
index 97c6ba7..5ac204c 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -14,10 +14,11 @@ Open Library works currently have a flat `subjects` list (plus `subject_people`,
 
 ### `migrate_subjects.py`
 
-The main migration tool. Given a work's OL JSON, it:
+The current runner/compatibility entry point. Given a work's OL JSON, it:
 
 1. Loads the legacy `subjects`, `subject_people`, `subject_places`, and `subject_times` lists
-2. Applies rule-based and keyword matching to classify each string into the correct canonical type
+2. Builds a `SubjectClassifier` from one or more enabled rule packs
+3. Applies rule-based and keyword matching to classify each string into the correct canonical type
 3. Outputs a structured tag object ready for import into the new schema
 
 **Usage:**
@@ -28,13 +29,29 @@ python scripts/migrate_subjects.py --work OL82563W
 # From a local JSON file
 python scripts/migrate_subjects.py --file work.json
 
+# Legacy-compatible fixed-order wrapper
+./scripts/run_legacy_subjects.sh --file work.json
+
 # Batch from a newline-delimited list of OL IDs
 python scripts/migrate_subjects.py --batch ol_ids.txt --output output/
 
 # Dry run (print proposed mappings without writing)
 python scripts/migrate_subjects.py --work OL82563W --dry-run
+
+# Run the old full sequence explicitly through the wrapper
+./scripts/run_legacy_subjects.sh --file work.json --dry-run
+
+# Run only a subset of rule packs
+python scripts/migrate_subjects.py --file work.json --pack genres --pack content_formats --pack subject_diagnostics --dry-run
+
+# Run a single tag-type module
+python scripts/migrate_subjects.py --file work.json --pack content_formats --dry-run
 ```
 
+`migrate_subjects.py` no longer enables a default full preset when `--pack` is omitted. If you want the old full sequence, use `run_legacy_subjects.sh` or pass the pack list explicitly.
+
+`run_legacy_subjects.sh` is just a thin wrapper around `migrate_subjects.py` with the pack order written out explicitly, so it is easy to inspect and change. Any extra CLI args are forwarded as-is.
+
 **Output format:**
 ```json
 {
@@ -60,12 +77,49 @@ The `unmapped` field collects strings that couldn't be classified — these are
 
 ---
 
+### Architecture
+
+The reusable classification core now lives outside the script entry point:
+
+```text
+core/
+  json_loader.py                # JSON resource loading for default assembly
+  subject_classifier.py         # public work-level orchestration core
+  pack_registry.py              # stable pack names -> factories / presets
+  classifier_assembler.py       # pack resolution + classifier assembly
+  migrate_subject_classifier.py # compatibility shim for older imports
+rule_engine/
+  base.py                       # RulePack interface
+  normalization.py              # shared text normalization helpers
+rules/
+  prefix_rule.py                # subject prefix matching
+  mapping_rule.py               # normalized direct mapping
+  override_rule.py              # override-based field normalization
+  passthrough_rule.py           # cleaned passthrough fields
+rule_packs/
+  genres.py                     # one module per tag type
+  content_formats.py
+  audience.py
+  literary_themes.py
+  literary_tropes.py
+  main_topics.py
+  people.py
+  places.py
+  times.py
+config/
+  packs/                        # future static pack configs
+```
+
+`scripts/migrate_subjects.py` remains the operational entry point, but classification logic is now encapsulated in the shared core so future runners can reuse it.
+
+The classification core itself is kept narrow: `SubjectClassifier` consumes a normalized `work` object plus already-constructed packs, and returns a result. JSON resource loading now lives in the default assembly layer rather than inside individual packs.
+
 ### Adding Mapping Rules
 
-Mappings live in `scripts/mappings/`. Each file covers one tag type:
+Mappings live in `resources/mappings/`. Each file covers one tag type:
 
 ```
-scripts/
+resources/
   mappings/
     genres.json          # legacy string → canonical genre
     subgenres.json        # legacy string → canonical subgenre
@@ -89,7 +143,6 @@ Each mapping file is a JSON object where keys are legacy strings (lowercase, str
 }
 ```
 
-To add a new mapping: edit the appropriate file and open a PR. No code changes needed for new string mappings.
 
 ---
 

From 025dd517ba43e5bf0bb867056df0c2e72aa7df56 Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Wed, 22 Apr 2026 00:22:57 +0900
Subject: [PATCH 4/9] Move pack default assembly into pack-owned factories

---
 core/pack_registry.py             | 50 ++++++++-----------------------
 rule_packs/audience.py            |  8 +++++
 rule_packs/content_formats.py     |  8 +++++
 rule_packs/genres.py              |  8 +++++
 rule_packs/literary_form.py       |  4 +++
 rule_packs/literary_themes.py     |  8 +++++
 rule_packs/literary_tropes.py     |  8 +++++
 rule_packs/main_topics.py         |  8 +++++
 rule_packs/moods.py               |  4 +++
 rule_packs/people.py              |  5 ++++
 rule_packs/places.py              |  5 ++++
 rule_packs/subgenres.py           |  8 +++++
 rule_packs/subject_diagnostics.py |  5 ++++
 rule_packs/times.py               |  4 +++
 14 files changed, 96 insertions(+), 37 deletions(-)

diff --git a/core/pack_registry.py b/core/pack_registry.py
index b1df0e7..3130d31 100644
--- a/core/pack_registry.py
+++ b/core/pack_registry.py
@@ -4,7 +4,6 @@
 
 from typing import Callable
 
-from core.json_loader import load_mapping, load_set
 from rule_packs import (
     AudiencePack,
     ContentFormatsPack,
@@ -44,42 +43,19 @@
 }
 
 PACK_FACTORIES: dict[str, PackFactory] = {
-    "literary_form": lambda: LiteraryFormPack(remove_matched_subjects=True),
-    "audience": lambda: AudiencePack(
-        mapping=load_mapping("audience"),
-        remove_matched_subjects=True,
-    ),
-    "genres": lambda: GenresPack(
-        mapping=load_mapping("genres"),
-        remove_matched_subjects=True,
-    ),
-    "subgenres": lambda: SubgenresPack(
-        mapping=load_mapping("subgenres"),
-        remove_matched_subjects=True,
-    ),
-    "content_formats": lambda: ContentFormatsPack(
-        mapping=load_mapping("content_formats"),
-        remove_matched_subjects=True,
-    ),
-    "moods": lambda: MoodsPack(remove_matched_subjects=True),
-    "literary_themes": lambda: LiteraryThemesPack(
-        mapping=load_mapping("literary_themes"),
-        remove_matched_subjects=True,
-    ),
-    "literary_tropes": lambda: LiteraryTropesPack(
-        mapping=load_mapping("literary_tropes"),
-        remove_matched_subjects=True,
-    ),
-    "main_topics": lambda: MainTopicsPack(
-        mapping=load_mapping("main_topics"),
-        remove_matched_subjects=True,
-    ),
-    "subject_diagnostics": lambda: SubjectDiagnosticsPack(
-        droppable=load_set("droppable")
-    ),
-    "people": lambda: PeoplePack(overrides=load_mapping("people_overrides")),
-    "places": lambda: PlacesPack(overrides=load_mapping("places_overrides")),
-    "times": TimesPack,
+    "literary_form": LiteraryFormPack.default,
+    "audience": AudiencePack.default,
+    "genres": GenresPack.default,
+    "subgenres": SubgenresPack.default,
+    "content_formats": ContentFormatsPack.default,
+    "moods": MoodsPack.default,
+    "literary_themes": LiteraryThemesPack.default,
+    "literary_tropes": LiteraryTropesPack.default,
+    "main_topics": MainTopicsPack.default,
+    "subject_diagnostics": SubjectDiagnosticsPack.default,
+    "people": PeoplePack.default,
+    "places": PlacesPack.default,
+    "times": TimesPack.default,
 }
 
 AVAILABLE_PACK_NAMES = tuple(sorted({*PACK_FACTORIES, *PACK_PRESETS}))
diff --git a/rule_packs/audience.py b/rule_packs/audience.py
index 165c391..8f59146 100644
--- a/rule_packs/audience.py
+++ b/rule_packs/audience.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
@@ -20,3 +21,10 @@ def __init__(
     ) -> None:
         self.rules = (PrefixRule("audience"), MappingRule(mapping))
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "AudiencePack":
+        return cls(
+            mapping=load_mapping("audience"),
+            remove_matched_subjects=True,
+        )
diff --git a/rule_packs/content_formats.py b/rule_packs/content_formats.py
index c1260b4..c96e197 100644
--- a/rule_packs/content_formats.py
+++ b/rule_packs/content_formats.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
@@ -20,3 +21,10 @@ def __init__(
     ) -> None:
         self.rules = (PrefixRule("format"), MappingRule(mapping))
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "ContentFormatsPack":
+        return cls(
+            mapping=load_mapping("content_formats"),
+            remove_matched_subjects=True,
+        )
diff --git a/rule_packs/genres.py b/rule_packs/genres.py
index b110f12..1812744 100644
--- a/rule_packs/genres.py
+++ b/rule_packs/genres.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
@@ -20,3 +21,10 @@ def __init__(
     ) -> None:
         self.rules = (PrefixRule("genre"), MappingRule(mapping))
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "GenresPack":
+        return cls(
+            mapping=load_mapping("genres"),
+            remove_matched_subjects=True,
+        )
diff --git a/rule_packs/literary_form.py b/rule_packs/literary_form.py
index 1972b30..8c02391 100644
--- a/rule_packs/literary_form.py
+++ b/rule_packs/literary_form.py
@@ -14,3 +14,7 @@ class LiteraryFormPack(SubjectPack):
     def __init__(self, remove_matched_subjects: bool = True) -> None:
         self.rules = (PrefixRule("form"),)
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "LiteraryFormPack":
+        return cls(remove_matched_subjects=True)
diff --git a/rule_packs/literary_themes.py b/rule_packs/literary_themes.py
index 2357776..7ccb6bb 100644
--- a/rule_packs/literary_themes.py
+++ b/rule_packs/literary_themes.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
@@ -20,3 +21,10 @@ def __init__(
     ) -> None:
         self.rules = (PrefixRule("theme"), MappingRule(mapping))
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "LiteraryThemesPack":
+        return cls(
+            mapping=load_mapping("literary_themes"),
+            remove_matched_subjects=True,
+        )
diff --git a/rule_packs/literary_tropes.py b/rule_packs/literary_tropes.py
index 1c1db97..9a18f81 100644
--- a/rule_packs/literary_tropes.py
+++ b/rule_packs/literary_tropes.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
@@ -20,3 +21,10 @@ def __init__(
     ) -> None:
         self.rules = (PrefixRule("trope"), MappingRule(mapping))
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "LiteraryTropesPack":
+        return cls(
+            mapping=load_mapping("literary_tropes"),
+            remove_matched_subjects=True,
+        )
diff --git a/rule_packs/main_topics.py b/rule_packs/main_topics.py
index 5c4e359..68d6320 100644
--- a/rule_packs/main_topics.py
+++ b/rule_packs/main_topics.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
@@ -20,3 +21,10 @@ def __init__(
     ) -> None:
         self.rules = (PrefixRule("topic"), MappingRule(mapping))
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "MainTopicsPack":
+        return cls(
+            mapping=load_mapping("main_topics"),
+            remove_matched_subjects=True,
+        )
diff --git a/rule_packs/moods.py b/rule_packs/moods.py
index c436e0e..d6a2731 100644
--- a/rule_packs/moods.py
+++ b/rule_packs/moods.py
@@ -14,3 +14,7 @@ class MoodsPack(SubjectPack):
     def __init__(self, remove_matched_subjects: bool = True) -> None:
         self.rules = (PrefixRule("mood"),)
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "MoodsPack":
+        return cls(remove_matched_subjects=True)
diff --git a/rule_packs/people.py b/rule_packs/people.py
index e5660f1..2ad387e 100644
--- a/rule_packs/people.py
+++ b/rule_packs/people.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from core.run_state import RunState
 from rule_engine.base import RulePack
 from rules import OverrideRule
@@ -21,3 +22,7 @@ def apply(self, state: RunState) -> None:
             value = self.rule.apply(raw)
             if value is not None:
                 state.add("people", value)
+
+    @classmethod
+    def default(cls) -> "PeoplePack":
+        return cls(overrides=load_mapping("people_overrides"))
diff --git a/rule_packs/places.py b/rule_packs/places.py
index 0a13464..1757eba 100644
--- a/rule_packs/places.py
+++ b/rule_packs/places.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from core.run_state import RunState
 from rule_engine.base import RulePack
 from rules import OverrideRule
@@ -21,3 +22,7 @@ def apply(self, state: RunState) -> None:
             value = self.rule.apply(raw)
             if value is not None:
                 state.add("places", value)
+
+    @classmethod
+    def default(cls) -> "PlacesPack":
+        return cls(overrides=load_mapping("places_overrides"))
diff --git a/rule_packs/subgenres.py b/rule_packs/subgenres.py
index eb832c5..97fb428 100644
--- a/rule_packs/subgenres.py
+++ b/rule_packs/subgenres.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Mapping
 
+from core.json_loader import load_mapping
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
@@ -20,3 +21,10 @@ def __init__(
     ) -> None:
         self.rules = (PrefixRule("subgenre"), MappingRule(mapping))
         self.remove_matched_subjects = remove_matched_subjects
+
+    @classmethod
+    def default(cls) -> "SubgenresPack":
+        return cls(
+            mapping=load_mapping("subgenres"),
+            remove_matched_subjects=True,
+        )
diff --git a/rule_packs/subject_diagnostics.py b/rule_packs/subject_diagnostics.py
index 4e6a0f7..adfa89c 100644
--- a/rule_packs/subject_diagnostics.py
+++ b/rule_packs/subject_diagnostics.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from core.json_loader import load_set
 from core.run_state import RunState
 from rule_engine.base import RulePack
 from rule_engine.normalization import (
@@ -39,3 +40,7 @@ def apply(self, state: RunState) -> None:
             value = raw.strip()
             if value:
                 state.add("unmapped", value)
+
+    @classmethod
+    def default(cls) -> "SubjectDiagnosticsPack":
+        return cls(droppable=load_set("droppable"))
diff --git a/rule_packs/times.py b/rule_packs/times.py
index c50a61a..6808ca8 100644
--- a/rule_packs/times.py
+++ b/rule_packs/times.py
@@ -19,3 +19,7 @@ def apply(self, state: RunState) -> None:
             value = self.rule.apply(raw)
             if value is not None:
                 state.add("times", value)
+
+    @classmethod
+    def default(cls) -> "TimesPack":
+        return cls()

From 6002bd6f164708f34408a3284a90fc8d582b4039 Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Wed, 22 Apr 2026 00:39:29 +0900
Subject: [PATCH 5/9] Print remaining and removed subjects in
 migrate_subjects.py

---
 core/run_state.py           | 22 ++++++++++++++++
 core/subject_classifier.py  | 18 +++++++++++--
 rule_packs/utils.py         | 13 ++++++++++
 scripts/migrate_subjects.py | 52 +++++++++++++++++++++++++------------
 4 files changed, 86 insertions(+), 19 deletions(-)

diff --git a/core/run_state.py b/core/run_state.py
index a78fae0..a2f524e 100644
--- a/core/run_state.py
+++ b/core/run_state.py
@@ -13,10 +13,32 @@ class RunState:
 
     work: Mapping[str, Any]
     result: dict[str, list[str]]
+    original_subjects: list[str] = field(default_factory=list)
     remaining_subjects: list[str] = field(default_factory=list)
+    removed_subjects: list[str] = field(default_factory=list)
+    subject_matches: list[dict[str, str]] = field(default_factory=list)
 
     def add(self, output_type: str, value: str) -> None:
         if output_type not in self.result:
             self.result[output_type] = []
         if value not in self.result[output_type]:
             self.result[output_type].append(value)
+
+    def record_subject_match(
+        self,
+        raw: str,
+        output_type: str,
+        value: str,
+        action: str,
+    ) -> None:
+        self.subject_matches.append(
+            {
+                "subject": raw,
+                "output_type": output_type,
+                "value": value,
+                "action": action,
+            }
+        )
+
+    def record_removed_subject(self, raw: str) -> None:
+        self.removed_subjects.append(raw)
diff --git a/core/subject_classifier.py b/core/subject_classifier.py
index f85aef2..c1ee98e 100644
--- a/core/subject_classifier.py
+++ b/core/subject_classifier.py
@@ -40,12 +40,26 @@ def __init__(
         self.output_types = tuple(output_types or DEFAULT_OUTPUT_TYPES)
 
     def classify_work(self, work: Mapping[str, Any]) -> dict[str, list[str]]:
+        """Return only the proposed tags for compatibility callers."""
+        return self.classify_work_report(work)["proposed_tags"]
+
+    def classify_work_report(self, work: Mapping[str, Any]) -> dict[str, Any]:
         """Run the enabled rule packs against a normalized work object."""
+        original_subjects = list(work.get("subjects", []))
         state = RunState(
             work=work,
             result={tag_type: [] for tag_type in self.output_types},
-            remaining_subjects=list(work.get("subjects", [])),
+            original_subjects=original_subjects,
+            remaining_subjects=list(original_subjects),
         )
         for pack in self.rule_packs:
             pack.apply(state)
-        return state.result
+        return {
+            "proposed_tags": state.result,
+            "subject_proposal": {
+                "original": state.original_subjects,
+                "removed": state.removed_subjects,
+                "remaining": state.remaining_subjects,
+            },
+            "subject_matches": state.subject_matches,
+        }
diff --git a/rule_packs/utils.py b/rule_packs/utils.py
index c27827e..221f5ca 100644
--- a/rule_packs/utils.py
+++ b/rule_packs/utils.py
@@ -33,6 +33,13 @@ def classify_subject_values(
             next_subjects.append(raw)
             continue
         state.add(output_type, match)
+        state.record_subject_match(
+            raw=raw,
+            output_type=output_type,
+            value=match,
+            action="move",
+        )
+        state.record_removed_subject(raw)
     state.remaining_subjects = next_subjects
 
 
@@ -49,6 +56,12 @@ def apply_subject_pack(
         match = classify_subject_value(raw, rules)
         if match is not None:
             state.add(output_type, match)
+            state.record_subject_match(
+                raw=raw,
+                output_type=output_type,
+                value=match,
+                action="extract_only",
+            )
 
 
 class SubjectPack(RulePack):
diff --git a/scripts/migrate_subjects.py b/scripts/migrate_subjects.py
index 57e8018..fe64208 100644
--- a/scripts/migrate_subjects.py
+++ b/scripts/migrate_subjects.py
@@ -61,20 +61,38 @@ def load_work_file(path: str) -> dict:
 # ---------------------------------------------------------------------------
 
 
-def print_result(work_id: str, result: dict):
+def print_report(work_id: str, report: dict):
     print(f"\n=== {work_id} ===")
-    for key, values in result.items():
+    print("  proposed_tags:")
+    for key, values in report["proposed_tags"].items():
         if values:
-            print(f"  {key}:")
+            print(f"    {key}:")
             for v in values:
-                print(f"    - {v}")
-
-
-def write_result(work_id: str, result: dict, output_dir: str):
+                print(f"      - {v}")
+
+    subject_proposal = report["subject_proposal"]
+    print("  subject_proposal:")
+    for key in ("removed", "remaining"):
+        values = subject_proposal[key]
+        print(f"    {key}:")
+        for value in values:
+            print(f"      - {value}")
+
+    if report["subject_matches"]:
+        print("  subject_matches:")
+        for match in report["subject_matches"]:
+            print(
+                "    - "
+                f"{match['subject']} -> {match['output_type']}:{match['value']} "
+                f"({match['action']})"
+            )
+
+
+def write_report(work_id: str, report: dict, output_dir: str):
     os.makedirs(output_dir, exist_ok=True)
     out_path = Path(output_dir) / f"{work_id}.json"
     with open(out_path, "w") as f:
-        json.dump({"work_id": work_id, **result}, f, indent=2)
+        json.dump({"work_id": work_id, **report}, f, indent=2)
     print(f"Written: {out_path}")
 
 
@@ -111,20 +129,20 @@ def main():
     if args.work:
         print(f"Fetching {args.work}...")
         work = fetch_work(args.work)
-        result = classifier.classify_work(work)
+        report = classifier.classify_work_report(work)
         if args.dry_run:
-            print_result(args.work, result)
+            print_report(args.work, report)
         else:
-            write_result(args.work, result, args.output)
+            write_report(args.work, report, args.output)
 
     elif args.file:
         work = load_work_file(args.file)
         work_id = work.get("key", Path(args.file).stem).split("/")[-1]
-        result = classifier.classify_work(work)
+        report = classifier.classify_work_report(work)
         if args.dry_run:
-            print_result(work_id, result)
+            print_report(work_id, report)
         else:
-            write_result(work_id, result, args.output)
+            write_report(work_id, report, args.output)
 
     elif args.batch:
         with open(args.batch) as f:
@@ -134,11 +152,11 @@ def main():
             try:
                 print(f"Processing {work_id}...")
                 work = fetch_work(work_id)
-                result = classifier.classify_work(work)
+                report = classifier.classify_work_report(work)
                 if args.dry_run:
-                    print_result(work_id, result)
+                    print_report(work_id, report)
                 else:
-                    write_result(work_id, result, args.output)
+                    write_report(work_id, report, args.output)
             except Exception as e:
                 print(f"ERROR processing {work_id}: {e}", file=sys.stderr)
 

From 1830c7cfaa405a11bf63ca6d24afa9507d2825dd Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Wed, 22 Apr 2026 00:56:53 +0900
Subject: [PATCH 6/9] Support rule-level move and extract-only subject actions

---
 core/run_state.py                 |  4 +++
 rule_packs/subject_diagnostics.py |  2 ++
 rule_packs/utils.py               | 56 ++++++++++++++-----------------
 rules/__init__.py                 |  9 ++++-
 rules/mapping_rule.py             | 15 +++++++--
 rules/match_result.py             | 13 +++++++
 rules/prefix_rule.py              |  9 +++--
 7 files changed, 71 insertions(+), 37 deletions(-)
 create mode 100644 rules/match_result.py

diff --git a/core/run_state.py b/core/run_state.py
index a2f524e..658d5b4 100644
--- a/core/run_state.py
+++ b/core/run_state.py
@@ -16,6 +16,7 @@ class RunState:
     original_subjects: list[str] = field(default_factory=list)
     remaining_subjects: list[str] = field(default_factory=list)
     removed_subjects: list[str] = field(default_factory=list)
+    retained_matched_subjects: set[str] = field(default_factory=set)
     subject_matches: list[dict[str, str]] = field(default_factory=list)
 
     def add(self, output_type: str, value: str) -> None:
@@ -42,3 +43,6 @@ def record_subject_match(
 
     def record_removed_subject(self, raw: str) -> None:
         self.removed_subjects.append(raw)
+
+    def record_retained_subject(self, raw: str) -> None:
+        self.retained_matched_subjects.add(raw.lower().strip())
diff --git a/rule_packs/subject_diagnostics.py b/rule_packs/subject_diagnostics.py
index adfa89c..a7849cd 100644
--- a/rule_packs/subject_diagnostics.py
+++ b/rule_packs/subject_diagnostics.py
@@ -24,6 +24,8 @@ def apply(self, state: RunState) -> None:
             key = normalize(raw)
             if key in self.droppable:
                 continue
+            if key in state.retained_matched_subjects:
+                continue
 
             if is_reading_level(raw):
                 value = raw.strip()
diff --git a/rule_packs/utils.py b/rule_packs/utils.py
index 221f5ca..00943b1 100644
--- a/rule_packs/utils.py
+++ b/rule_packs/utils.py
@@ -7,63 +7,59 @@
 
 from core.run_state import RunState
 from rule_engine.base import RulePack
+from rules import RuleMatch
 
 
 class SubjectValueRule(Protocol):
-    def match(self, raw: str) -> str | None: ...
+    def match(self, raw: str) -> RuleMatch | str | None: ...
 
 
-def classify_subject_value(raw: str, rules: Iterable[SubjectValueRule]) -> str | None:
+def _coerce_match(match: RuleMatch | str, default_action: str) -> RuleMatch:
+    if isinstance(match, RuleMatch):
+        return match
+    return RuleMatch(value=match, action=default_action)
+
+
+def classify_subject_value(
+    raw: str,
+    rules: Iterable[SubjectValueRule],
+    default_action: str,
+) -> RuleMatch | None:
     for rule in rules:
         match = rule.match(raw)
         if match is not None:
-            return match
+            return _coerce_match(match, default_action)
     return None
 
 
-def classify_subject_values(
+def apply_subject_pack(
     state: RunState,
     output_type: str,
     rules: Iterable[SubjectValueRule],
+    remove_matched_subjects: bool,
 ) -> None:
+    default_action = "move" if remove_matched_subjects else "extract_only"
     next_subjects: list[str] = []
     for raw in state.remaining_subjects:
-        match = classify_subject_value(raw, rules)
+        match = classify_subject_value(raw, rules, default_action=default_action)
         if match is None:
             next_subjects.append(raw)
             continue
-        state.add(output_type, match)
+        state.add(output_type, match.value)
         state.record_subject_match(
             raw=raw,
             output_type=output_type,
-            value=match,
-            action="move",
+            value=match.value,
+            action=match.action,
         )
-        state.record_removed_subject(raw)
+        if match.action == "move":
+            state.record_removed_subject(raw)
+            continue
+        state.record_retained_subject(raw)
+        next_subjects.append(raw)
     state.remaining_subjects = next_subjects
 
 
-def apply_subject_pack(
-    state: RunState,
-    output_type: str,
-    rules: Iterable[SubjectValueRule],
-    remove_matched_subjects: bool,
-) -> None:
-    if remove_matched_subjects:
-        classify_subject_values(state, output_type, rules)
-        return
-    for raw in state.remaining_subjects:
-        match = classify_subject_value(raw, rules)
-        if match is not None:
-            state.add(output_type, match)
-            state.record_subject_match(
-                raw=raw,
-                output_type=output_type,
-                value=match,
-                action="extract_only",
-            )
-
-
 class SubjectPack(RulePack):
     """Small helper for packs that operate on the shared subject sequence."""
 
diff --git a/rules/__init__.py b/rules/__init__.py
index 9cbc515..ed93b0e 100644
--- a/rules/__init__.py
+++ b/rules/__init__.py
@@ -1,8 +1,15 @@
 """Composable rule units for pack implementations."""
 
+from .match_result import RuleMatch
 from .mapping_rule import MappingRule
 from .override_rule import OverrideRule
 from .passthrough_rule import PassthroughRule
 from .prefix_rule import PrefixRule
 
-__all__ = ["MappingRule", "OverrideRule", "PassthroughRule", "PrefixRule"]
+__all__ = [
+    "MappingRule",
+    "OverrideRule",
+    "PassthroughRule",
+    "PrefixRule",
+    "RuleMatch",
+]
diff --git a/rules/mapping_rule.py b/rules/mapping_rule.py
index 2cb85d3..2fd1bc7 100644
--- a/rules/mapping_rule.py
+++ b/rules/mapping_rule.py
@@ -5,13 +5,22 @@
 from collections.abc import Mapping
 
 from rule_engine.normalization import normalize
+from rules.match_result import RuleMatch
 
 
 class MappingRule:
     """Match normalized input values against a provided mapping."""
 
-    def __init__(self, mapping: Mapping[str, str] | None = None) -> None:
+    def __init__(
+        self,
+        mapping: Mapping[str, str] | None = None,
+        default_action: str = "move",
+    ) -> None:
         self.mapping = dict(mapping or {})
+        self.default_action = default_action
 
-    def match(self, raw: str) -> str | None:
-        return self.mapping.get(normalize(raw))
+    def match(self, raw: str) -> RuleMatch | None:
+        value = self.mapping.get(normalize(raw))
+        if value is None:
+            return None
+        return RuleMatch(value=value, action=self.default_action)
diff --git a/rules/match_result.py b/rules/match_result.py
new file mode 100644
index 0000000..5a3a16f
--- /dev/null
+++ b/rules/match_result.py
@@ -0,0 +1,13 @@
+"""Structured subject-match results with per-rule actions."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class RuleMatch:
+    """A normalized match value plus the subject-handling action to apply."""
+
+    value: str
+    action: str
diff --git a/rules/prefix_rule.py b/rules/prefix_rule.py
index 11b49f9..8ec6b9a 100644
--- a/rules/prefix_rule.py
+++ b/rules/prefix_rule.py
@@ -2,14 +2,17 @@
 
 from __future__ import annotations
 
+from rules.match_result import RuleMatch
+
 
 class PrefixRule:
     """Match values like ``theme:love`` and return the normalized payload."""
 
-    def __init__(self, prefix: str) -> None:
+    def __init__(self, prefix: str, action: str = "move") -> None:
         self.prefix = prefix
+        self.action = action
 
-    def match(self, raw: str) -> str | None:
+    def match(self, raw: str) -> RuleMatch | None:
         if not self.prefix or ":" not in raw:
             return None
         prefix, _, value = raw.partition(":")
@@ -18,4 +21,4 @@ def match(self, raw: str) -> str | None:
         cleaned = value.strip()
         if not cleaned:
             return None
-        return cleaned.title()
+        return RuleMatch(value=cleaned.title(), action=self.action)

From 98fd8135fbacd66948c6d52d39b153e00a74499f Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Wed, 22 Apr 2026 00:57:52 +0900
Subject: [PATCH 7/9] Finish rule pack for content_formats

---
 rule_packs/content_formats.py | 40 +++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/rule_packs/content_formats.py b/rule_packs/content_formats.py
index c96e197..e4d271d 100644
--- a/rule_packs/content_formats.py
+++ b/rule_packs/content_formats.py
@@ -8,6 +8,19 @@
 from rule_packs.utils import SubjectPack
 from rules import MappingRule, PrefixRule
 
+MOVE = "move"
+EXTRACT_ONLY = "extract_only"
+
+# First-pass direct-match policies based on current dry-run evidence.
+MOVE_TAGS = frozenset(
+    {
+        "Memoir",
+        "Anthology",
+        "Letters",
+        "Dictionary",
+    }
+)
+
 
 class ContentFormatsPack(SubjectPack):
     name = "content_formats"
@@ -16,15 +29,30 @@ class ContentFormatsPack(SubjectPack):
 
     def __init__(
         self,
-        mapping: Mapping[str, str] | None = None,
-        remove_matched_subjects: bool = True,
+        move_mapping: Mapping[str, str] | None = None,
+        extract_only_mapping: Mapping[str, str] | None = None,
     ) -> None:
-        self.rules = (PrefixRule("format"), MappingRule(mapping))
-        self.remove_matched_subjects = remove_matched_subjects
+        self.rules = (
+            PrefixRule("format", action=EXTRACT_ONLY),
+            MappingRule(move_mapping, default_action=MOVE),
+            MappingRule(extract_only_mapping, default_action=EXTRACT_ONLY),
+        )
+        self.remove_matched_subjects = False
 
     @classmethod
     def default(cls) -> "ContentFormatsPack":
+        mapping = load_mapping("content_formats")
+        move_mapping = {
+            legacy: canonical
+            for legacy, canonical in mapping.items()
+            if canonical in MOVE_TAGS
+        }
+        extract_only_mapping = {
+            legacy: canonical
+            for legacy, canonical in mapping.items()
+            if canonical not in MOVE_TAGS
+        }
         return cls(
-            mapping=load_mapping("content_formats"),
-            remove_matched_subjects=True,
+            move_mapping=move_mapping,
+            extract_only_mapping=extract_only_mapping,
         )

From 335c71f4deb00bd79ee882c836cadf1628490d27 Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Wed, 22 Apr 2026 01:27:49 +0900
Subject: [PATCH 8/9] Delete unnecessary abstract layer

---
 core/__init__.py                         |   5 -
 core/classifier_assembler.py             |  41 -------
 core/migrate_subject_classifier.py       |  17 ---
 core/pack_registry.py                    |  61 ----------
 resources/mappings/audience.json         |  23 ----
 resources/mappings/genres.json           |  75 ------------
 resources/mappings/literary_themes.json  |  66 -----------
 resources/mappings/literary_tropes.json  |  27 -----
 resources/mappings/main_topics.json      |  61 ----------
 resources/mappings/people_overrides.json |  10 --
 resources/mappings/places_overrides.json |  13 --
 resources/mappings/subgenres.json        |  60 ----------
 rule_packs/__init__.py                   |  40 +------
 rule_packs/audience.py                   |  30 -----
 rule_packs/genres.py                     |  30 -----
 rule_packs/literary_form.py              |  20 ----
 rule_packs/literary_themes.py            |  30 -----
 rule_packs/literary_tropes.py            |  30 -----
 rule_packs/main_topics.py                |  30 -----
 rule_packs/moods.py                      |  20 ----
 rule_packs/people.py                     |  28 -----
 rule_packs/places.py                     |  28 -----
 rule_packs/subgenres.py                  |  30 -----
 rule_packs/times.py                      |  25 ----
 rules/__init__.py                        |   4 -
 rules/override_rule.py                   |  20 ----
 rules/passthrough_rule.py                |  11 --
 scripts/README.md                        | 145 +++++++++++------------
 scripts/migrate_subjects.py              |  39 +++++-
 scripts/run_legacy_subjects.sh           |  22 ----
 30 files changed, 109 insertions(+), 932 deletions(-)
 delete mode 100644 core/__init__.py
 delete mode 100644 core/classifier_assembler.py
 delete mode 100644 core/migrate_subject_classifier.py
 delete mode 100644 core/pack_registry.py
 delete mode 100644 resources/mappings/audience.json
 delete mode 100644 resources/mappings/genres.json
 delete mode 100644 resources/mappings/literary_themes.json
 delete mode 100644 resources/mappings/literary_tropes.json
 delete mode 100644 resources/mappings/main_topics.json
 delete mode 100644 resources/mappings/people_overrides.json
 delete mode 100644 resources/mappings/places_overrides.json
 delete mode 100644 resources/mappings/subgenres.json
 delete mode 100644 rule_packs/audience.py
 delete mode 100644 rule_packs/genres.py
 delete mode 100644 rule_packs/literary_form.py
 delete mode 100644 rule_packs/literary_themes.py
 delete mode 100644 rule_packs/literary_tropes.py
 delete mode 100644 rule_packs/main_topics.py
 delete mode 100644 rule_packs/moods.py
 delete mode 100644 rule_packs/people.py
 delete mode 100644 rule_packs/places.py
 delete mode 100644 rule_packs/subgenres.py
 delete mode 100644 rule_packs/times.py
 delete mode 100644 rules/override_rule.py
 delete mode 100644 rules/passthrough_rule.py
 delete mode 100755 scripts/run_legacy_subjects.sh

diff --git a/core/__init__.py b/core/__init__.py
deleted file mode 100644
index ee7aaf5..0000000
--- a/core/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Core orchestration and default migration assembly."""
-
-from .subject_classifier import DEFAULT_OUTPUT_TYPES, SubjectClassifier
-
-__all__ = ["DEFAULT_OUTPUT_TYPES", "SubjectClassifier"]
diff --git a/core/classifier_assembler.py b/core/classifier_assembler.py
deleted file mode 100644
index 23a732e..0000000
--- a/core/classifier_assembler.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""Assembly helpers for building migration classifiers."""
-
-from __future__ import annotations
-
-from collections.abc import Iterable
-
-from core.json_loader import load_set
-from core.pack_registry import (
-    AVAILABLE_PACK_NAMES,
-    PACK_FACTORIES,
-    PACK_PRESETS,
-)
-from core.subject_classifier import SubjectClassifier
-
-
-def resolve_pack_names(enabled_packs: Iterable[str] | None) -> list[str]:
-    """Expand presets into concrete stable pack names."""
-    selected = list(enabled_packs or [])
-    expanded: list[str] = []
-    for name in selected:
-        if name in PACK_PRESETS:
-            expanded.extend(PACK_PRESETS[name])
-            continue
-        expanded.append(name)
-    return expanded
-
-
-def build_subject_classifier(
-    enabled_packs: Iterable[str] | None = None,
-) -> SubjectClassifier:
-    """Build the migration classifier from an explicit pack-name list."""
-    selected = resolve_pack_names(enabled_packs)
-    missing = [name for name in selected if name not in PACK_FACTORIES]
-    if missing:
-        available = ", ".join(AVAILABLE_PACK_NAMES)
-        missing_display = ", ".join(sorted(missing))
-        raise ValueError(
-            f"Unknown rule pack(s): {missing_display}. Available: {available}"
-        )
-
-    return SubjectClassifier(rule_packs=[PACK_FACTORIES[name]() for name in selected])
diff --git a/core/migrate_subject_classifier.py b/core/migrate_subject_classifier.py
deleted file mode 100644
index 38cc1a1..0000000
--- a/core/migrate_subject_classifier.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""Compatibility wrapper for migration classifier assembly."""
-
-from __future__ import annotations
-
-from core.classifier_assembler import (
-    build_subject_classifier,
-    resolve_pack_names,
-)
-from core.pack_registry import AVAILABLE_PACK_NAMES, PACK_FACTORIES, PACK_PRESETS
-
-__all__ = [
-    "AVAILABLE_PACK_NAMES",
-    "PACK_FACTORIES",
-    "PACK_PRESETS",
-    "build_subject_classifier",
-    "resolve_pack_names",
-]
diff --git a/core/pack_registry.py b/core/pack_registry.py
deleted file mode 100644
index 3130d31..0000000
--- a/core/pack_registry.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Stable pack-name registry for migration assembly."""
-
-from __future__ import annotations
-
-from typing import Callable
-
-from rule_packs import (
-    AudiencePack,
-    ContentFormatsPack,
-    GenresPack,
-    LiteraryFormPack,
-    LiteraryThemesPack,
-    LiteraryTropesPack,
-    MainTopicsPack,
-    MoodsPack,
-    PeoplePack,
-    PlacesPack,
-    SubgenresPack,
-    SUBJECT_PACK_CLASSES,
-    SubjectDiagnosticsPack,
-    TimesPack,
-)
-
-PackFactory = Callable[[], object]
-
-SUBJECT_PACK_BUILDERS = {pack_cls.name: pack_cls for pack_cls in SUBJECT_PACK_CLASSES}
-PACK_PRESETS: dict[str, tuple[str, ...]] = {
-    "subject_mappings": (
-        "literary_form",
-        "audience",
-        "genres",
-        "subgenres",
-        "content_formats",
-        "moods",
-        "literary_themes",
-        "literary_tropes",
-        "main_topics",
-        "subject_diagnostics",
-        "people",
-        "places",
-        "times",
-    ),
-}
-
-PACK_FACTORIES: dict[str, PackFactory] = {
-    "literary_form": LiteraryFormPack.default,
-    "audience": AudiencePack.default,
-    "genres": GenresPack.default,
-    "subgenres": SubgenresPack.default,
-    "content_formats": ContentFormatsPack.default,
-    "moods": MoodsPack.default,
-    "literary_themes": LiteraryThemesPack.default,
-    "literary_tropes": LiteraryTropesPack.default,
-    "main_topics": MainTopicsPack.default,
-    "subject_diagnostics": SubjectDiagnosticsPack.default,
-    "people": PeoplePack.default,
-    "places": PlacesPack.default,
-    "times": TimesPack.default,
-}
-
-AVAILABLE_PACK_NAMES = tuple(sorted({*PACK_FACTORIES, *PACK_PRESETS}))
diff --git a/resources/mappings/audience.json b/resources/mappings/audience.json
deleted file mode 100644
index b80e84a..0000000
--- a/resources/mappings/audience.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "juvenile fiction": "Juvenile",
-  "juvenile literature": "Juvenile",
-  "juvenile nonfiction": "Juvenile",
-  "children's fiction": "Children",
-  "children's literature": "Children",
-  "children's nonfiction": "Children",
-  "children's stories": "Children",
-  "picture books": "Children",
-  "board books": "Preschool",
-  "baby books": "Preschool",
-  "young adult fiction": "Young Adult",
-  "young adult literature": "Young Adult",
-  "young adult nonfiction": "Young Adult",
-  "teen fiction": "Young Adult",
-  "teenage fiction": "Young Adult",
-  "ya fiction": "Young Adult",
-  "readers (adult)": "Adult",
-  "adult fiction": "Adult",
-  "academic": "Academic",
-  "scholarly": "Academic",
-  "textbooks": "Academic"
-}
diff --git a/resources/mappings/genres.json b/resources/mappings/genres.json
deleted file mode 100644
index 7220cbb..0000000
--- a/resources/mappings/genres.json
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-  "absurdist fiction": "Absurd",
-  "absurdism": "Absurd",
-  "action": "Action",
-  "action and adventure": "Adventure",
-  "adventure": "Adventure",
-  "adventure fiction": "Adventure",
-  "adventure stories": "Adventure",
-  "black comedy": "Comedy",
-  "british comedy": "Comedy",
-  "comedy": "Comedy",
-  "comic fiction": "Comedy",
-  "comedies": "Comedy",
-  "crime": "Crime",
-  "crime fiction": "Crime",
-  "crime stories": "Crime",
-  "criminal fiction": "Crime",
-  "detective and mystery stories": "Mystery",
-  "detective fiction": "Mystery",
-  "detective stories": "Mystery",
-  "drama": "Drama",
-  "dramatic fiction": "Drama",
-  "erotica": "Erotica",
-  "erotic fiction": "Erotica",
-  "fantasy": "Fantasy",
-  "fantasy fiction": "Fantasy",
-  "fantasy stories": "Fantasy",
-  "historical": "Historical",
-  "historical fiction": "Historical",
-  "historical novel": "Historical",
-  "fiction, historical": "Historical",
-  "horror": "Horror",
-  "horror fiction": "Horror",
-  "horror stories": "Horror",
-  "humor": "Humor",
-  "humorous fiction": "Humor",
-  "humorous stories": "Humor",
-  "lgbtq fiction": "LGBTQ+",
-  "gay fiction": "LGBTQ+",
-  "lesbian fiction": "LGBTQ+",
-  "queer fiction": "LGBTQ+",
-  "classic fiction": "Literary",
-  "classic literature": "Literary",
-  "classics": "Literary",
-  "literary fiction": "Literary",
-  "literature": "Literary",
-  "mystery fiction": "Mystery",
-  "mystery": "Mystery",
-  "mystery and suspense": "Mystery",
-  "mysteries": "Mystery",
-  "whodunit": "Mystery",
-  "mythology": "Mythology",
-  "mythological fiction": "Mythology",
-  "romance": "Romance",
-  "romance fiction": "Romance",
-  "romantic fiction": "Romance",
-  "love stories": "Romance",
-  "love story": "Romance",
-  "man-woman relationships, fiction": "Romance",
-  "satire": "Satire",
-  "satirical fiction": "Satire",
-  "science fiction": "Sci-Fi",
-  "sci-fi": "Sci-Fi",
-  "sf": "Sci-Fi",
-  "speculative fiction": "Sci-Fi",
-  "suspense fiction": "Thriller",
-  "thriller": "Thriller",
-  "thrillers": "Thriller",
-  "thriller fiction": "Thriller",
-  "tragedy": "Tragedy",
-  "tragic fiction": "Tragedy",
-  "western": "Western",
-  "westerns": "Western",
-  "western stories": "Western"
-}
diff --git a/resources/mappings/literary_themes.json b/resources/mappings/literary_themes.json
deleted file mode 100644
index 7b8120f..0000000
--- a/resources/mappings/literary_themes.json
+++ /dev/null
@@ -1,66 +0,0 @@
-{
-  "betrayal": "Betrayal",
-  "class conflict": "Class",
-  "class struggle": "Class",
-  "class warfare": "Class",
-  "class wars": "Class",
-  "social class": "Class",
-  "social conflict": "Class",
-  "coming of age": "Coming of Age",
-  "loss of innocence": "Coming of Age",
-  "death": "Death",
-  "death and dying": "Death",
-  "mortality": "Death",
-  "desire": "Desire",
-  "longing": "Desire",
-  "obsession": "Obsession",
-  "duty": "Duty",
-  "loyalty": "Duty",
-  "fate": "Fate",
-  "determinism": "Fate",
-  "freedom": "Freedom",
-  "liberation": "Freedom",
-  "autonomy": "Freedom",
-  "gender": "Gender",
-  "gender roles": "Gender",
-  "grief": "Grief",
-  "mourning": "Grief",
-  "guilt": "Guilt",
-  "remorse": "Guilt",
-  "identity": "Identity",
-  "self-discovery": "Identity",
-  "innocence": "Innocence",
-  "justice": "Justice",
-  "injustice": "Justice",
-  "love": "Love",
-  "romantic love": "Love",
-  "memory": "Memory",
-  "nostalgia": "Memory",
-  "mortality": "Mortality",
-  "nature": "Nature",
-  "environment": "Nature",
-  "power": "Power",
-  "power dynamics": "Power",
-  "abuse of power": "Power",
-  "state power": "Power",
-  "race": "Race",
-  "racism": "Race",
-  "racial identity": "Race",
-  "redemption": "Redemption",
-  "atonement": "Redemption",
-  "rejection": "Rejection",
-  "rejection (psychology)": "Rejection",
-  "rejet (psychologie)": "Rejection",
-  "revenge": "Revenge",
-  "vengeance": "Revenge",
-  "vengeance -- fiction": "Revenge",
-  "revenge -- fiction": "Revenge",
-  "sacrifice": "Sacrifice",
-  "survival": "Survival",
-  "truth": "Truth",
-  "deception": "Truth",
-  "honesty": "Truth",
-  "violence": "Violence",
-  "war": "War",
-  "warfare": "War"
-}
diff --git a/resources/mappings/literary_tropes.json b/resources/mappings/literary_tropes.json
deleted file mode 100644
index 566f9b4..0000000
--- a/resources/mappings/literary_tropes.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "foundlings": "Foundlings",
-  "foundlings in fiction": "Foundlings",
-  "foundlings -- fiction": "Foundlings",
-  "enfants trouvés": "Foundlings",
-  "orphans": "Orphan Protagonist",
-  "orphans in fiction": "Orphan Protagonist",
-  "orphans -- fiction": "Orphan Protagonist",
-  "triangles (interpersonal relations)": "Love Triangles",
-  "triangles (interpersonal relationships)": "Love Triangles",
-  "triangles (interpersonal relations)--fiction": "Love Triangles",
-  "triangle (relations humaines)": "Love Triangles",
-  "love triangle": "Love Triangles",
-  "love triangles": "Love Triangles",
-  "inheritance and succession": "Inheritance and Succession",
-  "unreliable narrator": "Unreliable Narrator",
-  "enemies to lovers": "Enemies to Lovers",
-  "found family": "Found Family",
-  "chosen one": "Chosen One",
-  "antihero": "Antihero",
-  "slow burn": "Slow Burn",
-  "redemption arc": "Redemption Arc",
-  "forbidden love": "Forbidden Love",
-  "fake identity": "Fake Identity",
-  "frame narrative": "Frame Narrative",
-  "epistolary": "Epistolary Structure"
-}
diff --git a/resources/mappings/main_topics.json b/resources/mappings/main_topics.json
deleted file mode 100644
index 17d6dd6..0000000
--- a/resources/mappings/main_topics.json
+++ /dev/null
@@ -1,61 +0,0 @@
-{
-  "interpersonal relations": "Interpersonal relations",
-  "interpersonal relationships": "Interpersonal relations",
-  "interpersonal relations, fiction": "Interpersonal relations",
-  "family life": "Family life",
-  "families": "Family life",
-  "family": "Family life",
-  "fiction, family life, general": "Family life",
-  "social conditions": "Social conditions",
-  "social life and customs": "Social life and customs",
-  "manners and customs": "Manners and customs",
-  "mœurs et coutumes": "Manners and customs",
-  "inheritance": "Inheritance",
-  "inheritance and succession": "Inheritance",
-  "debt": "Debt",
-  "slavery": "Slavery",
-  "slavery in fiction": "Slavery",
-  "education": "Education",
-  "class": "Class",
-  "social class": "Class",
-  "war": "War",
-  "battles": "War",
-  "religion": "Religion",
-  "church": "Religion",
-  "magic": "Magic",
-  "witchcraft": "Witchcraft",
-  "medicine": "Medicine",
-  "technology": "Technology",
-  "politics": "Politics",
-  "government": "Politics",
-  "law": "Law",
-  "justice": "Justice",
-  "trade": "Trade",
-  "economics": "Economics",
-  "labor": "Labor",
-  "work": "Labor",
-  "immigration": "Immigration",
-  "colonialism": "Colonialism",
-  "empire": "Imperialism",
-  "imperialism": "Imperialism",
-  "race": "Race",
-  "gender": "Gender",
-  "feminism": "Feminism",
-  "sexuality": "Sexuality",
-  "language": "Language",
-  "art": "Art",
-  "music": "Music",
-  "science": "Science",
-  "nature": "Nature",
-  "environment": "Environment",
-  "travel": "Travel",
-  "exploration": "Exploration",
-  "philosophy": "Philosophy",
-  "ethics": "Ethics",
-  "morality": "Ethics",
-  "psychology": "Psychology",
-  "mental health": "Mental health",
-  "poverty": "Poverty",
-  "wealth": "Wealth",
-  "power": "Power"
-}
diff --git a/resources/mappings/people_overrides.json b/resources/mappings/people_overrides.json
deleted file mode 100644
index c8576ab..0000000
--- a/resources/mappings/people_overrides.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "heathcliff (fictitious character)": "Heathcliff",
-  "heathcliff (fictitious character : brontë)": "Heathcliff",
-  "heathcliff (fictitious character : bronte)": "Heathcliff",
-  "catherine earnshawm (fictitious character)": "Catherine Earnshaw",
-  "harry potter (fictitious character)": "Harry Potter",
-  "hermione granger (fictitious character)": "Hermione Granger",
-  "ron weasley (fictitious character)": "Ron Weasley",
-  "beuve de hanstone (legendary character)": "Beuve de Hanstone"
-}
diff --git a/resources/mappings/places_overrides.json b/resources/mappings/places_overrides.json
deleted file mode 100644
index 145e981..0000000
--- a/resources/mappings/places_overrides.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "yorkshire (england)": "Yorkshire",
-  "yorkshire (england) -- fiction": "Yorkshire",
-  "england, fiction": "England",
-  "england -- fiction": "England",
-  "london (england)": "London",
-  "new york (n.y.)": "New York City",
-  "new york (state)": "New York State",
-  "united states": "United States",
-  "great britain": "Great Britain",
-  "france": "France",
-  "paris (france)": "Paris"
-}
diff --git a/resources/mappings/subgenres.json b/resources/mappings/subgenres.json
deleted file mode 100644
index 3c271e0..0000000
--- a/resources/mappings/subgenres.json
+++ /dev/null
@@ -1,60 +0,0 @@
-{
-  "apocalyptic fiction": "Apocalyptic",
-  "apocalyptic": "Apocalyptic",
-  "biopunk": "Biopunk",
-  "climate fiction": "Cli-fi",
-  "cli-fi": "Cli-fi",
-  "bildungsroman": "Coming of Age",
-  "coming-of-age": "Coming of Age",
-  "coming of age": "Coming of Age",
-  "coming of age fiction": "Coming of Age",
-  "cyberpunk": "Cyberpunk",
-  "detective fiction": "Detective",
-  "detective stories": "Detective",
-  "dystopia": "Dystopian",
-  "dystopian": "Dystopian",
-  "dystopian fiction": "Dystopian",
-  "epistolary fiction": "Epistolary",
-  "epistolary": "Epistolary",
-  "epistolary novel": "Epistolary",
-  "epic": "Epic",
-  "epic fantasy": "Epic",
-  "espionage": "Espionage",
-  "spy fiction": "Espionage",
-  "spy stories": "Espionage",
-  "family saga": "Family Saga",
-  "family sagas": "Family Saga",
-  "saga": "Family Saga",
-  "futurism": "Futurism",
-  "futuristic fiction": "Futurism",
-  "gonzo": "Gonzo",
-  "gonzo journalism": "Gonzo",
-  "gothic": "Gothic",
-  "gothic fiction": "Gothic",
-  "gothic horror": "Gothic",
-  "gothic romance": "Gothic",
-  "english gothic fiction": "Gothic",
-  "british gothic": "Gothic",
-  "southern gothic": "Gothic",
-  "historical fiction": "Historical",
-  "fiction, historical": "Historical",
-  "locked room mystery": "Locked Room",
-  "locked-room": "Locked Room",
-  "melodrama": "Melodrama",
-  "picaresque": "Picaresque",
-  "picaresque novel": "Picaresque",
-  "post-apocalyptic": "Post-Apocalyptic",
-  "post-apocalyptic fiction": "Post-Apocalyptic",
-  "post apocalyptic": "Post-Apocalyptic",
-  "psychological": "Psychological",
-  "psychological fiction": "Psychological",
-  "psychological thriller": "Psychological",
-  "psychological horror": "Psychological",
-  "fiction, psychological": "Psychological",
-  "space opera": "Space Opera",
-  "steampunk": "Steampunk",
-  "true crime": "True Crime",
-  "utopian": "Utopian",
-  "utopian fiction": "Utopian",
-  "utopia": "Utopian"
-}
diff --git a/rule_packs/__init__.py b/rule_packs/__init__.py
index 4dd2638..436daac 100644
--- a/rule_packs/__init__.py
+++ b/rule_packs/__init__.py
@@ -1,54 +1,18 @@
 """Concrete rule-pack modules."""
 
-from .audience import AudiencePack
 from .content_formats import ContentFormatsPack
-from .genres import GenresPack
-from .literary_form import LiteraryFormPack
-from .literary_themes import LiteraryThemesPack
-from .literary_tropes import LiteraryTropesPack
-from .main_topics import MainTopicsPack
-from .moods import MoodsPack
-from .people import PeoplePack
-from .places import PlacesPack
-from .subgenres import SubgenresPack
 from .subject_diagnostics import SubjectDiagnosticsPack
-from .times import TimesPack
 
-SUBJECT_PACK_CLASSES = (
-    LiteraryFormPack,
-    AudiencePack,
-    GenresPack,
-    SubgenresPack,
-    ContentFormatsPack,
-    MoodsPack,
-    LiteraryThemesPack,
-    LiteraryTropesPack,
-    MainTopicsPack,
-)
+SUBJECT_PACK_CLASSES = (ContentFormatsPack,)
 
-FIELD_PACK_CLASSES = (
-    PeoplePack,
-    PlacesPack,
-    TimesPack,
-)
+FIELD_PACK_CLASSES = ()
 
 ALL_PACK_CLASSES = SUBJECT_PACK_CLASSES + FIELD_PACK_CLASSES
 
 __all__ = [
     "ALL_PACK_CLASSES",
-    "AudiencePack",
     "ContentFormatsPack",
     "FIELD_PACK_CLASSES",
-    "GenresPack",
-    "LiteraryFormPack",
-    "LiteraryThemesPack",
-    "LiteraryTropesPack",
-    "MainTopicsPack",
-    "MoodsPack",
-    "PeoplePack",
-    "PlacesPack",
     "SUBJECT_PACK_CLASSES",
-    "SubgenresPack",
     "SubjectDiagnosticsPack",
-    "TimesPack",
 ]
diff --git a/rule_packs/audience.py b/rule_packs/audience.py
deleted file mode 100644
index 8f59146..0000000
--- a/rule_packs/audience.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Rule pack for audience tags."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from rule_packs.utils import SubjectPack
-from rules import MappingRule, PrefixRule
-
-
-class AudiencePack(SubjectPack):
-    name = "audience"
-    output_types = ("audience",)
-    output_type = "audience"
-
-    def __init__(
-        self,
-        mapping: Mapping[str, str] | None = None,
-        remove_matched_subjects: bool = True,
-    ) -> None:
-        self.rules = (PrefixRule("audience"), MappingRule(mapping))
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "AudiencePack":
-        return cls(
-            mapping=load_mapping("audience"),
-            remove_matched_subjects=True,
-        )
diff --git a/rule_packs/genres.py b/rule_packs/genres.py
deleted file mode 100644
index 1812744..0000000
--- a/rule_packs/genres.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Rule pack for genre tags."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from rule_packs.utils import SubjectPack
-from rules import MappingRule, PrefixRule
-
-
-class GenresPack(SubjectPack):
-    name = "genres"
-    output_types = ("genres",)
-    output_type = "genres"
-
-    def __init__(
-        self,
-        mapping: Mapping[str, str] | None = None,
-        remove_matched_subjects: bool = True,
-    ) -> None:
-        self.rules = (PrefixRule("genre"), MappingRule(mapping))
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "GenresPack":
-        return cls(
-            mapping=load_mapping("genres"),
-            remove_matched_subjects=True,
-        )
diff --git a/rule_packs/literary_form.py b/rule_packs/literary_form.py
deleted file mode 100644
index 8c02391..0000000
--- a/rule_packs/literary_form.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""Rule pack for literary_form."""
-
-from __future__ import annotations
-
-from rule_packs.utils import SubjectPack
-from rules import PrefixRule
-
-
-class LiteraryFormPack(SubjectPack):
-    name = "literary_form"
-    output_types = ("literary_form",)
-    output_type = "literary_form"
-
-    def __init__(self, remove_matched_subjects: bool = True) -> None:
-        self.rules = (PrefixRule("form"),)
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "LiteraryFormPack":
-        return cls(remove_matched_subjects=True)
diff --git a/rule_packs/literary_themes.py b/rule_packs/literary_themes.py
deleted file mode 100644
index 7ccb6bb..0000000
--- a/rule_packs/literary_themes.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Rule pack for literary_themes tags."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from rule_packs.utils import SubjectPack
-from rules import MappingRule, PrefixRule
-
-
-class LiteraryThemesPack(SubjectPack):
-    name = "literary_themes"
-    output_types = ("literary_themes",)
-    output_type = "literary_themes"
-
-    def __init__(
-        self,
-        mapping: Mapping[str, str] | None = None,
-        remove_matched_subjects: bool = True,
-    ) -> None:
-        self.rules = (PrefixRule("theme"), MappingRule(mapping))
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "LiteraryThemesPack":
-        return cls(
-            mapping=load_mapping("literary_themes"),
-            remove_matched_subjects=True,
-        )
diff --git a/rule_packs/literary_tropes.py b/rule_packs/literary_tropes.py
deleted file mode 100644
index 9a18f81..0000000
--- a/rule_packs/literary_tropes.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Rule pack for literary_tropes tags."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from rule_packs.utils import SubjectPack
-from rules import MappingRule, PrefixRule
-
-
-class LiteraryTropesPack(SubjectPack):
-    name = "literary_tropes"
-    output_types = ("literary_tropes",)
-    output_type = "literary_tropes"
-
-    def __init__(
-        self,
-        mapping: Mapping[str, str] | None = None,
-        remove_matched_subjects: bool = True,
-    ) -> None:
-        self.rules = (PrefixRule("trope"), MappingRule(mapping))
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "LiteraryTropesPack":
-        return cls(
-            mapping=load_mapping("literary_tropes"),
-            remove_matched_subjects=True,
-        )
diff --git a/rule_packs/main_topics.py b/rule_packs/main_topics.py
deleted file mode 100644
index 68d6320..0000000
--- a/rule_packs/main_topics.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Rule pack for main_topics tags."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from rule_packs.utils import SubjectPack
-from rules import MappingRule, PrefixRule
-
-
-class MainTopicsPack(SubjectPack):
-    name = "main_topics"
-    output_types = ("main_topics",)
-    output_type = "main_topics"
-
-    def __init__(
-        self,
-        mapping: Mapping[str, str] | None = None,
-        remove_matched_subjects: bool = True,
-    ) -> None:
-        self.rules = (PrefixRule("topic"), MappingRule(mapping))
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "MainTopicsPack":
-        return cls(
-            mapping=load_mapping("main_topics"),
-            remove_matched_subjects=True,
-        )
diff --git a/rule_packs/moods.py b/rule_packs/moods.py
deleted file mode 100644
index d6a2731..0000000
--- a/rule_packs/moods.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""Rule pack for moods tags."""
-
-from __future__ import annotations
-
-from rule_packs.utils import SubjectPack
-from rules import PrefixRule
-
-
-class MoodsPack(SubjectPack):
-    name = "moods"
-    output_types = ("moods",)
-    output_type = "moods"
-
-    def __init__(self, remove_matched_subjects: bool = True) -> None:
-        self.rules = (PrefixRule("mood"),)
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "MoodsPack":
-        return cls(remove_matched_subjects=True)
diff --git a/rule_packs/people.py b/rule_packs/people.py
deleted file mode 100644
index 2ad387e..0000000
--- a/rule_packs/people.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""Rule pack for subject_people."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from core.run_state import RunState
-from rule_engine.base import RulePack
-from rules import OverrideRule
-
-
-class PeoplePack(RulePack):
-    name = "people"
-    output_types = ("people",)
-
-    def __init__(self, overrides: Mapping[str, str] | None = None) -> None:
-        self.rule = OverrideRule(overrides)
-
-    def apply(self, state: RunState) -> None:
-        for raw in state.work.get("subject_people", []):
-            value = self.rule.apply(raw)
-            if value is not None:
-                state.add("people", value)
-
-    @classmethod
-    def default(cls) -> "PeoplePack":
-        return cls(overrides=load_mapping("people_overrides"))
diff --git a/rule_packs/places.py b/rule_packs/places.py
deleted file mode 100644
index 1757eba..0000000
--- a/rule_packs/places.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""Rule pack for subject_places."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from core.run_state import RunState
-from rule_engine.base import RulePack
-from rules import OverrideRule
-
-
-class PlacesPack(RulePack):
-    name = "places"
-    output_types = ("places",)
-
-    def __init__(self, overrides: Mapping[str, str] | None = None) -> None:
-        self.rule = OverrideRule(overrides)
-
-    def apply(self, state: RunState) -> None:
-        for raw in state.work.get("subject_places", []):
-            value = self.rule.apply(raw)
-            if value is not None:
-                state.add("places", value)
-
-    @classmethod
-    def default(cls) -> "PlacesPack":
-        return cls(overrides=load_mapping("places_overrides"))
diff --git a/rule_packs/subgenres.py b/rule_packs/subgenres.py
deleted file mode 100644
index 97fb428..0000000
--- a/rule_packs/subgenres.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Rule pack for subgenre tags."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from core.json_loader import load_mapping
-from rule_packs.utils import SubjectPack
-from rules import MappingRule, PrefixRule
-
-
-class SubgenresPack(SubjectPack):
-    name = "subgenres"
-    output_types = ("subgenres",)
-    output_type = "subgenres"
-
-    def __init__(
-        self,
-        mapping: Mapping[str, str] | None = None,
-        remove_matched_subjects: bool = True,
-    ) -> None:
-        self.rules = (PrefixRule("subgenre"), MappingRule(mapping))
-        self.remove_matched_subjects = remove_matched_subjects
-
-    @classmethod
-    def default(cls) -> "SubgenresPack":
-        return cls(
-            mapping=load_mapping("subgenres"),
-            remove_matched_subjects=True,
-        )
diff --git a/rule_packs/times.py b/rule_packs/times.py
deleted file mode 100644
index 6808ca8..0000000
--- a/rule_packs/times.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""Rule pack for subject_times."""
-
-from __future__ import annotations
-
-from core.run_state import RunState
-from rule_engine.base import RulePack
-from rules import PassthroughRule
-
-
-class TimesPack(RulePack):
-    name = "times"
-    output_types = ("times",)
-
-    def __init__(self) -> None:
-        self.rule = PassthroughRule()
-
-    def apply(self, state: RunState) -> None:
-        for raw in state.work.get("subject_times", []):
-            value = self.rule.apply(raw)
-            if value is not None:
-                state.add("times", value)
-
-    @classmethod
-    def default(cls) -> "TimesPack":
-        return cls()
diff --git a/rules/__init__.py b/rules/__init__.py
index ed93b0e..e597b50 100644
--- a/rules/__init__.py
+++ b/rules/__init__.py
@@ -2,14 +2,10 @@
 
 from .match_result import RuleMatch
 from .mapping_rule import MappingRule
-from .override_rule import OverrideRule
-from .passthrough_rule import PassthroughRule
 from .prefix_rule import PrefixRule
 
 __all__ = [
     "MappingRule",
-    "OverrideRule",
-    "PassthroughRule",
     "PrefixRule",
     "RuleMatch",
 ]
diff --git a/rules/override_rule.py b/rules/override_rule.py
deleted file mode 100644
index 0fe74c1..0000000
--- a/rules/override_rule.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""Override-based normalization for field values."""
-
-from __future__ import annotations
-
-from collections.abc import Mapping
-
-from rule_engine.normalization import normalize
-
-
-class OverrideRule:
-    """Normalize a field value using overrides with raw fallback."""
-
-    def __init__(self, overrides: Mapping[str, str] | None = None) -> None:
-        self.overrides = dict(overrides or {})
-
-    def apply(self, raw: str) -> str | None:
-        cleaned = raw.strip()
-        if not cleaned:
-            return None
-        return self.overrides.get(normalize(raw), cleaned)
diff --git a/rules/passthrough_rule.py b/rules/passthrough_rule.py
deleted file mode 100644
index 5dbb88b..0000000
--- a/rules/passthrough_rule.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""Passthrough normalization for field values."""
-
-from __future__ import annotations
-
-
-class PassthroughRule:
-    """Return cleaned field values without additional transformation."""
-
-    def apply(self, raw: str) -> str | None:
-        cleaned = raw.strip()
-        return cleaned or None
diff --git a/scripts/README.md b/scripts/README.md
index 5ac204c..98f726e 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,12 +1,22 @@
 # scripts
 
-Tools for migrating Open Library's legacy subject strings to canonical typed tags.
+Tools for running the current subject-migration dry runs.
 
 ---
 
 ## Overview
 
-Open Library works currently have a flat `subjects` list (plus `subject_people`, `subject_places`, `subject_times`) containing a mix of genres, themes, tropes, catalog codes, reading levels, and noise. These scripts help convert that legacy data into structured, typed canonical tags.
+The current migration scope is intentionally narrow:
+
+- `content_formats` is the only actively developed type-specific migration pack
+- `subject_diagnostics` is kept as a minimal QA/support pack
+
+The goal of the script is to run subject-driven migration proposals against Open Library work JSON and show:
+
+- which structured tags would be proposed
+- which legacy subjects would be removed
+- which legacy subjects would remain
+- which subjects matched with `move` vs `extract_only`
 
 ---
 
@@ -14,12 +24,11 @@ Open Library works currently have a flat `subjects` list (plus `subject_people`,
 
 ### `migrate_subjects.py`
 
-The current runner/compatibility entry point. Given a work's OL JSON, it:
+The current runner. Given a work's OL JSON, it:
 
 1. Loads the legacy `subjects`, `subject_people`, `subject_places`, and `subject_times` lists
-2. Builds a `SubjectClassifier` from one or more enabled rule packs
-3. Applies rule-based and keyword matching to classify each string into the correct canonical type
-3. Outputs a structured tag object ready for import into the new schema
+2. Applies the currently enabled subject packs
+3. Outputs a proposal-style run report for review
 
 **Usage:**
 ```bash
@@ -29,122 +38,110 @@ python scripts/migrate_subjects.py --work OL82563W
 # From a local JSON file
 python scripts/migrate_subjects.py --file work.json
 
-# Legacy-compatible fixed-order wrapper
-./scripts/run_legacy_subjects.sh --file work.json
-
 # Batch from a newline-delimited list of OL IDs
 python scripts/migrate_subjects.py --batch ol_ids.txt --output output/
 
 # Dry run (print proposed mappings without writing)
 python scripts/migrate_subjects.py --work OL82563W --dry-run
 
-# Run the old full sequence explicitly through the wrapper
-./scripts/run_legacy_subjects.sh --file work.json --dry-run
-
-# Run only a subset of rule packs
-python scripts/migrate_subjects.py --file work.json --pack genres --pack content_formats --pack subject_diagnostics --dry-run
-
-# Run a single tag-type module
+# Run only content_formats
 python scripts/migrate_subjects.py --file work.json --pack content_formats --dry-run
+
+# Run content_formats plus diagnostics
+python scripts/migrate_subjects.py --file work.json --pack subject_mappings --dry-run
 ```
 
-`migrate_subjects.py` no longer enables a default full preset when `--pack` is omitted. If you want the old full sequence, use `run_legacy_subjects.sh` or pass the pack list explicitly.
+Available packs:
 
-`run_legacy_subjects.sh` is just a thin wrapper around `migrate_subjects.py` with the pack order written out explicitly, so it is easy to inspect and change. Any extra CLI args are forwarded as-is.
+- `content_formats`
+- `subject_diagnostics`
+- `subject_mappings` (preset for both)
 
 **Output format:**
 ```json
 {
   "work_id": "OL82563W",
-  "literary_form": ["Fiction"],
-  "genres": ["Tragedy", "Gothic", "Romance"],
-  "subgenres": ["Psychological", "Historical"],
-  "content_formats": ["Novel"],
-  "moods": [],
-  "literary_themes": ["Love", "Revenge", "Death"],
-  "literary_tropes": ["Foundlings", "Love Triangles"],
-  "main_topics": ["Interpersonal relations", "Family life", "Class"],
-  "sub_topics": ["Country life", "Rural families", "Landscape"],
-  "people": ["Heathcliff", "Catherine Earnshaw"],
-  "places": ["Yorkshire", "England"],
-  "times": [],
-  "things": [],
-  "unmapped": ["Pr4172 .w7 2009c", "823/.8", "Zhang pian xiao shuo"]
+  "proposed_tags": {
+    "content_formats": ["Memoir", "Biography"],
+    "reading_level": ["Grade 4"],
+    "unmapped": ["abc"]
+  },
+  "subject_proposal": {
+    "original": ["Memoirs", "Biography", "abc", "Grade 4"],
+    "removed": ["Memoirs"],
+    "remaining": ["Biography", "abc", "Grade 4"]
+  },
+  "subject_matches": [
+    {
+      "subject": "Memoirs",
+      "output_type": "content_formats",
+      "value": "Memoir",
+      "action": "move"
+    },
+    {
+      "subject": "Biography",
+      "output_type": "content_formats",
+      "value": "Biography",
+      "action": "extract_only"
+    }
+  ]
 }
 ```
 
-The `unmapped` field collects strings that couldn't be classified — these are candidates for manual review or the `other` / droppable bucket.
+This report is meant for dry-run review and QA, not as a final persisted work format.
 
 ---
 
 ### Architecture
 
-The reusable classification core now lives outside the script entry point:
+The current implementation is intentionally small and only supports the present migration scope:
 
 ```text
 core/
-  json_loader.py                # JSON resource loading for default assembly
-  subject_classifier.py         # public work-level orchestration core
-  pack_registry.py              # stable pack names -> factories / presets
-  classifier_assembler.py       # pack resolution + classifier assembly
-  migrate_subject_classifier.py # compatibility shim for older imports
+  json_loader.py         # JSON resource loading
+  run_state.py           # shared run/proposal state
+  subject_classifier.py  # work-level orchestration + report output
 rule_engine/
-  base.py                       # RulePack interface
-  normalization.py              # shared text normalization helpers
+  base.py                # RulePack interface
+  normalization.py       # shared text normalization helpers
 rules/
-  prefix_rule.py                # subject prefix matching
-  mapping_rule.py               # normalized direct mapping
-  override_rule.py              # override-based field normalization
-  passthrough_rule.py           # cleaned passthrough fields
+  match_result.py        # structured value + action matches
+  mapping_rule.py        # normalized mapping matches
+  prefix_rule.py         # prefix-based matches
 rule_packs/
-  genres.py                     # one module per tag type
-  content_formats.py
-  audience.py
-  literary_themes.py
-  literary_tropes.py
-  main_topics.py
-  people.py
-  places.py
-  times.py
-config/
-  packs/                        # future static pack configs
+  content_formats.py     # current migration logic under active development
+  subject_diagnostics.py # minimal QA/support pack
+  utils.py               # shared subject-pack execution helper
 ```
 
-`scripts/migrate_subjects.py` remains the operational entry point, but classification logic is now encapsulated in the shared core so future runners can reuse it.
-
-The classification core itself is kept narrow: `SubjectClassifier` consumes a normalized `work` object plus already-constructed packs, and returns a result. JSON resource loading now lives in the default assembly layer rather than inside individual packs.
+`scripts/migrate_subjects.py` remains the operational entry point and keeps the pack selection local to the script.
 
 ### Adding Mapping Rules
 
-Mappings live in `resources/mappings/`. Each file covers one tag type:
+Mappings live in `resources/mappings/`.
 
 ```
 resources/
   mappings/
-    genres.json          # legacy string → canonical genre
-    subgenres.json        # legacy string → canonical subgenre
     content_formats.json  # legacy string → canonical format
-    literary_themes.json  # legacy string → canonical theme
-    literary_tropes.json  # legacy string → canonical trope
     droppable.json        # strings to discard (reading levels, codes, etc.)
-    people_overrides.json # OL people string → canonical name
-    places_overrides.json # OL place string → canonical place
 ```
 
-Each mapping file is a JSON object where keys are legacy strings (lowercase, stripped) and values are the canonical tag:
+`content_formats.json` is a JSON object where keys are legacy subject strings and values are canonical content format tags:
 
 ```json
 {
-  "historical fiction": "Historical",
-  "fiction, historical": "Historical",
-  "psychological fiction": "Psychological",
-  "gothic fiction": "Gothic",
-  "english gothic fiction": "Gothic"
+  "memoirs": "Memoir",
+  "biography": "Biography",
+  "letters": "Letters",
+  "novels": "Novel"
 }
 ```
 
+`ContentFormatsPack` then splits those mappings into:
 
----
+- `move` cases for currently clean first-pass formats
+- `extract_only` cases for overlapping or not-yet-approved removals
 
 ## Development
 
@@ -160,5 +157,3 @@ Requirements: `requests`, `tqdm` (for batch progress)
 ## Data Sources
 
 - OL Work JSON: `https://openlibrary.org/works/{OL_ID}.json`
-- OL Search API: `https://openlibrary.org/search.json`
-- Tag objects: `https://openlibrary.org/tags/{TAG_ID}.json`
diff --git a/scripts/migrate_subjects.py b/scripts/migrate_subjects.py
index fe64208..b858b3a 100644
--- a/scripts/migrate_subjects.py
+++ b/scripts/migrate_subjects.py
@@ -17,13 +17,15 @@
 import os
 import sys
 from pathlib import Path
+from typing import Callable
 
 REPO_ROOT = Path(__file__).resolve().parent.parent
 if str(REPO_ROOT) not in sys.path:
     sys.path.insert(0, str(REPO_ROOT))
 
-from core.classifier_assembler import build_subject_classifier
-from core.pack_registry import AVAILABLE_PACK_NAMES
+from core.subject_classifier import SubjectClassifier
+from rule_packs.content_formats import ContentFormatsPack
+from rule_packs.subject_diagnostics import SubjectDiagnosticsPack
 
 # ---------------------------------------------------------------------------
 # Paths
@@ -31,6 +33,39 @@
 
 OL_WORK_URL = "https://openlibrary.org/works/{work_id}.json"
 
+PackFactory = Callable[[], object]
+PACK_PRESETS: dict[str, tuple[str, ...]] = {
+    "subject_mappings": ("content_formats", "subject_diagnostics"),
+}
+PACK_FACTORIES: dict[str, PackFactory] = {
+    "content_formats": ContentFormatsPack.default,
+    "subject_diagnostics": SubjectDiagnosticsPack.default,
+}
+AVAILABLE_PACK_NAMES = tuple(sorted({*PACK_FACTORIES, *PACK_PRESETS}))
+
+
+def resolve_pack_names(enabled_packs: list[str] | None) -> list[str]:
+    selected = list(enabled_packs or [])
+    expanded: list[str] = []
+    for name in selected:
+        if name in PACK_PRESETS:
+            expanded.extend(PACK_PRESETS[name])
+            continue
+        expanded.append(name)
+    return expanded
+
+
+def build_subject_classifier(enabled_packs: list[str] | None = None) -> SubjectClassifier:
+    selected = resolve_pack_names(enabled_packs)
+    missing = [name for name in selected if name not in PACK_FACTORIES]
+    if missing:
+        available = ", ".join(AVAILABLE_PACK_NAMES)
+        missing_display = ", ".join(sorted(missing))
+        raise ValueError(
+            f"Unknown rule pack(s): {missing_display}. Available: {available}"
+        )
+    return SubjectClassifier(rule_packs=[PACK_FACTORIES[name]() for name in selected])
+
 
 # ---------------------------------------------------------------------------
 # Fetching
diff --git a/scripts/run_legacy_subjects.sh b/scripts/run_legacy_subjects.sh
deleted file mode 100755
index 829baa4..0000000
--- a/scripts/run_legacy_subjects.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-
-python3 "${REPO_ROOT}/scripts/migrate_subjects.py" \
-  --pack literary_form \
-  --pack audience \
-  --pack genres \
-  --pack subgenres \
-  --pack content_formats \
-  --pack moods \
-  --pack literary_themes \
-  --pack literary_tropes \
-  --pack main_topics \
-  --pack subject_diagnostics \
-  --pack people \
-  --pack places \
-  --pack times \
-  "$@"

From d5b7c2922fe6376f3d2e108986275779e2845534 Mon Sep 17 00:00:00 2001
From: Kaftow <166228791+Kaftow@users.noreply.github.com>
Date: Wed, 22 Apr 2026 02:07:23 +0900
Subject: [PATCH 9/9] Minimize rule_pack package

---
 demo_content_formats.json                     | 20 ++++++++++
 rule_packs/content_formats.py                 |  5 +--
 rule_packs/{utils.py => subject_migration.py} | 38 +++++++------------
 3 files changed, 35 insertions(+), 28 deletions(-)
 create mode 100644 demo_content_formats.json
 rename rule_packs/{utils.py => subject_migration.py} (52%)

diff --git a/demo_content_formats.json b/demo_content_formats.json
new file mode 100644
index 0000000..0155f48
--- /dev/null
+++ b/demo_content_formats.json
@@ -0,0 +1,20 @@
+{
+  "key": "/works/OLDEMO1W",
+  "subjects": [
+    "Memoirs",
+    "Anthology",
+    "Letters",
+    "Dictionary",
+    "Biography",
+    "Autobiography",
+    "Manga",
+    "Encyclopedia",
+    "Novel",
+    "format:Diary",
+    "abc",
+    "Grade 4"
+  ],
+  "subject_people": [],
+  "subject_places": [],
+  "subject_times": []
+}
diff --git a/rule_packs/content_formats.py b/rule_packs/content_formats.py
index e4d271d..6131bbb 100644
--- a/rule_packs/content_formats.py
+++ b/rule_packs/content_formats.py
@@ -5,7 +5,7 @@
 from collections.abc import Mapping
 
 from core.json_loader import load_mapping
-from rule_packs.utils import SubjectPack
+from rule_packs.subject_migration import SubjectMigrationPack
 from rules import MappingRule, PrefixRule
 
 MOVE = "move"
@@ -22,7 +22,7 @@
 )
 
 
-class ContentFormatsPack(SubjectPack):
+class ContentFormatsPack(SubjectMigrationPack):
     name = "content_formats"
     output_types = ("content_formats",)
     output_type = "content_formats"
@@ -37,7 +37,6 @@ def __init__(
             MappingRule(move_mapping, default_action=MOVE),
             MappingRule(extract_only_mapping, default_action=EXTRACT_ONLY),
         )
-        self.remove_matched_subjects = False
 
     @classmethod
     def default(cls) -> "ContentFormatsPack":
diff --git a/rule_packs/utils.py b/rule_packs/subject_migration.py
similarity index 52%
rename from rule_packs/utils.py
rename to rule_packs/subject_migration.py
index 00943b1..7b4aa69 100644
--- a/rule_packs/utils.py
+++ b/rule_packs/subject_migration.py
@@ -1,4 +1,4 @@
-"""Shared helpers for subject-based packs."""
+"""Shared helpers for subject-driven migration packs."""
 
 from __future__ import annotations
 
@@ -10,38 +10,26 @@
 from rules import RuleMatch
 
 
-class SubjectValueRule(Protocol):
-    def match(self, raw: str) -> RuleMatch | str | None: ...
+class SubjectMatchRule(Protocol):
+    def match(self, raw: str) -> RuleMatch | None: ...
 
 
-def _coerce_match(match: RuleMatch | str, default_action: str) -> RuleMatch:
-    if isinstance(match, RuleMatch):
-        return match
-    return RuleMatch(value=match, action=default_action)
-
-
-def classify_subject_value(
-    raw: str,
-    rules: Iterable[SubjectValueRule],
-    default_action: str,
-) -> RuleMatch | None:
+def first_match(raw: str, rules: Iterable[SubjectMatchRule]) -> RuleMatch | None:
     for rule in rules:
         match = rule.match(raw)
         if match is not None:
-            return _coerce_match(match, default_action)
+            return match
     return None
 
 
-def apply_subject_pack(
+def apply_subject_migration(
     state: RunState,
     output_type: str,
-    rules: Iterable[SubjectValueRule],
-    remove_matched_subjects: bool,
+    rules: Iterable[SubjectMatchRule],
 ) -> None:
-    default_action = "move" if remove_matched_subjects else "extract_only"
     next_subjects: list[str] = []
     for raw in state.remaining_subjects:
-        match = classify_subject_value(raw, rules, default_action=default_action)
+        match = first_match(raw, rules)
         if match is None:
             next_subjects.append(raw)
             continue
@@ -60,15 +48,15 @@ def apply_subject_pack(
     state.remaining_subjects = next_subjects
 
 
-class SubjectPack(RulePack):
-    """Small helper for packs that operate on the shared subject sequence."""
+class SubjectMigrationPack(RulePack):
+    """Base class for packs that migrate legacy subjects into structured tags."""
 
-    output_type = ""
+    output_type: str = ""
+    rules: Iterable[SubjectMatchRule]
 
     def apply(self, state: RunState) -> None:
-        apply_subject_pack(
+        apply_subject_migration(
             state,
             output_type=self.output_type,
             rules=self.rules,
-            remove_matched_subjects=self.remove_matched_subjects,
         )