hidai25 · hidai25 · May 27, 2026 · May 27, 2026
diff --git a/evalview/core/chaos.py b/evalview/core/chaos.py
@@ -30,7 +30,7 @@
 
 import hashlib
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 
 # ── Mode constants ──────────────────────────────────────────────────────────
@@ -193,7 +193,7 @@ def build_scenario(
     The "one disruption per step" rule keeps the simulator
     deterministic and the scenario easy for humans to read.
     """
-    seen: set[int] = set()
+    seen: Set[int] = set()
     unique: List[ChaosDisruption] = []
     for d in disruptions:
         if d.step_index in seen:
@@ -233,7 +233,7 @@ def random_scenario(
         raise ValueError("max_steps must be positive")
 
     disruptions: List[ChaosDisruption] = []
-    used_steps: set[int] = set()
+    used_steps: Set[int] = set()
     for i in range(n_disruptions):
         mode = modes[_seeded_choice(seed, "mode", i) % len(modes)]
         # Pick a step that isn't taken yet. After many collisions we

diff --git a/evalview/core/fleet.py b/evalview/core/fleet.py
@@ -26,7 +26,7 @@
 import statistics
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
 
 
 # ── Defaults ────────────────────────────────────────────────────────────────
@@ -216,7 +216,7 @@ def summarize_instance(name: str, entries: Sequence[Dict[str, Any]]) -> Instance
     cost = 0.0
     first_seen: Optional[str] = None
     last_seen: Optional[str] = None
-    failing: set[str] = set()
+    failing: Set[str] = set()
 
     for e in entries:
         if "total_tests" not in e:
@@ -416,7 +416,7 @@ def discover_history_files(
             found.extend(sorted(dp.glob("*.jsonl")))
 
     # Dedup while preserving order.
-    seen: set[Path] = set()
+    seen: Set[Path] = set()
     out: List[Path] = []
     for p in found:
         resolved = p.resolve()

diff --git a/evalview/core/freshness.py b/evalview/core/freshness.py
@@ -25,7 +25,9 @@
 import re
 from dataclasses import dataclass
 from datetime import datetime, timezone
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Sequence, Tuple
+
+from evalview.core.text import STOPWORDS as _STOPWORDS
 
 
 # ── Tunables ────────────────────────────────────────────────────────────────
@@ -49,26 +51,10 @@
 _MAX_EXAMPLES_PER_CLUSTER = 5
 
 
-# A short English stoplist. Keeping this tiny on purpose: Jaccard is already
-# coarse and overly aggressive stopword filtering throws away signal. These
-# are the words whose presence is least informative for query similarity.
-_STOPWORDS = frozenset({
-    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
-    "do", "does", "did", "doing", "have", "has", "had", "having",
-    "i", "you", "he", "she", "it", "we", "they", "me", "him", "her", "us", "them",
-    "my", "your", "his", "its", "our", "their", "mine", "yours", "ours",
-    "this", "that", "these", "those",
-    "and", "or", "but", "if", "then", "else", "of", "in", "on", "at", "to",
-    "for", "with", "by", "from", "as", "about", "into", "than",
-    "can", "could", "would", "should", "will", "shall", "may", "might", "must",
-    "not", "no", "so", "just", "also", "very", "really", "please",
-})
-
-
 # ── Tokenization & similarity ───────────────────────────────────────────────
 
 
-def normalize_query(query: str) -> frozenset[str]:
+def normalize_query(query: str) -> FrozenSet[str]:
     """Lower-case, strip punctuation, collapse numbers, drop stopwords, tokenize.
 
     The returned set is used directly for Jaccard similarity. We deliberately
@@ -93,7 +79,7 @@ def normalize_query(query: str) -> frozenset[str]:
     return frozenset(tokens)
 
 
-def jaccard(a: frozenset[str], b: frozenset[str]) -> float:
+def jaccard(a: FrozenSet[str], b: FrozenSet[str]) -> float:
     """Standard Jaccard set similarity in ``[0.0, 1.0]``.
 
     Defined as ``|A ∩ B| / |A ∪ B|``. Returns 0.0 when both sets are empty
@@ -169,7 +155,7 @@ def compute_coverage(
     Jaccard similarity. If the suite is empty, every production query is
     classified as uncovered with ``similarity == 0.0``.
     """
-    suite_tokens: List[Tuple[str, frozenset[str]]] = [
+    suite_tokens: List[Tuple[str, FrozenSet[str]]] = [
         (q, normalize_query(q)) for q in suite_queries if q
     ]
 
@@ -230,7 +216,7 @@ def examples(self, limit: int = _MAX_EXAMPLES_PER_CLUSTER) -> List[str]:
 
 def _pick_representative(
     members: Sequence[str],
-    token_cache: Dict[str, frozenset[str]],
+    token_cache: Dict[str, FrozenSet[str]],
 ) -> Tuple[str, float]:
     """Return ``(representative, avg_intra_similarity)`` for a cluster.
 
@@ -279,11 +265,11 @@ def cluster_queries(
     volumes typical of an early-production agent (hundreds to low thousands),
     this is plenty fast.
     """
-    token_cache: Dict[str, frozenset[str]] = {}
+    token_cache: Dict[str, FrozenSet[str]] = {}
     # Use ``id(seed)`` would be unstable across runs; instead key by the
     # seed string itself, with a counter as tiebreaker for duplicates.
     cluster_members: List[List[str]] = []
-    cluster_seeds: List[frozenset[str]] = []
+    cluster_seeds: List[FrozenSet[str]] = []
 
     for q in queries:
         if not q:

diff --git a/evalview/core/goal_drift.py b/evalview/core/goal_drift.py
@@ -31,7 +31,9 @@
 
 import re
 from dataclasses import dataclass, field
-from typing import Callable, Iterable, List, Optional, Sequence, Tuple
+from typing import Callable, FrozenSet, Iterable, List, Optional, Sequence, Tuple
+
+from evalview.core.text import STOPWORDS as _STOPWORDS
 
 
 # ── Tunables ────────────────────────────────────────────────────────────────
@@ -47,21 +49,6 @@
 _MAX_TEXT_CHARS = 4096
 
 
-# Mirror the small stoplist in evalview.core.freshness so the two modules
-# behave consistently when a future refactor unifies them.
-_STOPWORDS = frozenset({
-    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
-    "do", "does", "did", "doing", "have", "has", "had", "having",
-    "i", "you", "he", "she", "it", "we", "they", "me", "him", "her", "us", "them",
-    "my", "your", "his", "its", "our", "their", "mine", "yours", "ours",
-    "this", "that", "these", "those",
-    "and", "or", "but", "if", "then", "else", "of", "in", "on", "at", "to",
-    "for", "with", "by", "from", "as", "about", "into", "than",
-    "can", "could", "would", "should", "will", "shall", "may", "might", "must",
-    "not", "no", "so", "just", "also", "very", "really", "please",
-})
-
-
 # ── Data shapes ─────────────────────────────────────────────────────────────
 
 
@@ -106,7 +93,7 @@ def severity(self) -> str:
 # ── Tokenization (kept local to avoid coupling to freshness module) ─────────
 
 
-def _tokens(text: str) -> frozenset[str]:
+def _tokens(text: str) -> FrozenSet[str]:
     """Lower / strip / collapse digits / drop stopwords → token set.
 
     Same digit normalization as the freshness module: order numbers and
@@ -124,7 +111,7 @@ def _tokens(text: str) -> frozenset[str]:
     )
 
 
-def _jaccard(a: frozenset[str], b: frozenset[str]) -> float:
+def _jaccard(a: FrozenSet[str], b: FrozenSet[str]) -> float:
     if not a or not b:
         return 0.0
     union = len(a | b)

diff --git a/evalview/core/retrieval_lineage.py b/evalview/core/retrieval_lineage.py
@@ -27,23 +27,12 @@
 
 import re
 from dataclasses import dataclass, field
-from typing import Callable, Dict, List, Optional, Sequence, Tuple
+from typing import Callable, Dict, FrozenSet, List, Optional, Sequence, Tuple
 
+from evalview.core.text import STOPWORDS as _STOPWORDS
 
-# ── Tunables ────────────────────────────────────────────────────────────────
 
-# Stoplist mirrors the freshness/goal_drift modules — keep them in sync.
-_STOPWORDS = frozenset({
-    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
-    "do", "does", "did", "doing", "have", "has", "had", "having",
-    "i", "you", "he", "she", "it", "we", "they", "me", "him", "her", "us", "them",
-    "my", "your", "his", "its", "our", "their", "mine", "yours", "ours",
-    "this", "that", "these", "those",
-    "and", "or", "but", "if", "then", "else", "of", "in", "on", "at", "to",
-    "for", "with", "by", "from", "as", "about", "into", "than",
-    "can", "could", "would", "should", "will", "shall", "may", "might", "must",
-    "not", "no", "so", "just", "also", "very", "really", "please",
-})
+# ── Tunables ────────────────────────────────────────────────────────────────
 
 _MAX_TEXT_CHARS = 8192
 
@@ -139,7 +128,7 @@ class StaleMemoryFlag:
 # ── Tokenization ────────────────────────────────────────────────────────────
 
 
-def _tokens(text: str) -> frozenset[str]:
+def _tokens(text: str) -> FrozenSet[str]:
     if not text:
         return frozenset()
     truncated = text[:_MAX_TEXT_CHARS].lower()
@@ -151,7 +140,7 @@ def _tokens(text: str) -> frozenset[str]:
     )
 
 
-def _overlap(chunk_tokens: frozenset[str], output_tokens: frozenset[str]) -> float:
+def _overlap(chunk_tokens: FrozenSet[str], output_tokens: FrozenSet[str]) -> float:
     """Fraction of chunk tokens that appear in the output.
 
     This is *recall on the chunk*, not Jaccard. We want "did the output

diff --git a/evalview/core/text.py b/evalview/core/text.py
@@ -0,0 +1,23 @@
+"""Tiny shared text helpers used by the Jaccard-style analyses.
+
+Centralizing the stoplist here keeps ``freshness``, ``goal_drift``, and
+``retrieval_lineage`` consistent. Kept intentionally small: Jaccard on a bag
+of words is already coarse, and aggressive stopword filtering throws away
+signal.
+"""
+from __future__ import annotations
+
+from typing import FrozenSet
+
+
+STOPWORDS: FrozenSet[str] = frozenset({
+    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+    "do", "does", "did", "doing", "have", "has", "had", "having",
+    "i", "you", "he", "she", "it", "we", "they", "me", "him", "her", "us", "them",
+    "my", "your", "his", "its", "our", "their", "mine", "yours", "ours",
+    "this", "that", "these", "those",
+    "and", "or", "but", "if", "then", "else", "of", "in", "on", "at", "to",
+    "for", "with", "by", "from", "as", "about", "into", "than",
+    "can", "could", "would", "should", "will", "shall", "may", "might", "must",
+    "not", "no", "so", "just", "also", "very", "really", "please",
+})