tuirk · dependabot · Jun 6, 2026 · Jun 7, 2026
@@ -73,9 +73,9 @@ updates:
         versions: [">=0.5.0"]
       - dependency-name: numpy
         versions: [">=2.0.0"]
-      # transformers 5.x breaks torch 2.5.1 + Dockerfile 4.x assert; cap in requirements.txt.
+      # transformers >=5.10 imports torch.float8_e8m0fnu; torch 2.6.0 lacks it.
       - dependency-name: transformers
-        versions: [">=5.0.0"]
+        versions: [">=5.10.0"]
       # pyrate-limiter 4.x API change; defer until rate-limiter path is retested.
       - dependency-name: pyrate-limiter
         versions: [">=4.0.0"]

@@ -63,6 +63,9 @@ jobs:
           python-version: '3.11'
           cache: 'pip'
           cache-dependency-path: nlp-service/requirements.txt
+      # Match nlp-service/Dockerfile: CPU torch before requirements (avoids
+      # CUDA wheel + keeps transformers 5.x compatible with torch 2.6.x).
+      - run: pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu
       - run: pip install -r requirements.txt
         working-directory: nlp-service
       - run: python -m pytest

@@ -10,6 +10,13 @@ ship with a `migrate.py` step that runs at boot.
 
 ## [Unreleased]
 
+### Security
+
+- **nlp-service:** bump `torch` to 2.6.0 (CPU) and exact-pin `transformers`
+  to 5.9.0 to clear Scorecard alert #70 (27 OSV IDs). Keep below 5.10 because
+  5.10+ requires torch float8 symbols 2.6.0 lacks. CI `unit-tests-nlp` now
+  installs the CPU torch wheel before `requirements.txt` (Dockerfile parity).
+
 ## [0.2.2] — 2026-05-30
 
 Incremental release over 0.2.1: pre-compile health checks, DeepSeek V4 Flash,

@@ -109,7 +109,7 @@ improvement at disproportionate maintenance cost for this project.
 
 | Alert | File:Line |
 |---|---|
-| 40 | nlp-service/Dockerfile:46 — `pip install ... torch==2.5.1 --index-url <cpu>` (was :44 pre-#121) |
+| 40 | nlp-service/Dockerfile:46 — `pip install ... torch==2.6.0 --index-url <cpu>` |
 | 41 | nlp-service/Dockerfile:48 — `pip install -r requirements.txt` (was :46 pre-#121) |
 | 62 | .github/workflows/integration-test.yml:67 — `pip install -r requirements.txt` |
 | 71 | nlp-service/Dockerfile:48 — same finding as #41 (Scorecard re-file after line shift) |
@@ -134,6 +134,18 @@ Re-evaluate when:
 - Upstream PyPI compromise affects any pinned dependency.
 - The pip dependency surface shrinks below ~30 transitive deps.
 
+## Bucket E — resolved (alert #70, VulnerabilitiesID)
+
+**Rule:** `VulnerabilitiesID` — OSV scan of dependency manifests.
+
+Scorecard #70 reported 27 open OSV IDs; triage (2026-06-06) found all 27 in
+`transformers` on PyPI. npm lockfiles (`app`, `cli`, `mcp-server`) were clean.
+Fix: bump `torch` to **2.6.0** (CPU wheel) and exact-pin `transformers` to
+**5.9.0** in [nlp-service/requirements.txt](../../nlp-service/requirements.txt).
+The exact pin avoids Scorecard/OSV treating a broad vulnerable range as still
+affected; 5.10+ still needs torch float8 symbols 2.6 lacks. Alert should
+auto-close on the next Scorecard run after merge.
+
 ## Bucket D — tracked TODO (1 alert, left open)
 
 **Alert 27 — `BranchProtectionID`.**

@@ -36,19 +36,19 @@ COPY requirements.txt /app/requirements.txt
 # (~2 GB) from PyPI, bloating the image to 10 GB+. sentence-transformers and
 # keybert find torch already installed and skip their own resolution.
 #
-# Torch MUST be pinned. sentence-transformers==3.3.1 (Nov 2024) calls
-# `.to(device)` on meta-device-initialised weights; torch >=2.7 raises
-# NotImplementedError("Cannot copy out of meta tensor") and requires
-# `.to_empty()` instead. An unpinned install after a --no-cache rebuild
-# pulls the latest torch and breaks /resolve/embedding + /extract/keybert
-# at runtime — and the unit tests stub sentence_transformers, so nothing
-# catches the drift.
-RUN pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
+# Torch MUST be pinned. sentence-transformers calls `.to(device)` on meta-
+# device-initialised weights; torch >=2.7 raises NotImplementedError
+# ("Cannot copy out of meta tensor") and requires `.to_empty()` instead.
+# 2.6.x has float8 symbols transformers 5.x needs; 2.5.1 does not.
+# An unpinned install after a --no-cache rebuild pulls the latest torch
+# and breaks /resolve/embedding + /extract/keybert at runtime — and unit
+# tests stub sentence_transformers, so nothing catches the drift.
+RUN pip install --no-cache-dir torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu
 
 RUN pip install --no-cache-dir -r requirements.txt
 
 # Fail the image build if torch/transformers pins drift (pytest stubs hide this).
-RUN python -c "import transformers; from sentence_transformers import SentenceTransformer; assert transformers.__version__.startswith('4.')"
+RUN python -c "import transformers; from sentence_transformers import SentenceTransformer; assert transformers.__version__.startswith('5.')"
 
 # Override markitdown's youtube-transcript-api~=1.0.0 pin. 1.0.x returns empty
 # timedtext bodies on current YouTube (ParseError) even from residential IPs;

@@ -25,11 +25,12 @@ rake-nltk==1.0.6
 yake==0.7.3
 keybert==0.9.0
 sentence-transformers==5.5.0
-# Dockerfile pins torch==2.5.1 (meta-tensor compat). transformers 5.x imports
-# torch.float8_e8m0fnu at load time, which 2.5.1 lacks — breaks
-# /resolve/embedding on fresh pip installs. Cap at 4.x; pytest stubs ST so
-# only test_embedding_import_smoke.py catches drift.
-transformers>=4.41.0,<5.0.0
+# Dockerfile pins torch==2.6.0 (CPU wheel). torch 2.5.1 breaks transformers
+# 5.x imports; torch >=2.7 breaks sentence-transformers meta-device init
+# (NotImplementedError on .to(device)).
+# Exact pin avoids Scorecard/OSV treating a broad vulnerable range as still
+# affected by #70, while staying below 5.10+ imports of torch.float8_e8m0fnu.
+transformers==5.9.0
 # pytextrank: spaCy-native TextRank component (PyPI-available, no git dep).
 pytextrank==3.3.0
 scikit-learn==1.8.0
@@ -39,7 +40,7 @@ nltk==3.9.4
 rapidfuzz>=3.14.5
 # Recovers truncated JSON from Gemini 2.5 Flash repetition-loop bug
 # (see issue in repo) — salvages already-billed output on parse failure.
-json-repair>=0.59.10
+json-repair>=0.60.1
 # Commit 7: Chroma vector store (embedded, no separate server). 0.4.x API:
 # PersistentClient, get_or_create_collection, cosine distance.
 chromadb==0.4.24

@@ -1,12 +1,12 @@
 """Smoke test: real sentence-transformers import stack (not conftest-stubbed).
 
 conftest.py stubs sentence_transformers for speed, so a fresh ``pip install``
-can pull transformers 5.x incompatible with the Dockerfile's torch==2.5.1 pin
+can pull transformers/torch versions incompatible with the Dockerfile pins
 without any unit test failing. This runs the import in a subprocess so the
 stub never applies.
 
-Regression: resolve 500 ``embedding_failed`` when transformers 5.10+ met
-torch 2.5.1 after a Docker requirements-layer cache bust (2026-06-06).
+Regression: resolve 500 ``embedding_failed`` when transformers 5.x met
+torch 2.5.1 (missing float8) or torch >=2.7 (meta-tensor .to(device)).
 """
 
 from __future__ import annotations
@@ -19,7 +19,7 @@ def test_sentence_transformer_imports_with_pinned_torch() -> None:
     script = (
         "import transformers; "
         "from sentence_transformers import SentenceTransformer; "
-        "assert transformers.__version__.startswith('4.'), transformers.__version__; "
+        "assert transformers.__version__.startswith('5.'), transformers.__version__; "
         "print('ok')"
     )
     result = subprocess.run(

@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""Map Scorecard #70 OSV IDs to transformers fix boundaries."""
+from __future__ import annotations
+
+import json
+import urllib.request
+
+IDS = [
+    "PYSEC-2023-299", "GHSA-282v-666c-3fvg",
+    "GHSA-37mw-44qp-f5jm", "GHSA-37q5-v5qm-c9v8",
+    "PYSEC-2023-300", "GHSA-3863-2447-669p",
+    "GHSA-4w7r-h757-3r74", "GHSA-59p9-h35m-wg4g",
+    "GHSA-69w3-r845-3855", "GHSA-6rvg-6v2m-4j46",
+    "GHSA-9356-575x-2w9m", "GHSA-fpwr-67px-3qhx",
+    "PYSEC-2024-229", "GHSA-hxxf-235m-72v3",
+    "GHSA-jjph-296x-mrcr", "GHSA-phhr-52qp-3mj4",
+    "GHSA-q2wp-rjmx-x6x9",
+    "PYSEC-2025-40", "GHSA-qq3j-4f4f-9583",
+    "PYSEC-2024-227", "GHSA-qxrp-vhvm-j765",
+    "GHSA-rcv9-qm8p-9p6j",
+    "PYSEC-2023-301", "GHSA-v68g-wm8c-6x7j",
+    "PYSEC-2024-228", "GHSA-wrfc-pvp9-mr9g",
+    "PYSEC-2025-211", "PYSEC-2025-212", "PYSEC-2025-213",
+    "PYSEC-2025-214", "PYSEC-2025-215", "PYSEC-2025-216",
+    "PYSEC-2025-217", "PYSEC-2025-218",
+]
+
+seen: set[str] = set()
+max_la = ""
+needs_5x: list[str] = []
+for vid in IDS:
+    if vid in seen:
+        continue
+    seen.add(vid)
+    with urllib.request.urlopen(f"https://api.osv.dev/v1/vulns/{vid}", timeout=20) as r:
+        v = json.load(r)
+    pkg = v["affected"][0]
+    la = fix = None
+    for rng in pkg.get("ranges", []):
+        for ev in rng.get("events", []):
+            if "last_affected" in ev:
+                la = ev["last_affected"]
+            if "fixed" in ev:
+                fix = ev["fixed"]
+    if la and (not max_la or la > max_la):
+        max_la = la
+    if fix and fix.startswith("5."):
+        needs_5x.append(f"{vid} (fix {fix})")
+    print(f"{vid:22} last_affected={la or '-':10} fixed={fix or '-'}")
+
+print(f"\nMax last_affected in 4.x: {max_la}")
+print(f"Need 5.x to fix: {len(needs_5x)}")
+for x in needs_5x:
+    print(f"  {x}")