diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index d6d306a..60ddd05 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -10,7 +10,6 @@ on:
 env:
   REGISTRY: ghcr.io
   IMAGE_NAME: ${{ github.repository }}
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 
 jobs:
   build-backend:
@@ -20,7 +19,7 @@ jobs:
       packages: write
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
@@ -59,7 +58,7 @@ jobs:
       packages: write
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 2990e78..9b3eca3 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -4,9 +4,6 @@ on:
   push:
   pull_request:
 
-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
   docs:
     runs-on: ubuntu-22.04
@@ -17,9 +14,9 @@ jobs:
         poetry-version: ["2.1.0"]
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
 
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 61d5b39..142365c 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -5,9 +5,6 @@ on:
     branches: [main]
   pull_request:
 
-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
   integration:
     runs-on: ubuntu-22.04
@@ -33,9 +30,9 @@ jobs:
           --health-retries 5
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
 
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index a757709..ae0079d 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -4,9 +4,6 @@ on:
   push:
   pull_request:
 
-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
   lint:
     runs-on: ubuntu-22.04
@@ -17,9 +14,9 @@ jobs:
         poetry-version: ["2.1.0"]
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
 
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b1ed5d4..db75c06 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -4,9 +4,6 @@ on:
   push:
   pull_request:
 
-env:
-  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
-
 jobs:
   test:
     runs-on: ubuntu-22.04
@@ -17,9 +14,9 @@ jobs:
         poetry-version: ["2.1.0"]
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
 
diff --git a/Dockerfile b/Dockerfile
index 0df0128..17c1ecc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,5 @@
-FROM python:3.12-slim
+# ── Stage 1: build dependencies ──────────────────────────────────────────────
+FROM python:3.12-slim AS builder
 
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -8,16 +9,30 @@ RUN apt-get update && apt-get install -y \
 
 WORKDIR /app
 
-# Install Poetry and dependencies first (layer cache)
 RUN pip install --no-cache-dir poetry==2.1.0
 
 COPY pyproject.toml poetry.lock ./
 RUN poetry config virtualenvs.create false \
     && poetry install --without dev --no-root --no-interaction --no-ansi
 
-# Copy source
 COPY protea/ ./protea/
 RUN poetry install --without dev --no-interaction --no-ansi
+
+# ── Stage 2: runtime ────────────────────────────────────────────────────────
+FROM python:3.12-slim
+
+RUN apt-get update && apt-get install -y \
+    libpq5 \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy installed packages from builder
+COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy application code
+COPY protea/ ./protea/
 COPY scripts/ ./scripts/
 COPY alembic/ ./alembic/
 COPY alembic.ini ./
@@ -25,7 +40,10 @@ COPY alembic.ini ./
 ENV PYTHONUNBUFFERED=1
 EXPOSE 8000
 
+HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
 # Default: API server
 # Override CMD to run a worker:
 #   docker run protea python scripts/worker.py --queue protea.jobs
-CMD ["uvicorn", "protea.api.app:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["uvicorn", "protea.api.app:create_app", "--factory", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/RERANKER.md b/RERANKER.md
new file mode 100644
index 0000000..2301546
--- /dev/null
+++ b/RERANKER.md
@@ -0,0 +1,188 @@
+# Temporal Holdout Re-Ranker for GO Term Prediction
+
+## Motivación
+
+El pipeline actual de PROTEA transfiere anotaciones GO mediante KNN sobre embeddings ESM, usando un scoring heurístico que combina distancia de embedding y pesos de evidencia. Este scoring no está optimizado para la métrica objetivo (Fmax) ni para el comportamiento real de las anotaciones GO a lo largo del tiempo.
+
+La hipótesis central es que existe una señal aprendible: **dado el contexto de una predicción KNN, ¿acabará este GO term apareciendo en el siguiente release de GOA para esta proteína?** Esta señal puede extraerse directamente del mecanismo de holdout temporal que ya implementa PROTEA.
+
+---
+
+## Formulación del Problema
+
+Sea $\mathcal{G}_N$ el conjunto de anotaciones GO en el release $N$ de GOA (Swiss-Prot reviewed). Para cada par consecutivo $(G_N, G_{N+1})$, el delta temporal es:
+
+$$\Delta_{N \to N+1} = \{(p, t) \mid (p, t) \in \mathcal{G}_{N+1} \setminus \mathcal{G}_N\}$$
+
+El re-ranker aprende una función:
+
+$$f(q, t, \mathcal{N}_K(q)) \to \hat{y} \in [0, 1]$$
+
+donde:
+- $q$ es la proteína query (representada por su embedding ESM)
+- $t$ es el GO term candidato
+- $\mathcal{N}_K(q)$ es el conjunto de $K$ vecinos más cercanos en el espacio de embeddings con referencia $\mathcal{G}_N$
+- $\hat{y}$ es la probabilidad de que $(q, t) \in \Delta_{N \to N+1}$
+
+---
+
+## Protocolo de Entrenamiento
+
+Se utiliza validación cruzada temporal con múltiples splits históricos de GOA:
+
+```
+Training splits:
+  GOA_190 → GOA_195
+  GOA_195 → GOA_200
+  GOA_200 → GOA_205
+  GOA_205 → GOA_211
+  GOA_211 → GOA_215
+  GOA_215 → GOA_220
+
+Test split (holdout estricto, nunca visto durante training):
+  GOA_220 → GOA_229
+```
+
+Para cada split se generan ejemplos etiquetados: positivos $(y=1)$ si el par (proteína, GO term) aparece en el delta, negativos $(y=0)$ en caso contrario. El desbalanceo esperado es aproximadamente 1:10, manejable con técnicas estándar.
+
+---
+
+## Arquitectura: Cross-Attention Re-Ranker
+
+El modelo procesa cada par (query, GO term) usando el contexto completo de los vecinos KNN que contribuyeron a esa predicción.
+
+```
+Inputs por predicción (query_protein, go_term):
+  query_embedding       float32[D]       ESM embedding del query (D=480 para esmc_300m)
+  neighbor_embeddings   float32[K × D]   ESM embeddings de los K vecinos contribuyentes
+  tabular_features      float32[K × F]   distancia, evidencia, alineamiento, taxonomía...
+  go_term_embedding     float32[G]       embedding semántico del GO term (G=64)
+
+Arquitectura:
+  1. query_proj(query_embedding)          →  q        [H=256]
+  2. ref_proj(neighbor_embeddings)        →  tokens   [K × H]
+  3. feature_encoder(tabular_features)   →  (sumado a tokens)
+  4. CrossAttention(q, tokens, tokens)   →  context  [H]
+  5. MLP([q ‖ context ‖ go_emb ‖ agg_features])  →  score  [1]
+```
+
+La atención cruzada permite al modelo aprender **qué vecinos son más informativos para este query concreto**, en lugar de agregar los scores de forma heurística.
+
+### GO Term Embeddings
+
+Los embeddings de los GO terms se aprenden a partir de la estructura del DAG de GO (relaciones `is_a` / `part_of`) mediante Node2Vec o TransE, de forma que términos semánticamente relacionados (padre-hijo) tengan representaciones similares. El DAG ya está disponible en PROTEA a través de los modelos `GOTerm` y `GOTermRelationship`.
+
+---
+
+## Feature Vector
+
+Cada predicción (query, GO term) se caracteriza por las siguientes features tabulares, computadas por vecino que contribuyó a la predicción:
+
+| Feature | Descripción | Estado |
+|---|---|---|
+| `distance` | Distancia coseno en espacio de embeddings | Existente |
+| `evidence_weight` | Peso del código de evidencia (IDA > IEA) | Existente |
+| `identity_nw / sw` | Identidad de secuencia (alineamiento NW/SW) | Existente (opcional) |
+| `similarity_nw / sw` | Similaridad de secuencia | Existente (opcional) |
+| `taxonomic_distance` | Distancia taxonómica entre query y referencia | Existente (opcional) |
+| `vote_count` | Número de vecinos que coinciden en este GO term | **Nuevo** |
+| `k_position` | Posición del vecino más cercano que predijo este término | **Nuevo** |
+| `go_term_frequency` | Frecuencia del término en el annotation set de referencia | **Nuevo** |
+| `ref_annotation_density` | Número de GO terms de la proteína de referencia | **Nuevo** |
+| `neighbor_distance_std` | Varianza de distancias a los K vecinos | **Nuevo** |
+
+---
+
+## Función de Pérdida
+
+Se utiliza **LambdaRank** en lugar de binary cross-entropy, ya que optimiza directamente el orden de las predicciones (proxy de NDCG / Fmax) en lugar de la calibración de probabilidades.
+
+Para cada proteína query, las predicciones GO se rankean conjuntamente:
+- Positivos: GO terms en $\Delta_{N \to N+1}$
+- Negativos: GO terms predichos pero no en el delta
+
+---
+
+## Pipeline de Datos: WebDataset
+
+El volumen de datos (múltiples splits × ~1.35M predicciones por split × embeddings de 480 dim) requiere un pipeline de datos eficiente. Se propone almacenar los ejemplos de entrenamiento en formato **WebDataset** (shards tar), con un shard por split GOA:
+
+```
+reranker_data/
+  splits/
+    goa190_to_195.tar       # ~2GB por shard
+    goa195_to_200.tar
+    ...
+    goa220_to_229.tar       # test split — no tocar durante training
+  models/
+    reranker_v1.pt
+    reranker_v1_config.json
+```
+
+Cada muestra en el WebDataset es **una proteína query** con todas sus predicciones GO para ese split:
+
+```python
+{
+    "query_accession": "P12345",
+    "query_embedding": float32[480],
+    "go_term_ids": ["GO:0006915", "GO:0005737", ...],   # N_preds
+    "neighbor_embeddings": float32[N_preds, K, 480],
+    "tabular_features": float32[N_preds, K, F],
+    "labels": int8[N_preds],                             # 1 si en delta, 0 si no
+}
+```
+
+El streaming de WebDataset permite entrenar sin cargar todo en RAM.
+
+---
+
+## Stack Tecnológico
+
+| Componente | Tecnología |
+|---|---|
+| Modelo | PyTorch |
+| Data pipeline | WebDataset + torch.utils.data |
+| Baseline comparación | LightGBM (binary + LambdaRank) |
+| GO embeddings | Node2Vec / PyTorch Geometric |
+| Seguimiento experimentos | wandb |
+| Embeddings proteína | ESM2 / ESMC (ya en PROTEA) |
+
+---
+
+## Integración en PROTEA
+
+Una vez entrenado, el re-ranker se integra en el pipeline existente:
+
+1. Nuevo modelo ORM `RerankingModel`: almacena pesos serializados y metadata de entrenamiento
+2. Campo `reranker_id` (nullable) en `PredictionSet`
+3. Si `reranker_id` presente: `store_predictions` aplica el modelo y sobreescribe `score` con $\hat{y}$
+4. El threshold de Fmax se calcula igual que ahora sobre los nuevos scores
+5. UI: selector de re-ranker en la pantalla de predicción
+
+---
+
+## Experimentos y Ablaciones
+
+El diseño permite comparar directamente:
+
+| Configuración | Descripción |
+|---|---|
+| **Baseline** | KNN + scoring heurístico actual |
+| **LightGBM tabular** | Re-ranker con features tabulares sin embeddings |
+| **LightGBM + derived** | Features tabulares + features derivadas del embedding (density, std) |
+| **MLP cross-encoder** | Arquitectura completa sin cross-attention |
+| **Cross-attention (propuesto)** | Arquitectura completa |
+| **+ GO DAG embeddings** | Ablación: ¿aportan los go_term_emb? |
+| **+ temporal CV** | Ablación: ¿mejora añadir más splits históricos? |
+
+La métrica principal es **Fmax promedio sobre los 9 settings** (NK/LK/PK × BPO/MFO/CCO) en el test split GOA220→229.
+
+---
+
+## Valor para la Tesis
+
+1. **Científicamente honesto**: el mismo mecanismo temporal que se usa para evaluar se usa para entrenar. No hay data leakage.
+2. **Comprobable y cuantificable**: Fmax(baseline KNN) vs Fmax(re-ranker) en benchmark idéntico.
+3. **Interpretable**: las feature importances (LightGBM) o los pesos de atención (cross-attention) revelan qué aspectos de una predicción KNN son más predictivos de anotaciones futuras.
+4. **Generalizable**: el re-ranker aprende sobre distribuciones temporales de anotaciones GO, no sobre una proteína concreta — debería generalizar a proteínas no vistas.
+5. **Extensible**: la arquitectura admite incorporar embeddings de secuencia de mayor calidad (ESM3, ProstT5) sin cambiar el pipeline.
diff --git a/alembic/env.py b/alembic/env.py
index 6cb72b8..ba6ce44 100644
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -1,8 +1,7 @@
 from logging.config import fileConfig
 from pathlib import Path
 
-from sqlalchemy import engine_from_config
-from sqlalchemy import pool
+from sqlalchemy import engine_from_config, pool
 
 from alembic import context
 
@@ -17,14 +16,14 @@
 
 # Wire PROTEA's ORM metadata so autogenerate works.
 # All model modules must be imported before Base.metadata is used.
-from protea.infrastructure.orm.base import Base
-import protea.infrastructure.orm.models  # noqa: F401 — registers all mappers
+import protea.infrastructure.orm.models  # noqa: E402, F401 — registers all mappers
+from protea.infrastructure.orm.base import Base  # noqa: E402
 
 target_metadata = Base.metadata
 
 # Override the DB URL from PROTEA's settings rather than relying on the
 # placeholder value in alembic.ini.
-from protea.infrastructure.settings import load_settings
+from protea.infrastructure.settings import load_settings  # noqa: E402
 
 _project_root = Path(__file__).resolve().parents[1]
 _settings = load_settings(_project_root)
diff --git a/alembic/versions/110a5b8cfbb9_add_reranker_model_id_to_evaluation_.py b/alembic/versions/110a5b8cfbb9_add_reranker_model_id_to_evaluation_.py
new file mode 100644
index 0000000..4164b2d
--- /dev/null
+++ b/alembic/versions/110a5b8cfbb9_add_reranker_model_id_to_evaluation_.py
@@ -0,0 +1,36 @@
+"""add reranker_model_id to evaluation_result
+
+Revision ID: 110a5b8cfbb9
+Revises: ba9966bd453e
+Create Date: 2026-03-19 10:52:11.951459
+
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = '110a5b8cfbb9'
+down_revision: str | Sequence[str] | None = 'ba9966bd453e'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('evaluation_result', sa.Column('reranker_model_id', sa.UUID(), nullable=True))
+    op.create_index(op.f('ix_evaluation_result_reranker_model_id'), 'evaluation_result', ['reranker_model_id'], unique=False)
+    op.create_foreign_key(None, 'evaluation_result', 'reranker_model', ['reranker_model_id'], ['id'], ondelete='SET NULL')
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(None, 'evaluation_result', type_='foreignkey')
+    op.drop_index(op.f('ix_evaluation_result_reranker_model_id'), table_name='evaluation_result')
+    op.drop_column('evaluation_result', 'reranker_model_id')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/1f0ac8aa38a4_add_evaluation_set.py b/alembic/versions/1f0ac8aa38a4_add_evaluation_set.py
index d816d02..9602ee8 100644
--- a/alembic/versions/1f0ac8aa38a4_add_evaluation_set.py
+++ b/alembic/versions/1f0ac8aa38a4_add_evaluation_set.py
@@ -5,17 +5,18 @@
 Create Date: 2026-03-12 22:13:05.918342
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
-from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 
+from alembic import op
+
 # revision identifiers, used by Alembic.
 revision: str = '1f0ac8aa38a4'
-down_revision: Union[str, Sequence[str], None] = 'a7b8c9d0e1f2'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = 'a7b8c9d0e1f2'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/3505bfa74df6_add_aspect_to_reranker_model_and_.py b/alembic/versions/3505bfa74df6_add_aspect_to_reranker_model_and_.py
new file mode 100644
index 0000000..98f21bd
--- /dev/null
+++ b/alembic/versions/3505bfa74df6_add_aspect_to_reranker_model_and_.py
@@ -0,0 +1,35 @@
+"""add aspect to reranker_model and reranker_config to evaluation_result
+
+Revision ID: 3505bfa74df6
+Revises: 110a5b8cfbb9
+Create Date: 2026-03-19 15:16:18.474851
+
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = '3505bfa74df6'
+down_revision: str | Sequence[str] | None = '110a5b8cfbb9'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('evaluation_result', sa.Column('reranker_config', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
+    op.add_column('reranker_model', sa.Column('aspect', sa.String(length=3), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('reranker_model', 'aspect')
+    op.drop_column('evaluation_result', 'reranker_config')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/3884c47fe946_add_reranker_feature_columns_to_go_.py b/alembic/versions/3884c47fe946_add_reranker_feature_columns_to_go_.py
new file mode 100644
index 0000000..fa60e23
--- /dev/null
+++ b/alembic/versions/3884c47fe946_add_reranker_feature_columns_to_go_.py
@@ -0,0 +1,40 @@
+"""add reranker feature columns to go_prediction
+
+Revision ID: 3884c47fe946
+Revises: 5fc2eb0f986d
+Create Date: 2026-03-18 13:40:17.716092
+
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = '3884c47fe946'
+down_revision: str | Sequence[str] | None = '5fc2eb0f986d'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('go_prediction', sa.Column('vote_count', sa.Integer(), nullable=True))
+    op.add_column('go_prediction', sa.Column('k_position', sa.Integer(), nullable=True))
+    op.add_column('go_prediction', sa.Column('go_term_frequency', sa.Integer(), nullable=True))
+    op.add_column('go_prediction', sa.Column('ref_annotation_density', sa.Integer(), nullable=True))
+    op.add_column('go_prediction', sa.Column('neighbor_distance_std', sa.Float(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('go_prediction', 'neighbor_distance_std')
+    op.drop_column('go_prediction', 'ref_annotation_density')
+    op.drop_column('go_prediction', 'go_term_frequency')
+    op.drop_column('go_prediction', 'k_position')
+    op.drop_column('go_prediction', 'vote_count')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/47de89cf6fec_add_evaluation_result.py b/alembic/versions/47de89cf6fec_add_evaluation_result.py
index e7c0792..9376c6c 100644
--- a/alembic/versions/47de89cf6fec_add_evaluation_result.py
+++ b/alembic/versions/47de89cf6fec_add_evaluation_result.py
@@ -5,17 +5,18 @@
 Create Date: 2026-03-12 22:27:34.042479
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
-from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 
+from alembic import op
+
 # revision identifiers, used by Alembic.
 revision: str = '47de89cf6fec'
-down_revision: Union[str, Sequence[str], None] = '1f0ac8aa38a4'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = '1f0ac8aa38a4'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/489835ed5b31_add_composite_index_pga_set_accession.py b/alembic/versions/489835ed5b31_add_composite_index_pga_set_accession.py
index b99dc62..de393fa 100644
--- a/alembic/versions/489835ed5b31_add_composite_index_pga_set_accession.py
+++ b/alembic/versions/489835ed5b31_add_composite_index_pga_set_accession.py
@@ -5,17 +5,15 @@
 Create Date: 2026-03-15 11:17:30.865922
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
 from alembic import op
-import sqlalchemy as sa
-
 
 # revision identifiers, used by Alembic.
 revision: str = '489835ed5b31'
-down_revision: Union[str, Sequence[str], None] = '7737a352d4fe'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = '7737a352d4fe'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/4f38043a5e41_add_parent_job_id.py b/alembic/versions/4f38043a5e41_add_parent_job_id.py
index 56d6655..ee3c908 100644
--- a/alembic/versions/4f38043a5e41_add_parent_job_id.py
+++ b/alembic/versions/4f38043a5e41_add_parent_job_id.py
@@ -5,17 +5,17 @@
 Create Date: 2026-03-09 11:55:12.264352
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
-from alembic import op
 import sqlalchemy as sa
 
+from alembic import op
 
 # revision identifiers, used by Alembic.
 revision: str = '4f38043a5e41'
-down_revision: Union[str, Sequence[str], None] = 'a1b2c3d4e5f6'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = 'a1b2c3d4e5f6'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/513355a1d933_add_scoring_config_id_to_evaluation_.py b/alembic/versions/513355a1d933_add_scoring_config_id_to_evaluation_.py
index 1890a22..4b856ae 100644
--- a/alembic/versions/513355a1d933_add_scoring_config_id_to_evaluation_.py
+++ b/alembic/versions/513355a1d933_add_scoring_config_id_to_evaluation_.py
@@ -5,17 +5,17 @@
 Create Date: 2026-03-15 12:37:19.930750
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
-from alembic import op
 import sqlalchemy as sa
 
+from alembic import op
 
 # revision identifiers, used by Alembic.
 revision: str = '513355a1d933'
-down_revision: Union[str, Sequence[str], None] = '489835ed5b31'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = '489835ed5b31'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/54e758c210c8_add_ia_url_to_ontology_snapshot.py b/alembic/versions/54e758c210c8_add_ia_url_to_ontology_snapshot.py
index cde8fca..8722240 100644
--- a/alembic/versions/54e758c210c8_add_ia_url_to_ontology_snapshot.py
+++ b/alembic/versions/54e758c210c8_add_ia_url_to_ontology_snapshot.py
@@ -5,17 +5,18 @@
 Create Date: 2026-03-16 11:42:10.636169
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
-from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 
+from alembic import op
+
 # revision identifiers, used by Alembic.
 revision: str = '54e758c210c8'
-down_revision: Union[str, Sequence[str], None] = 'c1d2e3f4a5b6'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = 'c1d2e3f4a5b6'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/5fc2eb0f986d_add_composite_indexes_for_knn.py b/alembic/versions/5fc2eb0f986d_add_composite_indexes_for_knn.py
new file mode 100644
index 0000000..1fcab83
--- /dev/null
+++ b/alembic/versions/5fc2eb0f986d_add_composite_indexes_for_knn.py
@@ -0,0 +1,37 @@
+"""add composite indexes for KNN performance
+
+Revision ID: 5fc2eb0f986d
+Revises: 54e758c210c8
+Create Date: 2026-03-18 12:00:00.000000
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "5fc2eb0f986d"
+down_revision: str = "54e758c210c8"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Composite index for KNN GO transfer: queries are always scoped to
+    # a single annotation_set_id and filtered by protein_accession.
+    op.create_index(
+        "ix_pga_set_accession",
+        "protein_go_annotation",
+        ["annotation_set_id", "protein_accession"],
+    )
+
+    # Composite index for prediction export and evaluation: queries filter
+    # by prediction_set_id then protein_accession.
+    op.create_index(
+        "ix_go_prediction_set_accession",
+        "go_prediction",
+        ["prediction_set_id", "protein_accession"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_go_prediction_set_accession", table_name="go_prediction")
+    op.drop_index("ix_pga_set_accession", table_name="protein_go_annotation")
diff --git a/alembic/versions/7737a352d4fe_merge_scoring_config_branch.py b/alembic/versions/7737a352d4fe_merge_scoring_config_branch.py
index f759c30..e8c1d3f 100644
--- a/alembic/versions/7737a352d4fe_merge_scoring_config_branch.py
+++ b/alembic/versions/7737a352d4fe_merge_scoring_config_branch.py
@@ -5,17 +5,13 @@
 Create Date: 2026-03-15 10:11:56.507967
 
 """
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-
+from collections.abc import Sequence
 
 # revision identifiers, used by Alembic.
 revision: str = '7737a352d4fe'
-down_revision: Union[str, Sequence[str], None] = ('47de89cf6fec', 'b1c2d3e4f5a6')
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = ('47de89cf6fec', 'b1c2d3e4f5a6')
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/7c19ca08d5d4_add_support_entry_table.py b/alembic/versions/7c19ca08d5d4_add_support_entry_table.py
index 599214c..b298e7a 100644
--- a/alembic/versions/7c19ca08d5d4_add_support_entry_table.py
+++ b/alembic/versions/7c19ca08d5d4_add_support_entry_table.py
@@ -5,17 +5,17 @@
 Create Date: 2026-03-15 12:42:43.832417
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
-from alembic import op
 import sqlalchemy as sa
 
+from alembic import op
 
 # revision identifiers, used by Alembic.
 revision: str = '7c19ca08d5d4'
-down_revision: Union[str, Sequence[str], None] = '513355a1d933'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = '513355a1d933'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/a1b2c3d4e5f6_add_esm3c_chunking_normalize_residues.py b/alembic/versions/a1b2c3d4e5f6_add_esm3c_chunking_normalize_residues.py
index 7dd4e56..7b1ed45 100644
--- a/alembic/versions/a1b2c3d4e5f6_add_esm3c_chunking_normalize_residues.py
+++ b/alembic/versions/a1b2c3d4e5f6_add_esm3c_chunking_normalize_residues.py
@@ -19,16 +19,17 @@
     + uq_seq_embedding_seq_config_chunk  (sequence_id, embedding_config_id, chunk_index_s)
 """
 
-from typing import Sequence, Union
+from collections.abc import Sequence
 
 import sqlalchemy as sa
+
 from alembic import op
 
 # revision identifiers, used by Alembic.
 revision: str = "a1b2c3d4e5f6"
-down_revision: Union[str, Sequence[str], None] = "cdd8510858db"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = "cdd8510858db"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/a7b8c9d0e1f2_add_feature_engineering_to_go_prediction.py b/alembic/versions/a7b8c9d0e1f2_add_feature_engineering_to_go_prediction.py
index 7d88cb1..184c96a 100644
--- a/alembic/versions/a7b8c9d0e1f2_add_feature_engineering_to_go_prediction.py
+++ b/alembic/versions/a7b8c9d0e1f2_add_feature_engineering_to_go_prediction.py
@@ -6,15 +6,16 @@
 """
 from __future__ import annotations
 
-from typing import Sequence, Union
+from collections.abc import Sequence
 
 import sqlalchemy as sa
+
 from alembic import op
 
 revision: str = "a7b8c9d0e1f2"
-down_revision: Union[str, Sequence[str], None] = "f1a2b3c4d5e6"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = "f1a2b3c4d5e6"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/b1c2d3e4f5a6_add_scoring_config.py b/alembic/versions/b1c2d3e4f5a6_add_scoring_config.py
index 5eae559..0de80aa 100644
--- a/alembic/versions/b1c2d3e4f5a6_add_scoring_config.py
+++ b/alembic/versions/b1c2d3e4f5a6_add_scoring_config.py
@@ -6,10 +6,11 @@
 """
 from __future__ import annotations
 
-from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 
+from alembic import op
+
 revision = "b1c2d3e4f5a6"
 down_revision = "a7b8c9d0e1f2"
 branch_labels = None
diff --git a/alembic/versions/ba9966bd453e_add_reranker_model_table.py b/alembic/versions/ba9966bd453e_add_reranker_model_table.py
new file mode 100644
index 0000000..7f9e0a5
--- /dev/null
+++ b/alembic/versions/ba9966bd453e_add_reranker_model_table.py
@@ -0,0 +1,51 @@
+"""add reranker_model table
+
+Revision ID: ba9966bd453e
+Revises: 3884c47fe946
+Create Date: 2026-03-18 13:57:29.263810
+
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = 'ba9966bd453e'
+down_revision: str | Sequence[str] | None = '3884c47fe946'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('reranker_model',
+    sa.Column('id', sa.UUID(), nullable=False),
+    sa.Column('name', sa.String(length=255), nullable=False),
+    sa.Column('prediction_set_id', sa.UUID(), nullable=True),
+    sa.Column('evaluation_set_id', sa.UUID(), nullable=True),
+    sa.Column('category', sa.String(length=10), nullable=False),
+    sa.Column('model_data', sa.Text(), nullable=False),
+    sa.Column('metrics', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+    sa.Column('feature_importance', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
+    sa.ForeignKeyConstraint(['evaluation_set_id'], ['evaluation_set.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['prediction_set_id'], ['prediction_set.id'], ondelete='SET NULL'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('name')
+    )
+    op.create_index(op.f('ix_reranker_model_evaluation_set_id'), 'reranker_model', ['evaluation_set_id'], unique=False)
+    op.create_index(op.f('ix_reranker_model_prediction_set_id'), 'reranker_model', ['prediction_set_id'], unique=False)
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_reranker_model_prediction_set_id'), table_name='reranker_model')
+    op.drop_index(op.f('ix_reranker_model_evaluation_set_id'), table_name='reranker_model')
+    op.drop_table('reranker_model')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/c1d2e3f4a5b6_add_evidence_weights_to_scoring_config.py b/alembic/versions/c1d2e3f4a5b6_add_evidence_weights_to_scoring_config.py
index 4a3a4c9..fa88ddc 100644
--- a/alembic/versions/c1d2e3f4a5b6_add_evidence_weights_to_scoring_config.py
+++ b/alembic/versions/c1d2e3f4a5b6_add_evidence_weights_to_scoring_config.py
@@ -20,9 +20,10 @@
 from __future__ import annotations
 
 import sqlalchemy as sa
-from alembic import op
 from sqlalchemy.dialects import postgresql
 
+from alembic import op
+
 revision = "c1d2e3f4a5b6"
 down_revision = "7c19ca08d5d4"
 branch_labels = None
diff --git a/alembic/versions/c3d4e5f6a7b8_add_query_set.py b/alembic/versions/c3d4e5f6a7b8_add_query_set.py
index 6ff0e6e..98a6ffd 100644
--- a/alembic/versions/c3d4e5f6a7b8_add_query_set.py
+++ b/alembic/versions/c3d4e5f6a7b8_add_query_set.py
@@ -5,16 +5,16 @@
 Create Date: 2026-03-10 00:00:00.000000
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
 import sqlalchemy as sa
+
 from alembic import op
-from sqlalchemy.dialects import postgresql
 
 revision: str = "c3d4e5f6a7b8"
-down_revision: Union[str, Sequence[str], None] = "4f38043a5e41"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = "4f38043a5e41"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/cdd8510858db_initial_schema.py b/alembic/versions/cdd8510858db_initial_schema.py
index 5125f7e..dfd4e5a 100644
--- a/alembic/versions/cdd8510858db_initial_schema.py
+++ b/alembic/versions/cdd8510858db_initial_schema.py
@@ -1,22 +1,23 @@
 """initial_schema
 
 Revision ID: cdd8510858db
-Revises: 
+Revises:
 Create Date: 2026-03-08 11:32:48.937483
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
-from alembic import op
-import sqlalchemy as sa
 import pgvector.sqlalchemy
+import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 
+from alembic import op
+
 # revision identifiers, used by Alembic.
 revision: str = 'cdd8510858db'
-down_revision: Union[str, Sequence[str], None] = None
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = None
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/d4e5f6a7b8c9_add_query_set_id_to_prediction_set.py b/alembic/versions/d4e5f6a7b8c9_add_query_set_id_to_prediction_set.py
index f1cbc97..bf51266 100644
--- a/alembic/versions/d4e5f6a7b8c9_add_query_set_id_to_prediction_set.py
+++ b/alembic/versions/d4e5f6a7b8c9_add_query_set_id_to_prediction_set.py
@@ -5,15 +5,16 @@
 Create Date: 2026-03-10 00:00:00.000000
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
 import sqlalchemy as sa
+
 from alembic import op
 
 revision: str = "d4e5f6a7b8c9"
-down_revision: Union[str, Sequence[str], None] = "c3d4e5f6a7b8"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = "c3d4e5f6a7b8"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/e5f6a7b8c9d0_drop_go_prediction_protein_fk.py b/alembic/versions/e5f6a7b8c9d0_drop_go_prediction_protein_fk.py
index 84835e4..c8a2b96 100644
--- a/alembic/versions/e5f6a7b8c9d0_drop_go_prediction_protein_fk.py
+++ b/alembic/versions/e5f6a7b8c9d0_drop_go_prediction_protein_fk.py
@@ -5,14 +5,14 @@
 Create Date: 2026-03-10 00:00:00.000000
 
 """
-from typing import Sequence, Union
+from collections.abc import Sequence
 
 from alembic import op
 
 revision: str = "e5f6a7b8c9d0"
-down_revision: Union[str, Sequence[str], None] = "d4e5f6a7b8c9"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
+down_revision: str | Sequence[str] | None = "d4e5f6a7b8c9"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
 
 
 def upgrade() -> None:
diff --git a/alembic/versions/f1a2b3c4d5e6_add_go_term_relationship.py b/alembic/versions/f1a2b3c4d5e6_add_go_term_relationship.py
index a794190..76b74d6 100644
--- a/alembic/versions/f1a2b3c4d5e6_add_go_term_relationship.py
+++ b/alembic/versions/f1a2b3c4d5e6_add_go_term_relationship.py
@@ -6,9 +6,10 @@
 """
 from __future__ import annotations
 
-from alembic import op
 import sqlalchemy as sa
 
+from alembic import op
+
 revision = "f1a2b3c4d5e6"
 down_revision = "e5f6a7b8c9d0"
 branch_labels = None
diff --git a/apps/web/app/[locale]/annotations/page.tsx b/apps/web/app/[locale]/annotations/page.tsx
index 73be48b..0affb9e 100644
--- a/apps/web/app/[locale]/annotations/page.tsx
+++ b/apps/web/app/[locale]/annotations/page.tsx
@@ -201,12 +201,12 @@ export default function AnnotationsPage() {
         <h1 className="text-xl font-semibold">{t("title")}</h1>
       </div>
 
-      <div className="flex gap-1 border-b mb-6 overflow-x-auto">
+      <div className="border-b mb-6 overflow-hidden"><div className="flex gap-1 overflow-x-auto">
         {tabs.map((tab) => (
           <button
             key={tab.key}
             onClick={() => setActiveTab(tab.key)}
-            className={`px-4 py-2 text-sm font-medium border-b-2 transition-colors ${
+            className={`px-3 sm:px-4 py-2 text-sm font-medium border-b-2 transition-colors whitespace-nowrap ${
               activeTab === tab.key
                 ? "border-blue-600 text-blue-600"
                 : "border-transparent text-gray-500 hover:text-gray-700"
@@ -215,7 +215,7 @@ export default function AnnotationsPage() {
             {tab.label}
           </button>
         ))}
-      </div>
+      </div></div>
 
       {/* ── Annotation Sets ── */}
       {activeTab === "sets" && (
@@ -226,7 +226,51 @@ export default function AnnotationsPage() {
               {t("setsTab.refresh")}
             </button>
           </div>
-          <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
+          {/* Mobile card list */}
+          <div className="lg:hidden space-y-2">
+            {loadingSets && Array.from({ length: 3 }).map((_, i) => (
+              <div key={i} className="rounded-lg border bg-white p-4 shadow-sm animate-pulse">
+                <div className="h-4 bg-gray-200 rounded w-1/3 mb-2" />
+                <div className="h-3 bg-gray-100 rounded w-2/3" />
+              </div>
+            ))}
+            {!loadingSets && sets.length === 0 && (
+              <div className="rounded-lg border bg-white px-4 py-8 text-center text-sm text-gray-400 shadow-sm">
+                {t("setsTab.noSetsFound")}
+              </div>
+            )}
+            {sets.map((a) => (
+              <div key={a.id} className="rounded-lg border bg-white p-4 shadow-sm">
+                <div className="flex items-center justify-between mb-1">
+                  <span className="font-medium text-gray-800">{a.source}</span>
+                  <button
+                    onClick={() => handleDeleteSet(a.id)}
+                    className="rounded border border-red-200 px-2 py-1 text-xs text-red-600 hover:bg-red-50 transition-colors"
+                  >
+                    {t("setsTab.delete")}
+                  </button>
+                </div>
+                <p className="text-xs text-gray-500">{a.source_version ?? "—"} · {(a.annotation_count ?? 0).toLocaleString()} annotations</p>
+                <div className="mt-1 flex flex-wrap gap-1">
+                  {a.meta && Object.entries(a.meta).map(([k, v]) => (
+                    <span key={k} className="rounded bg-gray-100 px-1.5 py-0.5 text-xs text-gray-600">
+                      {k}: {Array.isArray(v) ? v.join(", ") : String(v)}
+                    </span>
+                  ))}
+                </div>
+                <div className="mt-1 flex items-center gap-2 text-xs text-gray-400">
+                  <span className="font-mono">{shortId(a.id)}</span>
+                  <span>{formatDate(a.created_at)}</span>
+                  {a.job_id && (
+                    <Link href={`/jobs/${a.job_id}`} className="text-blue-400 hover:text-blue-600">↗</Link>
+                  )}
+                </div>
+              </div>
+            ))}
+          </div>
+
+          {/* Desktop table */}
+          <div className="hidden lg:block overflow-x-auto rounded-lg border bg-white shadow-sm">
             <div className="grid grid-cols-[80px_100px_140px_100px_1fr_160px_60px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
               <div>{t("setsTab.tableHeaders.id")}</div><div>{t("setsTab.tableHeaders.source")}</div><div>{t("setsTab.tableHeaders.version")}</div><div>{t("setsTab.tableHeaders.annotations")}</div><div>{t("setsTab.tableHeaders.meta")}</div><div>{t("setsTab.tableHeaders.created")}</div><div></div>
             </div>
@@ -278,7 +322,70 @@ export default function AnnotationsPage() {
               {t("snapshotsTab.refresh")}
             </button>
           </div>
-          <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
+          {/* Mobile card list */}
+          <div className="lg:hidden space-y-2">
+            {loadingSnaps && Array.from({ length: 2 }).map((_, i) => (
+              <div key={i} className="rounded-lg border bg-white p-4 shadow-sm animate-pulse">
+                <div className="h-4 bg-gray-200 rounded w-1/3 mb-2" />
+                <div className="h-3 bg-gray-100 rounded w-2/3" />
+              </div>
+            ))}
+            {!loadingSnaps && snapshots.length === 0 && (
+              <div className="rounded-lg border bg-white px-4 py-8 text-center text-sm text-gray-400 shadow-sm">
+                {t("snapshotsTab.noSnapshotsFound")}
+              </div>
+            )}
+            {snapshots.map((s) => (
+              <div key={s.id} className="rounded-lg border bg-white p-4 shadow-sm space-y-2">
+                <div className="flex items-center justify-between">
+                  <span className="font-medium text-gray-800">{s.obo_version}</span>
+                  <span className="text-xs text-gray-400">{(s.go_term_count ?? 0).toLocaleString()} terms</span>
+                </div>
+                <div className="min-w-0">
+                  {iaEditId === s.id ? (
+                    <div className="flex flex-col gap-1">
+                      <input
+                        autoFocus
+                        type="text"
+                        value={iaEditValue}
+                        onChange={(e) => setIaEditValue(e.target.value)}
+                        placeholder="https://…/IA_cafa6.tsv or file path"
+                        className="w-full rounded border px-2 py-1.5 text-xs focus:outline-none focus:ring-1 focus:ring-blue-500"
+                        onKeyDown={(e) => {
+                          if (e.key === "Enter") handleSaveIa(s.id);
+                          if (e.key === "Escape") setIaEditId(null);
+                        }}
+                      />
+                      <div className="flex gap-1">
+                        <button onClick={() => handleSaveIa(s.id)} disabled={iaSaving} className="rounded bg-blue-600 px-2 py-1 text-xs text-white hover:bg-blue-700 disabled:opacity-50">{t("snapshotsTab.save")}</button>
+                        <button onClick={() => setIaEditId(null)} className="rounded border px-2 py-1 text-xs text-gray-500 hover:bg-gray-50">{t("snapshotsTab.cancel")}</button>
+                      </div>
+                    </div>
+                  ) : (
+                    <button
+                      onClick={() => { setIaEditId(s.id); setIaEditValue(s.ia_url ?? ""); }}
+                      className="w-full text-left flex items-center gap-2 rounded px-1 py-0.5 hover:bg-gray-50 active:bg-gray-100 transition-colors"
+                      title={t("snapshotsTab.editTooltip")}
+                    >
+                      {s.ia_url ? (
+                        <span className="truncate text-xs text-gray-500 font-mono flex-1">{s.ia_url}</span>
+                      ) : (
+                        <span className="text-xs text-amber-500 italic flex-1">{t("snapshotsTab.notSet")}</span>
+                      )}
+                      <span className="shrink-0 text-gray-400 text-xs">✎</span>
+                    </button>
+                  )}
+                </div>
+                <div className="flex items-center gap-2 text-xs text-gray-400">
+                  <span className="font-mono">{shortId(s.id)}</span>
+                  <span>{formatDate(s.loaded_at)}</span>
+                </div>
+              </div>
+            ))}
+          </div>
+
+          {/* Desktop table */}
+          <div className="hidden lg:block overflow-x-auto rounded-lg border bg-white shadow-sm">
             <div className="grid grid-cols-[80px_160px_100px_minmax(160px,1fr)_160px] min-w-[700px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
               <div>{t("snapshotsTab.tableHeaders.id")}</div><div>{t("snapshotsTab.tableHeaders.version")}</div><div>{t("snapshotsTab.tableHeaders.goTerms")}</div><div>{t("snapshotsTab.tableHeaders.iaUrl")}</div><div>{t("snapshotsTab.tableHeaders.loaded")}</div>
             </div>
diff --git a/apps/web/app/[locale]/embeddings/page.tsx b/apps/web/app/[locale]/embeddings/page.tsx
index 7110cb8..d3411d0 100644
--- a/apps/web/app/[locale]/embeddings/page.tsx
+++ b/apps/web/app/[locale]/embeddings/page.tsx
@@ -5,12 +5,14 @@ import Link from "next/link";
 import { useTranslations } from "next-intl";
 import { useToast } from "@/components/Toast";
 import { SkeletonTableRow } from "@/components/Skeleton";
+import { ContextBanner } from "@/components/ContextBanner";
 import {
   listEmbeddingConfigs,
   createEmbeddingConfig,
   deleteEmbeddingConfig,
   createJob,
   listQuerySets,
+  getProteinStats,
   EmbeddingConfig,
   QuerySet,
 } from "@/lib/api";
@@ -95,6 +97,7 @@ export default function EmbeddingsPage() {
   const [cmpResult, setCmpResult] = useState<{ id: string; status: string } | null>(null);
   const [cmpError, setCmpError] = useState("");
   const [cmpSubmitting, setCmpSubmitting] = useState(false);
+  const [proteinCount, setProteinCount] = useState<number | null>(null);
 
   async function loadAll() {
     setLoading(true);
@@ -106,6 +109,7 @@ export default function EmbeddingsPage() {
       ]);
       setConfigs(cfgs);
       setQuerySets(qsets);
+      getProteinStats().then((s) => setProteinCount(s.total ?? 0)).catch(() => {});
       if (cfgs.length > 0 && !cmpConfigId) setCmpConfigId(cfgs[0].id);
     } catch (e: any) {
       setError(String(e));
@@ -224,6 +228,16 @@ export default function EmbeddingsPage() {
         <h1 className="text-xl font-semibold">{t("title")}</h1>
       </div>
 
+      <ContextBanner
+        title="Compute protein embeddings using language models"
+        description="Convert protein sequences into numerical vectors (ESM-2, ESMC, ProT5). These embeddings enable KNN-based GO term prediction."
+        prerequisites={proteinCount !== null ? [
+          { label: `${proteinCount} proteins loaded`, met: proteinCount > 0, href: "/proteins" },
+          { label: `${configs.length} embedding config(s)`, met: configs.length > 0 },
+        ] : undefined}
+        nextStep={{ label: "Functional Annotation", href: "/functional-annotation" }}
+      />
+
       {error && (
         <pre className="mb-4 whitespace-pre-wrap rounded-md border border-red-200 bg-red-50 p-3 text-sm text-red-700">
           {error}
@@ -475,53 +489,92 @@ export default function EmbeddingsPage() {
               {Array.from({ length: 3 }).map((_, i) => <SkeletonTableRow key={i} cols={9} />)}
             </div>
           ) : (
-            <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
-              <div className="grid grid-cols-[1fr_140px_80px_100px_80px_80px_60px_160px_60px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
-                <div>{t("configsTab.tableHeaders.description")}</div>
-                <div>{t("configsTab.tableHeaders.model")}</div>
-                <div>{t("configsTab.tableHeaders.backend")}</div>
-                <div>{t("configsTab.tableHeaders.layers")}</div>
-                <div>{t("configsTab.tableHeaders.agg")}</div>
-                <div>{t("configsTab.tableHeaders.pool")}</div>
-                <div>{t("configsTab.tableHeaders.norm")}</div>
-                <div>{t("configsTab.tableHeaders.created")}</div>
-                <div></div>
-              </div>
-              {configs.map((c) => (
-                <div
-                  key={c.id}
-                  className="grid grid-cols-[1fr_140px_80px_100px_80px_80px_60px_160px_60px] gap-2 border-b px-4 py-3 text-sm last:border-0 items-center"
-                >
-                  <div className="text-gray-700 truncate" title={c.description ?? c.model_name}>
-                    {c.description || <span className="text-gray-400 italic">—</span>}
+            <>
+              {/* Mobile card list */}
+              <div className="lg:hidden space-y-2">
+                {configs.map((c) => (
+                  <div key={c.id} className="rounded-lg border bg-white p-4 shadow-sm">
+                    <div className="flex items-center justify-between mb-1">
+                      <span className="text-sm font-medium text-gray-800 truncate">
+                        {c.description || <span className="text-gray-400 italic">—</span>}
+                      </span>
+                      <button
+                        onClick={() => handleDeleteConfig(c.id)}
+                        className="text-gray-400 hover:text-red-600 transition-colors ml-2"
+                        title="Delete config"
+                      >
+                        ✕
+                      </button>
+                    </div>
+                    <p className="font-mono text-xs text-gray-500 truncate" title={c.model_name}>{c.model_name}</p>
+                    <div className="mt-2 flex flex-wrap gap-x-3 gap-y-1 text-xs text-gray-600">
+                      <span>{c.model_backend}</span>
+                      <span>layers [{c.layer_indices.join(", ")}]</span>
+                      <span>{c.layer_agg}/{c.pooling}</span>
+                      <span>{c.normalize ? "norm" : "no norm"}</span>
+                    </div>
+                    <p className="mt-1 text-xs text-gray-400">{formatDate(c.created_at)}</p>
                   </div>
-                  <div className="font-mono text-xs text-gray-500 truncate" title={c.model_name}>{c.model_name}</div>
-                  <div className="text-gray-600">{c.model_backend}</div>
-                  <div className="font-mono text-xs text-gray-500">[{c.layer_indices.join(", ")}]</div>
-                  <div className="text-gray-600">{c.layer_agg}</div>
-                  <div className="text-gray-600">{c.pooling}</div>
-                  <div className="text-gray-600">{c.normalize ? "yes" : "no"}</div>
-                  <div className="text-xs text-gray-400">{formatDate(c.created_at)}</div>
-                  <div>
-                    <button
-                      onClick={() => handleDeleteConfig(c.id)}
-                      className="text-gray-400 hover:text-red-600 transition-colors"
-                      title="Delete config"
-                    >
-                      ✕
+                ))}
+                {configs.length === 0 && (
+                  <div className="rounded-lg border bg-white px-4 py-8 text-center text-sm text-gray-400 shadow-sm">
+                    {t("configsTab.noConfigs")}{" "}
+                    <button onClick={() => setShowConfigForm(true)} className="text-blue-600 underline">
+                      ↑
                     </button>
                   </div>
+                )}
+              </div>
+
+              {/* Desktop table */}
+              <div className="hidden lg:block overflow-x-auto rounded-lg border bg-white shadow-sm">
+                <div className="grid grid-cols-[1fr_140px_80px_100px_80px_80px_60px_160px_60px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
+                  <div>{t("configsTab.tableHeaders.description")}</div>
+                  <div>{t("configsTab.tableHeaders.model")}</div>
+                  <div>{t("configsTab.tableHeaders.backend")}</div>
+                  <div>{t("configsTab.tableHeaders.layers")}</div>
+                  <div>{t("configsTab.tableHeaders.agg")}</div>
+                  <div>{t("configsTab.tableHeaders.pool")}</div>
+                  <div>{t("configsTab.tableHeaders.norm")}</div>
+                  <div>{t("configsTab.tableHeaders.created")}</div>
+                  <div></div>
                 </div>
-              ))}
-              {configs.length === 0 && (
-                <div className="px-4 py-8 text-center text-sm text-gray-400">
-                  {t("configsTab.noConfigs")}{" "}
-                  <button onClick={() => setShowConfigForm(true)} className="text-blue-600 underline">
-                    ↑
-                  </button>
-                </div>
-              )}
-            </div>
+                {configs.map((c) => (
+                  <div
+                    key={c.id}
+                    className="grid grid-cols-[1fr_140px_80px_100px_80px_80px_60px_160px_60px] gap-2 border-b px-4 py-3 text-sm last:border-0 items-center"
+                  >
+                    <div className="text-gray-700 truncate" title={c.description ?? c.model_name}>
+                      {c.description || <span className="text-gray-400 italic">—</span>}
+                    </div>
+                    <div className="font-mono text-xs text-gray-500 truncate" title={c.model_name}>{c.model_name}</div>
+                    <div className="text-gray-600">{c.model_backend}</div>
+                    <div className="font-mono text-xs text-gray-500">[{c.layer_indices.join(", ")}]</div>
+                    <div className="text-gray-600">{c.layer_agg}</div>
+                    <div className="text-gray-600">{c.pooling}</div>
+                    <div className="text-gray-600">{c.normalize ? "yes" : "no"}</div>
+                    <div className="text-xs text-gray-400">{formatDate(c.created_at)}</div>
+                    <div>
+                      <button
+                        onClick={() => handleDeleteConfig(c.id)}
+                        className="text-gray-400 hover:text-red-600 transition-colors"
+                        title="Delete config"
+                      >
+                        ✕
+                      </button>
+                    </div>
+                  </div>
+                ))}
+                {configs.length === 0 && (
+                  <div className="px-4 py-8 text-center text-sm text-gray-400">
+                    {t("configsTab.noConfigs")}{" "}
+                    <button onClick={() => setShowConfigForm(true)} className="text-blue-600 underline">
+                      ↑
+                    </button>
+                  </div>
+                )}
+              </div>
+            </>
           )}
         </div>
       )}
@@ -572,7 +625,7 @@ export default function EmbeddingsPage() {
                   </select>
                 </div>
 
-                <div className="grid grid-cols-2 gap-3">
+                <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
                   <div>
                     <label className={labelClass}>
                       {t("computeTab.queueBatchSizeLabel")}{" "}
diff --git a/apps/web/app/[locale]/evaluation/page.tsx b/apps/web/app/[locale]/evaluation/page.tsx
index a061dbd..25af270 100644
--- a/apps/web/app/[locale]/evaluation/page.tsx
+++ b/apps/web/app/[locale]/evaluation/page.tsx
@@ -2,8 +2,12 @@
 
 import { useEffect, useState } from "react";
 import { useTranslations } from "next-intl";
-import { listAnnotationSets, listPredictionSets, listScoringConfigs, baseUrl } from "@/lib/api";
-import type { AnnotationSet, PredictionSet, ScoringConfig } from "@/lib/api";
+import { ContextBanner } from "@/components/ContextBanner";
+import {
+  listAnnotationSets, listPredictionSets, listScoringConfigs,
+  listRerankers, baseUrl,
+} from "@/lib/api";
+import type { AnnotationSet, PredictionSet, ScoringConfig, RerankerModel } from "@/lib/api";
 
 const labelClass = "block text-sm font-medium text-gray-700 mb-1";
 const selectClass =
@@ -29,6 +33,19 @@ function InfoTooltip({ text }: { text: string }) {
   );
 }
 
+function RichTooltip({ children }: { children: React.ReactNode }) {
+  return (
+    <span className="relative inline-block group ml-1 align-middle">
+      <span className="inline-flex items-center justify-center w-4 h-4 rounded-full bg-blue-100 text-blue-500 text-[10px] font-bold cursor-help select-none">
+        ℹ
+      </span>
+      <span className="pointer-events-none absolute bottom-full left-1/2 -translate-x-1/2 mb-1.5 z-10 hidden group-hover:block w-72 rounded-md border border-gray-200 bg-white px-3 py-2.5 text-xs text-gray-600 shadow-xl leading-relaxed">
+        {children}
+      </span>
+    </span>
+  );
+}
+
 type NsMetrics = {
   fmax: number;
   precision: number;
@@ -45,6 +62,8 @@ type EvaluationResult = {
   evaluation_set_id: string;
   prediction_set_id: string;
   scoring_config_id: string | null;
+  reranker_model_id: string | null;
+  reranker_config: Record<string, Record<string, string>> | null;
   job_id: string | null;
   created_at: string;
   results: Record<string, SettingResults>; // NK | LK | PK
@@ -87,9 +106,13 @@ function setLabel(s: AnnotationSet) {
 }
 
 function predLabel(p: PredictionSet) {
+  const parts: string[] = [];
+  if (p.embedding_config_name) parts.push(p.embedding_config_name);
+  if (p.annotation_set_label) parts.push(p.annotation_set_label);
+  parts.push(`k=${p.limit_per_entry}`);
+  if (p.prediction_count != null) parts.push(`${p.prediction_count.toLocaleString()} preds`);
   const date = new Date(p.created_at).toLocaleDateString();
-  const count = p.prediction_count != null ? ` · ${p.prediction_count.toLocaleString()} preds.` : "";
-  return `${p.id.slice(0, 8)}… · ${date}${count}`;
+  return `${parts.join(" · ")} · ${date} (${p.id.slice(0, 8)}…)`;
 }
 
 function evalLabel(e: EvaluationSet, annotationSets: AnnotationSet[]) {
@@ -157,7 +180,7 @@ function ResultsTable({ results }: { results: Record<string, SettingResults> })
             {setting}
             {SETTING_TOOLTIPS[setting] && <InfoTooltip text={SETTING_TOOLTIPS[setting]} />}
           </div>
-          <div className="grid grid-cols-3 gap-3">
+          <div className="grid grid-cols-1 sm:grid-cols-3 gap-3">
             {["BPO", "MFO", "CCO"].map((ns) => {
               const m = results[setting]?.[ns];
               if (!m) return null;
@@ -178,7 +201,10 @@ function ResultsTable({ results }: { results: Record<string, SettingResults> })
                       <span className="text-gray-700">{m.recall.toFixed(3)}</span>
                     </div>
                     <div className="flex justify-between">
-                      <span className="text-gray-500">{t("resultMetrics.coverage")}</span>
+                      <span className="text-gray-500 flex items-center gap-1">
+                        {t("resultMetrics.coverage")}
+                        <InfoTooltip text="Fraction of benchmark proteins for which at least one prediction was submitted at the Fmax threshold. Values above 100% can occur in the PK setting: cafaeval shrinks the PK denominator by removing proteins whose new terms are excluded via the -known file, while PROTEA already subtracts those terms when building the ground truth — this double-accounting inflates the ratio. NK and LK coverage is always ≤ 100%." />
+                      </span>
                       <span className="text-gray-700">{(m.coverage * 100).toFixed(1)}%</span>
                     </div>
                     <div className="flex justify-between">
@@ -201,6 +227,7 @@ function EvaluationSetCard({
   annotationSets,
   predictionSets,
   scoringConfigs,
+  rerankers: initialRerankers,
   isSelected,
   onSelect,
   onDeleted,
@@ -209,6 +236,7 @@ function EvaluationSetCard({
   annotationSets: AnnotationSet[];
   predictionSets: PredictionSet[];
   scoringConfigs: ScoringConfig[];
+  rerankers: RerankerModel[];
   isSelected: boolean;
   onSelect: () => void;
   onDeleted: () => void;
@@ -219,6 +247,14 @@ function EvaluationSetCard({
   const [predSetId, setPredSetId] = useState("");
   const [maxDistance, setMaxDistance] = useState("");
   const [scoringConfigId, setScoringConfigId] = useState("");
+  // 3x3 reranker grid: category × aspect
+  const [rrGrid, setRrGrid] = useState<Record<string, Record<string, string>>>({
+    nk: { bpo: "", mfo: "", cco: "" },
+    lk: { bpo: "", mfo: "", cco: "" },
+    pk: { bpo: "", mfo: "", cco: "" },
+  });
+  const setRrCell = (cat: string, asp: string, val: string) =>
+    setRrGrid((prev) => ({ ...prev, [cat]: { ...prev[cat], [asp]: val } }));
   const [running, setRunning] = useState(false);
   const [runError, setRunError] = useState("");
   const [pendingJobId, setPendingJobId] = useState<string | null>(null);
@@ -277,7 +313,24 @@ function EvaluationSetCard({
     try {
       const body: Record<string, any> = { prediction_set_id: predSetId };
       if (maxDistance) body.max_distance = parseFloat(maxDistance);
-      if (scoringConfigId) body.scoring_config_id = scoringConfigId;
+      // Build nested rerankers mapping from the 3×3 grid
+      const rerankers: Record<string, Record<string, string>> = {};
+      let hasAnyReranker = false;
+      for (const cat of ["nk", "lk", "pk"]) {
+        const catMap: Record<string, string> = {};
+        for (const asp of ["bpo", "mfo", "cco"]) {
+          if (rrGrid[cat]?.[asp]) {
+            catMap[asp] = rrGrid[cat][asp];
+            hasAnyReranker = true;
+          }
+        }
+        if (Object.keys(catMap).length > 0) rerankers[cat] = catMap;
+      }
+      if (hasAnyReranker) {
+        body.rerankers = rerankers;
+      } else if (scoringConfigId) {
+        body.scoring_config_id = scoringConfigId;
+      }
       const res = await apiFetch<{ id: string; status: string }>(
         `/annotations/evaluation-sets/${e.id}/run`,
         {
@@ -305,7 +358,7 @@ function EvaluationSetCard({
         className="cursor-pointer p-4 hover:bg-gray-50 rounded-t-lg"
         onClick={onSelect}
       >
-        <div className="flex items-center justify-between">
+        <div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
           <div className="text-sm font-medium text-gray-800">{evalLabel(e, annotationSets)}</div>
           <button
             onClick={async (ev) => {
@@ -319,7 +372,7 @@ function EvaluationSetCard({
             {t("evaluationSetCard.delete")}
           </button>
         </div>
-        <div className="mt-3 grid grid-cols-4 gap-2">
+        <div className="mt-3 grid grid-cols-2 sm:grid-cols-4 gap-2">
           <StatBadge
             label={t("evaluationSetCard.deltaProteins")}
             value={e.stats.delta_proteins}
@@ -414,7 +467,7 @@ function EvaluationSetCard({
           {/* Run evaluation */}
           <div className="space-y-3">
             <p className="text-xs font-medium text-gray-500">{t("evaluationSetCard.runCafaEvaluator")}</p>
-            <div className="grid grid-cols-3 gap-3">
+            <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
               <div>
                 <label className={labelClass}>{t("evaluationSetCard.predictionSetLabel")}</label>
                 <select
@@ -428,22 +481,6 @@ function EvaluationSetCard({
                   ))}
                 </select>
               </div>
-              <div>
-                <label className={labelClass}>
-                  {t("evaluationSetCard.scoringConfigLabel")}
-                  <InfoTooltip text="Scoring formula applied to compute CAFA prediction scores. If omitted, falls back to 1 − cosine_distance / 2." />
-                </label>
-                <select
-                  value={scoringConfigId}
-                  onChange={(ev) => setScoringConfigId(ev.target.value)}
-                  className={selectClass}
-                >
-                  <option value="">{t("evaluationSetCard.fallbackFormula")}</option>
-                  {scoringConfigs.map((c) => (
-                    <option key={c.id} value={c.id}>{c.name}</option>
-                  ))}
-                </select>
-              </div>
               <div>
                 <label className={labelClass}>{t("evaluationSetCard.maxDistanceLabel")}</label>
                 <input
@@ -454,6 +491,75 @@ function EvaluationSetCard({
                 />
               </div>
             </div>
+
+            {/* Scoring method — 3×3 grid (category × aspect) */}
+            <div>
+              <label className={labelClass}>
+                Re-ranker models
+                <InfoTooltip text="Select a trained LightGBM re-ranker for each cell (category × aspect). Cells left empty fall back to the scoring config or default 1 − distance/2. Models trained for a specific aspect are shown with their aspect badge." />
+              </label>
+              {initialRerankers.length > 0 && (
+                <div className="overflow-x-auto">
+                  <table className="w-full text-xs border-collapse">
+                    <thead>
+                      <tr>
+                        <th className="px-2 py-1 text-left text-gray-500 font-medium"></th>
+                        <th className="px-2 py-1 text-center text-gray-600 font-semibold">BPO</th>
+                        <th className="px-2 py-1 text-center text-gray-600 font-semibold">MFO</th>
+                        <th className="px-2 py-1 text-center text-gray-600 font-semibold">CCO</th>
+                      </tr>
+                    </thead>
+                    <tbody>
+                      {(["nk", "lk", "pk"] as const).map((cat) => (
+                        <tr key={cat}>
+                          <td className="px-2 py-1.5 font-semibold text-gray-700 uppercase">{cat}</td>
+                          {(["bpo", "mfo", "cco"] as const).map((asp) => {
+                            // Show models matching this category+aspect, or category+null (all-aspect models)
+                            const candidates = initialRerankers.filter(
+                              (r) => r.category === cat && (r.aspect === asp || r.aspect === null)
+                            );
+                            return (
+                              <td key={asp} className="px-1 py-1">
+                                <select
+                                  value={rrGrid[cat]?.[asp] ?? ""}
+                                  onChange={(ev) => { setRrCell(cat, asp, ev.target.value); if (ev.target.value) setScoringConfigId(""); }}
+                                  className="w-full rounded border border-gray-300 px-1.5 py-1 text-xs focus:outline-none focus:ring-1 focus:ring-blue-400"
+                                >
+                                  <option value="">—</option>
+                                  {candidates.map((r) => (
+                                    <option key={r.id} value={r.id}>
+                                      {r.name}{r.aspect ? ` [${r.aspect.toUpperCase()}]` : " [all]"} · AUC {r.metrics.val_auc?.toFixed(3)}
+                                    </option>
+                                  ))}
+                                </select>
+                              </td>
+                            );
+                          })}
+                        </tr>
+                      ))}
+                    </tbody>
+                  </table>
+                </div>
+              )}
+              {(() => {
+                const hasAnyRr = Object.values(rrGrid).some((catMap) => Object.values(catMap).some(Boolean));
+                return scoringConfigs.length > 0 && !hasAnyRr ? (
+                  <div className="mt-2">
+                    <label className="text-xs text-gray-500 mb-0.5 block">Scoring config (alternative to re-ranker)</label>
+                    <select
+                      value={scoringConfigId}
+                      onChange={(ev) => setScoringConfigId(ev.target.value)}
+                      className={selectClass}
+                    >
+                      <option value="">Default (1 − distance / 2)</option>
+                      {scoringConfigs.map((c) => (
+                        <option key={c.id} value={c.id}>{c.name}</option>
+                      ))}
+                    </select>
+                  </div>
+                ) : null;
+              })()}
+            </div>
             {runError && (
               <p className="rounded border border-red-200 bg-red-50 px-3 py-2 text-sm text-red-700">
                 {runError}
@@ -512,22 +618,87 @@ function EvaluationSetCard({
                 {results.map((r) => {
                   const pred = predictionSets.find((p) => p.id === r.prediction_set_id);
                   const sc = scoringConfigs.find((c) => c.id === r.scoring_config_id);
+                  const hasReranker = !!r.reranker_model_id;
+                  const rr = initialRerankers.find((m) => m.id === r.reranker_model_id);
                   return (
                     <div key={r.id} className="rounded-lg border border-gray-200 bg-white p-4 space-y-3">
                       {/* Meta header */}
-                      <div className="flex items-start justify-between gap-2">
+                      <div className="flex flex-col sm:flex-row sm:items-start sm:justify-between gap-2">
                         <div className="space-y-0.5 text-xs text-gray-500 min-w-0">
-                          <div>
+                          <div className="flex items-center gap-0.5">
                             <span className="font-medium text-gray-700">{t("evaluationSetCard.predictionSet")} </span>
                             {pred
                               ? <span title={r.prediction_set_id}>{r.prediction_set_id.slice(0, 8)}… · {new Date(pred.created_at).toLocaleDateString()}{pred.prediction_count != null ? ` · ${pred.prediction_count.toLocaleString()} preds.` : ""}</span>
                               : <span className="font-mono">{r.prediction_set_id.slice(0, 8)}…</span>
                             }
+                            {pred && (
+                              <RichTooltip>
+                                <div className="space-y-1.5">
+                                  <div className="font-semibold text-gray-700 border-b border-gray-100 pb-1 mb-1">Prediction Set</div>
+                                  <div className="flex justify-between gap-3">
+                                    <span className="text-gray-400">Config</span>
+                                    <span className="text-right">{pred.embedding_config_name ?? pred.embedding_config_id.slice(0, 8) + "…"}</span>
+                                  </div>
+                                  <div className="flex justify-between gap-3">
+                                    <span className="text-gray-400">Annotations</span>
+                                    <span className="text-right">{pred.annotation_set_label ?? pred.annotation_set_id.slice(0, 8) + "…"}</span>
+                                  </div>
+                                  <div className="flex justify-between gap-3">
+                                    <span className="text-gray-400">Ontology</span>
+                                    <span className="text-right">{pred.ontology_snapshot_version ?? pred.ontology_snapshot_id.slice(0, 8) + "…"}</span>
+                                  </div>
+                                  <div className="flex justify-between gap-3">
+                                    <span className="text-gray-400">Max dist.</span>
+                                    <span>{pred.distance_threshold ?? "—"}</span>
+                                  </div>
+                                  <div className="flex justify-between gap-3">
+                                    <span className="text-gray-400">Limit/entry</span>
+                                    <span>{pred.limit_per_entry}</span>
+                                  </div>
+                                </div>
+                              </RichTooltip>
+                            )}
                           </div>
-                          <div>
+                          <div className="flex items-center gap-0.5 flex-wrap">
                             <span className="font-medium text-gray-700">{t("evaluationSetCard.scoring")} </span>
-                            {sc ? sc.name : <span className="italic text-gray-400">{t("evaluationSetCard.fallbackFormula")}</span>}
-                            {sc?.description && <InfoTooltip text={sc.description} />}
+                            {r.reranker_config ? (
+                              <span className="inline-flex items-center gap-1 flex-wrap">
+                                <span className="rounded-full bg-indigo-50 border border-indigo-100 px-1.5 py-0.5 text-[10px] font-medium text-indigo-700">Re-ranker</span>
+                                {Object.entries(r.reranker_config).map(([cat, aspMap]) => (
+                                  <span key={cat} className="text-[10px] text-gray-500">
+                                    {cat.toUpperCase()}({Object.keys(aspMap).map(a => a.toUpperCase()).join(",")})
+                                  </span>
+                                ))}
+                              </span>
+                            ) : hasReranker ? (
+                              <span className="inline-flex items-center gap-1">
+                                <span className="rounded-full bg-indigo-50 border border-indigo-100 px-1.5 py-0.5 text-[10px] font-medium text-indigo-700">Re-ranker</span>
+                                {rr ? rr.name : "model"}
+                              </span>
+                            ) : sc ? sc.name : <span className="italic text-gray-400">{t("evaluationSetCard.fallbackFormula")}</span>}
+                            {sc && !hasReranker && (
+                              <RichTooltip>
+                                <div className="space-y-1.5">
+                                  <div className="font-semibold text-gray-700 border-b border-gray-100 pb-1 mb-1">{sc.name}</div>
+                                  {sc.description && <div className="italic text-gray-500 mb-1">{sc.description}</div>}
+                                  <div className="flex justify-between gap-3">
+                                    <span className="text-gray-400">Formula</span>
+                                    <code className="text-blue-600 text-[10px]">{sc.formula}</code>
+                                  </div>
+                                  {Object.keys(sc.weights).length > 0 && (
+                                    <div>
+                                      <div className="text-gray-400 mb-0.5">Weights</div>
+                                      {Object.entries(sc.weights).map(([k, v]) => (
+                                        <div key={k} className="flex justify-between gap-3 pl-2">
+                                          <span className="text-gray-500">{k}</span>
+                                          <span className="font-mono">{v}</span>
+                                        </div>
+                                      ))}
+                                    </div>
+                                  )}
+                                </div>
+                              </RichTooltip>
+                            )}
                           </div>
                           <div className="text-gray-400">{new Date(r.created_at).toLocaleString()}</div>
                         </div>
@@ -559,6 +730,7 @@ function EvaluationSetCard({
               </div>
             )}
           </div>
+
         </div>
       )}
     </div>
@@ -571,6 +743,7 @@ export default function EvaluationPage() {
   const [predictionSets, setPredictionSets] = useState<PredictionSet[]>([]);
   const [evaluationSets, setEvaluationSets] = useState<EvaluationSet[]>([]);
   const [scoringConfigs, setScoringConfigs] = useState<ScoringConfig[]>([]);
+  const [rerankers, setRerankers] = useState<RerankerModel[]>([]);
   const [loading, setLoading] = useState(true);
 
   const [oldSetId, setOldSetId] = useState("");
@@ -580,12 +753,13 @@ export default function EvaluationPage() {
   const [selectedEvalId, setSelectedEvalId] = useState("");
 
   const reload = () =>
-    Promise.all([listAnnotationSets(), listPredictionSets(), listEvaluationSets(), listScoringConfigs()])
-      .then(([ann, pred, ev, sc]) => {
+    Promise.all([listAnnotationSets(), listPredictionSets(), listEvaluationSets(), listScoringConfigs(), listRerankers()])
+      .then(([ann, pred, ev, sc, rr]) => {
         setAnnotationSets(ann);
         setPredictionSets(pred);
         setEvaluationSets(ev);
         setScoringConfigs(sc);
+        setRerankers(rr);
       })
       .finally(() => setLoading(false));
 
@@ -616,9 +790,19 @@ export default function EvaluationPage() {
   if (loading) return <div className="p-8 text-sm text-gray-500">Loading…</div>;
 
   return (
-    <div className="p-8 max-w-3xl space-y-10">
+    <div className="px-4 sm:px-8 py-6 sm:py-8 max-w-3xl space-y-8 sm:space-y-10">
       <h1 className="text-xl font-semibold text-gray-900">{t("title")}</h1>
 
+      <ContextBanner
+        title="Benchmark prediction quality with CAFA metrics"
+        description="Compare two GOA releases (temporal holdout) to evaluate prediction accuracy. Computes Fmax, precision, recall, and coverage per aspect and category."
+        prerequisites={[
+          { label: `${annotationSets.length} annotation set(s)`, met: annotationSets.length >= 2, href: "/annotations" },
+          { label: `${predictionSets.length} prediction set(s)`, met: predictionSets.length > 0, href: "/functional-annotation" },
+        ]}
+        nextStep={{ label: "Scoring configs", href: "/scoring" }}
+      />
+
       {/* ── Generate Evaluation Set ───────────────────────────────── */}
       <section className="rounded-lg border border-gray-200 p-6 space-y-5">
         <div>
@@ -627,7 +811,7 @@ export default function EvaluationPage() {
             {t("generateSection.description")}
           </p>
         </div>
-        <div className="grid grid-cols-2 gap-4">
+        <div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
           <div>
             <label className={labelClass}>{t("generateSection.oldSetLabel")}</label>
             <select value={oldSetId} onChange={(e) => setOldSetId(e.target.value)} className={selectClass}>
@@ -669,6 +853,7 @@ export default function EvaluationPage() {
               annotationSets={annotationSets}
               predictionSets={predictionSets}
               scoringConfigs={scoringConfigs}
+              rerankers={rerankers}
               isSelected={selectedEvalId === e.id}
               onSelect={() => setSelectedEvalId(e.id === selectedEvalId ? "" : e.id)}
               onDeleted={() => setEvaluationSets((prev) => prev.filter((x) => x.id !== e.id))}
diff --git a/apps/web/app/[locale]/functional-annotation/[id]/page.tsx b/apps/web/app/[locale]/functional-annotation/[id]/page.tsx
index 28c0ff7..1f403a6 100644
--- a/apps/web/app/[locale]/functional-annotation/[id]/page.tsx
+++ b/apps/web/app/[locale]/functional-annotation/[id]/page.tsx
@@ -4,6 +4,7 @@ import { use, useEffect, useState } from "react";
 import Link from "next/link";
 import { useToast } from "@/components/Toast";
 import { SkeletonTableRow } from "@/components/Skeleton";
+import { Breadcrumbs } from "@/components/Breadcrumbs";
 import {
   getPredictionSet,
   getPredictionSetProteins,
@@ -295,139 +296,217 @@ function scoreColor(score: number): string {
   return "text-red-500";
 }
 
-function PredictionTable({ preds, annotatedGoIds, scoringConfig }: { preds: Prediction[]; annotatedGoIds: Set<string>; scoringConfig?: ScoringConfig }) {
+// Evidence code quality tier colours
+function evidenceBadgeClass(code: string): string {
+  const w = DEFAULT_EVIDENCE_WEIGHTS[code] ?? DEFAULT_EVIDENCE_WEIGHT_FALLBACK;
+  if (w >= 1.0) return "bg-green-100 text-green-700 border-green-200";
+  if (w >= 0.7) return "bg-blue-100 text-blue-700 border-blue-200";
+  if (w >= 0.5) return "bg-yellow-100 text-yellow-700 border-yellow-200";
+  return "bg-gray-100 text-gray-500 border-gray-200";
+}
+
+type GroupedAnnotation = { go_id: string; name: string | null; aspect: string | null; evidence_codes: string[] };
+
+function groupAnnotations(anns: ProteinAnnotation[]): Map<string, GroupedAnnotation> {
+  const map = new Map<string, GroupedAnnotation>();
+  for (const a of anns) {
+    const existing = map.get(a.go_id);
+    if (existing) {
+      if (a.evidence_code && !existing.evidence_codes.includes(a.evidence_code))
+        existing.evidence_codes.push(a.evidence_code);
+    } else {
+      map.set(a.go_id, { go_id: a.go_id, name: a.name, aspect: a.aspect, evidence_codes: a.evidence_code ? [a.evidence_code] : [] });
+    }
+  }
+  return map;
+}
+
+function PredictionTable({ preds, knownByGoId, scoringConfig }: {
+  preds: Prediction[];
+  knownByGoId: Map<string, GroupedAnnotation>;
+  scoringConfig?: ScoringConfig;
+}) {
   const hasAlignment = preds.some((p) => p.identity_nw != null);
   const hasTaxonomy = preds.some((p) => p.taxonomic_relation != null);
+  const hasReranker = preds.some((p) => p.vote_count != null);
   const hasScore = !!scoringConfig;
-  const [expanded, setExpanded] = useState<number | null>(null);
+  const hasDetail = hasAlignment || hasTaxonomy || hasReranker;
+  const [expanded, setExpanded] = useState<string | null>(null);
 
-  // Sort by score descending when a config is active
-  const sortedPreds = hasScore
+  // Sort: by score desc if config active, else by distance asc
+  const sorted = hasScore
     ? [...preds].sort((a, b) => computeScore(b, scoringConfig!) - computeScore(a, scoringConfig!))
-    : preds;
+    : [...preds].sort((a, b) => a.distance - b.distance);
 
-  // Column layout adapts to available features
-  const baseGrid = hasScore
-    ? (hasAlignment || hasTaxonomy
-        ? "grid-cols-[60px_80px_1fr_100px_65px_70px_70px]"
-        : "grid-cols-[60px_90px_1fr_110px_75px]")
-    : (hasAlignment || hasTaxonomy
-        ? "grid-cols-[80px_1fr_100px_65px_70px_70px]"
-        : "grid-cols-[90px_1fr_110px_75px]");
+  // Tailwind requires complete class strings — no dynamic construction
+  const gridClass = hasScore
+    ? "grid-cols-[60px_90px_1fr_110px_60px_60px_65px]"
+    : "grid-cols-[90px_1fr_110px_60px_60px_65px]";
 
   return (
-    <div className="overflow-hidden rounded-md border bg-white">
-      <div className={`grid ${baseGrid} gap-1 border-b bg-gray-50 px-3 py-1.5 text-xs font-semibold uppercase tracking-wide text-gray-400`}>
+    <div className="rounded-md border bg-white text-xs">
+      {/* Desktop header */}
+      <div className={`hidden lg:grid ${gridClass} gap-x-3 border-b bg-gray-50 px-3 py-1.5 font-semibold uppercase tracking-wide text-gray-400`}>
         {hasScore && <div>Score</div>}
         <div>GO ID</div>
         <div>Name</div>
-        <div>Ref. Protein</div>
+        <div>Via (ref)</div>
+        <div>Pred. ev.</div>
+        <div>Known ev.</div>
         <div>Dist</div>
-        {hasAlignment && <><div>NW id%</div><div>SW id%</div></>}
-        {hasTaxonomy && !hasAlignment && <><div>Relation</div><div>Tax dist</div></>}
       </div>
-      {sortedPreds.length === 0 ? (
-        <p className="px-3 py-3 text-xs text-gray-300">—</p>
-      ) : sortedPreds.map((pred, i) => (
-        <div key={i}>
-          <div
-            className={`grid ${baseGrid} gap-1 border-b px-3 py-2 text-xs last:border-0 items-center
-              ${annotatedGoIds.has(pred.go_id) ? "bg-green-50" : ""}
-              ${(hasAlignment || hasTaxonomy) ? "cursor-pointer hover:bg-blue-50/40" : ""}`}
-            onClick={() => (hasAlignment || hasTaxonomy) ? setExpanded(expanded === i ? null : i) : undefined}
-          >
-            {hasScore && (
-              <span className={`font-mono ${scoreColor(computeScore(pred, scoringConfig!))}`}>
-                {computeScore(pred, scoringConfig!).toFixed(3)}
-              </span>
-            )}
-            <span className="font-mono text-blue-600">{pred.go_id}</span>
-            <span className="text-gray-700 truncate" title={pred.name ?? ""}>{pred.name ?? "—"}</span>
-            <Link
-              href={`/proteins/${pred.ref_protein_accession}`}
-              className="font-mono text-xs text-blue-500 hover:underline truncate"
-              title={pred.ref_protein_accession}
-              onClick={(e) => e.stopPropagation()}
+
+      {sorted.length === 0 ? (
+        <p className="px-3 py-3 text-gray-300">—</p>
+      ) : sorted.map((pred) => {
+        const isExpanded = expanded === pred.go_id;
+        const knownAnn = knownByGoId.get(pred.go_id);
+
+        return (
+          <div key={pred.go_id}>
+            {/* ── Mobile card ── */}
+            <div
+              className={`lg:hidden border-b px-3 py-2.5 ${knownAnn ? "bg-green-50" : ""} ${hasDetail ? "cursor-pointer active:bg-blue-50/40" : ""}`}
+              onClick={() => hasDetail ? setExpanded(isExpanded ? null : pred.go_id) : undefined}
             >
-              {pred.ref_protein_accession}
-            </Link>
-            <span className="font-mono text-gray-500">{pred.distance.toFixed(4)}</span>
-            {hasAlignment && (
-              <>
-                <span className="font-mono text-gray-600">{pct(pred.identity_nw)}</span>
-                <span className="font-mono text-gray-600">{pct(pred.identity_sw)}</span>
-              </>
-            )}
-            {hasTaxonomy && !hasAlignment && (
-              <>
-                <span className={`rounded px-1 py-0.5 text-xs font-medium ${RELATION_COLORS[pred.taxonomic_relation ?? ""] ?? "bg-gray-50 text-gray-500"}`}>
-                  {pred.taxonomic_relation ?? "—"}
-                </span>
-                <span className="font-mono text-gray-500">{pred.taxonomic_distance ?? "—"}</span>
-              </>
-            )}
-          </div>
-          {expanded === i && (hasAlignment || hasTaxonomy) && (
-            <div className="border-b bg-gray-50 px-4 py-3 text-xs text-gray-600 grid grid-cols-2 gap-4">
-              {hasAlignment && (
-                <div>
-                  <p className="font-semibold text-gray-400 uppercase tracking-wide mb-1.5">Alignment</p>
-                  <table className="w-full text-xs">
-                    <thead>
-                      <tr className="text-gray-400">
-                        <th className="text-left font-medium pr-3 pb-1">Metric</th>
-                        <th className="text-right font-medium pr-3 pb-1">NW (global)</th>
-                        <th className="text-right font-medium pb-1">SW (local)</th>
-                      </tr>
-                    </thead>
-                    <tbody className="font-mono">
-                      <tr><td className="pr-3 text-gray-500 font-sans">Identity</td><td className="text-right pr-3">{pct(pred.identity_nw)}</td><td className="text-right">{pct(pred.identity_sw)}</td></tr>
-                      <tr><td className="pr-3 text-gray-500 font-sans">Similarity</td><td className="text-right pr-3">{pct(pred.similarity_nw)}</td><td className="text-right">{pct(pred.similarity_sw)}</td></tr>
-                      <tr><td className="pr-3 text-gray-500 font-sans">Score</td><td className="text-right pr-3">{pred.alignment_score_nw?.toFixed(0) ?? "—"}</td><td className="text-right">{pred.alignment_score_sw?.toFixed(0) ?? "—"}</td></tr>
-                      <tr><td className="pr-3 text-gray-500 font-sans">Gaps</td><td className="text-right pr-3">{pct(pred.gaps_pct_nw)}</td><td className="text-right">{pct(pred.gaps_pct_sw)}</td></tr>
-                      <tr><td className="pr-3 text-gray-500 font-sans">Aln length</td><td className="text-right pr-3">{pred.alignment_length_nw ?? "—"}</td><td className="text-right">{pred.alignment_length_sw ?? "—"}</td></tr>
-                      <tr><td className="pr-3 text-gray-500 font-sans">Seq length</td><td className="text-right pr-3">{pred.length_query ?? "—"} (q)</td><td className="text-right">{pred.length_ref ?? "—"} (r)</td></tr>
-                    </tbody>
-                  </table>
+              <div className="flex items-start justify-between gap-2 mb-1">
+                <span className="font-mono text-blue-600">{pred.go_id}</span>
+                <div className="flex items-center gap-1.5 shrink-0">
+                  {hasScore && (
+                    <span className={`font-mono text-[10px] ${scoreColor(computeScore(pred, scoringConfig!))}`}>
+                      {computeScore(pred, scoringConfig!).toFixed(3)}
+                    </span>
+                  )}
+                  <span className="font-mono text-gray-500 text-[10px]">{pred.distance.toFixed(4)}</span>
+                  {hasDetail && <span className="text-gray-300 text-[10px]">{isExpanded ? "▲" : "▼"}</span>}
                 </div>
+              </div>
+              <p className="text-gray-700 leading-snug text-xs mb-1">{pred.name ?? "—"}</p>
+              <div className="flex flex-wrap items-center gap-2 text-[10px]">
+                <span className="text-gray-400">via</span>
+                <Link
+                  href={`/proteins/${pred.ref_protein_accession}`}
+                  className="font-mono text-blue-500 hover:underline"
+                  onClick={(e) => e.stopPropagation()}
+                >{pred.ref_protein_accession}</Link>
+                {pred.evidence_code && (
+                  <span className={`rounded border px-1 py-0.5 font-mono font-medium ${evidenceBadgeClass(pred.evidence_code)}`}>
+                    {pred.evidence_code}
+                  </span>
+                )}
+                {knownAnn && knownAnn.evidence_codes.map((ec) => (
+                  <span key={ec} className={`rounded border px-1 py-0.5 font-mono font-medium ${evidenceBadgeClass(ec)}`}>
+                    {ec}
+                  </span>
+                ))}
+              </div>
+            </div>
+
+            {/* ── Desktop row ── */}
+            <div
+              className={`hidden lg:grid ${gridClass} gap-x-3 border-b px-3 py-2 last:border-0 items-start
+                ${knownAnn ? "bg-green-50" : ""}
+                ${hasDetail ? "cursor-pointer hover:bg-blue-50/40" : ""}`}
+              onClick={() => hasDetail ? setExpanded(isExpanded ? null : pred.go_id) : undefined}
+            >
+              {hasScore && (
+                <span className={`font-mono ${scoreColor(computeScore(pred, scoringConfig!))}`}>
+                  {computeScore(pred, scoringConfig!).toFixed(3)}
+                </span>
               )}
-              {hasTaxonomy && (
-                <div>
-                  <p className="font-semibold text-gray-400 uppercase tracking-wide mb-1.5">Taxonomy</p>
-                  <div className="space-y-1.5">
-                    <div className="flex justify-between">
-                      <span className="text-gray-500">Relation</span>
-                      <span className={`rounded px-1.5 py-0.5 font-medium ${RELATION_COLORS[pred.taxonomic_relation ?? ""] ?? "bg-gray-50 text-gray-500"}`}>
-                        {pred.taxonomic_relation ?? "—"}
-                      </span>
-                    </div>
-                    <div className="flex justify-between">
-                      <span className="text-gray-500">Distance</span>
-                      <span className="font-mono">{pred.taxonomic_distance ?? "—"}</span>
-                    </div>
-                    <div className="flex justify-between">
-                      <span className="text-gray-500">Common ancestors</span>
-                      <span className="font-mono">{pred.taxonomic_common_ancestors ?? "—"}</span>
-                    </div>
-                    <div className="flex justify-between">
-                      <span className="text-gray-500">LCA taxid</span>
-                      <span className="font-mono">{pred.taxonomic_lca ?? "—"}</span>
-                    </div>
-                    <div className="flex justify-between">
-                      <span className="text-gray-500">Query taxid</span>
-                      <span className="font-mono">{pred.query_taxonomy_id ?? "—"}</span>
+              <span className="font-mono text-blue-600">{pred.go_id}</span>
+              <span className="text-gray-700 leading-snug">{pred.name ?? "—"}</span>
+              <div className="flex items-start gap-1">
+                <Link
+                  href={`/proteins/${pred.ref_protein_accession}`}
+                  className="font-mono text-blue-500 hover:underline"
+                  onClick={(e) => e.stopPropagation()}
+                >{pred.ref_protein_accession}</Link>
+                {hasDetail && <span className="text-gray-300 text-[10px] mt-0.5">{isExpanded ? "▲" : "▼"}</span>}
+              </div>
+              <div>
+                {pred.evidence_code ? (
+                  <span className={`rounded border px-1 py-0.5 text-[10px] font-mono font-medium ${evidenceBadgeClass(pred.evidence_code)}`}>
+                    {pred.evidence_code}
+                  </span>
+                ) : <span className="text-gray-300">—</span>}
+              </div>
+              <div className="flex flex-wrap gap-0.5">
+                {knownAnn ? knownAnn.evidence_codes.map((ec) => (
+                  <span key={ec} className={`rounded border px-1 py-0.5 text-[10px] font-mono font-medium ${evidenceBadgeClass(ec)}`}>
+                    {ec}
+                  </span>
+                )) : <span className="text-gray-300">—</span>}
+              </div>
+              <span className="font-mono text-gray-500">{pred.distance.toFixed(4)}</span>
+            </div>
+
+            {/* Expanded: alignment + taxonomy + reranker detail */}
+            {isExpanded && hasDetail && (
+              <div className="border-b bg-gray-50 px-4 py-3 grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4 sm:gap-6">
+                {hasAlignment && (
+                  <div>
+                    <p className="text-[10px] font-semibold uppercase tracking-wide text-gray-400 mb-2">
+                      Alignment — query vs {pred.ref_protein_accession}
+                    </p>
+                    <table className="w-full text-xs">
+                      <thead>
+                        <tr className="text-gray-400">
+                          <th className="text-left font-medium pr-4 pb-1">Metric</th>
+                          <th className="text-right font-medium pr-4 pb-1">NW (global)</th>
+                          <th className="text-right font-medium pb-1">SW (local)</th>
+                        </tr>
+                      </thead>
+                      <tbody className="font-mono">
+                        <tr><td className="pr-4 text-gray-500 font-sans py-0.5">Identity</td><td className="text-right pr-4">{pct(pred.identity_nw)}</td><td className="text-right">{pct(pred.identity_sw)}</td></tr>
+                        <tr><td className="pr-4 text-gray-500 font-sans py-0.5">Similarity</td><td className="text-right pr-4">{pct(pred.similarity_nw)}</td><td className="text-right">{pct(pred.similarity_sw)}</td></tr>
+                        <tr><td className="pr-4 text-gray-500 font-sans py-0.5">Score</td><td className="text-right pr-4">{pred.alignment_score_nw?.toFixed(0) ?? "—"}</td><td className="text-right">{pred.alignment_score_sw?.toFixed(0) ?? "—"}</td></tr>
+                        <tr><td className="pr-4 text-gray-500 font-sans py-0.5">Gaps</td><td className="text-right pr-4">{pct(pred.gaps_pct_nw)}</td><td className="text-right">{pct(pred.gaps_pct_sw)}</td></tr>
+                        <tr><td className="pr-4 text-gray-500 font-sans py-0.5">Aln length</td><td className="text-right pr-4">{pred.alignment_length_nw ?? "—"}</td><td className="text-right">{pred.alignment_length_sw ?? "—"}</td></tr>
+                        <tr><td className="pr-4 text-gray-500 font-sans py-0.5">Seq length</td><td className="text-right pr-4">{pred.length_query ?? "—"} (q)</td><td className="text-right">{pred.length_ref ?? "—"} (r)</td></tr>
+                      </tbody>
+                    </table>
+                  </div>
+                )}
+                {hasTaxonomy && (
+                  <div>
+                    <p className="text-[10px] font-semibold uppercase tracking-wide text-gray-400 mb-2">
+                      Taxonomy — query vs {pred.ref_protein_accession}
+                    </p>
+                    <div className="space-y-1.5 text-xs">
+                      <div className="flex justify-between">
+                        <span className="text-gray-500">Relation</span>
+                        <span className={`rounded px-1.5 py-0.5 font-medium ${RELATION_COLORS[pred.taxonomic_relation ?? ""] ?? "bg-gray-50 text-gray-500"}`}>
+                          {pred.taxonomic_relation ?? "—"}
+                        </span>
+                      </div>
+                      <div className="flex justify-between"><span className="text-gray-500">Distance</span><span className="font-mono">{pred.taxonomic_distance ?? "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">Common ancestors</span><span className="font-mono">{pred.taxonomic_common_ancestors ?? "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">LCA taxid</span><span className="font-mono">{pred.taxonomic_lca ?? "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">Query taxid</span><span className="font-mono">{pred.query_taxonomy_id ?? "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">Ref taxid</span><span className="font-mono">{pred.ref_taxonomy_id ?? "—"}</span></div>
                     </div>
-                    <div className="flex justify-between">
-                      <span className="text-gray-500">Ref taxid</span>
-                      <span className="font-mono">{pred.ref_taxonomy_id ?? "—"}</span>
+                  </div>
+                )}
+                {hasReranker && (
+                  <div>
+                    <p className="text-[10px] font-semibold uppercase tracking-wide text-gray-400 mb-2">
+                      Re-ranker features
+                    </p>
+                    <div className="space-y-1.5 text-xs">
+                      <div className="flex justify-between"><span className="text-gray-500">Vote count</span><span className="font-mono">{pred.vote_count ?? "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">K position</span><span className="font-mono">{pred.k_position ?? "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">GO term frequency</span><span className="font-mono">{pred.go_term_frequency != null ? pred.go_term_frequency.toFixed(4) : "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">Ref annotation density</span><span className="font-mono">{pred.ref_annotation_density != null ? pred.ref_annotation_density.toFixed(4) : "—"}</span></div>
+                      <div className="flex justify-between"><span className="text-gray-500">Neighbor dist std</span><span className="font-mono">{pred.neighbor_distance_std != null ? pred.neighbor_distance_std.toFixed(4) : "—"}</span></div>
                     </div>
                   </div>
-                </div>
-              )}
-            </div>
-          )}
-        </div>
-      ))}
+                )}
+              </div>
+            )}
+          </div>
+        );
+      })}
     </div>
   );
 }
@@ -471,8 +550,7 @@ function ProteinDetail({
       setLoadingGraph(false);
     }
   }
-  const annotatedGoIds = new Set(annotations.map((a) => a.go_id));
-  const predictedGoIds = new Set(predictions.map((p) => p.go_id));
+  const knownByGoId = groupAnnotations(annotations);
 
   const predByAspect: Record<string, Prediction[]> = { F: [], P: [], C: [], other: [] };
   for (const p of predictions) {
@@ -480,21 +558,19 @@ function ProteinDetail({
     predByAspect[key].push(p);
   }
 
-  const annByAspect: Record<string, ProteinAnnotation[]> = { F: [], P: [], C: [], other: [] };
-  for (const a of annotations) {
-    const key = a.aspect && annByAspect[a.aspect] ? a.aspect : "other";
-    annByAspect[key].push(a);
-  }
+  const aspects = (["F", "P", "C"] as const).filter((asp) => predByAspect[asp].length > 0);
 
-  const aspects = (["F", "P", "C"] as const).filter(
-    (asp) => predByAspect[asp].length > 0 || annByAspect[asp].length > 0
-  );
+  const predictedGoIds = new Set(predictions.map((p) => p.go_id));
+  const annotatedGoIds = new Set(annotations.map((a) => a.go_id));
+  const totalMatches = predictions.filter((p) => knownByGoId.has(p.go_id)).length;
+  const totalUniquePredicted = predictions.length;
 
-  const totalMatches = predictions.filter((p) => annotatedGoIds.has(p.go_id)).length;
+  // Known terms not covered by any prediction
+  const uncoveredKnown = Array.from(knownByGoId.values()).filter((a) => !predictedGoIds.has(a.go_id));
 
   return (
     <div className="mt-4 rounded-lg border bg-gray-50 p-4">
-      <div className="flex items-center gap-3 mb-4">
+      <div className="flex flex-wrap items-center gap-2 sm:gap-3 mb-4">
         <span className="font-mono font-semibold text-gray-900">{accession}</span>
         {inDb && (
           <Link href={`/proteins/${accession}`} className="text-xs text-blue-500 hover:underline">
@@ -502,16 +578,16 @@ function ProteinDetail({
           </Link>
         )}
         {annotations.length > 0 && predictions.length > 0 && (
-          <span className="text-xs text-green-700 font-medium ml-auto">
-            {totalMatches} / {predictions.length} predicted match known annotations
+          <span className="text-xs text-green-700 font-medium sm:ml-auto">
+            {totalMatches} / {totalUniquePredicted} match known
           </span>
         )}
         {ontologySnapshotId && predictions.length > 0 && (
-          <button onClick={toggleGraph} className="rounded border bg-white px-2 py-1 text-xs hover:bg-gray-50 ml-2">
+          <button onClick={toggleGraph} className="rounded border bg-white px-2 py-1 text-xs hover:bg-gray-50">
             {loadingGraph ? "Loading…" : showGraph ? "Hide graph" : "GO graph"}
           </button>
         )}
-        <button onClick={onClose} className="text-gray-400 hover:text-gray-600 text-lg leading-none ml-2">×</button>
+        <button onClick={onClose} className="text-gray-400 hover:text-gray-600 text-lg leading-none ml-auto sm:ml-2">×</button>
       </div>
 
       {loading && <p className="text-sm text-gray-400">Loading…</p>}
@@ -533,38 +609,46 @@ function ProteinDetail({
 
       {!loading && aspects.map((asp) => {
         const preds = predByAspect[asp];
-        const anns = annByAspect[asp];
+        const uniquePredCount = new Set(preds.map((p) => p.go_id)).size;
+        const knownInAspect = Array.from(knownByGoId.values()).filter((a) => a.aspect === asp);
         return (
           <div key={asp} className="mb-5 last:mb-0">
             <div className="flex items-center gap-2 mb-2">
               <AspectBadge aspect={asp} />
               <span className="text-xs font-semibold text-gray-600">{ASPECT_LABELS[asp]}</span>
-              <span className="text-xs text-gray-400 ml-1">{preds.length} predicted · {anns.length} known</span>
+              <span className="text-xs text-gray-400 ml-1">{uniquePredCount} predicted · {knownInAspect.length} known</span>
             </div>
-            <div className="grid grid-cols-2 gap-3">
-              {/* Predictions */}
-              <PredictionTable preds={preds} annotatedGoIds={annotatedGoIds} scoringConfig={scoringConfig} />
-
+            <PredictionTable preds={preds} knownByGoId={knownByGoId} scoringConfig={scoringConfig} />
+          </div>
+        );
+      })}
 
-              {/* Known annotations */}
-              <div className="overflow-hidden rounded-md border bg-white">
-                <div className="grid grid-cols-[90px_1fr_60px] gap-1 border-b bg-gray-50 px-3 py-1.5 text-xs font-semibold uppercase tracking-wide text-gray-400">
-                  <div>GO ID</div><div>Name</div><div>Evidence</div>
+      {/* Known terms with no matching prediction */}
+      {!loading && uncoveredKnown.length > 0 && (
+        <div className="mt-4">
+          <p className="text-xs font-semibold text-gray-500 mb-2">
+            Known annotations not covered by any prediction ({uncoveredKnown.length})
+          </p>
+          <div className="overflow-x-auto rounded-md border bg-white text-xs">
+            <div className="grid grid-cols-[90px_1fr_80px] gap-2 border-b bg-gray-50 px-3 py-1.5 font-semibold uppercase tracking-wide text-gray-400">
+              <div>GO ID</div><div>Name</div><div>Evidence</div>
+            </div>
+            {uncoveredKnown.map((ann) => (
+              <div key={ann.go_id} className="grid grid-cols-[90px_1fr_80px] gap-2 border-b px-3 py-2 last:border-0 items-start">
+                <span className="font-mono text-blue-600 pt-0.5">{ann.go_id}</span>
+                <span className="text-gray-700 leading-snug">{ann.name ?? "—"}</span>
+                <div className="flex flex-wrap gap-0.5 justify-end">
+                  {ann.evidence_codes.map((ec) => (
+                    <span key={ec} className={`rounded border px-1 py-0.5 text-[10px] font-mono font-medium ${evidenceBadgeClass(ec)}`}>
+                      {ec}
+                    </span>
+                  ))}
                 </div>
-                {anns.length === 0 ? (
-                  <p className="px-3 py-3 text-xs text-gray-300">—</p>
-                ) : anns.map((ann, i) => (
-                  <div key={i} className={`grid grid-cols-[90px_1fr_60px] gap-1 border-b px-3 py-2 text-xs last:border-0 items-center ${predictedGoIds.has(ann.go_id) ? "bg-green-50" : ""}`}>
-                    <span className="font-mono text-blue-600">{ann.go_id}</span>
-                    <span className="text-gray-700 truncate" title={ann.name ?? ""}>{ann.name ?? "—"}</span>
-                    <span className="text-gray-500">{ann.evidence_code ?? "—"}</span>
-                  </div>
-                ))}
               </div>
-            </div>
+            ))}
           </div>
-        );
-      })}
+        </div>
+      )}
     </div>
   );
 }
@@ -679,6 +763,7 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
   const [activeTab, setActiveTab] = useState<Tab>("proteins");
   const [annotationSetId, setAnnotationSetId] = useState<string | null>(null);
   const [ontologySnapshotId, setOntologySnapshotId] = useState<string | null>(null);
+  const [limitPerEntry, setLimitPerEntry] = useState<number | null>(null);
 
   // Scoring
   const [scoringConfigs, setScoringConfigs] = useState<ScoringConfig[]>([]);
@@ -715,6 +800,7 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
       .then((ps) => {
         setAnnotationSetId(ps.annotation_set_id);
         setOntologySnapshotId(ps.ontology_snapshot_id);
+        setLimitPerEntry(ps.limit_per_entry);
       })
       .catch(() => {});
     listScoringConfigs()
@@ -753,7 +839,10 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
   }
 
   useEffect(() => {
-    if (activeTab === "proteins") loadProteins(0, "");
+    if (activeTab === "proteins") {
+      loadProteins(0, "");
+      if (!distribution) loadDistribution();
+    }
     if (activeTab === "distribution") loadDistribution();
   }, [activeTab]);
 
@@ -797,37 +886,40 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
 
   return (
     <>
-      <div className="mb-6 flex items-start justify-between gap-4">
+      <div className="mb-6 space-y-3">
+        <Breadcrumbs />
         <div>
-          <Link href="/functional-annotation" className="text-sm text-gray-400 hover:text-gray-600">← Functional Annotation</Link>
           <h1 className="text-xl font-semibold mt-2">
             Prediction Set <span className="font-mono text-base text-gray-500">{shortId(setId)}…</span>
           </h1>
+          {limitPerEntry != null && (
+            <p className="text-xs text-gray-400 mt-0.5">k = {limitPerEntry}</p>
+          )}
         </div>
-        <div className="flex flex-col items-end gap-2">
-          <div className="flex items-center gap-2">
-            <div className="flex items-center gap-1.5">
-              <label className="text-xs text-gray-500 whitespace-nowrap">Scoring</label>
-              <select
-                value={selectedConfigId}
-                onChange={(e) => setSelectedConfigId(e.target.value)}
-                className="rounded-md border bg-white px-2 py-1.5 text-sm text-gray-700 shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
-              >
-                <option value="">Raw distance</option>
-                {scoringConfigs.map((c) => (
-                  <option key={c.id} value={c.id}>{c.name}</option>
-                ))}
-                <option value={CUSTOM_ID}>Custom…</option>
-              </select>
-            </div>
-            <DownloadButton
-              setId={setId}
-              scoringConfigId={
-                selectedConfigId && selectedConfigId !== CUSTOM_ID ? selectedConfigId : undefined
-              }
-              customBlocked={selectedConfigId === CUSTOM_ID}
-            />
+        <div className="flex flex-wrap items-center gap-2">
+          <div className="flex items-center gap-1.5">
+            <label className="text-xs text-gray-500 whitespace-nowrap">Scoring</label>
+            <select
+              value={selectedConfigId}
+              onChange={(e) => setSelectedConfigId(e.target.value)}
+              className="rounded-md border bg-white px-2 py-1.5 text-sm text-gray-700 shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
+            >
+              <option value="">Raw distance</option>
+              {scoringConfigs.map((c) => (
+                <option key={c.id} value={c.id}>{c.name}</option>
+              ))}
+              <option value={CUSTOM_ID}>Custom…</option>
+            </select>
           </div>
+          <DownloadButton
+            setId={setId}
+            scoringConfigId={
+              selectedConfigId && selectedConfigId !== CUSTOM_ID ? selectedConfigId : undefined
+            }
+            customBlocked={selectedConfigId === CUSTOM_ID}
+          />
+        </div>
+        <div>
           {selectedConfigId && (
             <WeightPanel
               config={selectedConfigId !== CUSTOM_ID ? selectedConfig : undefined}
@@ -868,17 +960,35 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
         ))}
       </div>
 
+      {/* ── Executive summary ── */}
+      {activeTab === "proteins" && distribution && (
+        <div className="grid grid-cols-2 sm:grid-cols-4 gap-3 mb-6">
+          <div className="rounded-lg border bg-white p-3 text-center">
+            <div className="text-xl font-bold text-gray-900 tabular-nums">{proteinTotal.toLocaleString()}</div>
+            <div className="text-xs text-gray-500">Proteins</div>
+          </div>
+          {(["P", "F", "C"] as const).map((aspect) => (
+            <div key={aspect} className="rounded-lg border bg-white p-3 text-center">
+              <div className="text-xl font-bold text-gray-900 tabular-nums">
+                {(distribution.aspect_totals[aspect] ?? 0).toLocaleString()}
+              </div>
+              <div className="text-xs text-gray-500">{ASPECT_LABELS[aspect]}</div>
+            </div>
+          ))}
+        </div>
+      )}
+
       {/* ── Proteins ── */}
       {activeTab === "proteins" && (
         <div>
-          <div className="flex items-center gap-3 mb-4">
-            <form onSubmit={handleProteinSearch} className="flex gap-2">
+          <div className="flex flex-wrap items-center gap-3 mb-4">
+            <form onSubmit={handleProteinSearch} className="flex gap-2 flex-1 min-w-0">
               <input
                 type="text"
                 value={proteinSearchInput}
                 onChange={(e) => setProteinSearchInput(e.target.value)}
                 placeholder="Filter by accession…"
-                className="rounded-md border px-3 py-1.5 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 w-56"
+                className="rounded-md border px-3 py-1.5 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 w-full sm:w-56"
               />
               <button type="submit" className="rounded-md border bg-white px-3 py-1.5 text-sm hover:bg-gray-50">
                 Filter
@@ -890,16 +1000,76 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
                 </button>
               )}
             </form>
-            <span className="ml-auto text-sm text-gray-400">{proteinTotal.toLocaleString()} proteins</span>
+            <span className="text-sm text-gray-400">{proteinTotal.toLocaleString()} proteins</span>
+          </div>
+
+          {/* Mobile card list */}
+          <div className="lg:hidden space-y-2">
+            {loadingProteins && Array.from({ length: 4 }).map((_, i) => (
+              <div key={i} className="rounded-lg border bg-white p-4 shadow-sm animate-pulse">
+                <div className="h-4 bg-gray-200 rounded w-1/3 mb-2" />
+                <div className="h-3 bg-gray-100 rounded w-2/3" />
+              </div>
+            ))}
+            {!loadingProteins && proteins.length === 0 && (
+              <div className="rounded-lg border bg-white px-4 py-12 text-center text-sm text-gray-400 shadow-sm">No proteins found.</div>
+            )}
+            {!loadingProteins && proteins.map((p) => (
+              <div key={p.accession} className="rounded-lg border bg-white shadow-sm overflow-hidden">
+                <div
+                  className={`p-4 cursor-pointer transition-colors ${
+                    selectedAccession === p.accession ? "bg-blue-50" : "hover:bg-gray-50"
+                  }`}
+                  onClick={() => selectProtein(p.accession, p.in_db)}
+                >
+                  <div className="flex items-center justify-between mb-1">
+                    <div className="flex items-center gap-1.5">
+                      <span className={`inline-block w-2 h-2 rounded-full ${
+                        p.min_distance == null ? "bg-gray-300"
+                        : p.min_distance < 0.3 ? "bg-green-500"
+                        : p.min_distance < 0.6 ? "bg-amber-400"
+                        : "bg-red-500"
+                      }`} title={`min distance: ${p.min_distance?.toFixed(4) ?? "N/A"}`} />
+                      {p.in_db ? (
+                        <Link href={`/proteins/${p.accession}`} className="font-mono text-sm text-blue-600 hover:underline" onClick={(e) => e.stopPropagation()}>
+                          {p.accession}
+                        </Link>
+                      ) : (
+                        <span className="font-mono text-sm text-gray-700">{p.accession}</span>
+                      )}
+                    </div>
+                    <span className="text-xs text-gray-500">{p.go_count} predicted</span>
+                  </div>
+                  <div className="flex gap-4 text-xs text-gray-500">
+                    <span>dist: {p.min_distance?.toFixed(4) ?? "—"}</span>
+                    <span>known/pred: {p.annotation_count}/{p.go_count}</span>
+                  </div>
+                </div>
+                {selectedAccession === p.accession && (
+                  <div className="border-t px-4 pb-4">
+                    <ProteinDetail
+                      accession={p.accession}
+                      inDb={p.in_db}
+                      predictions={predictions}
+                      annotations={knownAnnotations}
+                      loading={loadingDetail}
+                      onClose={() => setSelectedAccession(null)}
+                      ontologySnapshotId={ontologySnapshotId}
+                      scoringConfig={selectedConfig}
+                    />
+                  </div>
+                )}
+              </div>
+            ))}
           </div>
 
-          <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
-            <div className="grid grid-cols-[160px_90px_120px_90px_90px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
+          {/* Desktop table */}
+          <div className="hidden lg:block overflow-x-auto rounded-lg border bg-white shadow-sm">
+            <div className="grid grid-cols-[160px_90px_120px_120px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
               <div>Accession</div>
               <div>Predicted</div>
               <div>Min Distance</div>
-              <div>Known</div>
-              <div>Matches</div>
+              <div>Known / Pred.</div>
             </div>
 
             {loadingProteins && Array.from({ length: 8 }).map((_, i) => <SkeletonTableRow key={i} cols={5} />)}
@@ -911,12 +1081,18 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
             {!loadingProteins && proteins.map((p) => (
               <div key={p.accession}>
                 <div
-                  className={`grid grid-cols-[160px_90px_120px_90px_90px] gap-2 border-b px-4 py-3 text-sm items-center cursor-pointer transition-colors ${
+                  className={`grid grid-cols-[160px_90px_120px_120px] gap-2 border-b px-4 py-3 text-sm items-center cursor-pointer transition-colors ${
                     selectedAccession === p.accession ? "bg-blue-50" : "hover:bg-gray-50"
                   }`}
                   onClick={() => selectProtein(p.accession, p.in_db)}
                 >
                   <div className="flex items-center gap-2">
+                    <span className={`inline-block w-2 h-2 rounded-full flex-shrink-0 ${
+                      p.min_distance == null ? "bg-gray-300"
+                      : p.min_distance < 0.3 ? "bg-green-500"
+                      : p.min_distance < 0.6 ? "bg-amber-400"
+                      : "bg-red-500"
+                    }`} title={`min distance: ${p.min_distance?.toFixed(4) ?? "N/A"}`} />
                     {p.in_db ? (
                       <Link
                         href={`/proteins/${p.accession}`}
@@ -931,11 +1107,12 @@ export default function PredictionSetDetailPage({ params }: { params: Promise<{
                   </div>
                   <div className="text-gray-700 font-medium">{p.go_count}</div>
                   <div className="text-gray-600 font-mono text-xs">{p.min_distance?.toFixed(4) ?? "—"}</div>
-                  <div className={`text-sm ${p.annotation_count > 0 ? "text-gray-700" : "text-gray-300"}`}>
-                    {p.annotation_count > 0 ? p.annotation_count : "—"}
-                  </div>
-                  <div className={`font-medium text-sm ${p.match_count > 0 ? "text-green-700" : "text-gray-300"}`}>
-                    {p.match_count > 0 ? p.match_count : "—"}
+                  <div className="text-sm font-mono">
+                    {p.annotation_count > 0
+                      ? <span className="text-gray-700">{p.annotation_count}</span>
+                      : <span className="text-gray-300">0</span>}
+                    <span className="text-gray-300 mx-1">/</span>
+                    <span className="text-gray-700">{p.go_count}</span>
                   </div>
                 </div>
 
diff --git a/apps/web/app/[locale]/functional-annotation/page.tsx b/apps/web/app/[locale]/functional-annotation/page.tsx
index 6aca0b0..92626b4 100644
--- a/apps/web/app/[locale]/functional-annotation/page.tsx
+++ b/apps/web/app/[locale]/functional-annotation/page.tsx
@@ -5,6 +5,7 @@ import Link from "next/link";
 import { useTranslations } from "next-intl";
 import { useToast } from "@/components/Toast";
 import { SkeletonTableRow } from "@/components/Skeleton";
+import { ContextBanner } from "@/components/ContextBanner";
 import {
   listEmbeddingConfigs,
   launchPredictGoTerms,
@@ -163,6 +164,17 @@ export default function FunctionalAnnotationPage() {
         <h1 className="text-xl font-semibold">{t("title")}</h1>
       </div>
 
+      <ContextBanner
+        title="Predict GO terms by embedding similarity"
+        description="Uses KNN search to transfer GO annotations from similar proteins. Requires computed embeddings and a loaded annotation set."
+        prerequisites={!loading ? [
+          { label: `${configs.length} embedding config(s)`, met: configs.length > 0, href: "/embeddings" },
+          { label: `${annotationSets.length} annotation set(s)`, met: annotationSets.length > 0, href: "/annotations" },
+          { label: `${ontologySnapshots.length} ontology snapshot(s)`, met: ontologySnapshots.length > 0, href: "/annotations" },
+        ] : undefined}
+        nextStep={{ label: "Evaluation", href: "/evaluation" }}
+      />
+
       <div className="flex gap-1 border-b mb-6 overflow-x-auto">
         {tabs.map((tab) => (
           <button
@@ -238,7 +250,7 @@ export default function FunctionalAnnotationPage() {
                   </select>
                 </div>
 
-                <div className="grid grid-cols-3 gap-3">
+                <div className="grid grid-cols-1 sm:grid-cols-3 gap-3">
                   <div>
                     <label className={labelClass}>{t("predictTab.limitPerEntryLabel")}</label>
                     <input
@@ -324,7 +336,7 @@ export default function FunctionalAnnotationPage() {
                 {/* Search Backend */}
                 <div className="rounded-md border border-gray-200 bg-gray-50 p-4 space-y-3">
                   <p className="text-xs font-semibold uppercase tracking-wide text-gray-500">{t("predictTab.searchBackend")}</p>
-                  <div className="grid grid-cols-2 gap-3">
+                  <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
                     <div>
                       <label className={labelClass}>{t("predictTab.searchBackendLabel")}</label>
                       <select value={predSearchBackend} onChange={(e) => setPredSearchBackend(e.target.value)} className={inputClass}>
@@ -352,7 +364,7 @@ export default function FunctionalAnnotationPage() {
                         </select>
                       </div>
                       {predFaissIndex === "IVFFlat" && (
-                        <div className="grid grid-cols-2 gap-3">
+                        <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
                           <div>
                             <label className={labelClass}>{t("predictTab.nlistLabel")}</label>
                             <input type="number" value={predFaissNlist} onChange={(e) => setPredFaissNlist(parseInt(e.target.value, 10))} min={1} className={inputClass} />
@@ -364,7 +376,7 @@ export default function FunctionalAnnotationPage() {
                         </div>
                       )}
                       {predFaissIndex === "HNSW" && (
-                        <div className="grid grid-cols-2 gap-3">
+                        <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
                           <div>
                             <label className={labelClass}>{t("predictTab.mLabel")}</label>
                             <input type="number" value={predFaissHnswM} onChange={(e) => setPredFaissHnswM(parseInt(e.target.value, 10))} min={2} className={inputClass} />
@@ -422,13 +434,14 @@ export default function FunctionalAnnotationPage() {
           </div>
 
           <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
-            <div className="grid grid-cols-[80px_100px_100px_100px_90px_120px_160px_60px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
+            <div className="grid grid-cols-[80px_100px_100px_100px_90px_80px_50px_160px_60px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
               <div>{t("resultsTab.tableHeaders.id")}</div>
               <div>{t("resultsTab.tableHeaders.config")}</div>
               <div>{t("resultsTab.tableHeaders.annotationSet")}</div>
               <div>{t("resultsTab.tableHeaders.snapshot")}</div>
               <div>{t("resultsTab.tableHeaders.goTerms")}</div>
               <div>{t("resultsTab.tableHeaders.distanceThreshold")}</div>
+              <div>{t("resultsTab.tableHeaders.k")}</div>
               <div>{t("resultsTab.tableHeaders.created")}</div>
               <div></div>
             </div>
@@ -436,7 +449,7 @@ export default function FunctionalAnnotationPage() {
             {predictionSets.map((ps) => (
               <div
                 key={ps.id}
-                className="grid grid-cols-[80px_100px_100px_100px_90px_120px_160px_60px] gap-2 border-b px-4 py-3 text-sm last:border-0 items-center"
+                className="grid grid-cols-[80px_100px_100px_100px_90px_80px_50px_160px_60px] gap-2 border-b px-4 py-3 text-sm last:border-0 items-center"
               >
                 <div className="font-mono text-xs">
                   <Link href={`/functional-annotation/${ps.id}`} className="text-blue-600 hover:underline" title={ps.id}>
@@ -450,6 +463,7 @@ export default function FunctionalAnnotationPage() {
                 <div className="text-gray-600">
                   {ps.distance_threshold != null ? ps.distance_threshold : <span className="text-gray-400">—</span>}
                 </div>
+                <div className="text-gray-600">{ps.limit_per_entry}</div>
                 <div className="text-xs text-gray-400">{formatDate(ps.created_at)}</div>
                 <div className="flex justify-end">
                   <button
diff --git a/apps/web/app/[locale]/jobs/[id]/page.tsx b/apps/web/app/[locale]/jobs/[id]/page.tsx
index 9de6abf..7d1d68f 100644
--- a/apps/web/app/[locale]/jobs/[id]/page.tsx
+++ b/apps/web/app/[locale]/jobs/[id]/page.tsx
@@ -8,6 +8,7 @@ import { StatusBadge } from "@/components/StatusBadge";
 import { EventTimeline } from "@/components/EventTimeline";
 import { useToast } from "@/components/Toast";
 import { useTranslations } from "next-intl";
+import { Breadcrumbs } from "@/components/Breadcrumbs";
 
 const TERMINAL = ["succeeded", "failed", "cancelled"];
 
@@ -140,8 +141,8 @@ export default function JobDetail({ params }: { params: Promise<{ id: string }>
   return (
     <div>
       {/* Header */}
+      <Breadcrumbs />
       <div className="flex flex-wrap items-center gap-3">
-        <Link href="/jobs" className="text-sm text-gray-500 hover:text-gray-800">{t("jobDetail.backToJobs")}</Link>
         <h1 className="text-xl font-semibold">{t("jobDetail.title")}</h1>
         {isLive && (
           <span className="flex items-center gap-1 text-xs text-blue-600">
diff --git a/apps/web/app/[locale]/layout.tsx b/apps/web/app/[locale]/layout.tsx
index 0b99bd9..1fa1b1e 100644
--- a/apps/web/app/[locale]/layout.tsx
+++ b/apps/web/app/[locale]/layout.tsx
@@ -6,6 +6,7 @@ import { NavLinks } from "@/components/NavLinks";
 import { SupportButton } from "@/components/SupportButton";
 import { ToastProvider } from "@/components/Toast";
 import { UsagePolicyModal } from "@/components/UsagePolicyModal";
+import { FloatingJobsWidget } from "@/components/FloatingJobsWidget";
 import { NextIntlClientProvider } from "next-intl";
 import { getMessages } from "next-intl/server";
 import { LanguageSwitcher } from "@/components/LanguageSwitcher";
@@ -33,19 +34,31 @@ export default async function LocaleLayout({
         <NextIntlClientProvider messages={messages}>
           <UsagePolicyModal />
           <ToastProvider>
-            <header className="relative border-b bg-white px-4 sm:px-6 py-3 flex items-center gap-3">
-              <span className="text-lg font-bold tracking-tight text-blue-700">PROTEA</span>
+            <header className="relative border-b bg-white px-4 sm:px-6 py-3 flex items-center gap-2 sm:gap-3">
+              <a href="/" className="text-lg font-bold tracking-tight text-blue-700 hover:text-blue-800 transition-colors">PROTEA</a>
               <span className="hidden lg:inline text-gray-300">|</span>
-              <NavLinks />
+              <NavLinks
+                mobileExtras={
+                  <>
+                    <LanguageSwitcher />
+                    <ResetDbButton />
+                  </>
+                }
+              />
               <div className="ml-auto flex items-center gap-2 sm:gap-3">
-                <LanguageSwitcher />
+                <div className="hidden lg:flex items-center gap-2">
+                  <LanguageSwitcher />
+                </div>
                 <SupportButton />
-                <ResetDbButton />
+                <div className="hidden lg:block">
+                  <ResetDbButton />
+                </div>
               </div>
             </header>
             <main className="mx-auto max-w-5xl px-4 sm:px-6 py-4 sm:py-6">
               {children}
             </main>
+            <FloatingJobsWidget />
           </ToastProvider>
         </NextIntlClientProvider>
       </body>
diff --git a/apps/web/app/[locale]/page.tsx b/apps/web/app/[locale]/page.tsx
index fe3f8ed..9f3f9fd 100644
--- a/apps/web/app/[locale]/page.tsx
+++ b/apps/web/app/[locale]/page.tsx
@@ -1,5 +1,345 @@
-import { redirect } from "next/navigation";
+"use client";
 
-export default function Home({ params }: { params: Promise<{ locale: string }> }) {
-  redirect("/jobs");
+import { useEffect, useState } from "react";
+import { useRouter } from "next/navigation";
+import { useTranslations } from "next-intl";
+import Link from "next/link";
+import { getShowcase, type ShowcaseData } from "../../lib/api";
+import { AnnotateForm } from "../../components/AnnotateForm";
+
+const ASPECTS = ["MFO", "BPO", "CCO"] as const;
+const ASPECT_COLORS: Record<string, string> = {
+  MFO: "blue",
+  BPO: "green",
+  CCO: "purple",
+};
+const ASPECT_LABELS: Record<string, string> = {
+  MFO: "Molecular Function",
+  BPO: "Biological Process",
+  CCO: "Cellular Component",
+};
+
+const CATEGORIES = ["NK", "LK", "PK"] as const;
+const CATEGORY_LABELS: Record<string, string> = {
+  NK: "No Knowledge",
+  LK: "Limited Knowledge",
+  PK: "Partial Knowledge",
+};
+
+const METHOD_KEYS: Record<string, string> = {
+  knn_baseline: "knnBaseline",
+  knn_scored: "knnScored",
+  knn_reranker: "knnReranker",
+};
+
+const STAGE_ICONS: Record<string, string> = {
+  sequences: "Aa",
+  embeddings: "E",
+  predictions: "K",
+  reranker_models: "R",
+  evaluations: "F",
+};
+
+const STAGE_I18N: Record<string, string> = {
+  sequences: "stageSequences",
+  embeddings: "stageEmbeddings",
+  predictions: "stageKnn",
+  reranker_models: "stageReranker",
+  evaluations: "stageEvaluation",
+};
+
+export default function HomePage() {
+  const t = useTranslations("home");
+  const router = useRouter();
+  const [data, setData] = useState<ShowcaseData | null>(null);
+  const [error, setError] = useState<string | null>(null);
+  const [activeCategory, setActiveCategory] = useState<string>("NK");
+
+  useEffect(() => {
+    getShowcase().then(setData).catch((e) => setError(e.message));
+  }, []);
+
+  if (error) {
+    return (
+      <div className="max-w-5xl mx-auto px-4 sm:px-6 py-12">
+        <div className="rounded-lg border border-red-200 bg-red-50 p-6 text-center">
+          <p className="text-red-800 text-sm">{error}</p>
+          <button
+            onClick={() => { setError(null); getShowcase().then(setData).catch((e) => setError(e.message)); }}
+            className="mt-3 text-sm text-red-600 underline hover:text-red-800"
+          >
+            Retry
+          </button>
+        </div>
+      </div>
+    );
+  }
+
+  if (!data) {
+    return (
+      <div className="max-w-5xl mx-auto px-4 sm:px-6 py-12 space-y-8">
+        <div className="h-8 w-96 bg-gray-100 rounded animate-pulse" />
+        <div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
+          {[0, 1, 2].map((i) => (
+            <div key={i} className="h-32 bg-gray-100 rounded-lg animate-pulse" />
+          ))}
+        </div>
+        <div className="h-48 bg-gray-100 rounded-lg animate-pulse" />
+      </div>
+    );
+  }
+
+  const hasFmax = data.best_fmax && Object.keys(data.best_fmax).length > 0;
+  const hasComparison = data.method_comparison && Object.keys(data.method_comparison).length > 0;
+
+  // Available categories (only those with data)
+  const availableCategories = CATEGORIES.filter(
+    (cat) => data.best_fmax?.[cat] || data.method_comparison?.[cat]
+  );
+
+  // Current category data
+  const catFmax = data.best_fmax?.[activeCategory] ?? {};
+  const catMethods = data.method_comparison?.[activeCategory] ?? [];
+  const baseline = catMethods.find((m) => m.method === "knn_baseline");
+
+  return (
+    <div className="max-w-5xl mx-auto px-4 sm:px-6 py-8 space-y-10">
+      {/* ── Hero ──────────────────────────────────────────────────── */}
+      <section className="text-center space-y-3">
+        <h1 className="text-3xl sm:text-4xl font-bold text-gray-900 tracking-tight">
+          PROTEA
+        </h1>
+        <p className="text-lg text-gray-500 max-w-2xl mx-auto">
+          {t("subtitle")}
+        </p>
+      </section>
+
+      {/* ── Annotate form ─────────────────────────────────────────── */}
+      <AnnotateForm />
+
+      {/* ── Category tabs ─────────────────────────────────────────── */}
+      {hasFmax ? (
+        <>
+          <section>
+            <div className="flex items-center gap-4 mb-4">
+              <h2 className="text-sm font-medium text-gray-500 uppercase tracking-wider">
+                {t("bestResults")}
+              </h2>
+              <div className="flex gap-1 rounded-lg bg-gray-100 p-0.5">
+                {availableCategories.map((cat) => (
+                  <button
+                    key={cat}
+                    onClick={() => setActiveCategory(cat)}
+                    className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${
+                      activeCategory === cat
+                        ? "bg-white text-gray-900 shadow-sm"
+                        : "text-gray-500 hover:text-gray-700"
+                    }`}
+                    title={CATEGORY_LABELS[cat]}
+                  >
+                    {cat}
+                  </button>
+                ))}
+              </div>
+              <span className="text-xs text-gray-400" title={CATEGORY_LABELS[activeCategory]}>
+                {CATEGORY_LABELS[activeCategory]}
+              </span>
+            </div>
+
+            {/* ── Fmax cards ────────────────────────────────────────── */}
+            <div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
+              {ASPECTS.map((aspect) => {
+                const d = catFmax[aspect];
+                if (!d) return null;
+                const color = ASPECT_COLORS[aspect];
+                return (
+                  <div
+                    key={aspect}
+                    className={`rounded-xl border-2 p-5 text-center`}
+                    style={{
+                      borderColor: `var(--color-${color}-200, #bfdbfe)`,
+                      backgroundColor: `var(--color-${color}-50, #eff6ff)`,
+                    }}
+                  >
+                    <div className="text-4xl font-bold text-gray-900 tabular-nums">
+                      {d.fmax.toFixed(2)}
+                    </div>
+                    <div className="text-sm font-semibold text-gray-600 mt-1">
+                      {t("fmax")} {aspect}
+                    </div>
+                    <div className="text-xs text-gray-400 mt-1">
+                      {ASPECT_LABELS[aspect]}
+                    </div>
+                    <div className="text-xs text-gray-400 mt-1">
+                      {d.method_label}
+                    </div>
+                  </div>
+                );
+              })}
+            </div>
+          </section>
+
+          {/* ── Method comparison table ───────────────────────────── */}
+          {catMethods.length > 0 && (
+            <section>
+              <h2 className="text-sm font-medium text-gray-500 uppercase tracking-wider mb-3">
+                {t("methodComparison")}
+                <span className="ml-2 text-xs font-normal normal-case text-gray-400">
+                  ({activeCategory})
+                </span>
+              </h2>
+              <div className="overflow-x-auto rounded-lg border">
+                <table className="w-full text-sm">
+                  <thead>
+                    <tr className="bg-gray-50 text-left">
+                      <th className="px-4 py-3 font-medium text-gray-600">{t("method")}</th>
+                      {ASPECTS.map((a) => (
+                        <th key={a} className="px-4 py-3 font-medium text-gray-600 text-center">
+                          {a}
+                        </th>
+                      ))}
+                    </tr>
+                  </thead>
+                  <tbody>
+                    {catMethods.map((row, i) => {
+                      const isBest = ASPECTS.some(
+                        (a) => catFmax[a]?.method === row.method
+                      );
+                      return (
+                        <tr
+                          key={row.method}
+                          className={`border-t ${isBest ? "bg-blue-50" : i % 2 === 0 ? "bg-white" : "bg-gray-50/50"}`}
+                        >
+                          <td className="px-4 py-3 font-medium text-gray-900">
+                            {t(METHOD_KEYS[row.method] ?? row.method)}
+                            {isBest && (
+                              <span className="ml-2 text-xs text-blue-600 font-normal">best</span>
+                            )}
+                          </td>
+                          {ASPECTS.map((aspect) => {
+                            const val = (row as any)[aspect]?.fmax;
+                            const baseVal = baseline ? (baseline as any)[aspect]?.fmax : null;
+                            const delta = val != null && baseVal != null && row.method !== "knn_baseline"
+                              ? val - baseVal
+                              : null;
+                            return (
+                              <td key={aspect} className="px-4 py-3 text-center tabular-nums">
+                                {val != null ? (
+                                  <span>
+                                    <span className="font-semibold">{val.toFixed(3)}</span>
+                                    {delta != null && (
+                                      <span className={`ml-1.5 text-xs ${delta > 0 ? "text-green-600" : delta < 0 ? "text-red-600" : "text-gray-400"}`}>
+                                        {delta > 0 ? "+" : ""}{delta.toFixed(3)}
+                                      </span>
+                                    )}
+                                  </span>
+                                ) : (
+                                  <span className="text-gray-300">&mdash;</span>
+                                )}
+                              </td>
+                            );
+                          })}
+                        </tr>
+                      );
+                    })}
+                  </tbody>
+                </table>
+              </div>
+            </section>
+          )}
+        </>
+      ) : (
+        <section className="rounded-xl border-2 border-dashed border-gray-200 bg-gray-50 p-8 text-center">
+          <p className="text-gray-500">{t("noDataYet")}</p>
+          <Link
+            href="/proteins"
+            className="mt-4 inline-block rounded-md bg-blue-600 px-4 py-2 text-sm font-medium text-white hover:bg-blue-700 transition-colors"
+          >
+            {t("getStarted")}
+          </Link>
+        </section>
+      )}
+
+      {/* ── Pipeline diagram ──────────────────────────────────────── */}
+      <section>
+        <h2 className="text-sm font-medium text-gray-500 uppercase tracking-wider mb-3">
+          {t("pipeline")}
+        </h2>
+        <div className="flex flex-col sm:flex-row items-center justify-center gap-2 sm:gap-0">
+          {data.pipeline_stages.map((stage, i) => (
+            <div key={stage.name} className="flex flex-col sm:flex-row items-center">
+              {i > 0 && (
+                <div className="text-gray-300 text-xl sm:mx-2 rotate-90 sm:rotate-0 my-1 sm:my-0 select-none">
+                  &rarr;
+                </div>
+              )}
+              <button
+                onClick={() => router.push(stage.href)}
+                className="group relative flex flex-col items-center justify-center w-28 h-20 rounded-lg border-2 border-gray-200 bg-white hover:border-blue-400 hover:shadow-md transition-all cursor-pointer"
+              >
+                <span className="text-xs font-bold text-gray-400 group-hover:text-blue-500 transition-colors">
+                  {STAGE_ICONS[stage.name] ?? stage.name.slice(0, 3).toUpperCase()}
+                </span>
+                <span className="text-xs font-medium text-gray-700 mt-1">
+                  {t(STAGE_I18N[stage.name] as any)}
+                </span>
+                <span className="text-[10px] text-gray-400 tabular-nums mt-0.5">
+                  {stage.count.toLocaleString()}
+                </span>
+              </button>
+            </div>
+          ))}
+          {/* LLM stage (future) */}
+          <div className="flex flex-col sm:flex-row items-center">
+            <div className="text-gray-300 text-xl sm:mx-2 rotate-90 sm:rotate-0 my-1 sm:my-0 select-none">
+              &rarr;
+            </div>
+            <div className="flex flex-col items-center justify-center w-28 h-20 rounded-lg border-2 border-dashed border-gray-200 bg-gray-50">
+              <span className="text-xs font-bold text-gray-300">LLM</span>
+              <span className="text-xs font-medium text-gray-400 mt-1">{t("stageLlm")}</span>
+              <span className="text-[10px] text-gray-300 mt-0.5">soon</span>
+            </div>
+          </div>
+        </div>
+      </section>
+
+      {/* ── Stats bar ─────────────────────────────────────────────── */}
+      <section>
+        <h2 className="text-sm font-medium text-gray-500 uppercase tracking-wider mb-3">
+          {t("stats")}
+        </h2>
+        <div className="grid grid-cols-2 sm:grid-cols-4 gap-3">
+          {([
+            ["proteins", data.counts.proteins],
+            ["sequences", data.counts.sequences],
+            ["embeddings", data.counts.embeddings],
+            ["predictions", data.counts.predictions],
+          ] as [string, number][]).map(([key, count]) => (
+            <div key={key} className="rounded-lg border bg-white p-3 text-center">
+              <div className="text-2xl font-bold text-gray-900 tabular-nums">
+                {count.toLocaleString()}
+              </div>
+              <div className="text-xs text-gray-500 mt-1">{t(key as any)}</div>
+            </div>
+          ))}
+        </div>
+      </section>
+
+      {/* ── CTAs ──────────────────────────────────────────────────── */}
+      <section className="flex flex-col sm:flex-row items-center justify-center gap-3 pt-2">
+        <Link
+          href="/evaluation"
+          className="rounded-md bg-blue-600 px-6 py-2.5 text-sm font-medium text-white hover:bg-blue-700 transition-colors"
+        >
+          {t("exploreResults")}
+        </Link>
+        <a
+          href="#annotate-form"
+          className="rounded-md border border-gray-300 bg-white px-6 py-2.5 text-sm font-medium text-gray-700 hover:bg-gray-50 transition-colors"
+        >
+          {t("annotateProteins")}
+        </a>
+      </section>
+    </div>
+  );
 }
diff --git a/apps/web/app/[locale]/proteins/[accession]/page.tsx b/apps/web/app/[locale]/proteins/[accession]/page.tsx
index 2c5b4c0..5ca2158 100644
--- a/apps/web/app/[locale]/proteins/[accession]/page.tsx
+++ b/apps/web/app/[locale]/proteins/[accession]/page.tsx
@@ -4,6 +4,7 @@ import { use, useEffect, useState } from "react";
 import Link from "next/link";
 import { useToast } from "@/components/Toast";
 import { useTranslations } from "next-intl";
+import { Breadcrumbs } from "@/components/Breadcrumbs";
 import { getProtein, getProteinAnnotations, getGoSubgraph, listOntologySnapshots, ProteinDetail, ProteinAnnotation, GoSubgraph } from "@/lib/api";
 import dynamic from "next/dynamic";
 const GoGraph = dynamic(() => import("@/components/GoGraph"), { ssr: false });
@@ -88,7 +89,7 @@ export default function ProteinDetailPage({ params }: { params: Promise<{ access
     <>
       {/* Header */}
       <div className="mb-6">
-        <Link href="/proteins" className="text-sm text-gray-400 hover:text-gray-600">{t("backToProteins")}</Link>
+        <Breadcrumbs />
         <div className="flex items-start gap-4 mt-2">
           <div>
             <h1 className="text-2xl font-bold text-gray-900 font-mono">{protein.accession}</h1>
diff --git a/apps/web/app/[locale]/proteins/page.tsx b/apps/web/app/[locale]/proteins/page.tsx
index 4360894..6334e8e 100644
--- a/apps/web/app/[locale]/proteins/page.tsx
+++ b/apps/web/app/[locale]/proteins/page.tsx
@@ -236,8 +236,41 @@ export default function ProteinsPage() {
             <span className="ml-auto text-sm text-gray-400">{t("browseTab.totalProteins", { count: total.toLocaleString() })}</span>
           </div>
 
-          {/* Table */}
-          <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
+          {/* Mobile card list */}
+          <div className="lg:hidden space-y-2">
+            {loadingBrowse && Array.from({ length: 4 }).map((_, i) => (
+              <div key={i} className="rounded-lg border bg-white p-4 shadow-sm animate-pulse">
+                <div className="h-4 bg-gray-200 rounded w-1/3 mb-2" />
+                <div className="h-3 bg-gray-100 rounded w-2/3" />
+              </div>
+            ))}
+            {!loadingBrowse && proteins.length === 0 && (
+              <div className="rounded-lg border bg-white px-4 py-12 text-center text-sm text-gray-400 shadow-sm">
+                {t("browseTab.noProteinsCta")}
+              </div>
+            )}
+            {!loadingBrowse && proteins.map((p) => (
+              <Link
+                key={p.accession}
+                href={`/proteins/${p.accession}`}
+                className="block rounded-lg border bg-white p-4 shadow-sm hover:bg-blue-50 transition-colors"
+              >
+                <div className="flex items-center justify-between mb-1">
+                  <span className="font-mono text-sm text-blue-600">{p.accession}</span>
+                  <ReviewedBadge reviewed={p.reviewed} />
+                </div>
+                <p className="text-sm font-medium text-gray-800 truncate">{p.gene_name ?? "—"}</p>
+                <p className="text-xs text-gray-500 truncate">{p.organism ?? "—"}</p>
+                <div className="mt-1 flex gap-3 text-xs text-gray-400">
+                  <span>{p.entry_name ?? "—"}</span>
+                  {p.length != null && <span>{p.length.toLocaleString()} aa</span>}
+                </div>
+              </Link>
+            ))}
+          </div>
+
+          {/* Desktop table */}
+          <div className="hidden lg:block overflow-x-auto rounded-lg border bg-white shadow-sm">
             <div className="grid grid-cols-[130px_140px_120px_1fr_80px_110px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
               <div>{t("browseTab.tableHeaders.accession")}</div>
               <div>{t("browseTab.tableHeaders.entryName")}</div>
@@ -353,7 +386,7 @@ export default function ProteinsPage() {
                 <input type="text" value={searchCriteria} onChange={(e) => setSearchCriteria(e.target.value)} required className={inputClass} placeholder="organism_id:9606 AND reviewed:true" />
                 <p className="mt-1 text-xs text-gray-400">{t("insertTab.searchCriteriaHelper")}</p>
               </div>
-              <div className="grid grid-cols-2 gap-3">
+              <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
                 <div>
                   <label className={labelClass}>{t("insertTab.pageSizeLabel")}</label>
                   <input type="number" value={pageSize} onChange={(e) => setPageSize(parseInt(e.target.value, 10))} min={1} className={inputClass} />
@@ -395,7 +428,7 @@ export default function ProteinsPage() {
                 <input type="text" value={metaCriteria} onChange={(e) => setMetaCriteria(e.target.value)} required className={inputClass} placeholder="organism_id:9606 AND reviewed:true" />
                 <p className="mt-1 text-xs text-gray-400">{t("metadataTab.searchCriteriaHelper")}</p>
               </div>
-              <div className="grid grid-cols-2 gap-3">
+              <div className="grid grid-cols-1 sm:grid-cols-2 gap-3">
                 <div>
                   <label className={labelClass}>{t("metadataTab.pageSizeLabel")}</label>
                   <input type="number" value={metaPageSize} onChange={(e) => setMetaPageSize(parseInt(e.target.value, 10))} min={1} className={inputClass} />
diff --git a/apps/web/app/[locale]/query-sets/page.tsx b/apps/web/app/[locale]/query-sets/page.tsx
index fc1a63b..49b6c00 100644
--- a/apps/web/app/[locale]/query-sets/page.tsx
+++ b/apps/web/app/[locale]/query-sets/page.tsx
@@ -117,7 +117,7 @@ export default function QuerySetsPage() {
 
       {/* List */}
       <div className="overflow-x-auto rounded-lg border bg-white shadow-sm">
-        <div className="grid grid-cols-[1fr_100px_160px_80px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
+        <div className="grid grid-cols-[1fr_80px_140px_60px] gap-2 border-b bg-gray-50 px-4 py-2.5 text-xs font-semibold uppercase tracking-wide text-gray-500">
           <div>{t("tableHeaders.name")}</div>
           <div>{t("tableHeaders.sequences")}</div>
           <div>{t("tableHeaders.created")}</div>
@@ -143,7 +143,7 @@ export default function QuerySetsPage() {
         {sets.map((qs) => (
           <div key={qs.id} className="border-b last:border-0">
             <div
-              className="grid grid-cols-[1fr_100px_160px_80px] gap-2 px-4 py-3 text-sm items-center hover:bg-blue-50 cursor-pointer transition-colors"
+              className="grid grid-cols-[1fr_80px_140px_60px] gap-2 px-4 py-3 text-sm items-center hover:bg-blue-50 cursor-pointer transition-colors"
               onClick={() => setExpandedId(expandedId === qs.id ? null : qs.id)}
             >
               <div>
diff --git a/apps/web/app/[locale]/reranker/page.tsx b/apps/web/app/[locale]/reranker/page.tsx
new file mode 100644
index 0000000..edf0751
--- /dev/null
+++ b/apps/web/app/[locale]/reranker/page.tsx
@@ -0,0 +1,574 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import { ContextBanner } from "@/components/ContextBanner";
+import {
+  baseUrl,
+  listPredictionSets,
+  listAnnotationSets,
+  listRerankers,
+  trainReranker,
+  deleteReranker,
+  getRerankedTsvUrl,
+  getRerankerMetrics,
+  getTrainingDataTsvUrl,
+} from "@/lib/api";
+import type { PredictionSet, AnnotationSet, RerankerModel } from "@/lib/api";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function apiFetch<T>(path: string, init?: RequestInit): Promise<T> {
+  const res = await fetch(`${baseUrl()}${path}`, { cache: "no-store", ...init });
+  if (!res.ok) throw new Error(await res.text());
+  return res.json();
+}
+
+type EvaluationSet = {
+  id: string;
+  old_annotation_set_id: string;
+  new_annotation_set_id: string;
+  created_at: string;
+  stats: Record<string, number>;
+};
+
+const listEvaluationSets = () => apiFetch<EvaluationSet[]>("/annotations/evaluation-sets");
+
+function shortId(id: string) { return id.slice(0, 8); }
+
+function predLabel(p: PredictionSet) {
+  const parts: string[] = [];
+  if (p.embedding_config_name) parts.push(p.embedding_config_name);
+  if (p.annotation_set_label) parts.push(p.annotation_set_label);
+  parts.push(`k=${p.limit_per_entry}`);
+  if (p.prediction_count != null) parts.push(`${p.prediction_count.toLocaleString()} preds`);
+  return `${parts.join(" · ")} (${shortId(p.id)}…)`;
+}
+
+function evalLabel(es: EvaluationSet, annotationSets: AnnotationSet[]) {
+  const oldSet = annotationSets.find((a) => a.id === es.old_annotation_set_id);
+  const newSet = annotationSets.find((a) => a.id === es.new_annotation_set_id);
+  const oldVer = oldSet ? `[${oldSet.source.toUpperCase()}] ${oldSet.source_version ?? "?"}` : shortId(es.old_annotation_set_id);
+  const newVer = newSet ? `[${newSet.source.toUpperCase()}] ${newSet.source_version ?? "?"}` : shortId(es.new_annotation_set_id);
+  const delta = es.stats.delta_proteins ?? "?";
+  return `${oldVer} → ${newVer} · ${delta} delta proteins (${shortId(es.id)}…)`;
+}
+
+const labelClass = "block text-sm font-medium text-gray-700 mb-1";
+const selectClass =
+  "w-full rounded-md border border-gray-300 px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500";
+const btnPrimary =
+  "rounded-md bg-blue-600 px-4 py-2 text-sm font-medium text-white hover:bg-blue-700 disabled:opacity-50 transition-colors";
+const btnDanger =
+  "rounded-md bg-red-50 border border-red-200 px-3 py-1.5 text-xs font-medium text-red-600 hover:bg-red-100 transition-colors";
+
+const CATEGORY_HINTS: Record<string, string> = {
+  nk: "No Knowledge: proteins with zero GO annotations at t0. Hardest setting — measures pure prediction ability.",
+  lk: "Limited Knowledge: proteins annotated in some GO namespaces but not all at t0. New annotations in previously empty namespaces.",
+  pk: "Partial Knowledge: proteins that already had annotations in a namespace at t0 and gained new ones at t1.",
+};
+
+const ASPECT_LABELS: Record<string, string> = {
+  bpo: "BPO (Biological Process)",
+  mfo: "MFO (Molecular Function)",
+  cco: "CCO (Cellular Component)",
+};
+
+// ---------------------------------------------------------------------------
+// Feature importance bar chart
+// ---------------------------------------------------------------------------
+
+function FeatureImportanceChart({ importance }: { importance: Record<string, number> }) {
+  const entries = Object.entries(importance)
+    .sort(([, a], [, b]) => b - a)
+    .filter(([, v]) => v > 0);
+  if (entries.length === 0) return <p className="text-xs text-gray-400">No feature importance data</p>;
+  const maxVal = entries[0][1];
+
+  return (
+    <div className="space-y-1">
+      {entries.map(([name, val]) => (
+        <div key={name} className="flex items-center gap-2 text-xs">
+          <span className="w-40 shrink-0 text-right text-gray-600 truncate" title={name}>{name}</span>
+          <div className="flex-1 h-4 bg-gray-100 rounded overflow-hidden">
+            <div
+              className="h-4 bg-blue-400 rounded"
+              style={{ width: `${Math.round((val / maxVal) * 100)}%` }}
+            />
+          </div>
+          <span className="w-20 shrink-0 font-mono text-gray-500 text-right">
+            {val >= 1000 ? `${(val / 1000).toFixed(1)}k` : val.toFixed(0)}
+          </span>
+        </div>
+      ))}
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Metrics display
+// ---------------------------------------------------------------------------
+
+function MetricsBadge({ label, value, suffix }: { label: string; value: number | string | undefined; suffix?: string }) {
+  if (value === undefined) return null;
+  const formatted = typeof value === "number" ? value.toFixed(4) : value;
+  return (
+    <div className="rounded-lg border bg-white p-3 shadow-sm text-center">
+      <p className="text-[10px] font-semibold uppercase tracking-wide text-gray-400">{label}</p>
+      <p className="text-lg font-bold text-gray-900 mt-0.5">{formatted}{suffix}</p>
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Reranker card
+// ---------------------------------------------------------------------------
+
+function RerankerCard({
+  model,
+  predictionSets,
+  evaluationSets,
+  annotationSets,
+  onDelete,
+}: {
+  model: RerankerModel;
+  predictionSets: PredictionSet[];
+  evaluationSets: EvaluationSet[];
+  annotationSets: AnnotationSet[];
+  onDelete: () => void;
+}) {
+  const [expanded, setExpanded] = useState(false);
+  const [metricsLoading, setMetricsLoading] = useState(false);
+  const [metrics, setMetrics] = useState<Record<string, any> | null>(null);
+  const [metricsError, setMetricsError] = useState<string | null>(null);
+  const [deleting, setDeleting] = useState(false);
+
+  // For computing metrics on a different prediction set
+  const [metricsPsId, setMetricsPsId] = useState(model.prediction_set_id ?? "");
+  const [metricsEsId, setMetricsEsId] = useState(model.evaluation_set_id ?? "");
+  const [metricsCategory, setMetricsCategory] = useState(model.category);
+
+  async function handleComputeMetrics() {
+    if (!metricsPsId || !metricsEsId) return;
+    setMetricsLoading(true);
+    setMetricsError(null);
+    setMetrics(null);
+    try {
+      const result = await getRerankerMetrics(metricsPsId, model.id, metricsEsId, metricsCategory);
+      setMetrics(result);
+    } catch (e: any) {
+      setMetricsError(e.message ?? "Failed to compute metrics");
+    } finally {
+      setMetricsLoading(false);
+    }
+  }
+
+  async function handleDelete() {
+    if (!confirm(`Delete reranker "${model.name}"?`)) return;
+    setDeleting(true);
+    try {
+      await deleteReranker(model.id);
+      onDelete();
+    } catch {
+      setDeleting(false);
+    }
+  }
+
+  const m = model.metrics;
+
+  return (
+    <div className="rounded-lg border bg-white shadow-sm overflow-hidden">
+      <div
+        className="px-4 py-3 cursor-pointer hover:bg-gray-50 transition-colors"
+        onClick={() => setExpanded(!expanded)}
+      >
+        <div className="flex items-center justify-between">
+          <div className="flex items-center gap-3">
+            <span className="font-semibold text-gray-900">{model.name}</span>
+            <span className="rounded-full border px-2 py-0.5 text-xs font-medium bg-indigo-50 text-indigo-700 border-indigo-100 uppercase">
+              {model.category}
+            </span>
+            {model.aspect && (
+              <span className="rounded-full border px-2 py-0.5 text-xs font-medium bg-amber-50 text-amber-700 border-amber-100 uppercase">
+                {model.aspect}
+              </span>
+            )}
+          </div>
+          <div className="flex items-center gap-3">
+            <span className="text-xs text-gray-400">{new Date(model.created_at).toLocaleDateString()}</span>
+            <span className="text-gray-300 text-xs">{expanded ? "▲" : "▼"}</span>
+          </div>
+        </div>
+        <div className="flex flex-wrap gap-4 mt-2 text-xs text-gray-500">
+          <span>AUC: <strong className="text-gray-700">{m.val_auc?.toFixed(4) ?? "—"}</strong></span>
+          <span>F1: <strong className="text-gray-700">{m.val_f1?.toFixed(4) ?? "—"}</strong></span>
+          <span>Precision: <strong className="text-gray-700">{m.val_precision?.toFixed(4) ?? "—"}</strong></span>
+          <span>Recall: <strong className="text-gray-700">{m.val_recall?.toFixed(4) ?? "—"}</strong></span>
+          <span>Positive rate: <strong className="text-gray-700">{m.positive_rate != null ? `${(m.positive_rate * 100).toFixed(2)}%` : "—"}</strong></span>
+        </div>
+      </div>
+
+      {expanded && (
+        <div className="border-t px-4 py-4 space-y-5">
+          {/* Validation metrics */}
+          <div>
+            <p className="text-xs font-semibold uppercase tracking-wide text-gray-400 mb-2">Validation metrics</p>
+            <div className="grid grid-cols-2 sm:grid-cols-4 gap-2">
+              <MetricsBadge label="AUC" value={m.val_auc} />
+              <MetricsBadge label="Log-loss" value={m.val_logloss} />
+              <MetricsBadge label="F1" value={m.val_f1} />
+              <MetricsBadge label="Best iteration" value={m.best_iteration} />
+            </div>
+            <div className="flex flex-wrap gap-4 mt-2 text-xs text-gray-500">
+              <span>Train samples: {m.train_samples?.toLocaleString()}</span>
+              <span>Val samples: {m.val_samples?.toLocaleString()}</span>
+            </div>
+          </div>
+
+          {/* Feature importance */}
+          <div>
+            <p className="text-xs font-semibold uppercase tracking-wide text-gray-400 mb-2">Feature importance (gain)</p>
+            <FeatureImportanceChart importance={model.feature_importance} />
+          </div>
+
+          {/* Download reranked TSV */}
+          {model.prediction_set_id && (
+            <div>
+              <p className="text-xs font-semibold uppercase tracking-wide text-gray-400 mb-2">Download re-ranked predictions</p>
+              <a
+                href={getRerankedTsvUrl(model.prediction_set_id, model.id)}
+                download={`reranked_${shortId(model.id)}.tsv`}
+                className="inline-flex items-center gap-1.5 rounded-md border bg-white px-3 py-1.5 text-sm font-medium text-gray-700 shadow-sm hover:bg-gray-50"
+              >
+                ↓ Download reranked TSV
+              </a>
+            </div>
+          )}
+
+          {/* Compute CAFA metrics */}
+          <div>
+            <p className="text-xs font-semibold uppercase tracking-wide text-gray-400 mb-2">Compute CAFA metrics</p>
+            <div className="grid grid-cols-1 sm:grid-cols-3 gap-2 mb-2">
+              <div>
+                <label className="text-xs text-gray-500 mb-0.5 block">Prediction set</label>
+                <select value={metricsPsId} onChange={(e) => setMetricsPsId(e.target.value)} className={selectClass}>
+                  <option value="">Select...</option>
+                  {predictionSets.map((ps) => (
+                    <option key={ps.id} value={ps.id}>{predLabel(ps)}</option>
+                  ))}
+                </select>
+              </div>
+              <div>
+                <label className="text-xs text-gray-500 mb-0.5 block">Evaluation set</label>
+                <select value={metricsEsId} onChange={(e) => setMetricsEsId(e.target.value)} className={selectClass}>
+                  <option value="">Select...</option>
+                  {evaluationSets.map((es) => (
+                    <option key={es.id} value={es.id}>{evalLabel(es, annotationSets)}</option>
+                  ))}
+                </select>
+              </div>
+              <div>
+                <label className="text-xs text-gray-500 mb-0.5 block">Category</label>
+                <select value={metricsCategory} onChange={(e) => setMetricsCategory(e.target.value)} className={selectClass}>
+                  <option value="nk">NK (No Knowledge)</option>
+                  <option value="lk">LK (Limited Knowledge)</option>
+                  <option value="pk">PK (Partial Knowledge)</option>
+                </select>
+                <p className="text-[10px] text-gray-400 mt-1 leading-snug">{CATEGORY_HINTS[metricsCategory]}</p>
+              </div>
+            </div>
+            <button
+              onClick={handleComputeMetrics}
+              disabled={!metricsPsId || !metricsEsId || metricsLoading}
+              className={btnPrimary}
+            >
+              {metricsLoading ? "Computing... (this may take 30-60s)" : "Compute metrics"}
+            </button>
+            {metricsError && <p className="text-xs text-red-500 mt-2">{metricsError}</p>}
+            {metrics && (
+              <div className="mt-3 rounded-md border bg-gray-50 p-3">
+                <div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-2">
+                  <MetricsBadge label="Fmax" value={metrics.fmax} />
+                  <MetricsBadge label="AUC-PR" value={metrics.auc_pr} />
+                  <MetricsBadge label="Threshold" value={metrics.threshold_at_fmax} />
+                  <MetricsBadge label="GT proteins" value={metrics.n_ground_truth_proteins} />
+                  <MetricsBadge label="Pred. proteins" value={metrics.n_predicted_proteins} />
+                  <MetricsBadge label="Predictions" value={metrics.n_predictions} />
+                </div>
+                {metrics.curve && metrics.curve.length > 0 && (
+                  <p className="text-[10px] text-gray-400 mt-2">{metrics.curve.length} PR curve points computed</p>
+                )}
+              </div>
+            )}
+          </div>
+
+          {/* Source info */}
+          <div className="flex flex-wrap gap-4 text-xs text-gray-400 border-t pt-3">
+            <span>Prediction set: <span className="font-mono">{model.prediction_set_id ? shortId(model.prediction_set_id) : "—"}</span></span>
+            <span>Evaluation set: <span className="font-mono">{model.evaluation_set_id ? shortId(model.evaluation_set_id) : "—"}</span></span>
+            <span>ID: <span className="font-mono">{shortId(model.id)}</span></span>
+          </div>
+
+          {/* Delete */}
+          <div className="border-t pt-3">
+            <button onClick={handleDelete} disabled={deleting} className={btnDanger}>
+              {deleting ? "Deleting..." : "Delete reranker"}
+            </button>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Main page
+// ---------------------------------------------------------------------------
+
+export default function RerankerPage() {
+  const [rerankers, setRerankers] = useState<RerankerModel[]>([]);
+  const [predictionSets, setPredictionSets] = useState<PredictionSet[]>([]);
+  const [evaluationSets, setEvaluationSets] = useState<EvaluationSet[]>([]);
+  const [annotationSets, setAnnotationSets] = useState<AnnotationSet[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+
+  // Train form
+  const [trainName, setTrainName] = useState("");
+  const [trainPsId, setTrainPsId] = useState("");
+  const [trainEsId, setTrainEsId] = useState("");
+  const [trainCategory, setTrainCategory] = useState("nk");
+  const [trainAspect, setTrainAspect] = useState("");
+  const [trainNegPosRatio, setTrainNegPosRatio] = useState("");
+  const [extraPairs, setExtraPairs] = useState<{ psId: string; esId: string }[]>([]);
+  const [training, setTraining] = useState(false);
+  const [trainError, setTrainError] = useState<string | null>(null);
+
+  async function loadAll() {
+    setLoading(true);
+    setError(null);
+    try {
+      const [r, ps, es, as_] = await Promise.all([
+        listRerankers(),
+        listPredictionSets(),
+        listEvaluationSets(),
+        listAnnotationSets(),
+      ]);
+      setRerankers(r);
+      setPredictionSets(ps);
+      setEvaluationSets(es);
+      setAnnotationSets(as_);
+    } catch (e: any) {
+      setError(e.message ?? "Failed to load data");
+    } finally {
+      setLoading(false);
+    }
+  }
+
+  useEffect(() => { loadAll(); }, []);
+
+  async function handleTrain() {
+    if (!trainName.trim() || !trainPsId || !trainEsId) return;
+    setTraining(true);
+    setTrainError(null);
+    try {
+      const validExtraPairs = extraPairs
+        .filter((p) => p.psId && p.esId)
+        .map((p) => ({ prediction_set_id: p.psId, evaluation_set_id: p.esId }));
+      const model = await trainReranker({
+        name: trainName.trim(),
+        prediction_set_id: trainPsId,
+        evaluation_set_id: trainEsId,
+        category: trainCategory,
+        aspect: trainAspect || null,
+        neg_pos_ratio: trainNegPosRatio ? parseFloat(trainNegPosRatio) : null,
+        extra_pairs: validExtraPairs.length > 0 ? validExtraPairs : undefined,
+      });
+      setRerankers((prev) => [...prev, model]);
+      setTrainName("");
+    } catch (e: any) {
+      setTrainError(e.message ?? "Training failed");
+    } finally {
+      setTraining(false);
+    }
+  }
+
+  return (
+    <>
+      <h1 className="text-xl font-semibold mb-1">Re-ranker Models</h1>
+
+      <ContextBanner
+        title="Train a LightGBM model to re-rank KNN predictions"
+        description="Uses features like alignment scores, taxonomic distance, and embedding similarity to learn an optimal ranking. Requires a prediction set and evaluation set for training."
+        prerequisites={[
+          { label: `${predictionSets.length} prediction set(s)`, met: predictionSets.length > 0, href: "/functional-annotation" },
+          { label: `${evaluationSets.length} evaluation set(s)`, met: evaluationSets.length > 0, href: "/evaluation" },
+        ]}
+        nextStep={{ label: "Evaluation", href: "/evaluation" }}
+      />
+      <p className="text-sm text-gray-500 mb-6">
+        LightGBM binary classifiers trained on temporal holdout data (CAFA protocol).
+        A re-ranker uses alignment, taxonomy, and aggregate features to re-score GO predictions
+        with calibrated probabilities, replacing the raw embedding distance ranking.
+      </p>
+
+      {/* Train new reranker */}
+      <div className="rounded-lg border bg-white p-5 shadow-sm mb-6">
+        <h2 className="text-sm font-semibold text-gray-700 mb-4">Train new re-ranker</h2>
+        <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-6 gap-3 mb-3">
+          <div>
+            <label className={labelClass}>Name</label>
+            <input
+              type="text"
+              value={trainName}
+              onChange={(e) => setTrainName(e.target.value)}
+              placeholder="e.g. reranker-nk-bpo-v1"
+              className="w-full rounded-md border border-gray-300 px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
+            />
+          </div>
+          <div>
+            <label className={labelClass}>Prediction set</label>
+            <select value={trainPsId} onChange={(e) => setTrainPsId(e.target.value)} className={selectClass}>
+              <option value="">Select...</option>
+              {predictionSets.map((ps) => (
+                <option key={ps.id} value={ps.id}>{predLabel(ps)}</option>
+              ))}
+            </select>
+          </div>
+          <div>
+            <label className={labelClass}>Evaluation set</label>
+            <select value={trainEsId} onChange={(e) => setTrainEsId(e.target.value)} className={selectClass}>
+              <option value="">Select...</option>
+              {evaluationSets.map((es) => (
+                <option key={es.id} value={es.id}>{evalLabel(es, annotationSets)}</option>
+              ))}
+            </select>
+          </div>
+          <div>
+            <label className={labelClass}>Category</label>
+            <select value={trainCategory} onChange={(e) => setTrainCategory(e.target.value)} className={selectClass}>
+              <option value="nk">NK (No Knowledge)</option>
+              <option value="lk">LK (Limited Knowledge)</option>
+              <option value="pk">PK (Partial Knowledge)</option>
+            </select>
+            <p className="text-[10px] text-gray-400 mt-1 leading-snug">{CATEGORY_HINTS[trainCategory]}</p>
+          </div>
+          <div>
+            <label className={labelClass}>Aspect</label>
+            <select value={trainAspect} onChange={(e) => setTrainAspect(e.target.value)} className={selectClass}>
+              <option value="">All aspects</option>
+              <option value="bpo">BPO (Biological Process)</option>
+              <option value="mfo">MFO (Molecular Function)</option>
+              <option value="cco">CCO (Cellular Component)</option>
+            </select>
+          </div>
+          <div>
+            <label className={labelClass}>Neg:Pos ratio</label>
+            <input
+              type="number" min="1" step="1" placeholder="all (no limit)"
+              value={trainNegPosRatio}
+              onChange={(e) => setTrainNegPosRatio(e.target.value)}
+              className="w-full rounded-md border border-gray-300 px-3 py-2 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
+            />
+          </div>
+        </div>
+
+        {/* Extra training pairs */}
+        <div className="mb-3">
+          <div className="flex items-center gap-2 mb-2">
+            <label className="text-xs font-medium text-gray-600">Additional training pairs (multi-temporal)</label>
+            <button
+              type="button"
+              onClick={() => setExtraPairs((prev) => [...prev, { psId: "", esId: "" }])}
+              className="rounded border border-gray-300 bg-white px-2 py-0.5 text-xs text-gray-600 hover:bg-gray-50"
+            >
+              + Add pair
+            </button>
+          </div>
+          {extraPairs.map((pair, i) => (
+            <div key={i} className="grid grid-cols-[1fr_1fr_auto] gap-2 mb-1.5">
+              <select
+                value={pair.psId}
+                onChange={(e) => setExtraPairs((prev) => prev.map((p, j) => j === i ? { ...p, psId: e.target.value } : p))}
+                className={selectClass}
+              >
+                <option value="">Prediction set...</option>
+                {predictionSets.map((ps) => (
+                  <option key={ps.id} value={ps.id}>{predLabel(ps)}</option>
+                ))}
+              </select>
+              <select
+                value={pair.esId}
+                onChange={(e) => setExtraPairs((prev) => prev.map((p, j) => j === i ? { ...p, esId: e.target.value } : p))}
+                className={selectClass}
+              >
+                <option value="">Evaluation set...</option>
+                {evaluationSets.map((es) => (
+                  <option key={es.id} value={es.id}>{evalLabel(es, annotationSets)}</option>
+                ))}
+              </select>
+              <button
+                type="button"
+                onClick={() => setExtraPairs((prev) => prev.filter((_, j) => j !== i))}
+                className="rounded border border-red-200 px-2 py-1 text-xs text-red-500 hover:bg-red-50"
+              >
+                x
+              </button>
+            </div>
+          ))}
+          {extraPairs.length > 0 && (
+            <p className="text-[10px] text-gray-400 mt-1">
+              Data from all pairs will be concatenated before training a single model.
+              {extraPairs.filter((p) => p.psId && p.esId).length > 0 &&
+                ` (${1 + extraPairs.filter((p) => p.psId && p.esId).length} pairs total)`}
+            </p>
+          )}
+        </div>
+
+        <div className="flex items-center gap-3">
+          <button
+            onClick={handleTrain}
+            disabled={!trainName.trim() || !trainPsId || !trainEsId || training}
+            className={btnPrimary}
+          >
+            {training ? "Training… (this may take 1-2 min)" : "Train"}
+          </button>
+          {trainPsId && trainEsId && (
+            <a
+              href={getTrainingDataTsvUrl(trainPsId, trainEsId, trainCategory)}
+              download={`training_data_${shortId(trainPsId)}_${trainCategory}.tsv`}
+              className="text-xs text-blue-600 hover:underline"
+            >
+              ↓ Preview training data TSV
+            </a>
+          )}
+        </div>
+        {trainError && <p className="text-xs text-red-500 mt-2">{trainError}</p>}
+      </div>
+
+      {/* List of rerankers */}
+      {loading && <p className="text-sm text-gray-400">Loading...</p>}
+      {error && <p className="text-sm text-red-500">{error}</p>}
+
+      {!loading && rerankers.length === 0 && (
+        <div className="rounded-lg border bg-white px-4 py-12 text-center text-sm text-gray-400 shadow-sm">
+          No re-ranker models trained yet. Use the form above to train one.
+        </div>
+      )}
+
+      <div className="space-y-3">
+        {rerankers.map((model) => (
+          <RerankerCard
+            key={model.id}
+            model={model}
+            predictionSets={predictionSets}
+            evaluationSets={evaluationSets}
+            annotationSets={annotationSets}
+            onDelete={() => setRerankers((prev) => prev.filter((r) => r.id !== model.id))}
+          />
+        ))}
+      </div>
+    </>
+  );
+}
diff --git a/apps/web/components/AnnotateForm.tsx b/apps/web/components/AnnotateForm.tsx
new file mode 100644
index 0000000..e28e1cf
--- /dev/null
+++ b/apps/web/components/AnnotateForm.tsx
@@ -0,0 +1,302 @@
+"use client";
+
+import { useState, useRef, useCallback, useEffect } from "react";
+import { useRouter } from "next/navigation";
+import { useTranslations } from "next-intl";
+import {
+  annotateProteins,
+  getJob,
+  launchPredictGoTerms,
+  listPredictionSets,
+  type AnnotateResult,
+} from "@/lib/api";
+
+type Stage = "idle" | "uploading" | "embedding" | "predicting" | "done" | "error";
+
+const POLL_MS = 3_000;
+
+const EXAMPLE_FASTA = `>sp|P04637|P53_HUMAN Cellular tumor antigen p53
+MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGP
+DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYPQGLNGTVNLPGRNSFEV
+RVCACPGRDRRTEEENLHKTTGIDSFLHPEVEYFTPETDPAGPMCSRHFYQLAKTCPVQLW
+VDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHERCTCGGNHGISTTTGICLICQFFLVHKP
+>sp|P38398|BRCA1_HUMAN Breast cancer type 1 susceptibility protein
+MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQC
+PLCKNDITKRSLQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEV
+SIIQSMGYRNRAKRLLQSEPENPSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELG`;
+
+export function AnnotateForm() {
+  const t = useTranslations("home");
+  const router = useRouter();
+
+  const [fasta, setFasta] = useState("");
+  const [stage, setStage] = useState<Stage>("idle");
+  const [error, setError] = useState<string | null>(null);
+  const [progress, setProgress] = useState<string>("");
+  const [predictionSetId, setPredictionSetId] = useState<string | null>(null);
+  const [rerankerId, setRerankerId] = useState<string | null>(null);
+  const fileRef = useRef<HTMLInputElement>(null);
+  const abortRef = useRef(false);
+
+  // Drag-and-drop state
+  const [dragOver, setDragOver] = useState(false);
+
+  const handleFile = (file: File) => {
+    const reader = new FileReader();
+    reader.onload = (e) => {
+      const text = e.target?.result;
+      if (typeof text === "string") setFasta(text);
+    };
+    reader.readAsText(file);
+  };
+
+  const handleDrop = (e: React.DragEvent) => {
+    e.preventDefault();
+    setDragOver(false);
+    const file = e.dataTransfer.files?.[0];
+    if (file) handleFile(file);
+  };
+
+  const pollJob = useCallback(
+    async (jobId: string): Promise<"succeeded" | "failed"> => {
+      while (!abortRef.current) {
+        try {
+          const job = await getJob(jobId);
+          if (job.progress_total && job.progress_current) {
+            const pct = Math.round((job.progress_current / job.progress_total) * 100);
+            setProgress(`${pct}%`);
+          }
+          if (job.status === "succeeded") return "succeeded";
+          if (job.status === "failed" || job.status === "cancelled") return "failed";
+        } catch {
+          // transient error, keep polling
+        }
+        await new Promise((r) => setTimeout(r, POLL_MS));
+      }
+      return "failed";
+    },
+    [],
+  );
+
+  const handleSubmit = async () => {
+    if (!fasta.trim()) return;
+    abortRef.current = false;
+    setError(null);
+    setStage("uploading");
+    setProgress("");
+
+    try {
+      // Step 1: Upload FASTA + create embedding job
+      setProgress(t("annotateUploading" as any));
+      const result: AnnotateResult = await annotateProteins({
+        fastaText: fasta,
+        name: `Annotation ${new Date().toISOString().slice(0, 16)}`,
+      });
+
+      // Step 2: Poll embedding job
+      setStage("embedding");
+      setProgress("0%");
+      const embedResult = await pollJob(result.embedding_job_id);
+      if (embedResult === "failed") {
+        throw new Error("Embedding computation failed");
+      }
+
+      // Step 3: Launch prediction
+      setStage("predicting");
+      setProgress("0%");
+      const predictJob = await launchPredictGoTerms(result.predict_payload as Parameters<typeof launchPredictGoTerms>[0]);
+
+      // Step 4: Poll prediction job
+      const predictResult = await pollJob(predictJob.id);
+      if (predictResult === "failed") {
+        throw new Error("Prediction failed");
+      }
+
+      // Step 5: Find the prediction set created for this query_set
+      const sets = await listPredictionSets();
+      const match = sets.find(
+        (s) =>
+          (s as any).query_set_id === result.query_set_id &&
+          s.embedding_config_id === result.embedding_config_id,
+      );
+      if (match) {
+        setPredictionSetId(match.id);
+      }
+      if (result.reranker_id) {
+        setRerankerId(result.reranker_id);
+      }
+
+      setStage("done");
+      setProgress("");
+    } catch (err: any) {
+      setStage("error");
+      setError(err?.message ?? "Unknown error");
+    }
+  };
+
+  // Auto-redirect when done
+  useEffect(() => {
+    if (stage === "done" && predictionSetId) {
+      const timer = setTimeout(() => {
+        const qs = rerankerId ? `?reranker_id=${rerankerId}` : "";
+        router.push(`/functional-annotation/${predictionSetId}${qs}`);
+      }, 1500);
+      return () => clearTimeout(timer);
+    }
+  }, [stage, predictionSetId, rerankerId, router]);
+
+  // Cleanup on unmount
+  useEffect(() => {
+    return () => {
+      abortRef.current = true;
+    };
+  }, []);
+
+  const isRunning = stage === "uploading" || stage === "embedding" || stage === "predicting";
+
+  return (
+    <section className="rounded-2xl border-2 border-blue-100 bg-gradient-to-b from-blue-50/60 to-white p-6 sm:p-8">
+      <h2 className="text-xl sm:text-2xl font-bold text-gray-900 mb-1">
+        {t("annotateTitle" as any)}
+      </h2>
+      <p className="text-sm text-gray-500 mb-5">
+        {t("annotateDescription" as any)}
+      </p>
+
+      {/* FASTA input */}
+      <div
+        className={`relative rounded-lg border-2 transition-colors ${
+          dragOver
+            ? "border-blue-400 bg-blue-50"
+            : "border-gray-200 bg-white"
+        }`}
+        onDragOver={(e) => {
+          e.preventDefault();
+          setDragOver(true);
+        }}
+        onDragLeave={() => setDragOver(false)}
+        onDrop={handleDrop}
+      >
+        <textarea
+          value={fasta}
+          onChange={(e) => setFasta(e.target.value)}
+          placeholder={t("annotatePlaceholder" as any)}
+          rows={6}
+          disabled={isRunning}
+          className="w-full rounded-lg p-4 text-xs font-mono text-gray-700 placeholder:text-gray-400 focus:outline-none focus:ring-2 focus:ring-blue-300 resize-y disabled:opacity-50 disabled:cursor-not-allowed bg-transparent"
+        />
+        {!fasta && !isRunning && (
+          <div className="absolute bottom-3 right-3 flex gap-2">
+            <button
+              type="button"
+              onClick={() => setFasta(EXAMPLE_FASTA)}
+              className="text-xs text-blue-500 hover:text-blue-700 underline"
+            >
+              {t("annotateTryExample" as any)}
+            </button>
+            <button
+              type="button"
+              onClick={() => fileRef.current?.click()}
+              className="text-xs text-gray-500 hover:text-gray-700 underline"
+            >
+              {t("annotateUploadFile" as any)}
+            </button>
+          </div>
+        )}
+      </div>
+      <input
+        ref={fileRef}
+        type="file"
+        accept=".fasta,.fa,.faa,.txt"
+        className="hidden"
+        onChange={(e) => {
+          const file = e.target.files?.[0];
+          if (file) handleFile(file);
+        }}
+      />
+
+      {/* Action row */}
+      <div className="mt-4 flex items-center gap-4">
+        <button
+          onClick={handleSubmit}
+          disabled={!fasta.trim() || isRunning}
+          className="rounded-lg bg-blue-600 px-6 py-2.5 text-sm font-semibold text-white hover:bg-blue-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+        >
+          {isRunning ? (
+            <span className="flex items-center gap-2">
+              <svg
+                className="animate-spin h-4 w-4"
+                viewBox="0 0 24 24"
+                fill="none"
+              >
+                <circle
+                  className="opacity-25"
+                  cx="12"
+                  cy="12"
+                  r="10"
+                  stroke="currentColor"
+                  strokeWidth="4"
+                />
+                <path
+                  className="opacity-75"
+                  fill="currentColor"
+                  d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
+                />
+              </svg>
+              {stage === "uploading" && t("annotateUploading" as any)}
+              {stage === "embedding" && t("annotateEmbedding" as any)}
+              {stage === "predicting" && t("annotatePredicting" as any)}
+            </span>
+          ) : (
+            t("annotateButton" as any)
+          )}
+        </button>
+
+        {isRunning && progress && (
+          <span className="text-sm text-gray-500 tabular-nums">{progress}</span>
+        )}
+
+        {stage === "done" && (
+          <span className="text-sm text-green-600 font-medium">
+            {t("annotateDone" as any)}
+          </span>
+        )}
+
+        {stage === "error" && (
+          <span className="text-sm text-red-600">{error}</span>
+        )}
+      </div>
+
+      {/* Progress bar */}
+      {isRunning && (
+        <div className="mt-3">
+          <div className="flex gap-1">
+            {(["uploading", "embedding", "predicting"] as const).map((s) => {
+              const active = stage === s;
+              const done =
+                (s === "uploading" && (stage === "embedding" || stage === "predicting")) ||
+                (s === "embedding" && stage === "predicting");
+              return (
+                <div
+                  key={s}
+                  className={`h-1.5 flex-1 rounded-full transition-colors ${
+                    done
+                      ? "bg-blue-500"
+                      : active
+                        ? "bg-blue-300 animate-pulse"
+                        : "bg-gray-200"
+                  }`}
+                />
+              );
+            })}
+          </div>
+          <div className="flex justify-between mt-1 text-[10px] text-gray-400">
+            <span>{t("annotateStepUpload" as any)}</span>
+            <span>{t("annotateStepEmbed" as any)}</span>
+            <span>{t("annotateStepPredict" as any)}</span>
+          </div>
+        </div>
+      )}
+    </section>
+  );
+}
diff --git a/apps/web/components/Breadcrumbs.tsx b/apps/web/components/Breadcrumbs.tsx
new file mode 100644
index 0000000..218a393
--- /dev/null
+++ b/apps/web/components/Breadcrumbs.tsx
@@ -0,0 +1,59 @@
+"use client";
+
+import Link from "next/link";
+import { usePathname } from "next/navigation";
+
+const ROUTE_LABELS: Record<string, string> = {
+  "functional-annotation": "Functional Annotation",
+  proteins: "Proteins",
+  jobs: "Jobs",
+  embeddings: "Embeddings",
+  annotations: "Annotations",
+  evaluation: "Evaluation",
+  scoring: "Scoring",
+  reranker: "Re-ranker",
+  "query-sets": "Query Sets",
+  maintenance: "Maintenance",
+};
+
+export function Breadcrumbs() {
+  const pathname = usePathname();
+  // Remove locale prefix
+  const stripped = pathname.replace(/^\/[a-z]{2}(?=\/|$)/, "") || "/";
+  const segments = stripped.split("/").filter(Boolean);
+
+  if (segments.length < 2) return null;
+
+  const crumbs: { label: string; href: string }[] = [
+    { label: "Home", href: "/" },
+  ];
+
+  let path = "";
+  for (let i = 0; i < segments.length; i++) {
+    path += `/${segments[i]}`;
+    const isLast = i === segments.length - 1;
+    const label = ROUTE_LABELS[segments[i]] ?? (
+      isLast && segments[i].length > 12
+        ? `${segments[i].slice(0, 8)}...`
+        : segments[i]
+    );
+    crumbs.push({ label, href: path });
+  }
+
+  return (
+    <nav className="flex items-center gap-1 text-xs text-gray-400 mb-3" aria-label="Breadcrumb">
+      {crumbs.map((crumb, i) => (
+        <span key={crumb.href} className="flex items-center gap-1">
+          {i > 0 && <span>/</span>}
+          {i < crumbs.length - 1 ? (
+            <Link href={crumb.href} className="hover:text-gray-600 transition-colors">
+              {crumb.label}
+            </Link>
+          ) : (
+            <span className="text-gray-600 font-medium">{crumb.label}</span>
+          )}
+        </span>
+      ))}
+    </nav>
+  );
+}
diff --git a/apps/web/components/ContextBanner.tsx b/apps/web/components/ContextBanner.tsx
new file mode 100644
index 0000000..b87a7e6
--- /dev/null
+++ b/apps/web/components/ContextBanner.tsx
@@ -0,0 +1,70 @@
+"use client";
+
+import { useState } from "react";
+import Link from "next/link";
+
+export type Prerequisite = {
+  label: string;
+  met: boolean;
+  href?: string;
+};
+
+type ContextBannerProps = {
+  title: string;
+  description: string;
+  prerequisites?: Prerequisite[];
+  nextStep?: { label: string; href: string };
+};
+
+export function ContextBanner({ title, description, prerequisites, nextStep }: ContextBannerProps) {
+  const [collapsed, setCollapsed] = useState(false);
+
+  return (
+    <div className="rounded-lg border border-blue-200 bg-blue-50 mb-6">
+      <button
+        onClick={() => setCollapsed((v) => !v)}
+        className="w-full flex items-center justify-between px-4 py-3 text-left"
+      >
+        <div className="flex items-center gap-2">
+          <span className="text-blue-600 text-sm">&#9432;</span>
+          <span className="text-sm font-medium text-blue-900">{title}</span>
+        </div>
+        <span className="text-xs text-blue-400">{collapsed ? "+" : "-"}</span>
+      </button>
+
+      {!collapsed && (
+        <div className="px-4 pb-3 space-y-2">
+          <p className="text-sm text-gray-600">{description}</p>
+
+          {prerequisites && prerequisites.length > 0 && (
+            <div className="flex flex-wrap gap-x-4 gap-y-1">
+              {prerequisites.map((p) => (
+                <span key={p.label} className="inline-flex items-center gap-1 text-xs">
+                  <span className={p.met ? "text-green-600" : "text-amber-500"}>
+                    {p.met ? "\u2713" : "\u26A0"}
+                  </span>
+                  {p.href && !p.met ? (
+                    <Link href={p.href} className="text-blue-600 underline hover:text-blue-800">
+                      {p.label}
+                    </Link>
+                  ) : (
+                    <span className={p.met ? "text-gray-600" : "text-amber-700"}>{p.label}</span>
+                  )}
+                </span>
+              ))}
+            </div>
+          )}
+
+          {nextStep && (
+            <div className="text-xs text-gray-500">
+              Next:{" "}
+              <Link href={nextStep.href} className="text-blue-600 underline hover:text-blue-800">
+                {nextStep.label} &rarr;
+              </Link>
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/apps/web/components/FloatingJobsWidget.tsx b/apps/web/components/FloatingJobsWidget.tsx
new file mode 100644
index 0000000..718e5ac
--- /dev/null
+++ b/apps/web/components/FloatingJobsWidget.tsx
@@ -0,0 +1,97 @@
+"use client";
+
+import { useEffect, useState, useCallback } from "react";
+import Link from "next/link";
+import { listJobs, type Job } from "@/lib/api";
+
+const POLL_INTERVAL = 10_000;
+
+export function FloatingJobsWidget() {
+  const [jobs, setJobs] = useState<Job[]>([]);
+  const [expanded, setExpanded] = useState(false);
+
+  const poll = useCallback(async () => {
+    if (document.visibilityState === "hidden") return;
+    try {
+      const running = await listJobs({ limit: 5, status: "running" });
+      setJobs(running);
+    } catch {
+      // ignore transient errors
+    }
+  }, []);
+
+  useEffect(() => {
+    poll();
+    const id = setInterval(poll, POLL_INTERVAL);
+    const onVisibility = () => {
+      if (document.visibilityState === "visible") poll();
+    };
+    document.addEventListener("visibilitychange", onVisibility);
+    return () => {
+      clearInterval(id);
+      document.removeEventListener("visibilitychange", onVisibility);
+    };
+  }, [poll]);
+
+  if (jobs.length === 0) return null;
+
+  return (
+    <div className="fixed bottom-4 right-4 z-50">
+      {expanded && (
+        <div className="mb-2 w-72 rounded-lg border bg-white shadow-xl overflow-hidden">
+          <div className="bg-gray-50 px-3 py-2 text-xs font-semibold text-gray-600 uppercase tracking-wide flex items-center justify-between">
+            <span>Running Jobs</span>
+            <button onClick={() => setExpanded(false)} className="text-gray-400 hover:text-gray-600 text-sm">
+              &times;
+            </button>
+          </div>
+          <div className="divide-y max-h-60 overflow-y-auto">
+            {jobs.map((job) => {
+              const pct = job.progress_total && job.progress_current
+                ? Math.round((job.progress_current / job.progress_total) * 100)
+                : null;
+              return (
+                <Link
+                  key={job.id}
+                  href={`/jobs/${job.id}`}
+                  className="block px-3 py-2.5 hover:bg-gray-50 transition-colors"
+                >
+                  <div className="flex items-center justify-between">
+                    <span className="text-xs font-medium text-gray-800 truncate">
+                      {job.operation}
+                    </span>
+                    {pct != null && (
+                      <span className="text-[10px] text-gray-400 tabular-nums ml-2">{pct}%</span>
+                    )}
+                  </div>
+                  {pct != null && (
+                    <div className="mt-1 h-1 rounded-full bg-gray-100 overflow-hidden">
+                      <div
+                        className="h-full rounded-full bg-blue-500 transition-all"
+                        style={{ width: `${pct}%` }}
+                      />
+                    </div>
+                  )}
+                  <div className="text-[10px] text-gray-400 mt-0.5 font-mono truncate">
+                    {job.id.slice(0, 8)}...
+                  </div>
+                </Link>
+              );
+            })}
+          </div>
+        </div>
+      )}
+
+      <button
+        onClick={() => setExpanded((v) => !v)}
+        className="flex items-center gap-1.5 rounded-full bg-blue-600 text-white px-3.5 py-2 text-sm font-medium shadow-lg hover:bg-blue-700 transition-colors"
+      >
+        <span className="relative flex h-2 w-2">
+          <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-blue-300 opacity-75" />
+          <span className="relative inline-flex rounded-full h-2 w-2 bg-white" />
+        </span>
+        {jobs.length} running
+      </button>
+    </div>
+  );
+}
diff --git a/apps/web/components/LanguageSwitcher.tsx b/apps/web/components/LanguageSwitcher.tsx
index ae175d1..a3b6761 100644
--- a/apps/web/components/LanguageSwitcher.tsx
+++ b/apps/web/components/LanguageSwitcher.tsx
@@ -1,4 +1,5 @@
 "use client";
+import { useState, useRef, useEffect } from "react";
 import { useLocale } from "next-intl";
 import { useRouter, usePathname } from "next/navigation";
 import { routing } from "@/i18n/routing";
@@ -15,29 +16,49 @@ export function LanguageSwitcher() {
   const locale = useLocale();
   const router = useRouter();
   const pathname = usePathname();
+  const [open, setOpen] = useState(false);
+  const ref = useRef<HTMLDivElement>(null);
+
+  useEffect(() => {
+    function handleClickOutside(e: MouseEvent) {
+      if (ref.current && !ref.current.contains(e.target as Node)) {
+        setOpen(false);
+      }
+    }
+    document.addEventListener("mousedown", handleClickOutside);
+    return () => document.removeEventListener("mousedown", handleClickOutside);
+  }, []);
 
   function switchLocale(newLocale: string) {
-    // Replace the locale segment in the pathname
     const segments = pathname.split("/");
     segments[1] = newLocale;
     router.push(segments.join("/"));
+    setOpen(false);
   }
 
+  const otherLocales = routing.locales.filter((l) => l !== locale);
+
   return (
-    <div className="flex items-center gap-1">
-      {routing.locales.map((l) => (
-        <button
-          key={l}
-          onClick={() => switchLocale(l)}
-          className={`px-1.5 py-0.5 text-xs rounded transition-colors ${
-            l === locale
-              ? "bg-blue-100 text-blue-700 font-semibold"
-              : "text-gray-500 hover:text-gray-700 hover:bg-gray-100"
-          }`}
-        >
-          {LOCALE_LABELS[l]}
-        </button>
-      ))}
+    <div className="relative" ref={ref}>
+      <button
+        onClick={() => setOpen(!open)}
+        className="px-2 py-1.5 text-xs rounded transition-colors min-h-[36px] min-w-[36px] flex items-center justify-center bg-blue-100 text-blue-700 font-semibold"
+      >
+        {LOCALE_LABELS[locale]}
+      </button>
+      {open && (
+        <div className="absolute right-0 mt-1 flex flex-col gap-0.5 bg-white border border-gray-200 rounded shadow-lg p-1 z-50">
+          {otherLocales.map((l) => (
+            <button
+              key={l}
+              onClick={() => switchLocale(l)}
+              className="px-3 py-1.5 text-xs rounded transition-colors min-h-[36px] min-w-[36px] flex items-center justify-center text-gray-500 hover:text-gray-700 hover:bg-gray-100 whitespace-nowrap"
+            >
+              {LOCALE_LABELS[l]}
+            </button>
+          ))}
+        </div>
+      )}
     </div>
   );
 }
diff --git a/apps/web/components/NavLinks.tsx b/apps/web/components/NavLinks.tsx
index 792e31c..c883d0d 100644
--- a/apps/web/components/NavLinks.tsx
+++ b/apps/web/components/NavLinks.tsx
@@ -2,34 +2,108 @@
 
 import Link from "next/link";
 import { usePathname } from "next/navigation";
-import { useState, useEffect } from "react";
+import { useState, useEffect, useRef } from "react";
 import { DocLinks } from "./DocLinks";
 import { useTranslations } from "next-intl";
 
-export function NavLinks() {
+type NavItem = { href: string; label: string };
+type NavGroup = { title: string; items: NavItem[] };
+
+function DropdownGroup({ group, pathname }: { group: NavGroup; pathname: string }) {
+  const [open, setOpen] = useState(false);
+  const ref = useRef<HTMLDivElement>(null);
+  const stripped = pathname.replace(/^\/[a-z]{2}(?=\/|$)/, "") || "/";
+  const groupActive = group.items.some(
+    ({ href }) => stripped === href || stripped.startsWith(href + "/")
+  );
+
+  // Close on click outside
+  useEffect(() => {
+    if (!open) return;
+    const handler = (e: MouseEvent) => {
+      if (ref.current && !ref.current.contains(e.target as Node)) setOpen(false);
+    };
+    document.addEventListener("mousedown", handler);
+    return () => document.removeEventListener("mousedown", handler);
+  }, [open]);
+
+  return (
+    <div ref={ref} className="relative">
+      <button
+        onClick={() => setOpen((v) => !v)}
+        className={`flex items-center gap-1 px-2 py-1 rounded transition-colors text-sm ${
+          groupActive ? "font-semibold text-blue-600" : "text-gray-500 hover:text-gray-900"
+        }`}
+      >
+        {group.title}
+        <svg className={`w-3 h-3 transition-transform ${open ? "rotate-180" : ""}`} fill="none" viewBox="0 0 12 12" stroke="currentColor" strokeWidth="2">
+          <path d="M3 4.5l3 3 3-3" />
+        </svg>
+      </button>
+      {open && (
+        <div className="absolute top-full left-0 mt-1 py-1 bg-white rounded-lg border shadow-lg z-50 min-w-[180px]">
+          {group.items.map(({ href, label }) => {
+            const active = stripped === href || stripped.startsWith(href + "/");
+            return (
+              <Link
+                key={href}
+                href={href}
+                onClick={() => setOpen(false)}
+                className={`block px-4 py-2 text-sm transition-colors ${
+                  active
+                    ? "font-semibold text-blue-600 bg-blue-50"
+                    : "text-gray-600 hover:bg-gray-50 hover:text-gray-900"
+                }`}
+              >
+                {label}
+              </Link>
+            );
+          })}
+        </div>
+      )}
+    </div>
+  );
+}
+
+export function NavLinks({ mobileExtras }: { mobileExtras?: React.ReactNode }) {
   const t = useTranslations("nav");
   const pathname = usePathname();
   const [open, setOpen] = useState(false);
 
-  const NAV_GROUPS = [
-    [
-      { href: "/proteins", label: t("proteins") },
-      { href: "/annotations", label: t("annotations") },
-      { href: "/query-sets", label: t("querySets") },
-    ],
-    [
-      { href: "/embeddings", label: t("embeddings") },
-      { href: "/functional-annotation", label: t("functionalAnnotation") },
-      { href: "/scoring", label: t("scoring") },
-      { href: "/evaluation", label: t("evaluation") },
-    ],
-    [
-      { href: "/jobs", label: t("jobs") },
-      { href: "/maintenance", label: t("maintenance") },
-    ],
+  const NAV_GROUPS: NavGroup[] = [
+    {
+      title: t("data" as any),
+      items: [
+        { href: "/proteins", label: t("proteins") },
+        { href: "/annotations", label: t("annotations") },
+        { href: "/query-sets", label: t("querySets") },
+      ],
+    },
+    {
+      title: t("pipelineGroup" as any),
+      items: [
+        { href: "/embeddings", label: t("embeddings") },
+        { href: "/functional-annotation", label: t("functionalAnnotation") },
+        { href: "/reranker", label: t("reranker") },
+      ],
+    },
+    {
+      title: t("results" as any),
+      items: [
+        { href: "/evaluation", label: t("evaluation") },
+        { href: "/scoring", label: t("scoring") },
+      ],
+    },
+    {
+      title: t("system" as any),
+      items: [
+        { href: "/jobs", label: t("jobs") },
+        { href: "/maintenance", label: t("maintenance") },
+      ],
+    },
   ];
 
-  const ALL_LINKS = NAV_GROUPS.flat();
+  const ALL_LINKS = NAV_GROUPS.flatMap((g) => g.items);
 
   // Close menu on route change
   useEffect(() => { setOpen(false); }, [pathname]);
@@ -39,33 +113,18 @@ export function NavLinks() {
       {/* Desktop nav */}
       <nav className="hidden lg:flex items-center gap-1 text-sm">
         {NAV_GROUPS.map((group, gi) => (
-          <span key={gi} className="flex items-center gap-1">
-            {gi > 0 && <span className="mx-2 text-gray-200">|</span>}
-            {group.map(({ href, label }) => {
-              const active = pathname === href || pathname.startsWith(href + "/");
-              return (
-                <Link
-                  key={href}
-                  href={href}
-                  className={`px-2 py-1 rounded transition-colors ${
-                    active
-                      ? "font-semibold text-blue-600"
-                      : "text-gray-500 hover:text-gray-900"
-                  }`}
-                >
-                  {label}
-                </Link>
-              );
-            })}
+          <span key={gi} className="flex items-center">
+            {gi > 0 && <span className="mx-1.5 text-gray-200">|</span>}
+            <DropdownGroup group={group} pathname={pathname} />
           </span>
         ))}
-        <span className="mx-2 text-gray-200">|</span>
+        <span className="mx-1.5 text-gray-200">|</span>
         <DocLinks />
       </nav>
 
       {/* Mobile hamburger */}
       <button
-        className="lg:hidden flex flex-col justify-center items-center w-8 h-8 gap-1.5 rounded text-gray-600 hover:bg-gray-100 transition-colors"
+        className="lg:hidden flex flex-col justify-center items-center w-10 h-10 gap-1.5 rounded text-gray-600 hover:bg-gray-100 transition-colors"
         onClick={() => setOpen((v) => !v)}
         aria-label="Toggle menu"
       >
@@ -77,26 +136,40 @@ export function NavLinks() {
       {/* Mobile dropdown */}
       {open && (
         <div className="lg:hidden absolute left-0 right-0 top-full z-50 border-b bg-white shadow-lg">
-          <nav className="px-4 py-3 flex flex-col gap-1">
-            {ALL_LINKS.map(({ href, label }) => {
-              const active = pathname === href || pathname.startsWith(href + "/");
-              return (
-                <Link
-                  key={href}
-                  href={href}
-                  className={`px-3 py-2.5 rounded-md text-sm transition-colors ${
-                    active
-                      ? "font-semibold text-blue-600 bg-blue-50"
-                      : "text-gray-600 hover:bg-gray-50 hover:text-gray-900"
-                  }`}
-                >
-                  {label}
-                </Link>
-              );
-            })}
+          <nav className="px-4 py-3 flex flex-col gap-0.5">
+            {NAV_GROUPS.map((group, gi) => (
+              <div key={gi}>
+                {gi > 0 && <div className="border-t my-1" />}
+                <div className="px-3 py-1.5 text-xs font-semibold text-gray-400 uppercase tracking-wider">
+                  {group.title}
+                </div>
+                {group.items.map(({ href, label }) => {
+                  const stripped = pathname.replace(/^\/[a-z]{2}(?=\/|$)/, "") || "/";
+                  const active = stripped === href || stripped.startsWith(href + "/");
+                  return (
+                    <Link
+                      key={href}
+                      href={href}
+                      className={`block px-3 py-2 rounded-md text-sm transition-colors ${
+                        active
+                          ? "font-semibold text-blue-600 bg-blue-50"
+                          : "text-gray-600 hover:bg-gray-50 hover:text-gray-900"
+                      }`}
+                    >
+                      {label}
+                    </Link>
+                  );
+                })}
+              </div>
+            ))}
             <div className="mt-1 pt-2 border-t flex gap-4 px-3 text-sm text-gray-500">
               <DocLinks />
             </div>
+            {mobileExtras && (
+              <div className="mt-1 pt-2 border-t px-3 pb-1 flex items-center justify-between gap-3">
+                {mobileExtras}
+              </div>
+            )}
           </nav>
         </div>
       )}
diff --git a/apps/web/components/SupportButton.tsx b/apps/web/components/SupportButton.tsx
index 7f6a5cb..dad8a28 100644
--- a/apps/web/components/SupportButton.tsx
+++ b/apps/web/components/SupportButton.tsx
@@ -62,7 +62,7 @@ export function SupportButton() {
       <div className="group relative inline-block">
         <button
           onClick={() => { setOpen((v) => !v); setSubmitted(false); }}
-          className="flex items-center gap-1.5 rounded-full border border-gray-200 bg-white px-2.5 sm:px-3 py-1.5 text-sm text-gray-600 hover:border-blue-300 hover:text-blue-600 transition-colors shadow-sm"
+          className="flex items-center justify-center gap-1.5 rounded-full border border-gray-200 bg-white px-2.5 sm:px-3 py-1.5 text-sm text-gray-600 hover:border-blue-300 hover:text-blue-600 transition-colors shadow-sm min-h-[40px] min-w-[40px]"
         >
           <span className="text-base leading-none">👍</span>
           <span className="font-medium hidden sm:inline">{t("support")}</span>
diff --git a/apps/web/components/Tooltip.tsx b/apps/web/components/Tooltip.tsx
new file mode 100644
index 0000000..c36fcd1
--- /dev/null
+++ b/apps/web/components/Tooltip.tsx
@@ -0,0 +1,22 @@
+"use client";
+
+export function Tooltip({ text, children }: { text: string; children: React.ReactNode }) {
+  return (
+    <span className="relative inline-block group">
+      {children}
+      <span className="pointer-events-none absolute bottom-full left-1/2 -translate-x-1/2 mb-1.5 z-20 hidden group-hover:block w-56 rounded-md border border-gray-200 bg-white px-3 py-2 text-xs text-gray-600 shadow-lg leading-relaxed">
+        {text}
+      </span>
+    </span>
+  );
+}
+
+export function HelpDot({ text }: { text: string }) {
+  return (
+    <Tooltip text={text}>
+      <span className="inline-flex items-center justify-center w-4 h-4 rounded-full bg-gray-200 text-gray-500 text-[10px] font-bold cursor-help select-none ml-1 align-middle">
+        ?
+      </span>
+    </Tooltip>
+  );
+}
diff --git a/apps/web/e2e/screenshots/mobile-annotations.png b/apps/web/e2e/screenshots/mobile-annotations.png
index 587a19a..373f539 100644
Binary files a/apps/web/e2e/screenshots/mobile-annotations.png and b/apps/web/e2e/screenshots/mobile-annotations.png differ
diff --git a/apps/web/e2e/screenshots/mobile-embeddings.png b/apps/web/e2e/screenshots/mobile-embeddings.png
index 5c54dd3..d5d77dc 100644
Binary files a/apps/web/e2e/screenshots/mobile-embeddings.png and b/apps/web/e2e/screenshots/mobile-embeddings.png differ
diff --git a/apps/web/e2e/screenshots/mobile-functional-annotation.png b/apps/web/e2e/screenshots/mobile-functional-annotation.png
index 10d57f4..460478b 100644
Binary files a/apps/web/e2e/screenshots/mobile-functional-annotation.png and b/apps/web/e2e/screenshots/mobile-functional-annotation.png differ
diff --git a/apps/web/e2e/screenshots/mobile-jobs.png b/apps/web/e2e/screenshots/mobile-jobs.png
index 5f14b57..d547aeb 100644
Binary files a/apps/web/e2e/screenshots/mobile-jobs.png and b/apps/web/e2e/screenshots/mobile-jobs.png differ
diff --git a/apps/web/e2e/screenshots/mobile-proteins.png b/apps/web/e2e/screenshots/mobile-proteins.png
index c2fe1ae..6cfc2ec 100644
Binary files a/apps/web/e2e/screenshots/mobile-proteins.png and b/apps/web/e2e/screenshots/mobile-proteins.png differ
diff --git a/apps/web/e2e/screenshots/mobile-query-sets.png b/apps/web/e2e/screenshots/mobile-query-sets.png
index 4ee4300..e649a17 100644
Binary files a/apps/web/e2e/screenshots/mobile-query-sets.png and b/apps/web/e2e/screenshots/mobile-query-sets.png differ
diff --git a/apps/web/e2e/screenshots/tablet-annotations.png b/apps/web/e2e/screenshots/tablet-annotations.png
index 8b2e952..cdc610e 100644
Binary files a/apps/web/e2e/screenshots/tablet-annotations.png and b/apps/web/e2e/screenshots/tablet-annotations.png differ
diff --git a/apps/web/e2e/screenshots/tablet-embeddings.png b/apps/web/e2e/screenshots/tablet-embeddings.png
index 5f6a207..b836950 100644
Binary files a/apps/web/e2e/screenshots/tablet-embeddings.png and b/apps/web/e2e/screenshots/tablet-embeddings.png differ
diff --git a/apps/web/e2e/screenshots/tablet-functional-annotation.png b/apps/web/e2e/screenshots/tablet-functional-annotation.png
index c65ab78..fb37fcc 100644
Binary files a/apps/web/e2e/screenshots/tablet-functional-annotation.png and b/apps/web/e2e/screenshots/tablet-functional-annotation.png differ
diff --git a/apps/web/e2e/screenshots/tablet-jobs.png b/apps/web/e2e/screenshots/tablet-jobs.png
index b773f72..f4a4147 100644
Binary files a/apps/web/e2e/screenshots/tablet-jobs.png and b/apps/web/e2e/screenshots/tablet-jobs.png differ
diff --git a/apps/web/e2e/screenshots/tablet-proteins.png b/apps/web/e2e/screenshots/tablet-proteins.png
index 40c6710..f7d8744 100644
Binary files a/apps/web/e2e/screenshots/tablet-proteins.png and b/apps/web/e2e/screenshots/tablet-proteins.png differ
diff --git a/apps/web/e2e/screenshots/tablet-query-sets.png b/apps/web/e2e/screenshots/tablet-query-sets.png
index c421f8b..41dcbb9 100644
Binary files a/apps/web/e2e/screenshots/tablet-query-sets.png and b/apps/web/e2e/screenshots/tablet-query-sets.png differ
diff --git a/apps/web/lib/api.ts b/apps/web/lib/api.ts
index a3a638d..cab4fe0 100644
--- a/apps/web/lib/api.ts
+++ b/apps/web/lib/api.ts
@@ -181,7 +181,7 @@ export type ProteinStats = {
 };
 
 export function getProteinStats() {
-  return http<ProteinStats>(`/proteins/stats`);
+  return http<ProteinStats>(`/proteins/stats/`);
 }
 
 export function listProteins(params?: {
@@ -306,6 +306,12 @@ export type Prediction = {
   taxonomic_distance: number | null;
   taxonomic_common_ancestors: number | null;
   taxonomic_relation: string | null;
+  // Re-ranker features
+  vote_count: number | null;
+  k_position: number | null;
+  go_term_frequency: number | null;
+  ref_annotation_density: number | null;
+  neighbor_distance_std: number | null;
 };
 
 export function getProteinPredictions(setId: string, accession: string) {
@@ -493,6 +499,127 @@ export function getScoredTsvUrl(
   return `${baseUrl()}/scoring/prediction-sets/${setId}/score.tsv?${q.toString()}`;
 }
 
+// ---------------------------------------------------------------------------
+// Re-ranker
+// ---------------------------------------------------------------------------
+
+export type RerankerModel = {
+  id: string;
+  name: string;
+  prediction_set_id: string | null;
+  evaluation_set_id: string | null;
+  category: string;
+  aspect: string | null;
+  metrics: Record<string, any>;
+  feature_importance: Record<string, number>;
+  created_at: string;
+};
+
+export function listRerankers() {
+  return http<RerankerModel[]>(`/scoring/rerankers`);
+}
+
+export function getReranker(id: string) {
+  return http<RerankerModel>(`/scoring/rerankers/${id}`);
+}
+
+export function trainReranker(body: {
+  name: string;
+  prediction_set_id: string;
+  evaluation_set_id: string;
+  category?: string;
+  aspect?: string | null;
+  neg_pos_ratio?: number | null;
+  extra_pairs?: { prediction_set_id: string; evaluation_set_id: string }[];
+}) {
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), 5 * 60_000); // 5 min
+  return http<RerankerModel>(`/scoring/rerankers/train`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+    signal: controller.signal,
+  }).finally(() => clearTimeout(timer));
+}
+
+export async function deleteReranker(id: string) {
+  const res = await fetch(`${baseUrl()}/scoring/rerankers/${id}`, {
+    cache: "no-store",
+    method: "DELETE",
+  });
+  if (!res.ok) throw new Error(await res.text());
+}
+
+export function getRerankedTsvUrl(
+  setId: string,
+  rerankerId: string,
+  params?: { minScore?: number },
+): string {
+  const q = new URLSearchParams();
+  q.set("reranker_id", rerankerId);
+  if (params?.minScore !== undefined) q.set("min_score", String(params.minScore));
+  return `${baseUrl()}/scoring/prediction-sets/${setId}/rerank.tsv?${q.toString()}`;
+}
+
+export function getRerankerMetrics(
+  setId: string,
+  rerankerId: string,
+  evaluationSetId: string,
+  category: string = "nk",
+) {
+  const q = new URLSearchParams();
+  q.set("reranker_id", rerankerId);
+  q.set("evaluation_set_id", evaluationSetId);
+  q.set("category", category);
+  return http<Record<string, any>>(`/scoring/prediction-sets/${setId}/reranker-metrics?${q.toString()}`);
+}
+
+export function getTrainingDataTsvUrl(
+  setId: string,
+  evaluationSetId: string,
+  category: string = "nk",
+): string {
+  const q = new URLSearchParams();
+  q.set("evaluation_set_id", evaluationSetId);
+  q.set("category", category);
+  return `${baseUrl()}/scoring/prediction-sets/${setId}/training-data.tsv?${q.toString()}`;
+}
+
+// ---------------------------------------------------------------------------
+// Annotate (one-click pipeline)
+// ---------------------------------------------------------------------------
+
+export type AnnotateResult = {
+  query_set_id: string;
+  embedding_config_id: string;
+  annotation_set_id: string;
+  ontology_snapshot_id: string;
+  embedding_job_id: string;
+  predict_payload: Record<string, any>;
+  reranker_id: string | null;
+  sequence_count: number;
+};
+
+export async function annotateProteins(
+  input: { file?: File; fastaText?: string; name?: string },
+): Promise<AnnotateResult> {
+  const form = new FormData();
+  if (input.file) form.append("file", input.file);
+  if (input.fastaText) form.append("fasta_text", input.fastaText);
+  form.append("name", input.name ?? "Quick annotation");
+  const res = await fetch(`${baseUrl()}/annotate`, {
+    cache: "no-store",
+    method: "POST",
+    body: form,
+  });
+  if (!res.ok) throw new Error(await res.text());
+  return res.json();
+}
+
+// ---------------------------------------------------------------------------
+// Query sets (upload)
+// ---------------------------------------------------------------------------
+
 export async function createQuerySet(file: File, name: string, description?: string): Promise<QuerySet> {
   const form = new FormData();
   form.append("file", file);
@@ -502,3 +629,48 @@ export async function createQuerySet(file: File, name: string, description?: str
   if (!res.ok) throw new Error(await res.text());
   return res.json();
 }
+
+// ---------------------------------------------------------------------------
+// Showcase
+// ---------------------------------------------------------------------------
+
+export type ShowcaseAspectFmax = {
+  fmax: number;
+  method: string;
+  method_label: string;
+  evaluation_result_id: string;
+};
+
+export type ShowcaseMethodEntry = {
+  method: string;
+  label: string;
+  BPO: { fmax: number | null };
+  MFO: { fmax: number | null };
+  CCO: { fmax: number | null };
+};
+
+export type ShowcasePipelineStage = {
+  name: string;
+  count: number;
+  href: string;
+};
+
+export type ShowcaseData = {
+  protein_stats: { total: number; canonical: number };
+  best_fmax: Record<string, Record<string, ShowcaseAspectFmax>>;
+  method_comparison: Record<string, ShowcaseMethodEntry[]>;
+  counts: {
+    proteins: number;
+    sequences: number;
+    embeddings: number;
+    prediction_sets: number;
+    predictions: number;
+    reranker_models: number;
+    evaluations: number;
+  };
+  pipeline_stages: ShowcasePipelineStage[];
+};
+
+export function getShowcase() {
+  return http<ShowcaseData>("/showcase/");
+}
diff --git a/apps/web/messages/de.json b/apps/web/messages/de.json
index 909550d..4d66adb 100644
--- a/apps/web/messages/de.json
+++ b/apps/web/messages/de.json
@@ -1,16 +1,17 @@
 {
   "layout": { "title": "PROTEA", "description": "Proteindatenplattform — Job-Warteschlange und Pipeline-Verwaltung" },
-  "nav": { "proteins": "Proteine", "annotations": "Annotationen", "querySets": "Query-Sets", "embeddings": "Embeddings", "functionalAnnotation": "Funktionale Annotation", "scoring": "Bewertung", "evaluation": "Evaluierung", "jobs": "Jobs", "maintenance": "Wartung" },
-  "jobs": { "title": "Jobs", "allStatuses": "Alle Status", "queued": "In der Warteschlange", "running": "Laufend", "succeeded": "Erfolgreich", "failed": "Fehlgeschlagen", "cancelled": "Abgebrochen", "autoRefresh": "Auto-Aktualisierung", "refresh": "Aktualisieren", "noJobsFound": "Keine Jobs gefunden.", "status": "Status", "operation": "Operation", "jobId": "Job-ID", "created": "Erstellt", "activeJobs": "{count} aktiv", "jobDetail": { "title": "Job-Details", "backToJobs": "← Jobs", "live": "Live", "cancel": "Abbrechen", "delete": "Löschen", "deleteConfirm": "Diesen Job löschen?", "queue": "Warteschlange:", "created": "Erstellt:", "started": "Gestartet:", "finished": "Beendet:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Nutzlast", "childJobsTitle": "Untergeordnete Jobs", "childJobsCount": "({count})", "eventsTitle": "Ereignisse", "eventsCount": "({count})" } },
-  "proteins": { "title": "Proteine", "tabs": { "browse": "Durchsuchen", "stats": "Statistiken", "insert": "Proteine einfügen", "metadata": "Metadaten abrufen" }, "browseTab": { "searchPlaceholder": "Akzession, Gen, Organismus…", "search": "Suchen", "clear": "Zurücksetzen", "allProteins": "Alle Proteine", "swissProt": "Nur Swiss-Prot", "trembl": "Nur TrEMBL", "canonicalOnly": "Nur kanonische", "totalProteins": "{count} Proteine", "tableHeaders": { "accession": "Akzession", "entryName": "Eintragsname", "gene": "Gen", "organism": "Organismus", "length": "Länge", "source": "Quelle" }, "noProteinsCta": "Keine Proteine gefunden. Verwenden Sie den Tab 'Proteine einfügen', um aus UniProt zu importieren.", "pagination": { "page": "Seite {current} von {total}", "previous": "Zurück", "next": "Weiter" } }, "statsTab": { "refresh": "Aktualisieren", "loading": "Lädt…", "overview": "Übersicht", "coverage": "Abdeckung", "totalProteins": "Proteine gesamt", "canonical": "Kanonische", "isoforms": "{count} Isoformen", "reviewed": "Swiss-Prot", "reviewedSub": "geprüft", "unreviewed": "TrEMBL", "unreviewedSub": "ungeprüft", "withMetadata": "Mit Metadaten", "metadataSub": "{percent}% der kanonischen", "withEmbeddings": "Mit Embeddings", "embeddingsSub": "{percent}% der Gesamtzahl", "withGoAnnotations": "Mit GO-Annotationen", "goAnnotationsSub": "{percent}% der Gesamtzahl" }, "insertTab": { "title": "Proteine aus UniProt einfügen", "description": "Lädt FASTA-Sequenzen herunter und fügt Protein- und Sequenz-Einträge ein.", "searchCriteriaLabel": "Suchkriterien", "searchCriteriaHelper": "UniProt-Abfrage — reviewed:true = nur Swiss-Prot", "pageSizeLabel": "Seitengröße", "totalLimitLabel": "Gesamtlimit", "totalLimitOptional": "(optional)", "includeIsoforms": "Isoformen einschließen", "jobQueuedPrefix": "Job in Warteschlange: ", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "metadataTab": { "title": "UniProt-Metadaten abrufen", "description": "Lädt TSV-Annotationen herunter und fügt ProteinUniProtMetadata-Einträge ein.", "searchCriteriaLabel": "Suchkriterien", "searchCriteriaHelper": "UniProt-Abfrage — reviewed:true = nur Swiss-Prot", "pageSizeLabel": "Seitengröße", "totalLimitLabel": "Gesamtlimit", "totalLimitOptional": "(optional)", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
-  "proteinDetail": { "backToProteins": "← Proteine", "tabs": { "overview": "Übersicht", "annotations": "GO-Annotationen" }, "overviewTab": { "identity": "Identität", "gene": "Gen", "organism": "Organismus", "taxonId": "Taxon-ID", "length": "Länge", "aa": "AS", "sequenceId": "Sequenz-ID", "canonical": "Kanonisch", "coverage": "Abdeckung", "embeddings": "Embeddings", "goAnnotations": "GO-Annotationen", "metadata": "Metadaten", "yes": "ja", "none": "keine", "isoforms": "Isoformen", "function": "Funktion", "biochemistry": "Biochemie", "ecNumber": "EC-Nummer", "catalyticActivity": "Katalytische Aktivität", "cofactor": "Kofaktor", "activityRegulation": "Aktivitätsregulation", "pathway": "Signalweg", "absorption": "Absorption", "kinetics": "Kinetik", "phDependence": "pH-Abhängigkeit", "redoxPotential": "Redoxpotenzial", "temperatureDependence": "Temperaturabhängigkeit", "rheaId": "Rhea-ID", "keywords": "Schlüsselwörter", "noFunctionalMetadata": "Keine funktionalen Metadaten verfügbar. Verwenden Sie den Tab 'Metadaten abrufen', um aus UniProt zu importieren.", "showGoGraph": "GO-Graph anzeigen", "hideGoGraph": "GO-Graph ausblenden", "loadingGraph": "Graph wird geladen…", "noGoAnnotations": "Keine GO-Annotationen für dieses Protein gefunden.", "molecularFunction": "Molekulare Funktion", "biologicalProcess": "Biologischer Prozess", "cellularComponent": "Zelluläre Komponente", "annotations": "Annotationen", "goTableHeaders": { "goId": "GO-ID", "name": "Name", "evidence": "Evidenz", "qualifier": "Qualifikator", "source": "Quelle" } } },
-  "annotations": { "title": "Annotationen", "tabs": { "sets": "Annotationssets", "snapshots": "Ontologie-Snapshots", "loadSnapshot": "Snapshot laden", "loadGoa": "GOA laden", "loadQuickgo": "QuickGO laden" }, "setsTab": { "annotationSets": "{count} Annotationssets", "refresh": "Aktualisieren", "noSetsFound": "Noch keine Annotationssets. Laden Sie GO-Annotationen aus dem Tab 'GOA laden' oder 'QuickGO laden'.", "tableHeaders": { "id": "ID", "source": "Quelle", "version": "Version", "annotations": "Annotationen", "meta": "Meta", "created": "Erstellt" }, "delete": "Löschen", "deleteConfirm": "Dieses Annotationsset und seine {count} GO-Annotationen löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoAnnotations": "Dieses Annotationsset löschen?" }, "snapshotsTab": { "snapshots": "{count} Snapshots", "refresh": "Aktualisieren", "noSnapshotsFound": "Noch keine Ontologie-Snapshots. Verwenden Sie den Tab 'Snapshot laden'.", "tableHeaders": { "id": "ID", "version": "Version", "goTerms": "GO-Terme", "iaUrl": "IA-URL", "loaded": "Geladen" }, "notSet": "nicht gesetzt", "save": "Speichern", "cancel": "Abbrechen", "editTooltip": "Tippen zum Bearbeiten der IA-URL" }, "loadSnapshotTab": { "title": "Ontologie-Snapshot laden", "description": "Lädt eine GO OBO-Datei herunter und füllt GOTerm-Einträge.", "oboUrlLabel": "OBO-URL", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "loadGoaTab": { "title": "GOA-Annotationen laden", "description": "Lädt GO-Annotationen aus einer GAF-Datei in großen Mengen.", "snapshotLabel": "Ontologie-Snapshot", "selectSnapshot": "— Snapshot auswählen —", "noSnapshots": "Keine Snapshots — zuerst Snapshot laden ausführen.", "gafUrlLabel": "GAF-URL", "gafUrlPlaceholder": "https://current.geneontology.org/annotations/goa_human.gaf.gz", "sourceVersionLabel": "Quellversion", "sourceVersionPlaceholder": "2025-03", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "loadQuickgoTab": { "title": "QuickGO-Annotationen laden", "description": "Streamt GO-Annotationen aus der QuickGO-Massen-Download-API.", "snapshotLabel": "Ontologie-Snapshot", "selectSnapshot": "— Snapshot auswählen —", "noSnapshots": "Keine Snapshots — zuerst Snapshot laden ausführen.", "sourceVersionLabel": "Quellversion", "sourceVersionPlaceholder": "2025-03", "launchJob": "Job starten", "launching": "Wird gestartet…" } },
-  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Konfigurationen", "compute": "Berechnen" }, "configsTab": { "configs": "{count} Konfigurationen", "newConfig": "+ Neue Konfiguration", "cancel": "Abbrechen", "newConfigForm": { "title": "Neue Embedding-Konfiguration", "layerIndexingWarning": "Schicht-Indizierung — umgekehrte Konvention: 0 = letzte (semantischste) Schicht, 1 = vorletzte, usw.", "modelBackendLabel": "Modell-Backend", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 auf GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAuto": "auto — Rückfall auf esm", "modelLabel": "Modell", "customModelPlaceholder": "z.B. facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Schichtindizes", "layerIndicesHelper": "(0 = letzte, 1 = vorletzte…)", "layerIndicesPlaceholder": "0  oder  0,1,2", "layerAggLabel": "Schicht-Aggregation", "layerAggMean": "mean — elementweiser Durchschnitt", "layerAggLast": "last — nur die letzte ausgewählte Schicht", "layerAggConcat": "concat — alle verketten (dim × n_layers)", "poolingLabel": "Sequenz-Pooling", "poolingMean": "mean — Mittelwert über Reste", "poolingMax": "max — Maximum über Reste", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — CLS/BOS-Token an Position 0", "maxLengthLabel": "Maximale Länge (Token)", "descriptionLabel": "Beschreibung (optional)", "normalizeResidues": "Reste normalisieren (L2 pro Rest vor Pooling)", "normalizeFinal": "Finales Embedding normalisieren (L2 nach Pooling)", "enableChunking": "Chunking aktivieren (lange Sequenzen → mehrere Embeddings pro Sequenz)", "chunkSizeLabel": "Chunk-Größe (Reste)", "chunkOverlapLabel": "Chunk-Überlappung (Reste)", "createConfig": "Konfiguration erstellen", "creating": "Wird erstellt…" }, "tableHeaders": { "description": "Beschreibung", "model": "Modell", "backend": "Backend", "layers": "Schichten", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Erstellt" }, "noConfigs": "Noch keine Embedding-Konfigurationen. Erstellen Sie eine", "deleteConfirm": "Diese Embedding-Konfiguration und ihre {count} gespeicherten Embeddings löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoEmbeddings": "Diese Embedding-Konfiguration löschen?" }, "computeTab": { "title": "Embeddings berechnen", "loading": "Lädt…", "configLabel": "Embedding-Konfiguration", "noConfigs": "— keine Konfigurationen verfügbar —", "querySetLabel": "Query-Set", "querySetHelper": "(optional — leer lassen für alle berechnen)", "allSequences": "— alle Sequenzen —", "queueBatchSizeLabel": "Warteschlangen-Batchgröße", "queueBatchSizeHelper": "(Seq/Job)", "modelBatchSizeLabel": "Modell-Batchgröße", "modelBatchSizeHelper": "(Seq/Vorwärtsdurchlauf)", "deviceLabel": "Gerät", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — Standard-GPU (FP16 für ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "benutzerdefiniert…", "skipExisting": "Vorhandene Embeddings überspringen", "launchComputeJob": "Berechnungs-Job starten", "launching": "Wird gestartet…" } },
-  "functionalAnnotation": { "title": "Funktionale Annotation", "tabs": { "predict": "Annotation ausführen", "results": "Ergebnisse" }, "predictTab": { "title": "GO-Term-Annotation durch Embedding-Ähnlichkeit", "loading": "Lädt…", "configLabel": "Embedding-Konfiguration", "noConfigs": "— keine Konfigurationen verfügbar —", "querySetLabel": "Query-Set", "querySetHelper": "(optional — leer lassen für alle annotieren)", "allSequences": "— alle Sequenzen —", "annotationSetLabel": "Annotationsset", "noAnnotationSets": "— keine Annotationssets verfügbar —", "snapshotLabel": "Ontologie-Snapshot", "noSnapshots": "— keine Snapshots verfügbar —", "limitPerEntryLabel": "Limit pro Eintrag", "batchSizeLabel": "Batchgröße", "distanceThresholdLabel": "Distanzschwelle", "distanceThresholdHelper": "(optional)", "knnStrategy": "KNN-Strategie", "aspectSeparatedKnn": "Aspekt-getrennte KNN-Indizes", "aspectSeparatedKnnHelper": "Separate BPO / MFO / CCO Referenzindizes — verbessert die Trefferquote für jeden Aspekt unabhängig", "featureEngineering": "Feature Engineering", "featureEngineeringHelper": "(opt-in — erhöht Rechenzeit)", "sequenceAlignments": "Sequenzalignments", "sequenceAlignmentsHelper": "NW (global) + SW (lokal) via parasail/BLOSUM62", "taxonomicDistance": "Taxonomische Distanz", "taxonomicDistanceHelper": "LCA, Distanz und Relation via NCBI-Taxonomie", "searchBackend": "Such-Backend", "searchBackendLabel": "Backend", "numpyBackend": "numpy — exakt", "faissBackend": "faiss — indiziert", "metricLabel": "Metrik", "cosineSimilarity": "Kosinus", "euclideanDistance": "L2 (Euklidisch²)", "indexTypeLabel": "Indextyp", "flatIndex": "Flat — exakt", "ivfflatIndex": "IVFFlat — näherungsweise (>100K Referenzen)", "hnswIndex": "HNSW — näherungsweise, graphbasiert", "nlistLabel": "nlist", "nprobeLabel": "nprobe", "mLabel": "M", "efSearchLabel": "efSearch", "launchAnnotationJob": "Annotations-Job starten", "launching": "Wird gestartet…" }, "resultsTab": { "title": "Ergebnisse", "refresh": "Aktualisieren", "noResults": "Noch keine Annotationsergebnisse. Führen Sie einen Annotations-Job im Tab 'Annotation ausführen' durch.", "tableHeaders": { "id": "ID", "config": "Konfiguration", "annotationSet": "Annotationsset", "snapshot": "Snapshot", "goTerms": "GO-Terme", "distanceThreshold": "Dist.-Schwelle", "created": "Erstellt" }, "delete": "Löschen", "deleteConfirm": "Dieses Annotationsset und seine {count} GO-Term-Zuweisungen löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoAssignments": "Dieses Annotationsset löschen?" } },
-  "evaluation": { "title": "CAFA-Evaluierung", "generateSection": { "heading": "Neues Evaluierungsset", "description": "Berechnet das Delta zwischen zwei GOA-Releases.", "oldSetLabel": "Altes GOA-Set (Referenz)", "selectSet": "— auswählen —", "newSetLabel": "Neues GOA-Set (Grundwahrheit)", "errorSameSets": "Altes und neues Set müssen unterschiedlich sein.", "generateEvaluationSet": "Evaluierungsset generieren", "generating": "Wird in Warteschlange eingereiht…" }, "evaluationSetsSection": { "heading": "Evaluierungssets" }, "evaluationSetCard": { "deleteConfirm": "Dieses Evaluierungsset und alle seine Ergebnisse löschen?", "delete": "Löschen", "deltaProteins": "Delta-Proteine", "nkProteins": "NK-Proteine", "lkProteins": "LK-Proteine", "pkProteins": "PK-Proteine", "groundTruthFiles": "Grundwahrheitsdateien", "downloadNK": "NK", "downloadLK": "LK", "downloadPK": "PK", "downloadKnownTerms": "Bekannte Terme", "deltaProteinSequences": "Delta-Proteinsequenzen (FASTA)", "allDelta": "Alle Delta (NK+LK+PK)", "nkOnly": "Nur NK", "lkOnly": "Nur LK", "pkOnly": "Nur PK", "runCafaEvaluator": "CAFA-Evaluator ausführen", "predictionSetLabel": "Vorhersageset", "scoringConfigLabel": "Bewertungskonfiguration (optional)", "fallbackFormula": "— Rückfall (1−d/2) —", "maxDistanceLabel": "Maximale Distanz (optional)", "jobQueued": "Job in Warteschlange.", "viewJob": "Job anzeigen →", "runEvaluation": "Evaluierung ausführen (NK + LK + PK)", "resultsHeading": "Ergebnisse", "pollingResults": "● Abrufen", "refreshResults": "↻ Aktualisieren", "refreshing": "Wird aktualisiert…", "noEvaluations": "Noch keine Evaluierungen durchgeführt.", "predictionSet": "Vorhersageset:", "scoring": "Bewertung:", "artifactsDownload": "↓ Artefakte (.zip)", "resultDelete": "Löschen", "deleteResultConfirm": "Dieses Evaluierungsergebnis löschen?" }, "resultMetrics": { "nk": "NK", "lk": "LK", "pk": "PK", "biologicalProcess": "Biologischer Prozess", "molecularFunction": "Molekulare Funktion", "cellularComponent": "Zelluläre Komponente", "fmax": "Fmax", "precision": "Präzision", "recall": "Trefferquote", "coverage": "Abdeckung", "tau": "τ" }, "manualEvaluatorSection": { "heading": "Manueller Evaluator-Befehl" } },
-  "scoring": { "title": "Bewertungskonfigurationen", "description": "Eine ScoringConfig definiert, wie rohe Vorhersagesignale zu einem einzigen [0, 1]-Konfidenzwert kombiniert werden.", "loadPresets": "Voreinstellungen laden", "presetsLoading": "Lädt…", "availableSignals": "Verfügbare Signale", "signals": { "embeddingSimilarity": "Embedding-Ähnlichkeit", "embeddingSimilarityHint": "1 − Kosinus-Distanz / 2 — immer verfügbar.", "identityNw": "Identität NW", "identityNwHint": "Needleman-Wunsch globale Sequenzidentität [0, 1].", "identitySw": "Identität SW", "identitySwHint": "Smith-Waterman lokale Sequenzidentität [0, 1].", "evidenceWeight": "Evidenzgewicht", "evidenceWeightHint": "Qualität des GO-Evidenzcodes der Referenzannotation.", "taxonomicProximity": "Taxonomische Nähe", "taxonomicProximityHint": "1 / (1 + taxonomische_Distanz)" }, "configCard": { "deleteConfirm": "Bewertungskonfiguration \"{name}\" löschen?", "delete": "Löschen", "customEvidenceWeights": "benutzerdefinierte Evidenzgewichte", "evidenceCodeWeights": "Evidenzcode-Gewichte", "expand": "▶", "collapse": "▲", "systemDefaults": "(Systemstandards)", "custom": "(benutzerdefiniert)" }, "newConfigForm": { "newConfig": "+ Neue Bewertungskonfiguration", "formTitle": "Neue Konfiguration", "close": "×", "nameLabel": "Name", "namePlaceholder": "meine_konfiguration", "formulaLabel": "Formel", "linear": "linear", "evidenceWeighted": "evidence_weighted", "descriptionLabel": "Beschreibung", "descriptionHelper": "(optional)", "descriptionPlaceholder": "Wofür diese Konfiguration ist…", "signalWeights": "Signalgewichte", "evidenceCodeWeights": "Evidenzcode-Gewichte", "overrideCheckbox": "Pro-Evidenzcode-Qualitätsgewichte überschreiben", "systemDefaultsNote": "Systemstandards werden verwendet — EXP/IDA → 1,0 · ISS/IBA → 0,7 · IEA → 0,3 · ND → 0,1", "experimental": "Experimentell", "experimentalDescription": "Annotationen, die durch direkte experimentelle Evidenz gestützt werden. Höchste Vertrauensstufe.", "computational": "Rechnerisch / Phylogenetisch", "computationalDescription": "Annotationen abgeleitet aus Sequenzähnlichkeit, Orthologie oder phylogenetischer Inferenz.", "electronic": "Elektronisch", "electronicDescription": "Automatisierte Annotationen (IEA) oder nicht rückverfolgbare Autorenaussagen (NAS). Geringere Konfidenz.", "noData": "Keine Daten", "noDataDescription": "Platzhaltercode, der angibt, dass keine biologischen Daten verfügbar sind.", "off": "Aus", "max": "Max", "groupShortcut": "0,5", "resetEvidenceWeights": "Alle auf Systemstandards zurücksetzen", "saveConfig": "Konfiguration speichern", "saving": "Wird gespeichert…", "cancel": "Abbrechen" }, "noConfigs": "Noch keine Konfigurationen. Voreinstellungen laden oder oben eine erstellen." },
+  "nav": { "proteins": "Proteine", "annotations": "Annotationen", "querySets": "Query-Sets", "embeddings": "Embeddings", "functionalAnnotation": "Funktionale Annotation", "scoring": "Scoring", "evaluation": "Evaluierung", "reranker": "Re-Ranker", "jobs": "Jobs", "maintenance": "Wartung", "home": "Startseite", "data": "Daten", "pipelineGroup": "Pipeline", "results": "Ergebnisse", "system": "System" },
+  "home": { "title": "Pipeline zur Vorhersage von Proteinfunktionen", "subtitle": "Von der Sequenz zur funktionellen Annotation durch Embedding-Ähnlichkeit, Re-Ranking und LLM-Kuration", "bestResults": "Beste Ergebnisse", "fmax": "Fmax", "methodComparison": "Methodenvergleich", "method": "Methode", "delta": "vs Baseline", "pipeline": "Pipeline", "stats": "Plattformstatistiken", "proteins": "Proteine", "sequences": "Sequenzen", "embeddings": "Embeddings", "predictions": "Vorhersagen", "predictionSets": "Vorhersagesets", "rerankerModels": "Re-Ranker-Modelle", "evaluations": "Evaluierungen", "exploreResults": "Ergebnisse erkunden", "annotateProteins": "Meine Proteine annotieren", "knnBaseline": "KNN (Embedding-Distanz)", "knnScored": "KNN + Scoring", "knnReranker": "KNN + Re-Ranker", "noDataYet": "Noch keine Evaluierungsdaten verfügbar. Führen Sie die Pipeline aus, um hier Ergebnisse zu sehen.", "getStarted": "Starten", "stageSequences": "Sequenzen", "stageEmbeddings": "Embeddings", "stageKnn": "KNN-Suche", "stageReranker": "Re-Ranker", "stageLlm": "LLM-Kuration", "stageAnnotation": "Annotation", "stageEvaluation": "Evaluierung", "nkCategory": "NK-Kategorie (No Knowledge) — anspruchsvollste Evaluierungseinstellung", "annotateTitle": "Ihre Proteine annotieren", "annotateDescription": "Proteinsequenzen im FASTA-Format einfuegen und automatisch funktionelle Annotationen mit der besten verfuegbaren Methode erhalten.", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "Beispiel testen", "annotateUploadFile": "Datei hochladen", "annotateButton": "Annotieren", "annotateUploading": "Hochladen...", "annotateEmbedding": "Embeddings berechnen...", "annotatePredicting": "GO-Terme vorhersagen...", "annotateDone": "Fertig! Weiterleitung zu Ergebnissen...", "annotateStepUpload": "Hochladen", "annotateStepEmbed": "Embeddings", "annotateStepPredict": "Vorhersage" },
+  "jobs": { "title": "Jobs", "allStatuses": "Alle Status", "queued": "Wartend", "running": "Laufend", "succeeded": "Erfolgreich", "failed": "Fehlgeschlagen", "cancelled": "Abgebrochen", "autoRefresh": "Auto-Aktualisierung", "refresh": "Aktualisieren", "noJobsFound": "Keine Jobs gefunden.", "status": "Status", "operation": "Operation", "jobId": "Job-ID", "created": "Erstellt", "activeJobs": "{count} aktiv", "jobDetail": { "title": "Job-Details", "backToJobs": "← Jobs", "live": "Live", "cancel": "Abbrechen", "delete": "Löschen", "deleteConfirm": "Diesen Job löschen?", "queue": "Warteschlange:", "created": "Erstellt:", "started": "Gestartet:", "finished": "Beendet:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Payload", "childJobsTitle": "Kind-Jobs", "childJobsCount": "({count})", "eventsTitle": "Ereignisse", "eventsCount": "({count})" } },
+  "proteins": { "title": "Proteine", "tabs": { "browse": "Durchsuchen", "stats": "Statistiken", "insert": "Proteine einfügen", "metadata": "Metadaten abrufen" }, "browseTab": { "searchPlaceholder": "Akzession, Gen, Organismus…", "search": "Suchen", "clear": "Zurücksetzen", "allProteins": "Alle Proteine", "swissProt": "Nur Swiss-Prot", "trembl": "Nur TrEMBL", "canonicalOnly": "Nur kanonische", "totalProteins": "{count} Proteine", "tableHeaders": { "accession": "Akzession", "entryName": "Eintragsname", "gene": "Gen", "organism": "Organismus", "length": "Länge", "source": "Quelle" }, "noProteinsCta": "Keine Proteine gefunden. Verwenden Sie den Tab „Proteine einfügen“, um aus UniProt zu importieren.", "pagination": { "page": "Seite {current} von {total}", "previous": "Zurück", "next": "Weiter" } }, "statsTab": { "refresh": "Aktualisieren", "loading": "Lädt…", "overview": "Übersicht", "coverage": "Abdeckung", "totalProteins": "Proteine gesamt", "canonical": "Kanonische", "isoforms": "{count} Isoformen", "reviewed": "Swiss-Prot", "reviewedSub": "geprüft", "unreviewed": "TrEMBL", "unreviewedSub": "ungeprüft", "withMetadata": "Mit Metadaten", "metadataSub": "{percent}% der kanonischen", "withEmbeddings": "Mit Embeddings", "embeddingsSub": "{percent}% gesamt", "withGoAnnotations": "Mit GO-Annotationen", "goAnnotationsSub": "{percent}% gesamt" }, "insertTab": { "title": "Proteine aus UniProt einfügen", "description": "Lädt FASTA-Sequenzen herunter und fügt Protein- und Sequenz-Einträge ein.", "searchCriteriaLabel": "Suchkriterien", "searchCriteriaHelper": "UniProt-Abfrage — reviewed:true = nur Swiss-Prot", "pageSizeLabel": "Seitengröße", "totalLimitLabel": "Gesamtlimit", "totalLimitOptional": "(optional)", "includeIsoforms": "Isoformen einschließen", "jobQueuedPrefix": "Job in Warteschlange: ", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "metadataTab": { "title": "UniProt-Metadaten abrufen", "description": "Lädt TSV-Annotationen herunter und fügt ProteinUniProtMetadata-Einträge ein.", "searchCriteriaLabel": "Suchkriterien", "searchCriteriaHelper": "UniProt-Abfrage — reviewed:true = nur Swiss-Prot", "pageSizeLabel": "Seitengröße", "totalLimitLabel": "Gesamtlimit", "totalLimitOptional": "(optional)", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
+  "proteinDetail": { "backToProteins": "← Proteine", "tabs": { "overview": "Übersicht", "annotations": "GO-Annotationen" }, "overviewTab": { "identity": "Identität", "gene": "Gen", "organism": "Organismus", "taxonId": "Taxon-ID", "length": "Länge", "aa": "AS", "sequenceId": "Sequenz-ID", "canonical": "Kanonisch", "coverage": "Abdeckung", "embeddings": "Embeddings", "goAnnotations": "GO-Annotationen", "metadata": "Metadaten", "yes": "ja", "none": "keine", "isoforms": "Isoformen", "function": "Funktion", "biochemistry": "Biochemie", "ecNumber": "EC-Nummer", "catalyticActivity": "Katalytische Aktivität", "cofactor": "Kofaktor", "activityRegulation": "Aktivitätsregulation", "pathway": "Signalweg", "absorption": "Absorption", "kinetics": "Kinetik", "phDependence": "pH-Abhängigkeit", "redoxPotential": "Redoxpotenzial", "temperatureDependence": "Temperaturabhängigkeit", "rheaId": "Rhea-ID", "keywords": "Schlüsselwörter", "noFunctionalMetadata": "Keine funktionalen Metadaten verfügbar. Verwenden Sie den Tab „Metadaten abrufen“, um aus UniProt zu importieren.", "showGoGraph": "GO-Graph anzeigen", "hideGoGraph": "GO-Graph ausblenden", "loadingGraph": "Graph wird geladen…", "noGoAnnotations": "Keine GO-Annotationen für dieses Protein gefunden.", "molecularFunction": "Molekulare Funktion", "biologicalProcess": "Biologischer Prozess", "cellularComponent": "Zelluläre Komponente", "annotations": "Annotationen", "goTableHeaders": { "goId": "GO-ID", "name": "Name", "evidence": "Evidenz", "qualifier": "Qualifikator", "source": "Quelle" } } },
+  "annotations": { "title": "Annotationen", "tabs": { "sets": "Annotationssets", "snapshots": "Ontologie-Snapshots", "loadSnapshot": "Snapshot laden", "loadGoa": "GOA laden", "loadQuickgo": "QuickGO laden" }, "setsTab": { "annotationSets": "{count} Annotationssets", "refresh": "Aktualisieren", "noSetsFound": "Noch keine Annotationssets. Laden Sie GO-Annotationen im Tab „GOA laden“ oder „QuickGO laden“.", "tableHeaders": { "id": "ID", "source": "Quelle", "version": "Version", "annotations": "Annotationen", "meta": "Meta", "created": "Erstellt" }, "delete": "Löschen", "deleteConfirm": "Dieses Annotationsset und seine {count} GO-Annotationen löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoAnnotations": "Dieses Annotationsset löschen?" }, "snapshotsTab": { "snapshots": "{count} Snapshots", "refresh": "Aktualisieren", "noSnapshotsFound": "Noch keine Ontologie-Snapshots. Verwenden Sie den Tab „Snapshot laden“.", "tableHeaders": { "id": "ID", "version": "Version", "goTerms": "GO-Terme", "iaUrl": "IA-URL", "loaded": "Geladen" }, "notSet": "nicht gesetzt", "save": "Speichern", "cancel": "Abbrechen", "editTooltip": "Tippen zum Bearbeiten der IA-URL" }, "loadSnapshotTab": { "title": "Ontologie-Snapshot laden", "description": "Lädt eine GO-OBO-Datei herunter und füllt GOTerm-Einträge.", "oboUrlLabel": "OBO-URL", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "loadGoaTab": { "title": "GOA-Annotationen laden", "description": "Lädt GO-Annotationen aus einer GAF-Datei in großen Mengen.", "snapshotLabel": "Ontologie-Snapshot", "selectSnapshot": "— Snapshot auswählen —", "noSnapshots": "Keine Snapshots — zuerst „Snapshot laden“ ausführen.", "gafUrlLabel": "GAF-URL", "gafUrlPlaceholder": "https://current.geneontology.org/annotations/goa_human.gaf.gz", "sourceVersionLabel": "Quellversion", "sourceVersionPlaceholder": "2025-03", "launchJob": "Job starten", "launching": "Wird gestartet…" }, "loadQuickgoTab": { "title": "QuickGO-Annotationen laden", "description": "Streamt GO-Annotationen aus der QuickGO-Massen-Download-API.", "snapshotLabel": "Ontologie-Snapshot", "selectSnapshot": "— Snapshot auswählen —", "noSnapshots": "Keine Snapshots — zuerst „Snapshot laden“ ausführen.", "sourceVersionLabel": "Quellversion", "sourceVersionPlaceholder": "2025-03", "launchJob": "Job starten", "launching": "Wird gestartet…" } },
+  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Konfigurationen", "compute": "Berechnen" }, "configsTab": { "configs": "{count} Konfigurationen", "newConfig": "+ Neue Konfiguration", "cancel": "Abbrechen", "newConfigForm": { "title": "Neue Embedding-Konfiguration", "layerIndexingWarning": "Schicht-Indizierung — umgekehrte Konvention: 0 = letzte (semantischste) Schicht, 1 = vorletzte, usw.", "modelBackendLabel": "Modell-Backend", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 auf GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAuto": "auto — Rückfall auf esm", "modelLabel": "Modell", "customModelPlaceholder": "z.B. facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Schichtindizes", "layerIndicesHelper": "(0 = letzte, 1 = vorletzte…)", "layerIndicesPlaceholder": "0  oder  0,1,2", "layerAggLabel": "Schicht-Aggregation", "layerAggMean": "mean — elementweiser Durchschnitt", "layerAggLast": "last — nur die letzte ausgewählte Schicht", "layerAggConcat": "concat — alle verketten (dim × n_layers)", "poolingLabel": "Sequenz-Pooling", "poolingMean": "mean — Mittelwert über Residuen", "poolingMax": "max — Maximum über Residuen", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — CLS/BOS-Token an Position 0", "maxLengthLabel": "Max. Länge (Token)", "descriptionLabel": "Beschreibung (optional)", "normalizeResidues": "Residuen normalisieren (L2 pro Residuum vor Pooling)", "normalizeFinal": "Finales Embedding normalisieren (L2 nach Pooling)", "enableChunking": "Chunking aktivieren (lange Sequenzen → mehrere Embeddings pro Sequenz)", "chunkSizeLabel": "Chunk-Größe (Residuen)", "chunkOverlapLabel": "Chunk-Überlappung (Residuen)", "createConfig": "Konfiguration erstellen", "creating": "Wird erstellt…" }, "tableHeaders": { "description": "Beschreibung", "model": "Modell", "backend": "Backend", "layers": "Schichten", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Erstellt" }, "noConfigs": "Noch keine Embedding-Konfigurationen. Erstellen Sie eine", "deleteConfirm": "Diese Embedding-Konfiguration und ihre {count} gespeicherten Embeddings löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoEmbeddings": "Diese Embedding-Konfiguration löschen?" }, "computeTab": { "title": "Embeddings berechnen", "loading": "Lädt…", "configLabel": "Embedding-Konfiguration", "noConfigs": "— keine Konfigurationen verfügbar —", "querySetLabel": "Query-Set", "querySetHelper": "(optional — leer lassen um alle zu berechnen)", "allSequences": "— alle Sequenzen —", "queueBatchSizeLabel": "Queue-Batchgröße", "queueBatchSizeHelper": "(Seq/Job)", "modelBatchSizeLabel": "Modell-Batchgröße", "modelBatchSizeHelper": "(Seq/Forward)", "deviceLabel": "Gerät", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — Standard-GPU (FP16 für ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "benutzerdefiniert…", "skipExisting": "Vorhandene Embeddings überspringen", "launchComputeJob": "Berechnungs-Job starten", "launching": "Wird gestartet…" } },
+  "functionalAnnotation": { "title": "Funktionale Annotation", "tabs": { "predict": "Annotation starten", "results": "Ergebnisse" }, "predictTab": { "title": "GO-Term-Annotation durch Embedding-Ähnlichkeit", "loading": "Lädt…", "configLabel": "Embedding-Konfiguration", "noConfigs": "— keine Konfigurationen verfügbar —", "querySetLabel": "Query-Set", "querySetHelper": "(optional — leer lassen um alle zu annotieren)", "allSequences": "— alle Sequenzen —", "annotationSetLabel": "Annotationsset", "noAnnotationSets": "— keine Annotationssets verfügbar —", "snapshotLabel": "Ontologie-Snapshot", "noSnapshots": "— keine Snapshots verfügbar —", "limitPerEntryLabel": "Limit pro Eintrag", "batchSizeLabel": "Batchgröße", "distanceThresholdLabel": "Distanzschwelle", "distanceThresholdHelper": "(optional)", "knnStrategy": "KNN-Strategie", "aspectSeparatedKnn": "KNN-Indizes pro Aspekt", "aspectSeparatedKnnHelper": "Separate BPO / MFO / CCO Referenzindizes — verbessert den Recall pro Aspekt", "featureEngineering": "Feature Engineering", "featureEngineeringHelper": "(opt-in — erhöht Rechenzeit)", "sequenceAlignments": "Sequenzalignments", "sequenceAlignmentsHelper": "NW (global) + SW (lokal) via parasail/BLOSUM62", "taxonomicDistance": "Taxonomische Distanz", "taxonomicDistanceHelper": "LCA, Distanz und Relation via NCBI-Taxonomie", "searchBackend": "Such-Backend", "searchBackendLabel": "Backend", "numpyBackend": "numpy — exakt", "faissBackend": "faiss — indiziert", "metricLabel": "Metrik", "cosineSimilarity": "Kosinus", "euclideanDistance": "L2 (Euklidisch²)", "indexTypeLabel": "Indextyp", "flatIndex": "Flat — exakt", "ivfflatIndex": "IVFFlat — approximativ (>100K Refs)", "hnswIndex": "HNSW — approximativ, graphbasiert", "nlistLabel": "nlist", "nprobeLabel": "nprobe", "mLabel": "M", "efSearchLabel": "efSearch", "launchAnnotationJob": "Annotations-Job starten", "launching": "Wird gestartet…" }, "resultsTab": { "title": "Ergebnisse", "refresh": "Aktualisieren", "noResults": "Noch keine Annotationsergebnisse. Starten Sie einen Annotations-Job im Tab „Annotation starten“.", "tableHeaders": { "id": "ID", "config": "Konfiguration", "annotationSet": "Annotationsset", "snapshot": "Snapshot", "goTerms": "GO-Terme", "distanceThreshold": "Dist.-Schwelle", "k": "k", "created": "Erstellt" }, "delete": "Löschen", "deleteConfirm": "Dieses Annotationsset und seine {count} GO-Term-Zuweisungen löschen? Dies kann nicht rückgängig gemacht werden.", "deleteConfirmNoAssignments": "Dieses Annotationsset löschen?" } },
+  "evaluation": { "title": "CAFA-Evaluierung", "generateSection": { "heading": "Neues Evaluierungsset", "description": "Berechnet das Delta zwischen zwei GOA-Releases. Wendet experimentelle Evidenzfilterung und NOT-Qualifier-Propagation durch den GO-DAG an.", "oldSetLabel": "Altes GOA-Set (Referenz)", "selectSet": "— auswählen —", "newSetLabel": "Neues GOA-Set (Ground Truth)", "errorSameSets": "Altes und neues Set müssen unterschiedlich sein.", "generateEvaluationSet": "Evaluierungsset generieren", "generating": "Wird eingereiht…" }, "evaluationSetsSection": { "heading": "Evaluierungssets" }, "evaluationSetCard": { "deleteConfirm": "Dieses Evaluierungsset und alle Ergebnisse löschen?", "delete": "Löschen", "deltaProteins": "Delta-Proteine", "nkProteins": "NK-Proteine", "lkProteins": "LK-Proteine", "pkProteins": "PK-Proteine", "groundTruthFiles": "Ground-Truth-Dateien", "downloadNK": "NK", "downloadLK": "LK", "downloadPK": "PK", "downloadKnownTerms": "Bekannte Terme", "deltaProteinSequences": "Delta-Proteinsequenzen (FASTA)", "allDelta": "Alle Delta (NK+LK+PK)", "nkOnly": "Nur NK", "lkOnly": "Nur LK", "pkOnly": "Nur PK", "runCafaEvaluator": "CAFA-Evaluator starten", "predictionSetLabel": "Prediction-Set", "scoringConfigLabel": "Scoring-Konfiguration (optional)", "fallbackFormula": "— Fallback (1−d/2) —", "maxDistanceLabel": "Max. Distanz (optional)", "jobQueued": "Job eingereiht.", "viewJob": "Job anzeigen →", "runEvaluation": "Evaluierung starten (NK + LK + PK)", "resultsHeading": "Ergebnisse", "pollingResults": "● Abfrage", "refreshResults": "↻ Aktualisieren", "refreshing": "Wird aktualisiert…", "noEvaluations": "Noch keine Evaluierungen durchgeführt.", "predictionSet": "Prediction-Set:", "scoring": "Scoring:", "artifactsDownload": "↓ Artefakte (.zip)", "resultDelete": "Löschen", "deleteResultConfirm": "Dieses Evaluierungsergebnis löschen?" }, "resultMetrics": { "nk": "NK", "lk": "LK", "pk": "PK", "biologicalProcess": "Biologischer Prozess", "molecularFunction": "Molekulare Funktion", "cellularComponent": "Zelluläre Komponente", "fmax": "Fmax", "precision": "Präzision", "recall": "Recall", "coverage": "Abdeckung", "tau": "τ" }, "manualEvaluatorSection": { "heading": "Manueller Evaluator-Befehl" } },
+  "scoring": { "title": "Scoring", "description": "Ein ScoringConfig definiert, wie Vorhersagesignale zu einem [0, 1]-Konfidenzwert kombiniert werden — ohne die KNN-Pipeline erneut auszuführen.", "loadPresets": "Voreinstellungen laden", "presetsLoading": "Lädt…", "availableSignals": "Verfügbare Signale", "signals": { "embeddingSimilarity": "Embedding-Ähnlichkeit", "embeddingSimilarityHint": "1 − Kosinus-Distanz / 2 — immer verfügbar.", "identityNw": "Identität NW", "identityNwHint": "Needleman-Wunsch globale Sequenzidentität [0, 1].", "identitySw": "Identität SW", "identitySwHint": "Smith-Waterman lokale Sequenzidentität [0, 1].", "evidenceWeight": "Evidenzgewicht", "evidenceWeightHint": "Qualität des GO-Evidenzcodes der Referenzannotation.", "taxonomicProximity": "Taxonomische Nähe", "taxonomicProximityHint": "1 / (1 + taxonomische_Distanz)" }, "configCard": { "deleteConfirm": "Scoring-Konfiguration \"{name}\" löschen?", "delete": "Löschen", "customEvidenceWeights": "benutzerdefinierte Evidenzgewichte", "evidenceCodeWeights": "Evidenzcode-Gewichte", "expand": "▶", "collapse": "▲", "systemDefaults": "(Systemstandard)", "custom": "(benutzerdefiniert)" }, "newConfigForm": { "newConfig": "+ Neues Scoring-Profil", "formTitle": "Neue Konfiguration", "close": "×", "nameLabel": "Name", "namePlaceholder": "mein_profil", "formulaLabel": "Formel", "linear": "linear", "evidenceWeighted": "evidence_weighted", "descriptionLabel": "Beschreibung", "descriptionHelper": "(optional)", "descriptionPlaceholder": "Zweck dieser Konfiguration…", "signalWeights": "Signalgewichte", "evidenceCodeWeights": "Evidenzcode-Gewichte", "overrideCheckbox": "Evidenzcode-Qualitätsgewichte überschreiben", "systemDefaultsNote": "Systemstandard — EXP/IDA → 1,0 · ISS/IBA → 0,7 · IEA → 0,3 · ND → 0,1", "experimental": "Experimentell", "experimentalDescription": "Annotationen mit direkter experimenteller Evidenz. Höchste Vertrauensstufe.", "computational": "Komputativ / Phylogenetisch", "computationalDescription": "Annotationen aus Sequenzähnlichkeit, Orthologie oder phylogenetischer Inferenz.", "electronic": "Elektronisch", "electronicDescription": "Automatisierte Annotationen (IEA) oder nicht rückverfolgbare Autorenaussagen (NAS). Geringere Konfidenz.", "noData": "Keine Daten", "noDataDescription": "Platzhaltercode — keine biologischen Daten verfügbar.", "off": "Aus", "max": "Max", "groupShortcut": "0,5", "resetEvidenceWeights": "Auf Systemstandard zurücksetzen", "saveConfig": "Konfiguration speichern", "saving": "Wird gespeichert…", "cancel": "Abbrechen" }, "noConfigs": "Noch keine Konfigurationen. Voreinstellungen laden oder oben eine erstellen." },
   "querySets": { "title": "Query-Sets", "uploadFasta": "+ FASTA hochladen", "noQuerySets": "Noch keine Query-Sets.", "uploadFastaCta": "Laden Sie eine FASTA-Datei hoch, um zu beginnen", "tableHeaders": { "name": "Name", "sequences": "Sequenzen", "created": "Erstellt" }, "delete": "Löschen", "deleteConfirm": "Query-Set \"{name}\" löschen? Dies kann nicht rückgängig gemacht werden.", "uploadModal": { "title": "FASTA hochladen", "close": "×", "nameLabel": "Name", "nameRequired": "*", "namePlaceholder": "z.B. humane_neue_proteine", "descriptionLabel": "Beschreibung (optional)", "descriptionPlaceholder": "Kurze Beschreibung", "fastaFileLabel": "FASTA-Datei", "fastaFileRequired": "*", "dragDrop": "FASTA hier ablegen oder durchsuchen", "supportedFormats": ".fasta · .fa · .faa · .txt", "fileSelected": "✓", "clickToChange": "zum Ändern klicken", "cancel": "Abbrechen", "upload": "Hochladen", "uploading": "Wird hochgeladen…" }, "expandedEntries": { "title": "Einträge", "count": "({count})" } },
-  "support": { "title": "Unterstützung", "hero": { "supportCount": "{count} Personen unterstützen dieses Projekt", "withComments": "{count} mit Kommentaren", "anonymous": "{count} anonym" }, "commentsSection": { "heading": "Was die Leute sagen", "noComments": "Noch keine Kommentare. Seien Sie der Erste!" } },
-  "maintenance": { "title": "Wartung", "description": "Verwaiste Daten bereinigen. Alle Operationen können sicher ausgeführt werden, während das System aktiv ist.", "orphanSequences": { "title": "Verwaiste Sequenzen", "description": "Sequenzen ohne Protein und ohne QuerySet-Eintrag, der auf sie verweist.", "orphanLabel": "Verwaiste Sequenzen", "totalSequences": "Sequenzen gesamt", "referencedSequences": "Referenzierte Sequenzen", "clean": "Bereinigen", "toClean": "{count} zu bereinigen", "refresh": "Aktualisieren", "vacuum": "Vacuum" }, "unindexedEmbeddings": { "title": "Nicht indizierte Embeddings", "description": "Embeddings für Sequenzen, die nicht in der Protein-Referenzdatenbank enthalten sind.", "orphanLabel": "Nicht indizierte Embeddings", "totalEmbeddings": "Embeddings gesamt", "indexedEmbeddings": "Indizierte Embeddings", "clean": "Bereinigen", "toClean": "{count} zu bereinigen", "refresh": "Aktualisieren", "vacuum": "Vacuum", "cleaning": "Wird bereinigt…" } },
-  "components": { "resetDbButton": { "button": "DB zurücksetzen", "resetOk": "✓ Zurücksetzen OK", "error": "✗ Fehler", "confirmTitle": "Datenbank zurücksetzen?", "confirmMessage": "Dadurch werden alle Daten dauerhaft gelöscht: Proteine, Annotationen, Embeddings, Vorhersagen und Jobs. Diese Aktion kann nicht rückgängig gemacht werden.", "cancel": "Abbrechen", "confirm": "Ja, zurücksetzen", "confirming": "Wird zurückgesetzt…" }, "supportButton": { "support": "Unterstützen", "tooltip": "Kommentare und Metriken sind öffentlich.", "projectSupport": "Unterstützen Sie das Projekt!", "commentPlaceholder": "Kommentar hinterlassen (optional)…", "publicNote": "Kommentare und Metriken sind öffentlich.", "sendThumbsUp": "👍 Daumen hoch senden", "sending": "Wird gesendet…", "thanks": "Danke für die Unterstützung! 🎉", "recentComments": "Aktuelle Kommentare", "viewAll": "Alle anzeigen →" }, "usagePolicyModal": { "title": "Willkommen bei PROTEA", "subtitle": "Persönliche Rechenressourcen, offen geteilt — bitte lesen Sie dies, bevor Sie fortfahren", "intro": "Diese Plattform läuft auf persönlicher Hardware, die frei für Forschungszwecke geteilt wird.", "rule1": "Dies sind persönliche Ressourcen, die freiwillig geteilt werden. Bitte beachten Sie die von Ihnen erzeugte Last.", "rule2": "Bevor Sie schwere Jobs starten, nehmen Sie bitte zuerst Kontakt auf.", "rule3": "Alle verarbeiteten Daten sind öffentlich und offen.", "rule4": "PROTEA ist frei und Open Source. Quellcode auf GitHub.", "rule5": "Dieser Dienst läuft auf Best-Effort-Basis und kann ohne Vorankündigung abgeschaltet werden.", "rule6": "Wenn etwas nicht funktioniert, melden Sie es bitte.", "dataPublicNote": "Alle hier verarbeiteten Daten sind öffentlich. Danke für die verantwortungsvolle Nutzung.", "accept": "Verstanden, los geht's" }, "statusBadge": { "queued": "QUEUED", "running": "RUNNING", "succeeded": "SUCCEEDED", "failed": "FAILED", "cancelled": "CANCELLED" }, "eventTimeline": { "noEvents": "Noch keine Ereignisse." }, "languageSwitcher": { "label": "Sprache" } }
-}
\ No newline at end of file
+  "support": { "title": "Unterstützung", "hero": { "supportCount": "{count} Personen unterstützen dieses Projekt", "withComments": "{count} mit Kommentaren", "anonymous": "{count} anonym" }, "commentsSection": { "heading": "Was andere sagen", "noComments": "Noch keine Kommentare. Seien Sie der Erste!" } },
+  "maintenance": { "title": "Wartung", "description": "Verwaiste Daten bereinigen. Alle Operationen können bei aktivem System sicher ausgeführt werden.", "orphanSequences": { "title": "Verwaiste Sequenzen", "description": "Sequenzen ohne Protein und ohne QuerySet-Eintrag.", "orphanLabel": "Verwaiste Sequenzen", "totalSequences": "Sequenzen gesamt", "referencedSequences": "Referenzierte Sequenzen", "clean": "Bereinigen", "toClean": "{count} zu bereinigen", "refresh": "Aktualisieren", "vacuum": "Vacuum" }, "unindexedEmbeddings": { "title": "Nicht indizierte Embeddings", "description": "Embeddings für Sequenzen außerhalb der Protein-Referenzdatenbank.", "orphanLabel": "Nicht indizierte Embeddings", "totalEmbeddings": "Embeddings gesamt", "indexedEmbeddings": "Indizierte Embeddings", "clean": "Bereinigen", "toClean": "{count} zu bereinigen", "refresh": "Aktualisieren", "vacuum": "Vacuum", "cleaning": "Wird bereinigt…" } },
+  "components": { "resetDbButton": { "button": "DB zurücksetzen", "resetOk": "✓ Zurückgesetzt", "error": "✗ Fehler", "confirmTitle": "Datenbank zurücksetzen?", "confirmMessage": "Dadurch werden alle Daten dauerhaft gelöscht: Proteine, Annotationen, Embeddings, Vorhersagen und Jobs. Dies kann nicht rückgängig gemacht werden.", "cancel": "Abbrechen", "confirm": "Ja, zurücksetzen", "confirming": "Wird zurückgesetzt…" }, "supportButton": { "support": "Unterstützen", "tooltip": "Kommentare und Metriken sind öffentlich und für alle sichtbar.", "projectSupport": "Unterstützen Sie das Projekt!", "commentPlaceholder": "Kommentar hinterlassen (optional)…", "publicNote": "Kommentare und Metriken sind öffentlich.", "sendThumbsUp": "👍 Daumen hoch", "sending": "Wird gesendet…", "thanks": "Danke für die Unterstützung! 🎉", "recentComments": "Aktuelle Kommentare", "viewAll": "Alle anzeigen →" }, "usagePolicyModal": { "title": "Willkommen bei PROTEA", "subtitle": "Persönliche Rechenressourcen, offen geteilt — bitte vor dem Fortfahren lesen", "intro": "Diese Plattform läuft auf persönlicher Hardware, die frei für Forschungszwecke geteilt wird. Keine Registrierung erforderlich. Bitte beachten Sie Folgendes:", "rule1": "Dies sind persönliche, freiwillig geteilte Ressourcen. Bitte achten Sie auf die von Ihnen erzeugte Last.", "rule2": "Bevor Sie rechenintensive oder langwierige Jobs starten, nehmen Sie bitte zuerst Kontakt auf — eine kurze Nachricht zu Ihrem Anwendungsfall wird sehr geschätzt.", "rule3": "Alle verarbeiteten Daten sind öffentlich und offen. Ergebnisse dürfen frei verwendet und geteilt werden.", "rule4": "PROTEA ist frei und Open Source. Jede Person oder Forschungseinrichtung kann eine eigene Instanz betreiben — Quellcode auf GitHub.", "rule5": "Dieser Dienst läuft auf Best-Effort-Basis. Das System kann jederzeit ohne Vorankündigung für Wartung oder persönliche Nutzung abgeschaltet werden.", "rule6": "Wenn etwas nicht funktioniert oder sich unerwartet verhält, melden Sie es bitte. Fehlerberichte und Feedback sind sehr willkommen.", "dataPublicNote": "Alle hier verarbeiteten Daten sind öffentlich. Danke für die verantwortungsvolle Nutzung.", "accept": "Verstanden, los geht's" }, "statusBadge": { "queued": "WARTEND", "running": "AKTIV", "succeeded": "FERTIG", "failed": "FEHLER", "cancelled": "ABBRUCH" }, "eventTimeline": { "noEvents": "Noch keine Ereignisse." }, "languageSwitcher": { "label": "Sprache" } }
+}
diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json
index 0621577..18d9b88 100644
--- a/apps/web/messages/en.json
+++ b/apps/web/messages/en.json
@@ -11,8 +11,60 @@
     "functionalAnnotation": "Functional Annotation",
     "scoring": "Scoring",
     "evaluation": "Evaluation",
+    "reranker": "Re-ranker",
     "jobs": "Jobs",
-    "maintenance": "Maintenance"
+    "maintenance": "Maintenance",
+    "home": "Home",
+    "data": "Data",
+    "pipelineGroup": "Pipeline",
+    "results": "Results",
+    "system": "System"
+  },
+  "home": {
+    "title": "Protein Function Prediction Pipeline",
+    "subtitle": "From sequence to functional annotation through embedding similarity, re-ranking, and LLM curation",
+    "bestResults": "Best Results",
+    "fmax": "Fmax",
+    "methodComparison": "Method Comparison",
+    "method": "Method",
+    "delta": "vs baseline",
+    "pipeline": "Pipeline",
+    "stats": "Platform Statistics",
+    "proteins": "Proteins",
+    "sequences": "Sequences",
+    "embeddings": "Embeddings",
+    "predictions": "Predictions",
+    "predictionSets": "Prediction Sets",
+    "rerankerModels": "Re-ranker Models",
+    "evaluations": "Evaluations",
+    "exploreResults": "Explore Results",
+    "annotateProteins": "Annotate My Proteins",
+    "knnBaseline": "KNN (embedding distance)",
+    "knnScored": "KNN + Scoring",
+    "knnReranker": "KNN + Re-ranker",
+    "noDataYet": "No evaluation data available yet. Run the pipeline to see results here.",
+    "getStarted": "Get Started",
+    "stageSequences": "Sequences",
+    "stageEmbeddings": "Embeddings",
+    "stageKnn": "KNN Search",
+    "stageReranker": "Re-ranker",
+    "stageLlm": "LLM Curation",
+    "stageAnnotation": "Annotation",
+    "stageEvaluation": "Evaluation",
+    "nkCategory": "NK category (No prior Knowledge) — most demanding evaluation setting",
+    "annotateTitle": "Annotate Your Proteins",
+    "annotateDescription": "Paste your protein sequences in FASTA format and get functional annotations automatically using the best available method.",
+    "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...",
+    "annotateTryExample": "Try example",
+    "annotateUploadFile": "Upload file",
+    "annotateButton": "Annotate",
+    "annotateUploading": "Uploading...",
+    "annotateEmbedding": "Computing embeddings...",
+    "annotatePredicting": "Predicting GO terms...",
+    "annotateDone": "Done! Redirecting to results...",
+    "annotateStepUpload": "Upload",
+    "annotateStepEmbed": "Embeddings",
+    "annotateStepPredict": "Prediction"
   },
   "jobs": {
     "title": "Jobs",
@@ -393,6 +445,7 @@
         "snapshot": "Snapshot",
         "goTerms": "GO Terms",
         "distanceThreshold": "Dist. Threshold",
+        "k": "k",
         "created": "Created"
       },
       "delete": "Delete",
diff --git a/apps/web/messages/es.json b/apps/web/messages/es.json
index d34cc45..49ff5e7 100644
--- a/apps/web/messages/es.json
+++ b/apps/web/messages/es.json
@@ -8,11 +8,63 @@
     "annotations": "Anotaciones",
     "querySets": "Conjuntos de Consulta",
     "embeddings": "Embeddings",
-    "functionalAnnotation": "Anotación Funcional",
+    "functionalAnnotation": "Anotación",
     "scoring": "Scoring",
     "evaluation": "Evaluación",
+    "reranker": "Re-ranker",
     "jobs": "Trabajos",
-    "maintenance": "Mantenimiento"
+    "maintenance": "Mantenimiento",
+    "home": "Inicio",
+    "data": "Datos",
+    "pipelineGroup": "Pipeline",
+    "results": "Resultados",
+    "system": "Sistema"
+  },
+  "home": {
+    "title": "Pipeline de Prediccion de Funcion Proteica",
+    "subtitle": "De secuencia a anotacion funcional mediante similitud de embeddings, re-ranking y curacion LLM",
+    "bestResults": "Mejores Resultados",
+    "fmax": "Fmax",
+    "methodComparison": "Comparacion de Metodos",
+    "method": "Metodo",
+    "delta": "vs linea base",
+    "pipeline": "Pipeline",
+    "stats": "Estadisticas de la Plataforma",
+    "proteins": "Proteinas",
+    "sequences": "Secuencias",
+    "embeddings": "Embeddings",
+    "predictions": "Predicciones",
+    "predictionSets": "Conjuntos de prediccion",
+    "rerankerModels": "Modelos Re-ranker",
+    "evaluations": "Evaluaciones",
+    "exploreResults": "Explorar Resultados",
+    "annotateProteins": "Anotar Mis Proteinas",
+    "knnBaseline": "KNN (distancia de embedding)",
+    "knnScored": "KNN + Scoring",
+    "knnReranker": "KNN + Re-ranker",
+    "noDataYet": "Aun no hay datos de evaluacion. Ejecuta el pipeline para ver resultados aqui.",
+    "getStarted": "Comenzar",
+    "stageSequences": "Secuencias",
+    "stageEmbeddings": "Embeddings",
+    "stageKnn": "Busqueda KNN",
+    "stageReranker": "Re-ranker",
+    "stageLlm": "Curacion LLM",
+    "stageAnnotation": "Anotacion",
+    "stageEvaluation": "Evaluacion",
+    "nkCategory": "Categoria NK (No Knowledge) — configuracion de evaluacion mas exigente",
+    "annotateTitle": "Anota tus proteinas",
+    "annotateDescription": "Pega tus secuencias de proteinas en formato FASTA y obtiene anotaciones funcionales automaticamente con el mejor metodo disponible.",
+    "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...",
+    "annotateTryExample": "Probar ejemplo",
+    "annotateUploadFile": "Subir archivo",
+    "annotateButton": "Anotar",
+    "annotateUploading": "Subiendo...",
+    "annotateEmbedding": "Calculando embeddings...",
+    "annotatePredicting": "Prediciendo terminos GO...",
+    "annotateDone": "Listo! Redirigiendo a resultados...",
+    "annotateStepUpload": "Subida",
+    "annotateStepEmbed": "Embeddings",
+    "annotateStepPredict": "Prediccion"
   },
   "jobs": {
     "title": "Trabajos",
@@ -392,7 +444,8 @@
         "annotationSet": "Conjunto de anotaciones",
         "snapshot": "Instantánea",
         "goTerms": "Términos GO",
-        "distanceThreshold": "Umbral de distancia",
+        "distanceThreshold": "Umbral dist.",
+        "k": "k",
         "created": "Creado"
       },
       "delete": "Eliminar",
@@ -621,7 +674,7 @@
       "projectSupport": "¡Apoya el proyecto!",
       "commentPlaceholder": "Deja un comentario (opcional)…",
       "publicNote": "Los comentarios y métricas son públicos.",
-      "sendThumbsUp": "👍 Enviar thumbs up",
+      "sendThumbsUp": "👍 Apoyar",
       "sending": "Enviando…",
       "thanks": "¡Gracias por el apoyo! 🎉",
       "recentComments": "Comentarios recientes",
diff --git a/apps/web/messages/pt.json b/apps/web/messages/pt.json
index 83b238e..2c9b404 100644
--- a/apps/web/messages/pt.json
+++ b/apps/web/messages/pt.json
@@ -1,16 +1,17 @@
 {
-  "layout": { "title": "PROTEA", "description": "Plataforma de dados de proteínas — fila de jobs e gerenciamento de pipelines" },
-  "nav": { "proteins": "Proteínas", "annotations": "Anotações", "querySets": "Query Sets", "embeddings": "Embeddings", "functionalAnnotation": "Anotação Funcional", "scoring": "Pontuação", "evaluation": "Avaliação", "jobs": "Jobs", "maintenance": "Manutenção" },
-  "jobs": { "title": "Jobs", "allStatuses": "Todos os status", "queued": "Na fila", "running": "Em execução", "succeeded": "Concluído", "failed": "Falhou", "cancelled": "Cancelado", "autoRefresh": "Atualização automática", "refresh": "Atualizar", "noJobsFound": "Nenhum job encontrado.", "status": "Status", "operation": "Operação", "jobId": "ID do Job", "created": "Criado", "activeJobs": "{count} ativo(s)", "jobDetail": { "title": "Detalhes do Job", "backToJobs": "← Jobs", "live": "Ao vivo", "cancel": "Cancelar", "delete": "Excluir", "deleteConfirm": "Excluir este job?", "queue": "Fila:", "created": "Criado:", "started": "Iniciado:", "finished": "Concluído:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Payload", "childJobsTitle": "Jobs Filhos", "childJobsCount": "({count})", "eventsTitle": "Eventos", "eventsCount": "({count})" } },
-  "proteins": { "title": "Proteínas", "tabs": { "browse": "Explorar", "stats": "Estatísticas", "insert": "Inserir Proteínas", "metadata": "Buscar Metadados" }, "browseTab": { "searchPlaceholder": "acession, gene, organismo…", "search": "Buscar", "clear": "Limpar", "allProteins": "Todas as proteínas", "swissProt": "Apenas Swiss-Prot", "trembl": "Apenas TrEMBL", "canonicalOnly": "Apenas canônicas", "totalProteins": "{count} proteínas", "tableHeaders": { "accession": "Accession", "entryName": "Nome do Registro", "gene": "Gene", "organism": "Organismo", "length": "Comprimento", "source": "Fonte" }, "noProteinsCta": "Nenhuma proteína encontrada. Use a aba Inserir Proteínas para importar do UniProt.", "pagination": { "page": "Página {current} de {total}", "previous": "Anterior", "next": "Próxima" } }, "statsTab": { "refresh": "Atualizar", "loading": "Carregando…", "overview": "Visão Geral", "coverage": "Cobertura", "totalProteins": "Total de Proteínas", "canonical": "Canônicas", "isoforms": "{count} isoformas", "reviewed": "Swiss-Prot", "reviewedSub": "revisadas", "unreviewed": "TrEMBL", "unreviewedSub": "não revisadas", "withMetadata": "Com Metadados", "metadataSub": "{percent}% das canônicas", "withEmbeddings": "Com Embeddings", "embeddingsSub": "{percent}% do total", "withGoAnnotations": "Com Anotações GO", "goAnnotationsSub": "{percent}% do total" }, "insertTab": { "title": "Inserir Proteínas do UniProt", "description": "Baixa sequências FASTA e insere registros de Proteína + Sequência.", "searchCriteriaLabel": "Critérios de busca", "searchCriteriaHelper": "Consulta UniProt — reviewed:true = apenas Swiss-Prot", "pageSizeLabel": "Tamanho da página", "totalLimitLabel": "Limite total", "totalLimitOptional": "(opcional)", "includeIsoforms": "Incluir isoformas", "jobQueuedPrefix": "Job na fila: ", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "metadataTab": { "title": "Buscar Metadados do UniProt", "description": "Baixa anotações TSV e insere registros ProteinUniProtMetadata.", "searchCriteriaLabel": "Critérios de busca", "searchCriteriaHelper": "Consulta UniProt — reviewed:true = apenas Swiss-Prot", "pageSizeLabel": "Tamanho da página", "totalLimitLabel": "Limite total", "totalLimitOptional": "(opcional)", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
-  "proteinDetail": { "backToProteins": "← Proteínas", "tabs": { "overview": "Visão Geral", "annotations": "Anotações GO" }, "overviewTab": { "identity": "Identidade", "gene": "Gene", "organism": "Organismo", "taxonId": "ID do Táxon", "length": "Comprimento", "aa": "aa", "sequenceId": "ID da Sequência", "canonical": "Canônica", "coverage": "Cobertura", "embeddings": "Embeddings", "goAnnotations": "Anotações GO", "metadata": "Metadados", "yes": "sim", "none": "nenhum", "isoforms": "Isoformas", "function": "Função", "biochemistry": "Bioquímica", "ecNumber": "Número EC", "catalyticActivity": "Atividade Catalítica", "cofactor": "Cofator", "activityRegulation": "Regulação da Atividade", "pathway": "Via Metabólica", "absorption": "Absorção", "kinetics": "Cinética", "phDependence": "Dependência de pH", "redoxPotential": "Potencial Redox", "temperatureDependence": "Dependência de Temperatura", "rheaId": "ID Rhea", "keywords": "Palavras-chave", "noFunctionalMetadata": "Nenhum metadado funcional disponível. Use a aba Buscar Metadados para importar do UniProt.", "showGoGraph": "Mostrar Grafo GO", "hideGoGraph": "Ocultar Grafo GO", "loadingGraph": "Carregando grafo…", "noGoAnnotations": "Nenhuma anotação GO encontrada para esta proteína.", "molecularFunction": "Função Molecular", "biologicalProcess": "Processo Biológico", "cellularComponent": "Componente Celular", "annotations": "anotações", "goTableHeaders": { "goId": "ID GO", "name": "Nome", "evidence": "Evidência", "qualifier": "Qualificador", "source": "Fonte" } } },
-  "annotations": { "title": "Anotações", "tabs": { "sets": "Conjuntos de Anotações", "snapshots": "Snapshots de Ontologia", "loadSnapshot": "Carregar Snapshot", "loadGoa": "Carregar GOA", "loadQuickgo": "Carregar QuickGO" }, "setsTab": { "annotationSets": "{count} conjuntos de anotações", "refresh": "Atualizar", "noSetsFound": "Nenhum conjunto de anotações ainda. Carregue anotações GO pelas abas Carregar GOA ou Carregar QuickGO.", "tableHeaders": { "id": "ID", "source": "Fonte", "version": "Versão", "annotations": "Anotações", "meta": "Meta", "created": "Criado" }, "delete": "Excluir", "deleteConfirm": "Excluir este conjunto de anotações e suas {count} anotações GO? Esta ação não pode ser desfeita.", "deleteConfirmNoAnnotations": "Excluir este conjunto de anotações?" }, "snapshotsTab": { "snapshots": "{count} snapshots", "refresh": "Atualizar", "noSnapshotsFound": "Nenhum snapshot de ontologia ainda. Use a aba Carregar Snapshot.", "tableHeaders": { "id": "ID", "version": "Versão", "goTerms": "Termos GO", "iaUrl": "URL IA", "loaded": "Carregado" }, "notSet": "não definido", "save": "Salvar", "cancel": "Cancelar", "editTooltip": "Toque para editar a URL IA" }, "loadSnapshotTab": { "title": "Carregar Snapshot de Ontologia", "description": "Baixa um arquivo GO OBO e preenche registros GOTerm.", "oboUrlLabel": "URL OBO", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "loadGoaTab": { "title": "Carregar Anotações GOA", "description": "Carrega em massa anotações GO de um arquivo GAF.", "snapshotLabel": "Snapshot de Ontologia", "selectSnapshot": "— selecionar snapshot —", "noSnapshots": "Nenhum snapshot — execute Carregar Snapshot primeiro.", "gafUrlLabel": "URL GAF", "gafUrlPlaceholder": "https://current.geneontology.org/annotations/goa_human.gaf.gz", "sourceVersionLabel": "Versão da fonte", "sourceVersionPlaceholder": "2025-03", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "loadQuickgoTab": { "title": "Carregar Anotações QuickGO", "description": "Transmite anotações GO da API de download em massa do QuickGO.", "snapshotLabel": "Snapshot de Ontologia", "selectSnapshot": "— selecionar snapshot —", "noSnapshots": "Nenhum snapshot — execute Carregar Snapshot primeiro.", "sourceVersionLabel": "Versão da fonte", "sourceVersionPlaceholder": "2025-03", "launchJob": "Iniciar Job", "launching": "Iniciando…" } },
-  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Configurações", "compute": "Computar" }, "configsTab": { "configs": "{count} configurações", "newConfig": "+ Nova Configuração", "cancel": "Cancelar", "newConfigForm": { "title": "Nova Configuração de Embedding", "layerIndexingWarning": "Indexação de camadas — convenção reversa: 0 = última (camada mais semântica), 1 = penúltima, etc.", "modelBackendLabel": "Backend do Modelo", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 na GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAuto": "auto — usa esm como fallback", "modelLabel": "Modelo", "customModelPlaceholder": "ex.: facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Índices de Camadas", "layerIndicesHelper": "(0 = última, 1 = penúltima…)", "layerIndicesPlaceholder": "0  ou  0,1,2", "layerAggLabel": "Agregação de Camadas", "layerAggMean": "mean — média elemento a elemento", "layerAggLast": "last — apenas a última camada selecionada", "layerAggConcat": "concat — concatenar todas (dim × n_layers)", "poolingLabel": "Pooling de Sequência", "poolingMean": "mean — média sobre resíduos", "poolingMax": "max — máximo sobre resíduos", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — token CLS/BOS na posição 0", "maxLengthLabel": "Comprimento Máximo (tokens)", "descriptionLabel": "Descrição (opcional)", "normalizeResidues": "Normalizar resíduos (L2 por resíduo antes do pooling)", "normalizeFinal": "Normalizar embedding final (L2 após pooling)", "enableChunking": "Habilitar chunking (sequências longas → múltiplos embeddings por sequência)", "chunkSizeLabel": "Tamanho do Chunk (resíduos)", "chunkOverlapLabel": "Sobreposição do Chunk (resíduos)", "createConfig": "Criar Configuração", "creating": "Criando…" }, "tableHeaders": { "description": "Descrição", "model": "Modelo", "backend": "Backend", "layers": "Camadas", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Criado" }, "noConfigs": "Nenhuma configuração de embedding ainda. Crie uma", "deleteConfirm": "Excluir esta configuração de embedding e seus {count} embeddings armazenados? Esta ação não pode ser desfeita.", "deleteConfirmNoEmbeddings": "Excluir esta configuração de embedding?" }, "computeTab": { "title": "Computar Embeddings", "loading": "Carregando…", "configLabel": "Configuração de Embedding", "noConfigs": "— nenhuma configuração disponível —", "querySetLabel": "Query Set", "querySetHelper": "(opcional — deixe vazio para computar todos)", "allSequences": "— todas as sequências —", "queueBatchSizeLabel": "Tamanho do Lote na Fila", "queueBatchSizeHelper": "(seqs/job)", "modelBatchSizeLabel": "Tamanho do Lote do Modelo", "modelBatchSizeHelper": "(seqs/passagem)", "deviceLabel": "Dispositivo", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — GPU padrão (FP16 para ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "personalizado…", "skipExisting": "Ignorar embeddings existentes", "launchComputeJob": "Iniciar Job de Computação", "launching": "Iniciando…" } },
-  "functionalAnnotation": { "title": "Anotação Funcional", "tabs": { "predict": "Executar Anotação", "results": "Resultados" }, "predictTab": { "title": "Anotação de Termos GO por Similaridade de Embedding", "loading": "Carregando…", "configLabel": "Configuração de Embedding", "noConfigs": "— nenhuma configuração disponível —", "querySetLabel": "Query Set", "querySetHelper": "(opcional — deixe vazio para anotar todos)", "allSequences": "— todas as sequências —", "annotationSetLabel": "Conjunto de Anotações", "noAnnotationSets": "— nenhum conjunto de anotações disponível —", "snapshotLabel": "Snapshot de Ontologia", "noSnapshots": "— nenhum snapshot disponível —", "limitPerEntryLabel": "Limite por Entrada", "batchSizeLabel": "Tamanho do Lote", "distanceThresholdLabel": "Limiar de Distância", "distanceThresholdHelper": "(opcional)", "knnStrategy": "Estratégia KNN", "aspectSeparatedKnn": "Índices KNN por aspecto", "aspectSeparatedKnnHelper": "Índices de referência separados BPO / MFO / CCO — melhora o recall para cada aspecto independentemente", "featureEngineering": "Feature Engineering", "featureEngineeringHelper": "(opt-in — aumenta o tempo de computação)", "sequenceAlignments": "Alinhamentos de sequência", "sequenceAlignmentsHelper": "NW (global) + SW (local) via parasail/BLOSUM62", "taxonomicDistance": "Distância taxonômica", "taxonomicDistanceHelper": "LCA, distância e relação via taxonomia NCBI", "searchBackend": "Backend de Busca", "searchBackendLabel": "Backend", "numpyBackend": "numpy — exato", "faissBackend": "faiss — indexado", "metricLabel": "Métrica", "cosineSimilarity": "cosseno", "euclideanDistance": "L2 (Euclidiano²)", "indexTypeLabel": "Tipo de Índice", "flatIndex": "Flat — exato", "ivfflatIndex": "IVFFlat — aproximado (>100K refs)", "hnswIndex": "HNSW — aproximado, baseado em grafo", "nlistLabel": "nlist", "nprobeLabel": "nprobe", "mLabel": "M", "efSearchLabel": "efSearch", "launchAnnotationJob": "Iniciar Job de Anotação", "launching": "Iniciando…" }, "resultsTab": { "title": "Resultados", "refresh": "Atualizar", "noResults": "Nenhum resultado de anotação ainda. Execute um job de anotação pela aba Executar Anotação.", "tableHeaders": { "id": "ID", "config": "Configuração", "annotationSet": "Conjunto de Anotações", "snapshot": "Snapshot", "goTerms": "Termos GO", "distanceThreshold": "Limiar de Dist.", "created": "Criado" }, "delete": "Excluir", "deleteConfirm": "Excluir este conjunto de anotações e suas {count} atribuições de termos GO? Esta ação não pode ser desfeita.", "deleteConfirmNoAssignments": "Excluir este conjunto de anotações?" } },
-  "evaluation": { "title": "Avaliação CAFA", "generateSection": { "heading": "Novo Conjunto de Avaliação", "description": "Computa o delta entre duas releases GOA.", "oldSetLabel": "Conjunto GOA antigo (referência)", "selectSet": "— selecionar —", "newSetLabel": "Conjunto GOA novo (verdade de campo)", "errorSameSets": "Os conjuntos antigo e novo devem ser diferentes.", "generateEvaluationSet": "Gerar Conjunto de Avaliação", "generating": "Enfileirando…" }, "evaluationSetsSection": { "heading": "Conjuntos de Avaliação" }, "evaluationSetCard": { "deleteConfirm": "Excluir este conjunto de avaliação e todos os seus resultados?", "delete": "Excluir", "deltaProteins": "Proteínas delta", "nkProteins": "Proteínas NK", "lkProteins": "Proteínas LK", "pkProteins": "Proteínas PK", "groundTruthFiles": "Arquivos de verdade de campo", "downloadNK": "NK", "downloadLK": "LK", "downloadPK": "PK", "downloadKnownTerms": "Termos conhecidos", "deltaProteinSequences": "Sequências de proteínas delta (FASTA)", "allDelta": "Todo delta (NK+LK+PK)", "nkOnly": "Apenas NK", "lkOnly": "Apenas LK", "pkOnly": "Apenas PK", "runCafaEvaluator": "Executar avaliador CAFA", "predictionSetLabel": "Conjunto de predições", "scoringConfigLabel": "Configuração de pontuação (opcional)", "fallbackFormula": "— fallback (1−d/2) —", "maxDistanceLabel": "Distância máxima (opcional)", "jobQueued": "Job na fila.", "viewJob": "Ver job →", "runEvaluation": "Executar Avaliação (NK + LK + PK)", "resultsHeading": "Resultados", "pollingResults": "● consultando", "refreshResults": "↻ Atualizar", "refreshing": "Atualizando…", "noEvaluations": "Nenhuma avaliação executada ainda.", "predictionSet": "Conjunto de predições:", "scoring": "Pontuação:", "artifactsDownload": "↓ Artefatos (.zip)", "resultDelete": "Excluir", "deleteResultConfirm": "Excluir este resultado de avaliação?" }, "resultMetrics": { "nk": "NK", "lk": "LK", "pk": "PK", "biologicalProcess": "Processo Biológico", "molecularFunction": "Função Molecular", "cellularComponent": "Componente Celular", "fmax": "Fmax", "precision": "Precisão", "recall": "Recall", "coverage": "Cobertura", "tau": "τ" }, "manualEvaluatorSection": { "heading": "Comando do avaliador manual" } },
-  "scoring": { "title": "Configurações de Pontuação", "description": "Uma ScoringConfig define como os sinais brutos de predição são combinados em uma única pontuação de confiança [0, 1].", "loadPresets": "Carregar predefinições", "presetsLoading": "Carregando…", "availableSignals": "Sinais disponíveis", "signals": { "embeddingSimilarity": "Similaridade de embedding", "embeddingSimilarityHint": "1 − distância cosseno / 2 — sempre disponível.", "identityNw": "Identidade NW", "identityNwHint": "Identidade de sequência global Needleman-Wunsch [0, 1].", "identitySw": "Identidade SW", "identitySwHint": "Identidade de sequência local Smith-Waterman [0, 1].", "evidenceWeight": "Peso de evidência", "evidenceWeightHint": "Qualidade do código de evidência GO da anotação de referência.", "taxonomicProximity": "Proximidade taxonômica", "taxonomicProximityHint": "1 / (1 + distância_taxonômica)" }, "configCard": { "deleteConfirm": "Excluir configuração de pontuação \"{name}\"?", "delete": "Excluir", "customEvidenceWeights": "pesos de evidência personalizados", "evidenceCodeWeights": "Pesos de código de evidência", "expand": "▶", "collapse": "▲", "systemDefaults": "(padrões do sistema)", "custom": "(personalizado)" }, "newConfigForm": { "newConfig": "+ Nova configuração de pontuação", "formTitle": "Nova configuração", "close": "×", "nameLabel": "Nome", "namePlaceholder": "minha_configuracao", "formulaLabel": "Fórmula", "linear": "linear", "evidenceWeighted": "evidence_weighted", "descriptionLabel": "Descrição", "descriptionHelper": "(opcional)", "descriptionPlaceholder": "Para que serve esta configuração…", "signalWeights": "Pesos dos sinais", "evidenceCodeWeights": "Pesos de código de evidência", "overrideCheckbox": "Substituir pesos de qualidade por código de evidência", "systemDefaultsNote": "Usando padrões do sistema — EXP/IDA → 1,0 · ISS/IBA → 0,7 · IEA → 0,3 · ND → 0,1", "experimental": "Experimental", "experimentalDescription": "Anotações respaldadas por evidência experimental direta. Maior nível de confiança.", "computational": "Computacional / Filogenético", "computationalDescription": "Anotações derivadas de similaridade de sequência, ortologia ou inferência filogenética.", "electronic": "Eletrônico", "electronicDescription": "Anotações automatizadas (IEA) ou declarações de autores não rastreáveis (NAS). Menor confiança.", "noData": "Sem dados", "noDataDescription": "Código de marcador indicando que nenhum dado biológico está disponível.", "off": "Desligado", "max": "Máx", "groupShortcut": "0,5", "resetEvidenceWeights": "Redefinir todos para padrões do sistema", "saveConfig": "Salvar configuração", "saving": "Salvando…", "cancel": "Cancelar" }, "noConfigs": "Nenhuma configuração ainda. Carregue as predefinições ou crie uma acima." },
+  "layout": { "title": "PROTEA", "description": "Plataforma de dados proteicos — fila de jobs e gestão de pipelines" },
+  "nav": { "proteins": "Proteínas", "annotations": "Anotações", "querySets": "Query Sets", "embeddings": "Embeddings", "functionalAnnotation": "Anotação Funcional", "scoring": "Scoring", "evaluation": "Avaliação", "reranker": "Re-Ranker", "jobs": "Jobs", "maintenance": "Manutenção", "home": "Início", "data": "Dados", "pipelineGroup": "Pipeline", "results": "Resultados", "system": "Sistema" },
+  "home": { "title": "Pipeline de Predição de Função Proteica", "subtitle": "De sequência a anotação funcional por similaridade de embeddings, re-ranking e curação LLM", "bestResults": "Melhores Resultados", "fmax": "Fmax", "methodComparison": "Comparação de Métodos", "method": "Método", "delta": "vs linha base", "pipeline": "Pipeline", "stats": "Estatísticas da Plataforma", "proteins": "Proteínas", "sequences": "Sequências", "embeddings": "Embeddings", "predictions": "Predições", "predictionSets": "Conjuntos de predição", "rerankerModels": "Modelos Re-ranker", "evaluations": "Avaliações", "exploreResults": "Explorar Resultados", "annotateProteins": "Anotar Minhas Proteínas", "knnBaseline": "KNN (distância de embedding)", "knnScored": "KNN + Scoring", "knnReranker": "KNN + Re-ranker", "noDataYet": "Ainda não há dados de avaliação. Execute o pipeline para ver resultados aqui.", "getStarted": "Começar", "stageSequences": "Sequências", "stageEmbeddings": "Embeddings", "stageKnn": "Busca KNN", "stageReranker": "Re-ranker", "stageLlm": "Curação LLM", "stageAnnotation": "Anotação", "stageEvaluation": "Avaliação", "nkCategory": "Categoria NK (No Knowledge) — configuração de avaliação mais exigente", "annotateTitle": "Anote suas proteinas", "annotateDescription": "Cole suas sequencias de proteinas em formato FASTA e obtenha anotacoes funcionais automaticamente com o melhor metodo disponivel.", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "Testar exemplo", "annotateUploadFile": "Enviar arquivo", "annotateButton": "Anotar", "annotateUploading": "Enviando...", "annotateEmbedding": "Calculando embeddings...", "annotatePredicting": "Prevendo termos GO...", "annotateDone": "Pronto! Redirecionando para resultados...", "annotateStepUpload": "Envio", "annotateStepEmbed": "Embeddings", "annotateStepPredict": "Predicao" },
+  "jobs": { "title": "Jobs", "allStatuses": "Todos os status", "queued": "Na fila", "running": "Em execução", "succeeded": "Concluído", "failed": "Falhou", "cancelled": "Cancelado", "autoRefresh": "Atualização automática", "refresh": "Atualizar", "noJobsFound": "Nenhum job encontrado.", "status": "Status", "operation": "Operação", "jobId": "ID do Job", "created": "Criado", "activeJobs": "{count} ativo(s)", "jobDetail": { "title": "Detalhes do Job", "backToJobs": "← Jobs", "live": "Ao vivo", "cancel": "Cancelar", "delete": "Excluir", "deleteConfirm": "Excluir este job?", "queue": "Fila:", "created": "Criado:", "started": "Iniciado:", "finished": "Concluído:", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "Payload", "childJobsTitle": "Jobs filhos", "childJobsCount": "({count})", "eventsTitle": "Eventos", "eventsCount": "({count})" } },
+  "proteins": { "title": "Proteínas", "tabs": { "browse": "Explorar", "stats": "Estatísticas", "insert": "Inserir Proteínas", "metadata": "Buscar Metadados" }, "browseTab": { "searchPlaceholder": "accession, gene, organismo…", "search": "Buscar", "clear": "Limpar", "allProteins": "Todas as proteínas", "swissProt": "Apenas Swiss-Prot", "trembl": "Apenas TrEMBL", "canonicalOnly": "Apenas canônicas", "totalProteins": "{count} proteínas", "tableHeaders": { "accession": "Accession", "entryName": "Nome do Registro", "gene": "Gene", "organism": "Organismo", "length": "Comprimento", "source": "Fonte" }, "noProteinsCta": "Nenhuma proteína encontrada. Use a aba Inserir Proteínas para importar do UniProt.", "pagination": { "page": "Página {current} de {total}", "previous": "Anterior", "next": "Próxima" } }, "statsTab": { "refresh": "Atualizar", "loading": "Carregando…", "overview": "Visão geral", "coverage": "Cobertura", "totalProteins": "Total de proteínas", "canonical": "Canônicas", "isoforms": "{count} isoformas", "reviewed": "Swiss-Prot", "reviewedSub": "revisadas", "unreviewed": "TrEMBL", "unreviewedSub": "não revisadas", "withMetadata": "Com metadados", "metadataSub": "{percent}% das canônicas", "withEmbeddings": "Com embeddings", "embeddingsSub": "{percent}% do total", "withGoAnnotations": "Com anotações GO", "goAnnotationsSub": "{percent}% do total" }, "insertTab": { "title": "Inserir proteínas do UniProt", "description": "Baixa sequências FASTA e insere registros Protein + Sequence.", "searchCriteriaLabel": "Critérios de busca", "searchCriteriaHelper": "Consulta UniProt — reviewed:true = apenas Swiss-Prot", "pageSizeLabel": "Tamanho da página", "totalLimitLabel": "Limite total", "totalLimitOptional": "(opcional)", "includeIsoforms": "Incluir isoformas", "jobQueuedPrefix": "Job na fila: ", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "metadataTab": { "title": "Buscar metadados do UniProt", "description": "Baixa anotações TSV e insere registros ProteinUniProtMetadata.", "searchCriteriaLabel": "Critérios de busca", "searchCriteriaHelper": "Consulta UniProt — reviewed:true = apenas Swiss-Prot", "pageSizeLabel": "Tamanho da página", "totalLimitLabel": "Limite total", "totalLimitOptional": "(opcional)", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
+  "proteinDetail": { "backToProteins": "← Proteínas", "tabs": { "overview": "Visão geral", "annotations": "Anotações GO" }, "overviewTab": { "identity": "Identidade", "gene": "Gene", "organism": "Organismo", "taxonId": "ID do Táxon", "length": "Comprimento", "aa": "aa", "sequenceId": "ID da Sequência", "canonical": "Canônica", "coverage": "Cobertura", "embeddings": "Embeddings", "goAnnotations": "Anotações GO", "metadata": "Metadados", "yes": "sim", "none": "nenhum", "isoforms": "Isoformas", "function": "Função", "biochemistry": "Bioquímica", "ecNumber": "Número EC", "catalyticActivity": "Atividade catalítica", "cofactor": "Cofator", "activityRegulation": "Regulação da atividade", "pathway": "Via metabólica", "absorption": "Absorção", "kinetics": "Cinética", "phDependence": "Dependência de pH", "redoxPotential": "Potencial redox", "temperatureDependence": "Dependência de temperatura", "rheaId": "ID Rhea", "keywords": "Palavras-chave", "noFunctionalMetadata": "Nenhum metadado funcional disponível. Use a aba Buscar Metadados para importar do UniProt.", "showGoGraph": "Mostrar grafo GO", "hideGoGraph": "Ocultar grafo GO", "loadingGraph": "Carregando grafo…", "noGoAnnotations": "Nenhuma anotação GO encontrada para esta proteína.", "molecularFunction": "Função Molecular", "biologicalProcess": "Processo Biológico", "cellularComponent": "Componente Celular", "annotations": "anotações", "goTableHeaders": { "goId": "ID GO", "name": "Nome", "evidence": "Evidência", "qualifier": "Qualificador", "source": "Fonte" } } },
+  "annotations": { "title": "Anotações", "tabs": { "sets": "Conjuntos", "snapshots": "Snapshots", "loadSnapshot": "Carregar Snapshot", "loadGoa": "Carregar GOA", "loadQuickgo": "Carregar QuickGO" }, "setsTab": { "annotationSets": "{count} conjuntos de anotações", "refresh": "Atualizar", "noSetsFound": "Nenhum conjunto de anotações ainda. Carregue anotações GO pelas abas Carregar GOA ou Carregar QuickGO.", "tableHeaders": { "id": "ID", "source": "Fonte", "version": "Versão", "annotations": "Anotações", "meta": "Meta", "created": "Criado" }, "delete": "Excluir", "deleteConfirm": "Excluir este conjunto e suas {count} anotações GO? Esta ação não pode ser desfeita.", "deleteConfirmNoAnnotations": "Excluir este conjunto de anotações?" }, "snapshotsTab": { "snapshots": "{count} snapshots", "refresh": "Atualizar", "noSnapshotsFound": "Nenhum snapshot de ontologia ainda. Use a aba Carregar Snapshot.", "tableHeaders": { "id": "ID", "version": "Versão", "goTerms": "Termos GO", "iaUrl": "URL IA", "loaded": "Carregado" }, "notSet": "não definido", "save": "Salvar", "cancel": "Cancelar", "editTooltip": "Toque para editar a URL IA" }, "loadSnapshotTab": { "title": "Carregar snapshot de ontologia", "description": "Baixa um arquivo GO OBO e preenche registros GOTerm.", "oboUrlLabel": "URL OBO", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "loadGoaTab": { "title": "Carregar anotações GOA", "description": "Carrega em massa anotações GO de um arquivo GAF.", "snapshotLabel": "Snapshot de Ontologia", "selectSnapshot": "— selecionar snapshot —", "noSnapshots": "Nenhum snapshot — execute Carregar Snapshot primeiro.", "gafUrlLabel": "URL GAF", "gafUrlPlaceholder": "https://current.geneontology.org/annotations/goa_human.gaf.gz", "sourceVersionLabel": "Versão da fonte", "sourceVersionPlaceholder": "2025-03", "launchJob": "Iniciar Job", "launching": "Iniciando…" }, "loadQuickgoTab": { "title": "Carregar anotações QuickGO", "description": "Transmite anotações GO da API de download em massa do QuickGO.", "snapshotLabel": "Snapshot de Ontologia", "selectSnapshot": "— selecionar snapshot —", "noSnapshots": "Nenhum snapshot — execute Carregar Snapshot primeiro.", "sourceVersionLabel": "Versão da fonte", "sourceVersionPlaceholder": "2025-03", "launchJob": "Iniciar Job", "launching": "Iniciando…" } },
+  "embeddings": { "title": "Embeddings", "tabs": { "configs": "Configurações", "compute": "Computar" }, "configsTab": { "configs": "{count} configurações", "newConfig": "+ Nova configuração", "cancel": "Cancelar", "newConfigForm": { "title": "Nova configuração de embedding", "layerIndexingWarning": "Indexação de camadas — convenção reversa: 0 = última (mais semântica), 1 = penúltima, etc.", "modelBackendLabel": "Backend do modelo", "modelBackendEsm": "esm — HuggingFace EsmModel (ESM-2)", "modelBackendEsm3c": "esm3c — ESM SDK ESMC (ESM3c) · FP16 na GPU", "modelBackendT5": "t5 — HuggingFace T5EncoderModel (ProstT5…)", "modelBackendAuto": "auto — fallback para esm", "modelLabel": "Modelo", "customModelPlaceholder": "ex.: facebook/esm2_t33_650M_UR50D", "layerIndicesLabel": "Índices de camadas", "layerIndicesHelper": "(0 = última, 1 = penúltima…)", "layerIndicesPlaceholder": "0  ou  0,1,2", "layerAggLabel": "Agregação de camadas", "layerAggMean": "mean — média elemento a elemento", "layerAggLast": "last — apenas a última camada", "layerAggConcat": "concat — concatenar todas (dim × n_layers)", "poolingLabel": "Pooling de sequência", "poolingMean": "mean — média sobre resíduos", "poolingMax": "max — máximo sobre resíduos", "poolingMeanMax": "mean_max — concat(mean, max) · dim × 2", "poolingCls": "cls — token CLS/BOS na posição 0", "maxLengthLabel": "Comprimento máx. (tokens)", "descriptionLabel": "Descrição (opcional)", "normalizeResidues": "Normalizar resíduos (L2 por resíduo antes do pooling)", "normalizeFinal": "Normalizar embedding final (L2 após pooling)", "enableChunking": "Habilitar chunking (sequências longas → múltiplos embeddings)", "chunkSizeLabel": "Tamanho do chunk (resíduos)", "chunkOverlapLabel": "Sobreposição do chunk (resíduos)", "createConfig": "Criar configuração", "creating": "Criando…" }, "tableHeaders": { "description": "Descrição", "model": "Modelo", "backend": "Backend", "layers": "Camadas", "agg": "Agg", "pool": "Pool", "norm": "Norm", "created": "Criado" }, "noConfigs": "Nenhuma configuração de embedding ainda. Crie uma", "deleteConfirm": "Excluir esta configuração e seus {count} embeddings? Esta ação não pode ser desfeita.", "deleteConfirmNoEmbeddings": "Excluir esta configuração de embedding?" }, "computeTab": { "title": "Computar embeddings", "loading": "Carregando…", "configLabel": "Configuração de embedding", "noConfigs": "— nenhuma configuração disponível —", "querySetLabel": "Query Set", "querySetHelper": "(opcional — deixe vazio para computar todos)", "allSequences": "— todas as sequências —", "queueBatchSizeLabel": "Batch na fila", "queueBatchSizeHelper": "(seqs/job)", "modelBatchSizeLabel": "Batch do modelo", "modelBatchSizeHelper": "(seqs/forward)", "deviceLabel": "Dispositivo", "deviceCpu": "cpu — CPU (FP32)", "deviceCuda": "cuda — GPU padrão (FP16 para ESM3c/T5)", "deviceCuda0": "cuda:0 — GPU 0", "deviceCuda1": "cuda:1 — GPU 1", "deviceCustom": "personalizado…", "skipExisting": "Ignorar embeddings existentes", "launchComputeJob": "Iniciar job de computação", "launching": "Iniciando…" } },
+  "functionalAnnotation": { "title": "Anotação Funcional", "tabs": { "predict": "Executar Anotação", "results": "Resultados" }, "predictTab": { "title": "Anotação de termos GO por similaridade de embedding", "loading": "Carregando…", "configLabel": "Configuração de embedding", "noConfigs": "— nenhuma configuração disponível —", "querySetLabel": "Query Set", "querySetHelper": "(opcional — deixe vazio para anotar todos)", "allSequences": "— todas as sequências —", "annotationSetLabel": "Conjunto de anotações", "noAnnotationSets": "— nenhum conjunto disponível —", "snapshotLabel": "Snapshot de ontologia", "noSnapshots": "— nenhum snapshot disponível —", "limitPerEntryLabel": "Limite por entrada", "batchSizeLabel": "Tamanho do batch", "distanceThresholdLabel": "Limiar de distância", "distanceThresholdHelper": "(opcional)", "knnStrategy": "Estratégia KNN", "aspectSeparatedKnn": "Índices KNN por aspecto", "aspectSeparatedKnnHelper": "Índices de referência separados BPO / MFO / CCO — melhora o recall por aspecto", "featureEngineering": "Feature Engineering", "featureEngineeringHelper": "(opt-in — aumenta o tempo de computação)", "sequenceAlignments": "Alinhamentos de sequência", "sequenceAlignmentsHelper": "NW (global) + SW (local) via parasail/BLOSUM62", "taxonomicDistance": "Distância taxonômica", "taxonomicDistanceHelper": "LCA, distância e relação via taxonomia NCBI", "searchBackend": "Backend de busca", "searchBackendLabel": "Backend", "numpyBackend": "numpy — exato", "faissBackend": "faiss — indexado", "metricLabel": "Métrica", "cosineSimilarity": "cosseno", "euclideanDistance": "L2 (Euclidiano²)", "indexTypeLabel": "Tipo de índice", "flatIndex": "Flat — exato", "ivfflatIndex": "IVFFlat — aproximado (>100K refs)", "hnswIndex": "HNSW — aproximado, baseado em grafo", "nlistLabel": "nlist", "nprobeLabel": "nprobe", "mLabel": "M", "efSearchLabel": "efSearch", "launchAnnotationJob": "Iniciar job de anotação", "launching": "Iniciando…" }, "resultsTab": { "title": "Resultados", "refresh": "Atualizar", "noResults": "Nenhum resultado de anotação ainda. Execute um job na aba Executar Anotação.", "tableHeaders": { "id": "ID", "config": "Configuração", "annotationSet": "Conjunto", "snapshot": "Snapshot", "goTerms": "Termos GO", "distanceThreshold": "Limiar dist.", "k": "k", "created": "Criado" }, "delete": "Excluir", "deleteConfirm": "Excluir este conjunto e suas {count} atribuições de termos GO? Esta ação não pode ser desfeita.", "deleteConfirmNoAssignments": "Excluir este conjunto de anotações?" } },
+  "evaluation": { "title": "Avaliação CAFA", "generateSection": { "heading": "Novo conjunto de avaliação", "description": "Computa o delta entre duas releases GOA. Aplica filtragem de evidência experimental e propagação do qualificador NOT pelo DAG GO.", "oldSetLabel": "Conjunto GOA antigo (referência)", "selectSet": "— selecionar —", "newSetLabel": "Conjunto GOA novo (ground truth)", "errorSameSets": "Os conjuntos antigo e novo devem ser diferentes.", "generateEvaluationSet": "Gerar conjunto de avaliação", "generating": "Enfileirando…" }, "evaluationSetsSection": { "heading": "Conjuntos de avaliação" }, "evaluationSetCard": { "deleteConfirm": "Excluir este conjunto de avaliação e todos os resultados?", "delete": "Excluir", "deltaProteins": "Proteínas delta", "nkProteins": "Proteínas NK", "lkProteins": "Proteínas LK", "pkProteins": "Proteínas PK", "groundTruthFiles": "Arquivos de ground truth", "downloadNK": "NK", "downloadLK": "LK", "downloadPK": "PK", "downloadKnownTerms": "Termos conhecidos", "deltaProteinSequences": "Sequências delta (FASTA)", "allDelta": "Todo delta (NK+LK+PK)", "nkOnly": "Apenas NK", "lkOnly": "Apenas LK", "pkOnly": "Apenas PK", "runCafaEvaluator": "Executar avaliador CAFA", "predictionSetLabel": "Prediction set", "scoringConfigLabel": "Configuração de scoring (opcional)", "fallbackFormula": "— fallback (1−d/2) —", "maxDistanceLabel": "Distância máxima (opcional)", "jobQueued": "Job na fila.", "viewJob": "Ver job →", "runEvaluation": "Executar avaliação (NK + LK + PK)", "resultsHeading": "Resultados", "pollingResults": "● consultando", "refreshResults": "↻ Atualizar", "refreshing": "Atualizando…", "noEvaluations": "Nenhuma avaliação executada ainda.", "predictionSet": "Prediction set:", "scoring": "Scoring:", "artifactsDownload": "↓ Artefatos (.zip)", "resultDelete": "Excluir", "deleteResultConfirm": "Excluir este resultado de avaliação?" }, "resultMetrics": { "nk": "NK", "lk": "LK", "pk": "PK", "biologicalProcess": "Processo Biológico", "molecularFunction": "Função Molecular", "cellularComponent": "Componente Celular", "fmax": "Fmax", "precision": "Precisão", "recall": "Recall", "coverage": "Cobertura", "tau": "τ" }, "manualEvaluatorSection": { "heading": "Comando do avaliador manual" } },
+  "scoring": { "title": "Scoring", "description": "Um ScoringConfig define como os sinais de predição são combinados em uma pontuação de confiança [0, 1] — sem re-executar o pipeline KNN.", "loadPresets": "Carregar predefinições", "presetsLoading": "Carregando…", "availableSignals": "Sinais disponíveis", "signals": { "embeddingSimilarity": "Similaridade de embedding", "embeddingSimilarityHint": "1 − distância cosseno / 2 — sempre disponível.", "identityNw": "Identidade NW", "identityNwHint": "Identidade de sequência global Needleman-Wunsch [0, 1].", "identitySw": "Identidade SW", "identitySwHint": "Identidade de sequência local Smith-Waterman [0, 1].", "evidenceWeight": "Peso de evidência", "evidenceWeightHint": "Qualidade do código de evidência GO da anotação de referência.", "taxonomicProximity": "Proximidade taxonômica", "taxonomicProximityHint": "1 / (1 + distância_taxonômica)" }, "configCard": { "deleteConfirm": "Excluir scoring \"{name}\"?", "delete": "Excluir", "customEvidenceWeights": "pesos de evidência personalizados", "evidenceCodeWeights": "Pesos de código de evidência", "expand": "▶", "collapse": "▲", "systemDefaults": "(padrão do sistema)", "custom": "(personalizado)" }, "newConfigForm": { "newConfig": "+ Novo perfil de scoring", "formTitle": "Nova configuração", "close": "×", "nameLabel": "Nome", "namePlaceholder": "meu_perfil", "formulaLabel": "Fórmula", "linear": "linear", "evidenceWeighted": "evidence_weighted", "descriptionLabel": "Descrição", "descriptionHelper": "(opcional)", "descriptionPlaceholder": "Finalidade desta configuração…", "signalWeights": "Pesos dos sinais", "evidenceCodeWeights": "Pesos de código de evidência", "overrideCheckbox": "Substituir pesos de qualidade por código de evidência", "systemDefaultsNote": "Padrão do sistema — EXP/IDA → 1,0 · ISS/IBA → 0,7 · IEA → 0,3 · ND → 0,1", "experimental": "Experimental", "experimentalDescription": "Anotações com evidência experimental direta. Maior nível de confiança.", "computational": "Computacional / Filogenético", "computationalDescription": "Anotações derivadas de similaridade de sequência, ortologia ou inferência filogenética.", "electronic": "Eletrônico", "electronicDescription": "Anotações automatizadas (IEA) ou declarações de autores não rastreáveis (NAS). Menor confiança.", "noData": "Sem dados", "noDataDescription": "Código de marcador indicando que nenhum dado biológico está disponível.", "off": "Desligado", "max": "Máx", "groupShortcut": "0,5", "resetEvidenceWeights": "Redefinir para padrão do sistema", "saveConfig": "Salvar configuração", "saving": "Salvando…", "cancel": "Cancelar" }, "noConfigs": "Nenhuma configuração ainda. Carregue predefinições ou crie uma acima." },
   "querySets": { "title": "Query Sets", "uploadFasta": "+ Enviar FASTA", "noQuerySets": "Nenhum query set ainda.", "uploadFastaCta": "Envie um arquivo FASTA para começar", "tableHeaders": { "name": "Nome", "sequences": "Sequências", "created": "Criado" }, "delete": "Excluir", "deleteConfirm": "Excluir query set \"{name}\"? Esta ação não pode ser desfeita.", "uploadModal": { "title": "Enviar FASTA", "close": "×", "nameLabel": "Nome", "nameRequired": "*", "namePlaceholder": "ex.: proteinas_humanas_novas", "descriptionLabel": "Descrição (opcional)", "descriptionPlaceholder": "Descrição breve", "fastaFileLabel": "Arquivo FASTA", "fastaFileRequired": "*", "dragDrop": "Solte o FASTA aqui ou procure", "supportedFormats": ".fasta · .fa · .faa · .txt", "fileSelected": "✓", "clickToChange": "clique para alterar", "cancel": "Cancelar", "upload": "Enviar", "uploading": "Enviando…" }, "expandedEntries": { "title": "Entradas", "count": "({count})" } },
-  "support": { "title": "Suporte", "hero": { "supportCount": "{count} pessoas apoiam este projeto", "withComments": "{count} com comentários", "anonymous": "{count} anônimos" }, "commentsSection": { "heading": "O que as pessoas estão dizendo", "noComments": "Nenhum comentário ainda. Seja o primeiro!" } },
-  "maintenance": { "title": "Manutenção", "description": "Limpar dados órfãos. Todas as operações são seguras para executar enquanto o sistema está ativo.", "orphanSequences": { "title": "Sequências Órfãs", "description": "Sequências sem Proteína e sem entrada QuerySet apontando para elas.", "orphanLabel": "Sequências órfãs", "totalSequences": "Total de sequências", "referencedSequences": "Sequências referenciadas", "clean": "Limpar", "toClean": "{count} para limpar", "refresh": "Atualizar", "vacuum": "Vacuum" }, "unindexedEmbeddings": { "title": "Embeddings Não Indexados", "description": "Embeddings para sequências que não estão no banco de dados de referência de proteínas.", "orphanLabel": "Embeddings não indexados", "totalEmbeddings": "Total de embeddings", "indexedEmbeddings": "Embeddings indexados", "clean": "Limpar", "toClean": "{count} para limpar", "refresh": "Atualizar", "vacuum": "Vacuum", "cleaning": "Limpando…" } },
-  "components": { "resetDbButton": { "button": "Resetar BD", "resetOk": "✓ Reset OK", "error": "✗ Erro", "confirmTitle": "Resetar banco de dados?", "confirmMessage": "Isso excluirá permanentemente todos os dados: proteínas, anotações, embeddings, predições e jobs. Esta ação não pode ser desfeita.", "cancel": "Cancelar", "confirm": "Sim, resetar", "confirming": "Resetando…" }, "supportButton": { "support": "Apoiar", "tooltip": "Comentários e métricas são públicos.", "projectSupport": "Apoie o projeto!", "commentPlaceholder": "Deixe um comentário (opcional)…", "publicNote": "Comentários e métricas são públicos.", "sendThumbsUp": "👍 Enviar curtida", "sending": "Enviando…", "thanks": "Obrigado pelo apoio! 🎉", "recentComments": "Comentários recentes", "viewAll": "Ver todos →" }, "usagePolicyModal": { "title": "Bem-vindo ao PROTEA", "subtitle": "Recursos computacionais pessoais, compartilhados abertamente — leia antes de continuar", "intro": "Esta plataforma funciona em hardware pessoal compartilhado livremente para fins de pesquisa.", "rule1": "Estes são recursos pessoais compartilhados voluntariamente. Por favor, seja consciente da carga que você gera.", "rule2": "Antes de iniciar jobs pesados, entre em contato primeiro.", "rule3": "Todos os dados processados são públicos e abertos.", "rule4": "PROTEA é gratuito e de código aberto. Código fonte no GitHub.", "rule5": "Este serviço opera em base de melhor esforço e pode ser encerrado sem aviso prévio.", "rule6": "Se algo quebrar, por favor reporte.", "dataPublicNote": "Todos os dados processados aqui são públicos. Obrigado por usar com responsabilidade.", "accept": "Entendido, vamos lá" }, "statusBadge": { "queued": "QUEUED", "running": "RUNNING", "succeeded": "SUCCEEDED", "failed": "FAILED", "cancelled": "CANCELLED" }, "eventTimeline": { "noEvents": "Nenhum evento ainda." }, "languageSwitcher": { "label": "Idioma" } }
-}
\ No newline at end of file
+  "support": { "title": "Suporte", "hero": { "supportCount": "{count} pessoas apoiam este projeto", "withComments": "{count} com comentários", "anonymous": "{count} anônimos" }, "commentsSection": { "heading": "O que as pessoas dizem", "noComments": "Nenhum comentário ainda. Seja o primeiro!" } },
+  "maintenance": { "title": "Manutenção", "description": "Limpar dados órfãos acumulados. Todas as operações são seguras com o sistema ativo.", "orphanSequences": { "title": "Sequências órfãs", "description": "Sequências sem proteína e sem entrada QuerySet associada.", "orphanLabel": "Sequências órfãs", "totalSequences": "Total de sequências", "referencedSequences": "Sequências referenciadas", "clean": "Limpar", "toClean": "{count} para limpar", "refresh": "Atualizar", "vacuum": "Vacuum" }, "unindexedEmbeddings": { "title": "Embeddings não indexados", "description": "Embeddings de sequências fora do banco de referência de proteínas.", "orphanLabel": "Embeddings não indexados", "totalEmbeddings": "Total de embeddings", "indexedEmbeddings": "Embeddings indexados", "clean": "Limpar", "toClean": "{count} para limpar", "refresh": "Atualizar", "vacuum": "Vacuum", "cleaning": "Limpando…" } },
+  "components": { "resetDbButton": { "button": "Resetar BD", "resetOk": "✓ Reset OK", "error": "✗ Erro", "confirmTitle": "Resetar banco de dados?", "confirmMessage": "Isso excluirá permanentemente todos os dados: proteínas, anotações, embeddings, predições e jobs. Esta ação não pode ser desfeita.", "cancel": "Cancelar", "confirm": "Sim, resetar", "confirming": "Resetando…" }, "supportButton": { "support": "Apoiar", "tooltip": "Comentários e métricas são públicos e visíveis para todos.", "projectSupport": "Apoie o projeto!", "commentPlaceholder": "Deixe um comentário (opcional)…", "publicNote": "Comentários e métricas são públicos.", "sendThumbsUp": "👍 Apoiar", "sending": "Enviando…", "thanks": "Obrigado pelo apoio! 🎉", "recentComments": "Comentários recentes", "viewAll": "Ver todos →" }, "usagePolicyModal": { "title": "Bem-vindo ao PROTEA", "subtitle": "Recursos computacionais pessoais, compartilhados abertamente — leia antes de continuar", "intro": "Esta plataforma funciona em hardware pessoal compartilhado livremente para fins de pesquisa. Não é necessário registro. Por favor, tenha em mente:", "rule1": "Estes são recursos pessoais compartilhados voluntariamente. Por favor, seja consciente da carga que você gera.", "rule2": "Antes de iniciar jobs pesados ou de longa duração, entre em contato primeiro — uma mensagem breve sobre seu caso de uso é muito apreciada.", "rule3": "Todos os dados processados são públicos e abertos. Fique à vontade para usar e compartilhar os resultados.", "rule4": "PROTEA é gratuito e de código aberto. Qualquer pessoa ou instituição de pesquisa pode implantar sua própria instância — código fonte no GitHub.", "rule5": "Este serviço opera em base de melhor esforço. O sistema pode ser desligado a qualquer momento para manutenção ou uso pessoal, sem aviso prévio.", "rule6": "Se algo quebrar ou se comportar de forma inesperada, por favor reporte. Relatórios de bugs e feedback são muito apreciados.", "dataPublicNote": "Todos os dados processados aqui são públicos. Obrigado por usar com responsabilidade.", "accept": "Entendido, vamos lá" }, "statusBadge": { "queued": "NA FILA", "running": "ATIVO", "succeeded": "CONCLUÍDO", "failed": "FALHOU", "cancelled": "CANCELADO" }, "eventTimeline": { "noEvents": "Nenhum evento ainda." }, "languageSwitcher": { "label": "Idioma" } }
+}
diff --git a/apps/web/messages/zh.json b/apps/web/messages/zh.json
index d837bc6..fab9c73 100644
--- a/apps/web/messages/zh.json
+++ b/apps/web/messages/zh.json
@@ -1,6 +1,7 @@
 {
   "layout": { "title": "PROTEA", "description": "蛋白质数据平台 — 任务队列与流水线管理" },
-  "nav": { "proteins": "蛋白质", "annotations": "注释", "querySets": "查询集", "embeddings": "嵌入向量", "functionalAnnotation": "功能注释", "scoring": "评分", "evaluation": "评估", "jobs": "任务", "maintenance": "维护" },
+  "nav": { "proteins": "蛋白质", "annotations": "注释", "querySets": "查询集", "embeddings": "嵌入向量", "functionalAnnotation": "功能注释", "scoring": "评分", "evaluation": "评估", "reranker": "重排序器", "jobs": "任务", "maintenance": "维护", "home": "首页", "data": "数据", "pipelineGroup": "流水线", "results": "结果", "system": "系统" },
+  "home": { "title": "蛋白质功能预测流水线", "subtitle": "通过嵌入向量相似性、重排序和LLM筛选，从序列到功能注释", "bestResults": "最佳结果", "fmax": "Fmax", "methodComparison": "方法比较", "method": "方法", "delta": "vs 基线", "pipeline": "流水线", "stats": "平台统计", "proteins": "蛋白质", "sequences": "序列", "embeddings": "嵌入向量", "predictions": "预测", "predictionSets": "预测集", "rerankerModels": "重排序模型", "evaluations": "评估", "exploreResults": "探索结果", "annotateProteins": "注释我的蛋白质", "knnBaseline": "KNN（嵌入距离）", "knnScored": "KNN + 评分", "knnReranker": "KNN + 重排序器", "noDataYet": "暂无评估数据。运行流水线后可在此查看结果。", "getStarted": "开始使用", "stageSequences": "序列", "stageEmbeddings": "嵌入向量", "stageKnn": "KNN搜索", "stageReranker": "重排序器", "stageLlm": "LLM筛选", "stageAnnotation": "注释", "stageEvaluation": "评估", "nkCategory": "NK类别（No Knowledge）— 最严格的评估设置", "annotateTitle": "注释您的蛋白质", "annotateDescription": "粘贴FASTA格式的蛋白质序列，使用最佳可用方法自动获取功能注释。", "annotatePlaceholder": ">sp|P04637|P53_HUMAN Cellular tumor antigen p53\nMEEPQSDPSVEPPLSQETFSDLWKLL...", "annotateTryExample": "试用示例", "annotateUploadFile": "上传文件", "annotateButton": "注释", "annotateUploading": "上传中...", "annotateEmbedding": "计算嵌入向量中...", "annotatePredicting": "预测GO术语中...", "annotateDone": "完成！正在跳转到结果页面...", "annotateStepUpload": "上传", "annotateStepEmbed": "嵌入向量", "annotateStepPredict": "预测" },
   "jobs": { "title": "任务", "allStatuses": "所有状态", "queued": "已排队", "running": "运行中", "succeeded": "已成功", "failed": "已失败", "cancelled": "已取消", "autoRefresh": "自动刷新", "refresh": "刷新", "noJobsFound": "未找到任务。", "status": "状态", "operation": "操作", "jobId": "任务 ID", "created": "创建时间", "activeJobs": "{count} 个活跃", "jobDetail": { "title": "任务详情", "backToJobs": "← 任务", "live": "实时", "cancel": "取消", "delete": "删除", "deleteConfirm": "删除此任务？", "queue": "队列：", "created": "创建：", "started": "开始：", "finished": "完成：", "progress": "{current} / {total} {unit} ({percent}%)", "payloadLabel": "载荷", "childJobsTitle": "子任务", "childJobsCount": "({count})", "eventsTitle": "事件", "eventsCount": "({count})" } },
   "proteins": { "title": "蛋白质", "tabs": { "browse": "浏览", "stats": "统计", "insert": "导入蛋白质", "metadata": "获取元数据" }, "browseTab": { "searchPlaceholder": "登录号、基因、生物体…", "search": "搜索", "clear": "清除", "allProteins": "所有蛋白质", "swissProt": "仅 Swiss-Prot", "trembl": "仅 TrEMBL", "canonicalOnly": "仅规范序列", "totalProteins": "{count} 条蛋白质", "tableHeaders": { "accession": "登录号", "entryName": "条目名称", "gene": "基因", "organism": "生物体", "length": "长度", "source": "来源" }, "noProteinsCta": "未找到蛋白质。请使用\"导入蛋白质\"标签页从 UniProt 导入。", "pagination": { "page": "第 {current} 页，共 {total} 页", "previous": "上一页", "next": "下一页" } }, "statsTab": { "refresh": "刷新", "loading": "加载中…", "overview": "概览", "coverage": "覆盖率", "totalProteins": "蛋白质总数", "canonical": "规范序列", "isoforms": "{count} 个亚型", "reviewed": "Swiss-Prot", "reviewedSub": "已审核", "unreviewed": "TrEMBL", "unreviewedSub": "未审核", "withMetadata": "含元数据", "metadataSub": "占规范序列的 {percent}%", "withEmbeddings": "含嵌入向量", "embeddingsSub": "占总数的 {percent}%", "withGoAnnotations": "含 GO 注释", "goAnnotationsSub": "占总数的 {percent}%" }, "insertTab": { "title": "从 UniProt 导入蛋白质", "description": "下载 FASTA 序列并插入蛋白质及序列记录。", "searchCriteriaLabel": "搜索条件", "searchCriteriaHelper": "UniProt 查询 — reviewed:true = 仅 Swiss-Prot", "pageSizeLabel": "每页条数", "totalLimitLabel": "总限制", "totalLimitOptional": "（可选）", "includeIsoforms": "包含亚型", "jobQueuedPrefix": "任务已排队：", "launchJob": "启动任务", "launching": "启动中…" }, "metadataTab": { "title": "获取 UniProt 元数据", "description": "下载 TSV 注释并插入 ProteinUniProtMetadata 记录。", "searchCriteriaLabel": "搜索条件", "searchCriteriaHelper": "UniProt 查询 — reviewed:true = 仅 Swiss-Prot", "pageSizeLabel": "每页条数", "totalLimitLabel": "总限制", "totalLimitOptional": "（可选）", "launchJob": "启动任务", "launching": "启动中…" }, "sourceSwissProt": "Swiss-Prot", "sourceTrembl": "TrEMBL" },
   "proteinDetail": { "backToProteins": "← 蛋白质", "tabs": { "overview": "概览", "annotations": "GO 注释" }, "overviewTab": { "identity": "标识", "gene": "基因", "organism": "生物体", "taxonId": "分类单元 ID", "length": "长度", "aa": "aa", "sequenceId": "序列 ID", "canonical": "规范序列", "coverage": "覆盖率", "embeddings": "嵌入向量", "goAnnotations": "GO 注释", "metadata": "元数据", "yes": "是", "none": "无", "isoforms": "亚型", "function": "功能", "biochemistry": "生物化学", "ecNumber": "EC 编号", "catalyticActivity": "催化活性", "cofactor": "辅因子", "activityRegulation": "活性调节", "pathway": "通路", "absorption": "吸收", "kinetics": "动力学", "phDependence": "pH 依赖性", "redoxPotential": "氧化还原电位", "temperatureDependence": "温度依赖性", "rheaId": "Rhea ID", "keywords": "关键词", "noFunctionalMetadata": "无可用功能元数据。请使用\"获取元数据\"标签页从 UniProt 导入。", "showGoGraph": "显示 GO 图", "hideGoGraph": "隐藏 GO 图", "loadingGraph": "加载图中…", "noGoAnnotations": "未找到该蛋白质的 GO 注释。", "molecularFunction": "分子功能", "biologicalProcess": "生物过程", "cellularComponent": "细胞组分", "annotations": "注释", "goTableHeaders": { "goId": "GO ID", "name": "名称", "evidence": "证据", "qualifier": "限定词", "source": "来源" } } },
diff --git a/apps/web/public/thesis.pdf b/apps/web/public/thesis.pdf
new file mode 100644
index 0000000..d993b81
Binary files /dev/null and b/apps/web/public/thesis.pdf differ
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index e5f9db4..49d97c0 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -1,7 +1,7 @@
 # Production overrides: pull pre-built images from ghcr.io instead of building locally.
 # Use with: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
 #
-# The worker-embeddings service gets GPU access via the NVIDIA container runtime.
+# The worker-embeddings-batch service gets GPU access via the NVIDIA container runtime.
 
 services:
   migrate:
@@ -15,6 +15,9 @@ services:
 
   worker-embeddings:
     image: ghcr.io/frapercan/protea:latest
+
+  worker-embeddings-batch:
+    image: ghcr.io/frapercan/protea:latest
     deploy:
       resources:
         reservations:
@@ -22,8 +25,19 @@ services:
             - driver: nvidia
               count: all
               capabilities: [gpu]
+        limits:
+          memory: 8G
+
+  worker-embeddings-write:
+    image: ghcr.io/frapercan/protea:latest
+
+  worker-predictions-batch:
+    image: ghcr.io/frapercan/protea:latest
+
+  worker-predictions-write:
+    image: ghcr.io/frapercan/protea:latest
 
-  worker-predictions:
+  worker-reaper:
     image: ghcr.io/frapercan/protea:latest
 
   frontend:
diff --git a/docker-compose.yml b/docker-compose.yml
index cb925b8..1e74b6c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,6 +16,10 @@ services:
       interval: 10s
       timeout: 5s
       retries: 5
+    deploy:
+      resources:
+        limits:
+          memory: 2G
 
   rabbitmq:
     image: rabbitmq:3-management
@@ -30,6 +34,10 @@ services:
       interval: 10s
       timeout: 5s
       retries: 5
+    deploy:
+      resources:
+        limits:
+          memory: 512M
 
   migrate:
     build: .
@@ -55,6 +63,10 @@ services:
         condition: service_healthy
       migrate:
         condition: service_completed_successfully
+    deploy:
+      resources:
+        limits:
+          memory: 1G
 
   worker-jobs:
     build: .
@@ -67,6 +79,10 @@ services:
         condition: service_completed_successfully
       rabbitmq:
         condition: service_healthy
+    deploy:
+      resources:
+        limits:
+          memory: 2G
 
   worker-embeddings:
     build: .
@@ -79,8 +95,44 @@ services:
         condition: service_completed_successfully
       rabbitmq:
         condition: service_healthy
+    deploy:
+      resources:
+        limits:
+          memory: 2G
 
-  worker-predictions:
+  worker-embeddings-batch:
+    build: .
+    environment:
+      PROTEA_DB_URL: postgresql+psycopg://protea:protea@postgres/protea
+      PROTEA_AMQP_URL: amqp://guest:guest@rabbitmq/
+    command: python scripts/worker.py --queue protea.embeddings.batch
+    depends_on:
+      migrate:
+        condition: service_completed_successfully
+      rabbitmq:
+        condition: service_healthy
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+
+  worker-embeddings-write:
+    build: .
+    environment:
+      PROTEA_DB_URL: postgresql+psycopg://protea:protea@postgres/protea
+      PROTEA_AMQP_URL: amqp://guest:guest@rabbitmq/
+    command: python scripts/worker.py --queue protea.embeddings.write
+    depends_on:
+      migrate:
+        condition: service_completed_successfully
+      rabbitmq:
+        condition: service_healthy
+    deploy:
+      resources:
+        limits:
+          memory: 1G
+
+  worker-predictions-batch:
     build: .
     environment:
       PROTEA_DB_URL: postgresql+psycopg://protea:protea@postgres/protea
@@ -91,6 +143,40 @@ services:
         condition: service_completed_successfully
       rabbitmq:
         condition: service_healthy
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+
+  worker-predictions-write:
+    build: .
+    environment:
+      PROTEA_DB_URL: postgresql+psycopg://protea:protea@postgres/protea
+      PROTEA_AMQP_URL: amqp://guest:guest@rabbitmq/
+    command: python scripts/worker.py --queue protea.predictions.write
+    depends_on:
+      migrate:
+        condition: service_completed_successfully
+      rabbitmq:
+        condition: service_healthy
+    deploy:
+      resources:
+        limits:
+          memory: 1G
+
+  worker-reaper:
+    build: .
+    environment:
+      PROTEA_DB_URL: postgresql+psycopg://protea:protea@postgres/protea
+      PROTEA_AMQP_URL: amqp://guest:guest@rabbitmq/
+    command: python scripts/worker.py --queue reaper
+    depends_on:
+      migrate:
+        condition: service_completed_successfully
+    deploy:
+      resources:
+        limits:
+          memory: 256M
 
   frontend:
     build:
@@ -102,6 +188,10 @@ services:
       - "3000:3000"
     depends_on:
       - api
+    deploy:
+      resources:
+        limits:
+          memory: 512M
 
 volumes:
   postgres_data:
diff --git a/docs/source/abstract.rst b/docs/source/abstract.rst
index 13e2f5d..c077578 100644
--- a/docs/source/abstract.rst
+++ b/docs/source/abstract.rst
@@ -15,6 +15,13 @@ decouples HTTP ingestion from computation, and a *two-session worker pattern* en
 robust, auditable state transitions. A React/Next.js frontend provides real-time visibility
 into job progress through structured event logs.
 
+The platform implements the full protein functional annotation pipeline: UniProt sequence
+ingestion, GO ontology and annotation loading, GPU-accelerated embedding computation
+(ESM-2, ESM3c, T5), KNN-based GO term prediction with optional pairwise alignment and
+taxonomic features, CAFA-style temporal holdout evaluation (NK/LK/PK), and LightGBM
+re-ranking. A scoring engine and one-click annotation endpoint make the system accessible
+to researchers without machine-learning infrastructure expertise.
+
 The platform is designed to accommodate continuous extension — new operations, new data
 sources, new models — without architectural regression. Computational efficiency is preserved
 at each migration step, with sequence deduplication by MD5 hash, cursor-based pagination,
diff --git a/docs/source/adr/001-knn-without-pgvector.rst b/docs/source/adr/001-knn-without-pgvector.rst
new file mode 100644
index 0000000..f0f521d
--- /dev/null
+++ b/docs/source/adr/001-knn-without-pgvector.rst
@@ -0,0 +1,51 @@
+ADR-001: KNN on CPU, not pgvector or GPU
+========================================
+
+:Date: 2025-12-15
+:Author: frapercan
+
+The problem
+-----------
+
+GO term prediction requires K-nearest-neighbor search over 500K+ embeddings
+of 1280 dimensions.  The natural options were ``pgvector`` (we already store
+vectors there) or PyTorch on GPU (we already have the GPU for inference).
+Both failed:
+
+- **pgvector** with an IVFFlat index on 527K vectors: index build took
+  >20 minutes, and each individual query cost 100-500ms.  For a job with
+  thousands of queries, unacceptable.
+- **PyTorch on GPU**: the GPU is busy with ESM-2/ESM-3c/T5 inference.
+  Loading the distance matrix competes with model forward passes and
+  causes CUDA OOM.
+
+What we do
+----------
+
+KNN runs **on CPU**, entirely in Python:
+
+- **NumPy** (brute-force via matrix multiplication) for small datasets
+  (<100K).
+- **FAISS** (Flat, IVFFlat, HNSW) for large datasets.  Uses SIMD and
+  multithreading on CPU without touching the GPU.
+
+Reference embeddings are loaded once from PostgreSQL into a process-level
+cache (``_REF_CACHE``, float16, ~4 GB for 500K vectors).  ``pgvector``
+remains as storage only — the ``VECTOR`` type is there, but we never
+search with ``<=>``.
+
+Trade-offs
+----------
+
+- The cache consumes worker RAM (~4 GB).  If the worker restarts, the
+  first prediction takes ~15s extra to reload from DB.
+- KNN and inference run in parallel without contention: CPU computes
+  distances while GPU computes embeddings.
+
+Rejected
+--------
+
+- **Dedicated vector database** (Milvus, Qdrant): one more infra
+  dependency for something NumPy/FAISS solves in-process.
+- **Persistent FAISS index on disk**: IVFFlat training takes a few
+  seconds; not worth the complexity of serialising/deserialising for now.
diff --git a/docs/source/adr/002-two-session-worker-pattern.rst b/docs/source/adr/002-two-session-worker-pattern.rst
new file mode 100644
index 0000000..14c2b51
--- /dev/null
+++ b/docs/source/adr/002-two-session-worker-pattern.rst
@@ -0,0 +1,45 @@
+ADR-002: Two-session worker pattern
+====================================
+
+:Date: 2025-12-20
+:Author: frapercan
+
+The problem
+-----------
+
+A worker executes operations that can run for hours (compute_embeddings,
+load_goa_annotations).  If the operation fails mid-way, we need the job
+to remain marked as ``RUNNING`` in the database so monitoring can detect it.
+
+With a single database session, a rollback on error also reverts the
+``QUEUED -> RUNNING`` transition.  The job silently goes back to ``QUEUED``
+and nobody notices the failure until the reaper catches it an hour later.
+
+What we do
+----------
+
+``BaseWorker.handle_job(job_id)`` opens **two independent sessions**:
+
+1. **Claim session** — changes the job to ``RUNNING``, records
+   ``started_at`` and the ``job.started`` event, and **commits immediately**.
+   From this point the job is visible as running.
+
+2. **Execute session** — runs the operation.  On success: ``SUCCEEDED``.
+   On failure: ``FAILED`` with ``error_code`` and ``error_message``.
+   A rollback here does not affect the claim.
+
+Trade-offs
+----------
+
+- Two round-trips to DB per job — irrelevant when the operation takes
+  minutes.
+- RabbitMQ delivers each message to a single consumer (``prefetch=1``),
+  so there is no real race condition between workers for the same job.
+
+Rejected
+--------
+
+- **Savepoints** inside a long transaction: hold locks and bloat the
+  PostgreSQL WAL.
+- **Optimistic locking** with a version column: does not solve the
+  requirement that the claim must be visible before execution starts.
diff --git a/docs/source/adr/003-queue-consumer-vs-operation-consumer.rst b/docs/source/adr/003-queue-consumer-vs-operation-consumer.rst
new file mode 100644
index 0000000..8819873
--- /dev/null
+++ b/docs/source/adr/003-queue-consumer-vs-operation-consumer.rst
@@ -0,0 +1,57 @@
+ADR-003: Two types of consumer
+===============================
+
+:Date: 2026-01-10
+:Author: frapercan
+
+The problem
+-----------
+
+Distributed pipelines (``compute_embeddings``, ``predict_go_terms``) split
+work into hundreds of batches.  If each batch had its own ``Job`` row in
+the DB:
+
+- The ``jobs`` table fills with thousands of rows per prediction run,
+  making it impossible to see real user-facing jobs.
+- Each batch pays the cost of the two-session pattern (2 round-trips),
+  which for 2-8s batches is more overhead than useful work.
+
+What we do
+----------
+
+Two consumers coexist:
+
+**QueueConsumer** — for user-facing jobs with full lifecycle tracking:
+
+- Receives ``{"job_id": "<uuid>"}`` and delegates to
+  ``BaseWorker.handle_job()``.
+- Used by: ``protea.ping``, ``protea.jobs``, ``protea.embeddings``.
+
+**OperationConsumer** — for ephemeral batches with no individual DB row:
+
+- Receives ``{"operation": "...", "job_id": "<parent>", "payload": {...}}``.
+- Executes the operation in a single session, ack/nack, done.
+- Progress is reported by incrementing ``progress_current`` on the
+  **parent job**.
+- Events are written to the parent's log with the ``child.`` prefix.
+- Used by: ``protea.embeddings.batch``, ``protea.embeddings.write``,
+  ``protea.predictions.batch``, ``protea.predictions.write``.
+
+From the outside, the user sees a single job (the coordinator) with a
+progress bar that advances.  Batches are invisible.
+
+Trade-offs
+----------
+
+- Two code paths for consuming messages, but both are short (~100 lines)
+  and share infrastructure (DLQ, registry, emit).
+- If a batch fails and goes to the DLQ, there is no individual retry
+  counter — just the dead message for inspection.
+
+Rejected
+--------
+
+- **Job with** ``is_batch=True`` **flag**: still creates thousands of DB
+  rows.
+- **Fire-and-forget** without tracking: operators lose visibility into
+  progress and failures.
diff --git a/docs/source/adr/004-dead-letter-queue-and-retry-strategy.rst b/docs/source/adr/004-dead-letter-queue-and-retry-strategy.rst
new file mode 100644
index 0000000..d151592
--- /dev/null
+++ b/docs/source/adr/004-dead-letter-queue-and-retry-strategy.rst
@@ -0,0 +1,51 @@
+ADR-004: Dead letter queue and retries
+======================================
+
+:Date: 2026-03-18
+:Author: frapercan
+
+The problem
+-----------
+
+Two related messaging problems:
+
+1. **Lost messages**: when a message failed permanently (invalid JSON,
+   unknown operation), it was discarded with ``basic_nack``.  The payload
+   disappeared and there was no way to do post-mortem.
+
+2. **Aggressive retries**: transient failures (broker down, GPU busy)
+   were retried immediately, amplifying load on the service that was
+   already struggling.
+
+What we do
+----------
+
+**Dead letter queue** — all queues are declared with
+``x-dead-letter-exchange: protea.dlx``.  Rejected messages
+(``nack`` without ``requeue``) end up in ``protea.dead-letter``, a durable
+queue where they can be inspected, fixed, and republished.
+
+**Publisher retries** — exponential backoff: 5 attempts with delays of
+1, 2, 4, 8, 16s (capped at 30s).  If the connection is broken, it is
+discarded and a new one is created.
+
+**Worker retries** — operations can raise
+``RetryLaterError("GPU busy", delay_seconds=60)``.  The worker calculates
+adaptive backoff based on how many previous retries have occurred:
+``delay = min(base * 2^retries, 600s)``.  The job goes back to ``QUEUED``
+and is republished after the wait.
+
+Trade-offs
+----------
+
+- The DLQ grows if nobody inspects it — it must be monitored (see runbook).
+- Adaptive backoff makes one DB query per retry to count previous
+  ``job.retry_later`` events.  Negligible cost.
+
+Rejected
+--------
+
+- **TTL + delay queue in RabbitMQ**: more complex to set up and debug than
+  an application-level ``sleep()``.
+- **Celery retries**: PROTEA does not use Celery; reimplementing its
+  countdown over raw pika adds no value.
diff --git a/docs/source/adr/005-thread-local-rabbitmq-connections.rst b/docs/source/adr/005-thread-local-rabbitmq-connections.rst
new file mode 100644
index 0000000..a5732e1
--- /dev/null
+++ b/docs/source/adr/005-thread-local-rabbitmq-connections.rst
@@ -0,0 +1,46 @@
+ADR-005: Reusable RabbitMQ connections
+======================================
+
+:Date: 2026-03-18
+:Author: frapercan
+
+The problem
+-----------
+
+When a coordinator (``compute_embeddings``) dispatches 500 batches, the
+publisher opened and closed a TCP connection for each ``publish_operation()``
+call.  This caused:
+
+- 500 TCP+AMQP handshakes in a burst.
+- ``EMFILE`` (too many open files) errors on the worker.
+- Broker-side resource exhaustion (each connection costs RabbitMQ memory).
+
+What we do
+----------
+
+Each thread keeps **a single connection** stored in ``threading.local()``.
+``_get_connection()`` returns the existing connection if it is open, or
+creates a new one.  If a publish fails, ``_close_cached_connection()``
+discards the broken connection so the next attempt reconnects.
+
+Result: from O(messages) connections down to O(threads) — in practice,
+1-4 connections total.
+
+Trade-offs
+----------
+
+- ``pika.BlockingConnection`` is not thread-safe, which is why
+  ``threading.local()`` isolation is mandatory.
+- Connections are never proactively closed — they live until the thread
+  dies or a publish fails.  If RabbitMQ restarts, the first publish after
+  restart always fails once (and reconnects automatically).
+
+Rejected
+--------
+
+- **Connection pool** (``pika_pool``): external dependency for something
+  ``threading.local()`` solves in 15 lines.
+- **Global connection with a lock**: serialises all publishes, creating a
+  bottleneck when dispatching hundreds of messages.
+- **``aio-pika`` async**: workers are synchronous; adding an event loop
+  just for the publisher is disproportionate.
diff --git a/docs/source/adr/006-sequence-deduplication-by-md5.rst b/docs/source/adr/006-sequence-deduplication-by-md5.rst
new file mode 100644
index 0000000..cd62a79
--- /dev/null
+++ b/docs/source/adr/006-sequence-deduplication-by-md5.rst
@@ -0,0 +1,47 @@
+ADR-006: Sequence deduplication by MD5
+======================================
+
+:Date: 2025-12-10
+:Author: frapercan
+
+The problem
+-----------
+
+UniProt has ~570K accessions in Swiss-Prot, but only ~540K unique sequences.
+The remaining 30K are isoforms or cross-references sharing the same amino
+acid chain.
+
+Computing the embedding for a sequence costs ~0.5s on GPU.  Processing 30K
+duplicates wastes **4+ hours** per full run.
+
+What we do
+----------
+
+When inserting proteins, we compute the MD5 hash of the amino acid string.
+The ``Sequence`` table has a **unique constraint on ``sequence_hash``**:
+
+1. If the hash already exists -> reuse the existing ``Sequence.id``.
+2. If it does not exist -> insert a new row.
+
+Multiple ``Protein`` rows (one per UniProt accession) point to the same
+``Sequence``.  The FK ``Protein.sequence_id`` is intentionally non-unique.
+
+When the embedding pipeline runs, it only processes ``Sequence`` rows
+without an embedding — duplicates are skipped automatically.
+
+Trade-offs
+----------
+
+- MD5 is not cryptographically secure, but that does not matter here:
+  there is no adversarial input, only biological sequences.
+- Sequences with a single mutation produce different hashes and are stored
+  separately.  This is correct — a mutation changes the embedding.
+
+Rejected
+--------
+
+- **SHA-256**: digest twice as long, zero practical benefit.
+- **UNIQUE on the sequence text column**: indexing multi-kilobyte text
+  columns is expensive; the 32-char hex digest is far more efficient.
+- **CD-HIT clustering** (90-95% identity): useful for reducing redundancy
+  in evolutionary analysis, but here we need exact deduplication (100%).
diff --git a/docs/source/adr/index.rst b/docs/source/adr/index.rst
new file mode 100644
index 0000000..b4f5046
--- /dev/null
+++ b/docs/source/adr/index.rst
@@ -0,0 +1,45 @@
+Architecture Decision Records
+=============================
+
+Design decisions that are not obvious from reading the code.  Each ADR
+documents **why** a decision was made, not just what — the code already
+shows the what.
+
+Decisions are grouped by system layer:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 10 50 40
+
+   * - ADR
+     - Decision
+     - Problem it solves
+   * - 001
+     - :doc:`KNN on CPU, not pgvector or GPU <001-knn-without-pgvector>`
+     - pgvector does not scale to 500K+ vectors; GPU must be reserved for inference
+   * - 006
+     - :doc:`Sequence deduplication by MD5 <006-sequence-deduplication-by-md5>`
+     - 30K duplicate sequences in Swiss-Prot waste hours of GPU time
+   * - 002
+     - :doc:`Two-session worker pattern <002-two-session-worker-pattern>`
+     - A mid-operation crash left the job invisible to monitoring
+   * - 003
+     - :doc:`Two types of consumer <003-queue-consumer-vs-operation-consumer>`
+     - Thousands of batch jobs per pipeline flooded the jobs table
+   * - 004
+     - :doc:`Dead letter queue and retries <004-dead-letter-queue-and-retry-strategy>`
+     - Failed messages were lost; retries without backoff amplified failures
+   * - 005
+     - :doc:`Reusable RabbitMQ connections <005-thread-local-rabbitmq-connections>`
+     - A coordinator dispatching 500 batches opened 500 TCP connections
+
+.. toctree::
+   :maxdepth: 1
+   :hidden:
+
+   001-knn-without-pgvector
+   002-two-session-worker-pattern
+   003-queue-consumer-vs-operation-consumer
+   004-dead-letter-queue-and-retry-strategy
+   005-thread-local-rabbitmq-connections
+   006-sequence-deduplication-by-md5
diff --git a/docs/source/appendix/configuration.rst b/docs/source/appendix/configuration.rst
index 6fbe82c..acab5d6 100644
--- a/docs/source/appendix/configuration.rst
+++ b/docs/source/appendix/configuration.rst
@@ -82,19 +82,38 @@ RabbitMQ management
 -------------------
 
 The RabbitMQ management UI is available at http://localhost:15672 (default
-credentials ``guest`` / ``guest``). The two PROTEA queues are:
+credentials ``guest`` / ``guest``). The seven PROTEA queues are:
 
 .. list-table::
    :header-rows: 1
 
    * - Queue
-     - Durability
+     - Consumer
      - Operations
    * - ``protea.ping``
-     - durable
+     - QueueConsumer
      - ``ping``
    * - ``protea.jobs``
-     - durable
-     - ``insert_proteins``, ``fetch_uniprot_metadata``
+     - QueueConsumer
+     - ``insert_proteins``, ``fetch_uniprot_metadata``, ``load_ontology_snapshot``,
+       ``load_goa_annotations``, ``load_quickgo_annotations``,
+       ``compute_embeddings`` (coordinator), ``predict_go_terms`` (coordinator),
+       ``generate_evaluation_set``, ``run_cafa_evaluation``,
+       ``train_reranker``, ``train_reranker_auto``
+   * - ``protea.embeddings``
+     - QueueConsumer
+     - ``compute_embeddings`` coordinator (serialised, one at a time)
+   * - ``protea.embeddings.batch``
+     - OperationConsumer
+     - ``compute_embeddings_batch`` — GPU inference (ephemeral)
+   * - ``protea.embeddings.write``
+     - OperationConsumer
+     - ``store_embeddings`` — bulk pgvector insert (ephemeral)
+   * - ``protea.predictions.batch``
+     - OperationConsumer
+     - ``predict_go_terms_batch`` — KNN + GO transfer (ephemeral)
+   * - ``protea.predictions.write``
+     - OperationConsumer
+     - ``store_predictions`` — bulk GOPrediction insert (ephemeral)
 
 Queues are declared at worker startup and survive broker restarts.
diff --git a/docs/source/appendix/howto_guides.rst b/docs/source/appendix/howto_guides.rst
index 0d910b4..90331ee 100644
--- a/docs/source/appendix/howto_guides.rst
+++ b/docs/source/appendix/howto_guides.rst
@@ -81,18 +81,14 @@ Jobs in terminal states (``SUCCEEDED``, ``FAILED``) are unaffected —
 the endpoint is a no-op. Cancelling a ``RUNNING`` job marks the DB row as
 ``CANCELLED`` but does not interrupt the worker process (soft cancel).
 
-Run a job manually without RabbitMQ
--------------------------------------
+Run a single worker manually
+-----------------------------
 
-Useful for debugging a specific job without the full message-broker pipeline:
+Useful for debugging a specific queue without the full ``manage.sh`` stack:
 
 .. code-block:: bash
 
-   poetry run python scripts/run_one_job.py <job-id-uuid>
-
-The script loads the job from the DB and runs it through ``BaseWorker``
-directly. The job must already exist in QUEUED status (created via the API
-before calling this script, for example).
+   poetry run python scripts/worker.py --queue protea.jobs
 
 Add a new operation
 --------------------
@@ -277,10 +273,112 @@ the DB before submitting.
          "distance_threshold": 0.3,
          "search_backend": "numpy",
          "compute_alignments": true,
-         "compute_taxonomy": false
+         "compute_taxonomy": false,
+         "compute_reranker_features": true
        }
      }'
 
+Generate an evaluation set (temporal holdout)
+----------------------------------------------
+
+Create a CAFA-style evaluation delta between an old and new annotation set.
+Both must share the same ``ontology_snapshot_id``.
+
+.. code-block:: bash
+
+   curl -s -X POST http://127.0.0.1:8000/annotations/evaluation-sets/generate \
+     -H "Content-Type: application/json" \
+     -d '{
+       "old_annotation_set_id": "<old-uuid>",
+       "new_annotation_set_id": "<new-uuid>"
+     }'
+
+The job classifies proteins into NK (no-knowledge), LK (limited-knowledge),
+and PK (partial-knowledge) categories per namespace. Download ground-truth
+files via ``GET /annotations/evaluation-sets/{id}/ground-truth-{NK|LK|PK}.tsv``.
+
+Run a CAFA evaluation
+----------------------
+
+Evaluate a prediction set against an evaluation set using the ``cafaeval``
+evaluator:
+
+.. code-block:: bash
+
+   curl -s -X POST http://127.0.0.1:8000/annotations/evaluation-sets/<eval-id>/run \
+     -H "Content-Type: application/json" \
+     -d '{
+       "prediction_set_id": "<prediction-set-uuid>"
+     }'
+
+Results include per-namespace Fmax, precision, recall, and coverage for
+NK, LK, and PK settings. Download metrics via
+``GET /annotations/evaluation-sets/{id}/results/{rid}/metrics.tsv``.
+
+Train a re-ranker
+------------------
+
+Train a LightGBM binary classifier to re-score GO predictions using
+temporal holdout labels:
+
+.. code-block:: bash
+
+   curl -s -X POST http://127.0.0.1:8000/jobs \
+     -H "Content-Type: application/json" \
+     -d '{
+       "operation": "train_reranker",
+       "queue_name": "protea.jobs",
+       "payload": {
+         "prediction_set_id": "<prediction-set-uuid>",
+         "evaluation_set_id": "<eval-set-uuid>"
+       }
+     }'
+
+The prediction set must have been generated with
+``compute_alignments=true``, ``compute_taxonomy=true``, and
+``compute_reranker_features=true`` to provide the full feature set.
+
+Apply a trained re-ranker to new predictions via
+``GET /scoring/prediction-sets/{id}/rerank.tsv?reranker_id=<uuid>``.
+
+Use one-click annotation
+-------------------------
+
+The ``/annotate`` endpoint accepts a FASTA file and automatically selects
+the best available embedding config, annotation set, and ontology snapshot:
+
+.. code-block:: bash
+
+   curl -s -X POST http://127.0.0.1:8000/annotate \
+     -F "file=@my_proteins.fasta" \
+     -F "name=Quick annotation" | python -m json.tool
+
+The response includes all IDs needed to monitor the embedding job and chain
+the prediction step. The frontend uses this endpoint to power the one-click
+annotation wizard.
+
+Score predictions with a ScoringConfig
+----------------------------------------
+
+Create a scoring config and apply it to a prediction set:
+
+.. code-block:: bash
+
+   # Create scoring config
+   curl -s -X POST http://127.0.0.1:8000/scoring/configs \
+     -H "Content-Type: application/json" \
+     -d '{
+       "name": "distance-only",
+       "weights": {"distance": -1.0}
+     }' | python -m json.tool
+
+   # Download scored predictions
+   curl -s "http://127.0.0.1:8000/scoring/prediction-sets/<id>/score.tsv?scoring_config_id=<config-id>" \
+     -o scored.tsv
+
+   # Compute CAFA metrics for scored predictions
+   curl -s "http://127.0.0.1:8000/scoring/prediction-sets/<id>/metrics?scoring_config_id=<config-id>&evaluation_set_id=<eval-id>"
+
 Scale batch workers
 --------------------
 
diff --git a/docs/source/appendix/index.rst b/docs/source/appendix/index.rst
index 1384103..dbeb4d2 100644
--- a/docs/source/appendix/index.rst
+++ b/docs/source/appendix/index.rst
@@ -7,3 +7,4 @@ Appendix
    installation_and_quickstart
    configuration
    howto_guides
+   runbook
diff --git a/docs/source/appendix/runbook.rst b/docs/source/appendix/runbook.rst
new file mode 100644
index 0000000..263be81
--- /dev/null
+++ b/docs/source/appendix/runbook.rst
@@ -0,0 +1,214 @@
+Operational Runbook
+===================
+
+Practical guide for operating PROTEA: starting the system, diagnosing
+problems, and maintaining infrastructure.
+
+.. contents:: Contents
+   :local:
+   :depth: 2
+
+
+Day-to-day operations
+---------------------
+
+Starting and stopping
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   # Prerequisite: PostgreSQL and RabbitMQ must be running
+   docker start pgvectorsql rabbitmq
+
+   # Start everything (API + workers + frontend)
+   bash scripts/manage.sh start
+
+   # Start with 3 batch workers per GPU pipeline
+   bash scripts/manage.sh start 3
+
+   # Check what is running
+   bash scripts/manage.sh status
+
+   # Stop everything
+   bash scripts/manage.sh stop
+
+Checking that everything works
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   # Liveness: is the API process alive?
+   curl http://127.0.0.1:8000/health
+   # -> {"status": "ok"}
+
+   # Readiness: can it connect to DB and RabbitMQ?
+   curl http://127.0.0.1:8000/health/ready
+   # -> {"status": "ready"}  or  503 if something is down
+
+If ``/health/ready`` returns 503, check that Docker containers are running
+and that the URLs in ``protea/config/system.yaml`` are correct.
+
+Scaling workers
+~~~~~~~~~~~~~~~
+
+Batch workers are stateless — they can be added on the fly:
+
+.. code-block:: bash
+
+   bash scripts/manage.sh scale protea.predictions.batch 2
+   bash scripts/manage.sh scale protea.embeddings.batch 3
+
+Scaling is linear for batch queues.
+
+.. warning::
+
+   The ``protea.embeddings`` queue must have **exactly one** consumer.
+   The coordinator serialises GPU access; multiple coordinators step on
+   each other and cause ``RetryLaterError`` storms.
+
+Remote access
+~~~~~~~~~~~~~
+
+For demos or access from outside the local network:
+
+.. code-block:: bash
+
+   bash scripts/expose.sh
+
+Opens an ngrok tunnel to the frontend (port 3000) with a static domain
+(``protea.ngrok.app``).  API calls are proxied through Next.js rewrites,
+so only one tunnel is needed.  Requires ngrok installed and authenticated.
+Closes with Ctrl+C.
+
+
+Troubleshooting
+---------------
+
+Jobs stuck in RUNNING
+~~~~~~~~~~~~~~~~~~~~~
+
+A job in ``RUNNING`` that is not progressing usually means the worker died.
+
+**Automatic detection**: the ``worker-reaper`` process checks every 60 s
+and marks as ``FAILED`` (error code ``JobTimeout``) any job that has been
+in ``RUNNING`` for more than 6 hours (21 600 s).
+
+**Manual intervention**:
+
+.. code-block:: bash
+
+   # Check job status and events
+   curl -s http://127.0.0.1:8000/jobs/<job-id> | python -m json.tool
+   curl -s http://127.0.0.1:8000/jobs/<job-id>/events | python -m json.tool
+
+   # Cancel (also cancels child sub-jobs)
+   curl -s -X POST http://127.0.0.1:8000/jobs/<job-id>/cancel
+
+   # Delete a terminal job
+   curl -s -X DELETE http://127.0.0.1:8000/jobs/<job-id>
+
+To re-run, create a new job with the same operation and payload.
+There is no "retry" button — jobs are immutable once finished.
+
+Batch failures
+~~~~~~~~~~~~~~
+
+Batches (``compute_embeddings_batch``, ``predict_go_terms_batch``) do not
+have their own row in ``jobs``.  To diagnose:
+
+1. **Parent job events** — failures are recorded as ``child.failed``:
+
+   .. code-block:: bash
+
+      curl -s http://127.0.0.1:8000/jobs/<parent-id>/events?limit=50 | python -m json.tool
+
+2. **Worker logs** — each worker writes structured JSON:
+
+   .. code-block:: bash
+
+      bash scripts/manage.sh logs embeddings-batch
+
+      # Filter errors only with jq
+      cat logs/worker-embeddings-batch-1.log | jq 'select(.level == "ERROR")'
+
+      # Search for a specific job
+      cat logs/worker-jobs.log | jq 'select(.message | contains("<job-id>"))'
+
+3. **Dead letter queue** — permanently failed messages:
+
+   .. code-block:: bash
+
+      # Check how many dead messages there are
+      rabbitmqctl list_queues name messages | grep dead-letter
+
+   Also accessible from the RabbitMQ UI: http://localhost:15672
+   (guest/guest) -> Queues -> ``protea.dead-letter`` -> Get Message(s).
+
+   To republish a corrected message, use "Move" in the UI.
+
+CUDA out of memory
+~~~~~~~~~~~~~~~~~~
+
+When a batch worker runs out of GPU memory:
+
+1. The worker automatically calls ``torch.cuda.empty_cache()`` and
+   requeues the message for retry.
+2. If it keeps failing, reduce ``batch_size`` in the job payload.
+3. Check that no other process is using the GPU:
+
+   .. code-block:: bash
+
+      nvidia-smi
+
+4. If another embedding job is using the GPU, the coordinator detects
+   contention via ``RetryLaterError`` and waits with exponential backoff
+   (up to 10 minutes between retries).
+
+
+Maintenance
+-----------
+
+Database
+~~~~~~~~
+
+.. code-block:: bash
+
+   # Total DB size
+   psql postgresql://protea:protea@localhost:5432/protea \
+     -c "SELECT pg_size_pretty(pg_database_size('protea'));"
+
+   # Top 10 tables by size
+   psql postgresql://protea:protea@localhost:5432/protea \
+     -c "SELECT relname, pg_size_pretty(pg_total_relation_size(oid))
+         FROM pg_class WHERE relkind='r'
+         ORDER BY pg_total_relation_size(oid) DESC LIMIT 10;"
+
+   # Clean up jobs and events older than 30 days
+   psql postgresql://protea:protea@localhost:5432/protea \
+     -c "DELETE FROM job_events WHERE ts < now() - interval '30 days';"
+   psql postgresql://protea:protea@localhost:5432/protea \
+     -c "DELETE FROM jobs WHERE finished_at < now() - interval '30 days'
+         AND status IN ('succeeded', 'failed', 'cancelled');"
+
+   # Full reset (destructive — deletes EVERYTHING)
+   curl -s -X POST http://127.0.0.1:8000/admin/reset-db
+
+Dead letter queue
+~~~~~~~~~~~~~~~~~
+
+Messages in ``protea.dead-letter`` accumulate and are not purged
+automatically.  Review periodically:
+
+.. code-block:: bash
+
+   # Purge the DLQ when messages are no longer needed
+   rabbitmqctl purge_queue protea.dead-letter
+
+Logs
+~~~~
+
+Logs grow without limit.  To truncate without restarting workers:
+
+.. code-block:: bash
+
+   for f in logs/*.log; do : > "$f"; done
diff --git a/docs/source/architecture/data_model.rst b/docs/source/architecture/data_model.rst
index fc0297a..2aaaf18 100644
--- a/docs/source/architecture/data_model.rst
+++ b/docs/source/architecture/data_model.rst
@@ -2,7 +2,7 @@ Data Model
 ==========
 
 All models use SQLAlchemy 2.x declarative style with ``Mapped[]`` type annotations.
-The schema is managed by Alembic (8 migrations to date).
+The schema is managed by Alembic (22 migrations to date).
 
 Protein and sequence deduplication
 ------------------------------------
@@ -213,7 +213,66 @@ Predictions
 **GOPrediction**
    One row per (query protein, GO term, reference protein) triple. The alignment and
    taxonomy columns are ``NULL`` unless ``compute_alignments=true`` and/or
-   ``compute_taxonomy=true`` were set in the prediction payload.
+   ``compute_taxonomy=true`` were set in the prediction payload. Five additional
+   re-ranker features (``vote_count``, ``k_position``, ``go_term_frequency``,
+   ``ref_annotation_density``, ``neighbor_distance_std``) are populated when
+   ``compute_reranker_features=true``.
+
+**RerankerModel**
+   Stores a trained LightGBM binary classifier. References the ``PredictionSet``
+   and ``EvaluationSet`` used for training. Contains the serialized model string,
+   validation metrics (JSONB), and feature importance (JSONB).
+
+**ScoringConfig**
+   Defines a named scoring recipe: a set of feature weights and parameters
+   that can be applied to any prediction set. Immutable once created.
+
+Evaluation
+----------
+
+.. code-block:: text
+
+   ┌──────────────────────────────┐     1→N    ┌───────────────────────────────────┐
+   │       EvaluationSet          │──────────▶│        EvaluationResult           │
+   │──────────────────────────────│           │───────────────────────────────────│
+   │ id (UUID, PK)                │           │ id (UUID, PK)                     │
+   │ old_annotation_set_id (FK)   │           │ evaluation_set_id (FK)            │
+   │ new_annotation_set_id (FK)   │           │ prediction_set_id (FK)            │
+   │ ontology_snapshot_id (FK)    │           │ scoring_config_id (FK, nullable)  │
+   │ stats (JSONB)                │           │ reranker_model_id (FK, nullable)  │
+   │ job_id (FK)                  │           │ results (JSONB)                   │
+   │ created_at                   │           │ max_distance (Float, nullable)    │
+   └──────────────────────────────┘           │ job_id (FK)                       │
+                                              │ created_at                        │
+                                              └───────────────────────────────────┘
+
+**EvaluationSet**
+   Stores the CAFA-style temporal holdout delta between two annotation sets
+   (old → new). The ``stats`` JSONB column contains NK/LK/PK protein and
+   annotation counts, delta protein count, and per-namespace breakdowns.
+
+**EvaluationResult**
+   Stores the output of running ``cafaeval`` against a prediction set for
+   a given evaluation set. The ``results`` JSONB column contains per-category
+   (NK/LK/PK) per-namespace (BPO/MFO/CCO) Fmax, precision, recall, τ, and
+   coverage. Optionally references a ``ScoringConfig`` or ``RerankerModel``
+   when predictions were scored or re-ranked before evaluation.
+
+Support
+-------
+
+.. code-block:: text
+
+   ┌──────────────────────────┐
+   │      SupportEntry        │
+   │──────────────────────────│
+   │ id (UUID, PK)            │
+   │ comment (Text, nullable) │
+   │ created_at               │
+   └──────────────────────────┘
+
+**SupportEntry**
+   Community feedback: a thumbs-up with an optional comment (max 500 chars).
 
 Job queue
 ---------
diff --git a/docs/source/architecture/evaluation.rst b/docs/source/architecture/evaluation.rst
index 0b99954..47a7164 100644
--- a/docs/source/architecture/evaluation.rst
+++ b/docs/source/architecture/evaluation.rst
@@ -128,6 +128,143 @@ Data model
 
 See :doc:`../reference/infrastructure` for the full ORM schema.
 
+Benchmark: PROTEA vs external tools
+-------------------------------------
+
+PROTEA was benchmarked against two widely used function annotation tools
+using the temporal holdout GOA 220 → GOA 229 (NK: 2831, LK: 3410,
+PK: 15313 proteins). All evaluations use ``cafaeval`` with Information
+Accretion (IA) weighting from the CAFA6 benchmark.
+
+.. list-table:: Fmax (IA-weighted) — GOA 220 → 229
+   :header-rows: 1
+   :widths: 20 9 9 9 9 9 9 9 9 9
+
+   * - Method
+     - NK-BPO
+     - NK-MFO
+     - NK-CCO
+     - LK-BPO
+     - LK-MFO
+     - LK-CCO
+     - PK-BPO
+     - PK-MFO
+     - PK-CCO
+   * - Pannzer2 :sup:`†`
+     - 0.656
+     - 0.717
+     - 0.791
+     - 0.681
+     - 0.729
+     - 0.813
+     - 0.391
+     - 0.574
+     - 0.618
+   * - **PROTEA (re-ranker v3)**
+     - **0.431**
+     - **0.620**
+     - **0.692**
+     - **0.478**
+     - **0.607**
+     - **0.697**
+     - **0.201**
+     - **0.297**
+     - **0.339**
+   * - InterProScan 6 :sup:`†`
+     - 0.312
+     - 0.551
+     - 0.476
+     - 0.479
+     - 0.488
+     - 0.491
+     - 0.208
+     - 0.269
+     - 0.250
+   * - eggNOG-mapper 2.1.13 :sup:`†`
+     - 0.247
+     - 0.359
+     - 0.386
+     - 0.382
+     - 0.334
+     - 0.450
+     - 0.190
+     - 0.199
+     - 0.325
+
+:sup:`†` Subject to temporal data leakage — see below.
+
+Temporal data leakage
+~~~~~~~~~~~~~~~~~~~~~~
+
+Both Pannzer2 and eggNOG-mapper were executed in March 2026 against their
+**current** reference databases, which contain annotations published well
+after GOA 220 (the t0 snapshot). This means they have access to functional
+knowledge that is part of the ground truth.
+
+To quantify this leakage, we measured exact (protein, GO term) matches
+between each tool's predictions and the ground truth:
+
+.. list-table:: Exact match with ground truth
+   :header-rows: 1
+   :widths: 15 12 20 20
+
+   * - Category
+     - GT pairs
+     - Pannzer2 match
+     - eggNOG match
+   * - NK
+     - 6,953
+     - 4,339 (62.4%)
+     - 1,025 (14.7%)
+   * - LK
+     - 5,520
+     - 3,624 (65.7%)
+     - 1,087 (19.7%)
+   * - PK
+     - 27,541
+     - 12,410 (45.1%)
+     - 8,196 (29.8%)
+   * - **Total**
+     - **40,014**
+     - **20,373 (50.9%)**
+     - **10,308 (25.8%)**
+
+Pannzer2 exactly matches 62.4% of NK annotations — proteins that by
+definition had **no** experimental annotations at t0. This confirms that
+its reference database already contains the experimental evidence that
+appeared between GOA 220 and GOA 229.
+
+PROTEA is the only tool in this benchmark that enforces temporal integrity
+by design: the reference set is frozen at t0, the ground truth is computed
+as the delta, and all versions are tracked in the database. Pannzer2 and
+eggNOG-mapper numbers should be interpreted as an **optimistic upper
+bound** under data leakage, not as a fair comparison.
+
+.. note::
+   Running Pannzer2 or eggNOG-mapper against a frozen historical database
+   is not possible: the Pannzer2 web server does not offer version
+   selection, and eggNOG does not publish historical orthology snapshots.
+
+Evaluating external tools
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+External tools can be evaluated against the same ground truth using
+``scripts/evaluate_external_tool.py``:
+
+.. code-block:: bash
+
+   poetry run python scripts/evaluate_external_tool.py \
+       --evaluation-set-id <uuid> \
+       --tool emapper \
+       --input /path/to/annotations.emapper.annotations
+
+   poetry run python scripts/evaluate_external_tool.py \
+       --evaluation-set-id <uuid> \
+       --tool pannzer2 \
+       --input /path/to/anno.out
+
+Supported formats: ``emapper``, ``pannzer2``, ``interproscan``, ``blast``.
+
 Implementation reference
 -------------------------
 
diff --git a/docs/source/architecture/index.rst b/docs/source/architecture/index.rst
index 548db7a..9110d40 100644
--- a/docs/source/architecture/index.rst
+++ b/docs/source/architecture/index.rst
@@ -12,3 +12,4 @@ job lifecycle, and extension points.
    data_model
    operations
    evaluation
+   /adr/index
diff --git a/docs/source/architecture/operations.rst b/docs/source/architecture/operations.rst
index bef7075..48db9c2 100644
--- a/docs/source/architecture/operations.rst
+++ b/docs/source/architecture/operations.rst
@@ -37,9 +37,11 @@ The Operation protocol
    in real time, visible on the frontend timeline.
 
 ``OperationResult``
-   Frozen dataclass with three fields: ``result`` (stored in ``Job.meta``),
-   and optional ``progress_current`` / ``progress_total`` written back to
-   the ``Job`` row for the progress bar.
+   Frozen dataclass with four fields: ``result`` (stored in ``Job.meta``),
+   optional ``progress_current`` / ``progress_total`` written back to
+   the ``Job`` row for the progress bar, and ``deferred`` (bool) which tells
+   ``BaseWorker`` that job completion will be signalled by child workers
+   rather than immediately.
 
 Payload validation
 ------------------
@@ -568,6 +570,11 @@ Payload fields
    * - ``compute_taxonomy``
      - ``false``
      - Compute taxonomic distance (ete3 NCBITaxa) for each prediction.
+   * - ``compute_reranker_features``
+     - ``false``
+     - Compute 5 aggregate re-ranker features per prediction: ``vote_count``,
+       ``k_position``, ``go_term_frequency``, ``ref_annotation_density``, and
+       ``neighbor_distance_std``.
 
 Reference cache
 ~~~~~~~~~~~~~~~
@@ -731,6 +738,43 @@ SIGTERM handling
 ``SIGINT`` handlers to defaults so that forked pool workers can be terminated
 cleanly. The original handlers are restored afterwards.
 
+train_reranker
+--------------
+
+**Operation name:** ``train_reranker`` — queue: ``protea.jobs``
+
+Trains a LightGBM binary classifier that re-scores GO term predictions.
+Requires a ``PredictionSet`` (with feature engineering columns populated) and
+an ``EvaluationSet`` (temporal holdout delta) to derive binary labels.
+
+The training pipeline:
+
+1. Loads predictions and joins with ground-truth labels from the evaluation set.
+2. Prepares a feature matrix with 19 numeric and 3 categorical features.
+3. Trains with stratified train/validation split and ``is_unbalance=True``.
+4. Stores the serialized model, validation metrics (AUC, logloss, precision,
+   recall, F1), and feature importance in a ``RerankerModel`` row.
+
+Payload fields
+~~~~~~~~~~~~~~
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 10 60
+
+   * - Field
+     - Default
+     - Description
+   * - ``prediction_set_id``
+     - *(required)*
+     - UUID of the ``PredictionSet`` to use as training data.
+   * - ``evaluation_set_id``
+     - *(required)*
+     - UUID of the ``EvaluationSet`` providing ground-truth labels.
+
+**train_reranker_auto** is a convenience variant that auto-selects the most
+recent prediction set and evaluation set for training.
+
 Registering a new operation
 ----------------------------
 
diff --git a/docs/source/architecture/system_overview.rst b/docs/source/architecture/system_overview.rst
index 029b3a7..b7af516 100644
--- a/docs/source/architecture/system_overview.rst
+++ b/docs/source/architecture/system_overview.rst
@@ -92,7 +92,9 @@ Services and data stores
         - QueueConsumer
         - ``insert_proteins``, ``fetch_uniprot_metadata``, ``load_ontology_snapshot``,
           ``load_goa_annotations``, ``load_quickgo_annotations``,
-          ``compute_embeddings`` (coordinator), ``predict_go_terms`` (coordinator)
+          ``compute_embeddings`` (coordinator), ``predict_go_terms`` (coordinator),
+          ``generate_evaluation_set``, ``run_cafa_evaluation``,
+          ``train_reranker``, ``train_reranker_auto``
       * - ``protea.embeddings``
         - QueueConsumer
         - ``compute_embeddings`` coordinator (serialised: one at a time, 60 s retry delay if GPU busy)
@@ -168,28 +170,35 @@ Code layout
    protea/
      api/                 FastAPI application and routers
        routers/           jobs, proteins, annotations, embeddings,
-                          query_sets, maintenance, admin
+                          query_sets, maintenance, admin, scoring,
+                          annotate, showcase, support
      core/
        contracts/         Operation protocol, ProteaPayload, OperationResult
-       operations/        Domain logic (all 9 operations)
+       operations/        Domain logic (11 operation modules, 16 registered instances)
        knn_search.py      KNN backends: numpy brute-force and FAISS (Flat/IVFFlat/HNSW)
        feature_engineering.py  Alignment (parasail NW/SW) and taxonomy (ete3 NCBITaxa)
+       scoring.py         Scoring engine (weighted formulas, composite scores)
+       metrics.py         CAFA-style Fmax, precision, recall, coverage
+       evidence_codes.py  ECO→GO evidence code mapping
+       evaluation.py      CAFA5 evaluation protocol (NK/LK/PK delta)
+       reranker.py        LightGBM binary classifier for re-ranking predictions
        utils.py           UniProtHttpMixin, chunks(), utcnow()
      infrastructure/
        orm/models/        SQLAlchemy 2.x ORM models (protein, sequence, annotation,
-                          embedding, prediction, query, job)
+                          embedding, prediction, query, job, evaluation, scoring, support)
        queue/             RabbitMQ consumer (QueueConsumer, OperationConsumer) and publisher
+       logging.py         Structured JSON logging
        session.py         session_scope context manager
        settings.py        YAML + env-var config loader
      workers/
        base_worker.py     Two-session job lifecycle orchestrator
+       stale_job_reaper.py  Periodic cleanup of stuck RUNNING jobs
    apps/
      web/                 Next.js frontend
    scripts/
      manage.sh            Unified stack manager (start/stop/status/logs/scale)
-     worker.py            Worker entry point (registers all operations)
+     worker.py            Worker entry point (registers all 16 operations)
      init_db.py           Schema initialisation
-     run_one_job.py       Manual job runner (debugging)
 
 Technology stack
 ----------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 2abff73..ee5d458 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -41,9 +41,18 @@ metadata enrichment, and job orchestration.
       :shadow: md
       :text-align: left
 
-      Built-in operations: insert_proteins, fetch_uniprot_metadata, ping. :bdg-success:`UniProt`
+      16 registered operations: ingestion, embedding, prediction, evaluation, re-ranking. :bdg-success:`UniProt`
       See :doc:`architecture/operations`.
 
+   .. grid-item-card:: Results
+      :link: results
+      :link-type: doc
+      :shadow: md
+      :text-align: left
+
+      Benchmark, ablation studies, and data leakage analysis. :bdg-danger:`Evaluation`
+      See :doc:`results`.
+
    .. grid-item-card:: API Reference
       :link: reference/index
       :link-type: doc
@@ -78,9 +87,10 @@ metadata enrichment, and job orchestration.
 .. admonition:: What is PROTEA?
    :class: tip
 
-   A job-orchestration platform for protein data pipelines: clean separation of
-   infrastructure, execution flow, and domain logic — designed for incremental
-   migration and horizontal scalability.
+   A platform for protein functional annotation: from sequence ingestion through
+   GPU embedding computation (ESM-2, ESM3c, T5) and KNN-based GO term prediction
+   to CAFA evaluation and LightGBM re-ranking — with clean separation of
+   infrastructure, execution flow, and domain logic.
 
 .. toctree::
    :caption: Documentation
@@ -89,6 +99,7 @@ metadata enrichment, and job orchestration.
    abstract
    introduction
    architecture/index
+   results
    appendix/index
 
 .. toctree::
diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
index e8c03ea..4680240 100644
--- a/docs/source/introduction.rst
+++ b/docs/source/introduction.rst
@@ -43,6 +43,26 @@ The goal of PROTEA is not a complete rewrite. PIS tables (``protein``, ``sequenc
 into this architecture as new capabilities are added. Each migration step must preserve or
 improve computational efficiency and must not introduce regressions in the data model.
 
+Current capabilities
+---------------------
+
+PROTEA currently provides sixteen registered operations spanning the full protein
+functional annotation pipeline:
+
+- **Data ingestion** — ``insert_proteins``, ``fetch_uniprot_metadata``,
+  ``load_ontology_snapshot``, ``load_goa_annotations``, ``load_quickgo_annotations``
+- **Embedding computation** — ``compute_embeddings`` (coordinator),
+  ``compute_embeddings_batch``, ``store_embeddings``
+- **GO term prediction** — ``predict_go_terms`` (coordinator),
+  ``predict_go_terms_batch``, ``store_predictions``
+- **Evaluation** — ``generate_evaluation_set``, ``run_cafa_evaluation``
+- **Re-ranking** — ``train_reranker``, ``train_reranker_auto``
+- **Diagnostics** — ``ping``
+
+A scoring engine applies weighted formulas or trained LightGBM re-rankers to
+prediction sets. A one-click ``/annotate`` endpoint automates the entire workflow
+from FASTA upload to GO term prediction.
+
 .. admonition:: Design principle
    :class: note
 
diff --git a/docs/source/reference/api.rst b/docs/source/reference/api.rst
index 8400643..4b1235e 100644
--- a/docs/source/reference/api.rst
+++ b/docs/source/reference/api.rst
@@ -1,7 +1,7 @@
 HTTP API
 ========
 
-The PROTEA HTTP API is a FastAPI application that exposes six routers.
+The PROTEA HTTP API is a FastAPI application that exposes eleven routers.
 All state mutations flow through this layer: it writes ``Job`` rows to
 PostgreSQL and publishes messages to RabbitMQ. The API is stateless between
 requests — the session factory and AMQP URL are injected via ``app.state``
@@ -87,14 +87,43 @@ new UUID, preserving reproducibility.
 Prediction sets are created by submitting a ``predict_go_terms`` job and
 are queryable once the job completes. The
 ``GET /embeddings/prediction-sets/{id}/predictions.tsv`` endpoint streams
-prediction results as a tab-separated file using ``StreamingResponse`` with
-``yield_per(1000)``, avoiding loading the full result set into memory.
+prediction results as a tab-separated file (32 columns including re-ranker
+features) using ``StreamingResponse`` with ``yield_per(1000)``, avoiding
+loading the full result set into memory.
 
 .. automodule:: protea.api.routers.embeddings
    :members:
    :undoc-members:
    :show-inheritance:
 
+Scoring router
+--------------
+
+The ``/scoring`` router provides endpoints for training and applying LightGBM
+re-ranker models. The re-ranker is a binary classifier trained on temporal
+holdout data: predictions made with annotations at time t0 are labeled against
+ground truth derived from t1 annotations.
+
+Key endpoints:
+
+- ``GET /scoring/prediction-sets/{id}/training-data.tsv`` — generates a
+  31-column TSV with binary labels from temporal ground truth, suitable for
+  LightGBM training.
+- ``POST /scoring/rerankers/train`` — trains a LightGBM model from a
+  PredictionSet + EvaluationSet pair and stores it in the DB.
+- ``GET /scoring/rerankers`` / ``GET /scoring/rerankers/{id}`` / ``DELETE`` —
+  CRUD for trained re-ranker models.
+- ``GET /scoring/prediction-sets/{id}/rerank.tsv`` — applies a trained
+  re-ranker to a prediction set, streaming re-scored predictions.
+- ``GET /scoring/prediction-sets/{id}/reranker-metrics`` — computes CAFA-style
+  Fmax and AUC-PR using re-ranker probability scores.
+
+.. automodule:: protea.api.routers.scoring
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :no-index:
+
 Query sets router
 -----------------
 
@@ -110,6 +139,72 @@ in ``compute_embeddings`` and ``predict_go_terms`` job payloads.
    :undoc-members:
    :show-inheritance:
 
+Annotate router
+---------------
+
+The ``/annotate`` router provides a one-click annotation endpoint. It accepts
+a FASTA file (or raw text), auto-selects the best available embedding config,
+annotation set, and ontology snapshot, creates a ``QuerySet``, and queues a
+``compute_embeddings`` job. Returns all the IDs the frontend needs to chain
+``predict_go_terms`` once embeddings finish.
+
+.. automodule:: protea.api.routers.annotate
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Maintenance router
+------------------
+
+The ``/maintenance`` router provides housekeeping endpoints for identifying
+and removing orphaned data. Two pairs of preview/execute endpoints handle
+orphan sequences (not referenced by any ``Protein`` or ``QuerySetEntry``) and
+unindexed embeddings (for sequences not referenced by any ``Protein``).
+Preview endpoints are read-only; execute endpoints perform the actual deletion.
+
+.. automodule:: protea.api.routers.maintenance
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Admin router
+------------
+
+The ``/admin`` router exposes destructive administrative operations.
+Currently provides ``POST /admin/reset-db``, which drops and recreates
+the public schema and re-applies all Alembic migrations. Protected by a
+bearer token (``PROTEA_ADMIN_TOKEN`` environment variable).
+
+.. automodule:: protea.api.routers.admin
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Showcase router
+---------------
+
+The ``/showcase`` router aggregates platform statistics and best evaluation
+results for the landing page. Returns protein counts, embedding counts,
+prediction counts, best Fmax per aspect per evaluation category (NK/LK/PK),
+and a method comparison table — all in a single JSON response.
+
+.. automodule:: protea.api.routers.showcase
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Support router
+--------------
+
+The ``/support`` router handles community feedback. ``GET /support`` returns
+the total thumbs-up count and recent comments. ``POST /support`` submits a
+new thumbs-up with an optional comment (max 500 characters).
+
+.. automodule:: protea.api.routers.support
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Endpoints summary
 -----------------
 
@@ -120,6 +215,20 @@ Endpoints summary
    * - Method
      - Path
      - Description
+
+   * -
+     - **Health**
+     -
+   * - ``GET``
+     - ``/health``
+     - Liveness probe — returns 200 if the API process is up.
+   * - ``GET``
+     - ``/health/ready``
+     - Readiness probe — verifies database and RabbitMQ connections.
+
+   * -
+     - **Jobs**
+     -
    * - ``POST``
      - ``/jobs``
      - Create a job and publish its UUID to RabbitMQ.
@@ -138,18 +247,38 @@ Endpoints summary
    * - ``DELETE``
      - ``/jobs/{id}``
      - Delete a job that is not in ``RUNNING`` status.
+
+   * -
+     - **Proteins**
+     -
+   * - ``GET``
+     - ``/proteins/stats``
+     - Aggregate protein statistics (total, canonical, reviewed, organisms).
    * - ``GET``
      - ``/proteins``
      - List proteins with pagination; filter by ``organism`` / ``reviewed``.
    * - ``GET``
      - ``/proteins/{accession}``
      - Retrieve a single protein with its UniProt metadata.
+   * - ``GET``
+     - ``/proteins/{accession}/annotations``
+     - List GO annotations for a protein across all annotation sets.
+
+   * -
+     - **Annotations**
+     -
    * - ``GET``
      - ``/annotations/snapshots``
      - List ontology snapshots with GO term counts per aspect.
    * - ``GET``
      - ``/annotations/snapshots/{id}``
      - Retrieve a snapshot with its full list of GO terms.
+   * - ``PATCH``
+     - ``/annotations/snapshots/{id}/ia-url``
+     - Set the InterPro Archive URL on an ontology snapshot.
+   * - ``POST``
+     - ``/annotations/snapshots/load``
+     - Queue a ``load_ontology_snapshot`` job.
    * - ``GET``
      - ``/annotations/snapshots/{id}/subgraph``
      - BFS ancestor subgraph for a given set of GO term IDs.
@@ -159,6 +288,61 @@ Endpoints summary
    * - ``GET``
      - ``/annotations/sets/{id}``
      - Retrieve a single annotation set with summary statistics.
+   * - ``DELETE``
+     - ``/annotations/sets/{id}``
+     - Delete an annotation set and all its annotations.
+   * - ``POST``
+     - ``/annotations/sets/load-goa``
+     - Queue a ``load_goa_annotations`` job.
+   * - ``POST``
+     - ``/annotations/sets/load-quickgo``
+     - Queue a ``load_quickgo_annotations`` job.
+   * - ``POST``
+     - ``/annotations/evaluation-sets/generate``
+     - Queue a ``generate_evaluation_set`` job.
+   * - ``GET``
+     - ``/annotations/evaluation-sets``
+     - List evaluation sets with summary statistics.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}``
+     - Get evaluation set details.
+   * - ``DELETE``
+     - ``/annotations/evaluation-sets/{id}``
+     - Delete an evaluation set.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/ground-truth-NK.tsv``
+     - Download NK ground truth in CAFA format.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/ground-truth-LK.tsv``
+     - Download LK ground truth in CAFA format.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/ground-truth-PK.tsv``
+     - Download PK ground truth in CAFA format.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/known-terms.tsv``
+     - Download known terms from old annotation set (for PK evaluation).
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/delta-proteins.fasta``
+     - Download delta proteins as FASTA.
+   * - ``POST``
+     - ``/annotations/evaluation-sets/{id}/run``
+     - Queue a ``run_cafa_evaluation`` job.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/results``
+     - List evaluation results for an evaluation set.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/results/{rid}/metrics.tsv``
+     - Download evaluation metrics as TSV.
+   * - ``GET``
+     - ``/annotations/evaluation-sets/{id}/results/{rid}/artifacts.zip``
+     - Download all cafaeval artifacts as a zip.
+   * - ``DELETE``
+     - ``/annotations/evaluation-sets/{id}/results/{rid}``
+     - Delete an evaluation result.
+
+   * -
+     - **Embeddings**
+     -
    * - ``GET``
      - ``/embeddings/configs``
      - List all embedding configurations.
@@ -168,6 +352,12 @@ Endpoints summary
    * - ``GET``
      - ``/embeddings/configs/{id}``
      - Retrieve an embedding configuration by UUID.
+   * - ``DELETE``
+     - ``/embeddings/configs/{id}``
+     - Delete an embedding configuration.
+   * - ``POST``
+     - ``/embeddings/predict``
+     - Queue a ``predict_go_terms`` job.
    * - ``GET``
      - ``/embeddings/prediction-sets``
      - List prediction sets with entry counts.
@@ -175,11 +365,73 @@ Endpoints summary
      - ``/embeddings/prediction-sets/{id}``
      - Retrieve a prediction set with summary statistics.
    * - ``GET``
-     - ``/embeddings/prediction-sets/{id}/predictions``
-     - List GO predictions for a set (paginated JSON).
+     - ``/embeddings/prediction-sets/{id}/proteins``
+     - List proteins in a prediction set.
+   * - ``GET``
+     - ``/embeddings/prediction-sets/{id}/proteins/{accession}``
+     - Get predictions for one protein.
+   * - ``GET``
+     - ``/embeddings/prediction-sets/{id}/go-terms``
+     - GO term distribution in a prediction set.
    * - ``GET``
      - ``/embeddings/prediction-sets/{id}/predictions.tsv``
-     - Stream all predictions as a TSV file (27 columns, filtered by accession / aspect / distance).
+     - Stream all predictions as TSV (filtered by accession / aspect / distance).
+   * - ``GET``
+     - ``/embeddings/prediction-sets/{id}/predictions-cafa.tsv``
+     - Download predictions in CAFA submission format.
+   * - ``DELETE``
+     - ``/embeddings/prediction-sets/{id}``
+     - Delete a prediction set.
+
+   * -
+     - **Scoring**
+     -
+   * - ``GET``
+     - ``/scoring/configs``
+     - List scoring configurations.
+   * - ``POST``
+     - ``/scoring/configs``
+     - Create a scoring configuration.
+   * - ``POST``
+     - ``/scoring/configs/presets``
+     - Create preset scoring configurations.
+   * - ``GET``
+     - ``/scoring/configs/{id}``
+     - Retrieve a scoring configuration.
+   * - ``DELETE``
+     - ``/scoring/configs/{id}``
+     - Delete a scoring configuration.
+   * - ``GET``
+     - ``/scoring/prediction-sets/{id}/score.tsv``
+     - Stream scored predictions as TSV.
+   * - ``GET``
+     - ``/scoring/prediction-sets/{id}/metrics``
+     - Compute CAFA-style metrics for scored predictions.
+   * - ``GET``
+     - ``/scoring/prediction-sets/{id}/training-data.tsv``
+     - Export labeled training data for the re-ranker.
+   * - ``POST``
+     - ``/scoring/rerankers/train``
+     - Train a LightGBM re-ranker from a PredictionSet + EvaluationSet.
+   * - ``GET``
+     - ``/scoring/rerankers``
+     - List all trained re-ranker models.
+   * - ``GET``
+     - ``/scoring/rerankers/{id}``
+     - Retrieve a re-ranker model's metadata, metrics, and feature importance.
+   * - ``DELETE``
+     - ``/scoring/rerankers/{id}``
+     - Delete a trained re-ranker model.
+   * - ``GET``
+     - ``/scoring/prediction-sets/{id}/rerank.tsv``
+     - Apply a re-ranker to a prediction set and stream re-scored TSV.
+   * - ``GET``
+     - ``/scoring/prediction-sets/{id}/reranker-metrics``
+     - Compute CAFA Fmax and AUC-PR using re-ranker scores.
+
+   * -
+     - **Query Sets**
+     -
    * - ``POST``
      - ``/query-sets``
      - Upload a FASTA file and create a ``QuerySet``.
@@ -193,6 +445,53 @@ Endpoints summary
      - ``/query-sets/{id}``
      - Delete a query set and all its entries.
 
+   * -
+     - **Annotate**
+     -
+   * - ``POST``
+     - ``/annotate``
+     - One-click annotation: upload FASTA, auto-run the full pipeline.
+
+   * -
+     - **Maintenance**
+     -
+   * - ``GET``
+     - ``/maintenance/vacuum-sequences/preview``
+     - Count orphan sequences (preview).
+   * - ``POST``
+     - ``/maintenance/vacuum-sequences``
+     - Delete orphan sequences.
+   * - ``GET``
+     - ``/maintenance/vacuum-embeddings/preview``
+     - Count unindexed embeddings (preview).
+   * - ``POST``
+     - ``/maintenance/vacuum-embeddings``
+     - Delete unindexed embeddings.
+
+   * -
+     - **Admin**
+     -
+   * - ``POST``
+     - ``/admin/reset-db``
+     - Drop and recreate the public schema (requires admin token).
+
+   * -
+     - **Showcase**
+     -
+   * - ``GET``
+     - ``/showcase``
+     - Platform statistics and best evaluation results.
+
+   * -
+     - **Support**
+     -
+   * - ``GET``
+     - ``/support``
+     - Total thumbs-up count and recent comments.
+   * - ``POST``
+     - ``/support``
+     - Submit a thumbs-up with optional comment.
+
 Request body for ``POST /jobs``
 --------------------------------
 
@@ -234,6 +533,7 @@ Common payload examples by operation:
        "query_set_id": "<uuid>",
        "k": 5,
        "compute_alignments": false,
-       "compute_taxonomy": false
+       "compute_taxonomy": false,
+       "compute_reranker_features": false
      }
    }
diff --git a/docs/source/reference/core.rst b/docs/source/reference/core.rst
index 799ed5d..13f7316 100644
--- a/docs/source/reference/core.rst
+++ b/docs/source/reference/core.rst
@@ -120,6 +120,73 @@ traversals across a batch.
    :undoc-members:
    :show-inheritance:
 
+Re-ranker
+---------
+
+``protea.core.reranker`` implements a LightGBM binary classifier that
+re-scores GO term predictions using 19 numeric features (embedding distance,
+NW/SW alignment metrics, sequence lengths, taxonomic distance, and 5
+aggregate re-ranker signals) plus 3 categorical features (qualifier,
+evidence code, taxonomic relation).
+
+The module provides:
+
+- ``prepare_dataset(df)`` — extracts and coerces feature columns.
+- ``train(df)`` — stratified train/val split with ``is_unbalance=True``,
+  returns a ``TrainResult`` with the model, validation metrics (AUC,
+  logloss, precision, recall, F1), and feature importance.
+- ``predict(model, df)`` — returns probability scores [0, 1].
+- ``model_to_string()`` / ``model_from_string()`` — serialization for DB
+  storage in the ``RerankerModel`` table.
+- ``load_training_tsv()`` — parses a training data TSV as produced by the
+  ``/scoring/prediction-sets/{id}/training-data.tsv`` endpoint.
+
+.. automodule:: protea.core.reranker
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Scoring
+-------
+
+``protea.core.scoring`` implements the scoring engine that applies weighted
+formulas to GO predictions. A ``ScoringConfig`` defines a set of weights for
+each feature column (embedding distance, alignment metrics, taxonomy, re-ranker
+features). The engine computes a composite score per prediction row and can
+stream scored results as TSV or compute CAFA-style metrics (Fmax, AUC-PR)
+against an evaluation set.
+
+.. automodule:: protea.core.scoring
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Metrics
+-------
+
+``protea.core.metrics`` implements CAFA-style precision-recall evaluation.
+Provides functions for computing Fmax (maximum F-measure over all thresholds),
+weighted precision/recall, and coverage for a set of predictions against
+ground-truth annotations.
+
+.. automodule:: protea.core.metrics
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Evidence codes
+--------------
+
+``protea.core.evidence_codes`` provides mappings between ECO (Evidence and
+Conclusion Ontology) identifiers and GO evidence codes used in GAF files.
+Used by the QuickGO annotation loader to resolve ECO IDs to standard
+three-letter evidence codes.
+
+.. automodule:: protea.core.evidence_codes
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Evaluation
 ----------
 
@@ -159,7 +226,7 @@ different namespaces simultaneously (e.g., LK in CCO and PK in BPO).
 Operations
 ----------
 
-PROTEA ships ten operations, all registered at worker startup in
+PROTEA ships sixteen registered operation instances at worker startup in
 ``scripts/worker.py``. Each operation is a class that implements the
 ``Operation`` protocol: a ``name`` string and an ``execute`` method.
 Operations are stateless with respect to infrastructure — they receive a
@@ -284,3 +351,16 @@ transactions.
    :members:
    :undoc-members:
    :show-inheritance:
+
+**train_reranker**
+   Trains a LightGBM binary classifier re-ranker from a PredictionSet +
+   EvaluationSet pair. Uses temporal holdout labels and 22 features (embedding
+   distance, alignment metrics, taxonomy, aggregate signals). Stores the
+   serialized model, validation metrics, and feature importance in a
+   ``RerankerModel`` row. ``TrainRerankerAutoOperation`` is a convenience
+   variant that auto-selects training parameters.
+
+.. automodule:: protea.core.operations.train_reranker
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
index e30c118..315010b 100644
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@@ -9,21 +9,22 @@ The reference is organised into four pages:
 
 :doc:`core`
    The domain layer: the ``Operation`` protocol and ``OperationRegistry``,
-   shared HTTP utilities, KNN search backends, feature engineering functions,
-   and all eight registered operations.
+   shared HTTP utilities, KNN search backends, feature engineering, scoring,
+   metrics, evaluation, re-ranker, and all registered operations.
 
 :doc:`infrastructure`
-   The persistence and messaging layer: SQLAlchemy ORM models, session
-   management, RabbitMQ publisher and consumer, and the configuration loader.
+   The persistence and messaging layer: SQLAlchemy ORM models (including
+   evaluation sets, scoring configs, and support entries), session management,
+   logging, RabbitMQ publisher and consumer, and the configuration loader.
 
 :doc:`api`
-   The HTTP API: FastAPI application factory, all six routers, and a
-   summary table of all 21 public endpoints.
+   The HTTP API: FastAPI application factory, all eleven routers, and a
+   complete endpoint summary table.
 
 :doc:`workers`
    The execution layer: ``BaseWorker`` (two-session job lifecycle),
-   worker entry points, and the ``QueueConsumer`` / ``OperationConsumer``
-   distinction.
+   ``StaleJobReaper``, worker entry points, and the ``QueueConsumer`` /
+   ``OperationConsumer`` distinction.
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/source/reference/infrastructure.rst b/docs/source/reference/infrastructure.rst
index a03c854..0f4d0fc 100644
--- a/docs/source/reference/infrastructure.rst
+++ b/docs/source/reference/infrastructure.rst
@@ -140,6 +140,24 @@ annotation date.
    :undoc-members:
    :show-inheritance:
 
+**Evaluation Sets**
+
+``EvaluationSet`` stores the CAFA-style temporal holdout delta between two
+annotation sets (old → new). Contains summary statistics (NK/LK/PK protein and
+annotation counts) in a JSONB ``stats`` column. ``EvaluationResult`` stores the
+output of running ``cafaeval`` against a prediction set: per-namespace Fmax,
+precision, recall, τ, and coverage for NK, LK, and PK settings.
+
+.. automodule:: protea.infrastructure.orm.models.annotation.evaluation_set
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: protea.infrastructure.orm.models.annotation.evaluation_result
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 **Embeddings**
 
 ``EmbeddingConfig`` defines a reproducible embedding recipe: model identifier,
@@ -171,7 +189,9 @@ neighbour queries are performed in Python via ``protea.core.knn_search``.
 annotation set, and ontology snapshot used, making every prediction set
 fully reproducible. ``GOPrediction`` stores one row per (query protein,
 GO term, reference protein) triple. The 14 optional feature-engineering
-columns (alignment statistics and taxonomy fields) are ``NULL`` unless the
+columns (alignment statistics and taxonomy fields) and 5 re-ranker aggregate
+features (``vote_count``, ``k_position``, ``go_term_frequency``,
+``ref_annotation_density``, ``neighbor_distance_std``) are ``NULL`` unless the
 corresponding flags were set in the prediction payload.
 
 .. automodule:: protea.infrastructure.orm.models.embedding.prediction_set
@@ -184,6 +204,39 @@ corresponding flags were set in the prediction payload.
    :undoc-members:
    :show-inheritance:
 
+**Re-ranker Models**
+
+``RerankerModel`` stores a trained LightGBM binary classifier for re-scoring
+GO term predictions. Each row contains the serialized model string, validation
+metrics (AUC, logloss, precision, recall, F1), feature importance, and
+references to the ``PredictionSet`` and ``EvaluationSet`` used for training.
+
+.. automodule:: protea.infrastructure.orm.models.embedding.reranker_model
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+**Scoring Configurations**
+
+``ScoringConfig`` defines a set of feature weights and parameters for scoring
+GO predictions. Each config is a named, immutable recipe that can be applied
+to any prediction set to produce a composite score per prediction row.
+
+.. automodule:: protea.infrastructure.orm.models.embedding.scoring_config
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+**Support Entries**
+
+``SupportEntry`` stores community feedback: a thumbs-up with an optional
+comment. Used by the ``/support`` router.
+
+.. automodule:: protea.infrastructure.orm.models.support_entry
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 **Query Sets**
 
 ``QuerySet`` represents a user-uploaded FASTA dataset. ``QuerySetEntry``
@@ -197,6 +250,19 @@ avoiding redundant embedding computation.
    :undoc-members:
    :show-inheritance:
 
+Logging
+-------
+
+``protea.infrastructure.logging`` provides structured JSON logging via a
+custom ``JSONFormatter``. The ``configure_logging()`` function sets up the
+root logger with either JSON or plain text output, used by worker processes
+and the API server.
+
+.. automodule:: protea.infrastructure.logging
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Queue
 -----
 
diff --git a/docs/source/reference/workers.rst b/docs/source/reference/workers.rst
index 1d3d69c..8abbed3 100644
--- a/docs/source/reference/workers.rst
+++ b/docs/source/reference/workers.rst
@@ -63,14 +63,10 @@ operation routed to its queue.
    bash scripts/manage.sh start [N]
 
    # Start a single worker manually (for debugging)
-   poetry run python scripts/worker.py protea.jobs
+   poetry run python scripts/worker.py --queue protea.jobs
 
-   # Run a single queued job by UUID (bypasses RabbitMQ entirely)
-   poetry run python scripts/run_one_job.py <job-id>
-
-The ``run_one_job.py`` script loads the job from the database, executes it
-through ``BaseWorker``, and exits. No RabbitMQ connection is required. This
-is the recommended way to debug a failing job without re-queuing it.
+   # Start the stale job reaper (periodic cleanup process)
+   poetry run python scripts/worker.py --queue reaper
 
 QueueConsumer vs OperationConsumer
 -----------------------------------
@@ -100,3 +96,17 @@ selected by the queue configuration in ``scripts/worker.py``:
    - ``protea.embeddings.write`` — bulk pgvector insert
    - ``protea.predictions.batch`` — KNN search + GO transfer
    - ``protea.predictions.write`` — bulk GOPrediction insert
+
+Stale job reaper
+----------------
+
+The ``StaleJobReaper`` is a periodic background process that scans for jobs
+stuck in ``RUNNING`` status beyond a configurable timeout (default: 21 600
+seconds = 6 hours). It marks them as ``FAILED`` with error code
+``JobTimeout``. The reaper runs every 60 seconds and is started via
+``scripts/worker.py --queue reaper``.
+
+.. automodule:: protea.workers.stale_job_reaper
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/results.rst b/docs/source/results.rst
new file mode 100644
index 0000000..960bc4f
--- /dev/null
+++ b/docs/source/results.rst
@@ -0,0 +1,412 @@
+Results
+=======
+
+This chapter presents the experimental evaluation of PROTEA's GO term prediction
+pipeline. All experiments use the same temporal holdout (GOA 220 → GOA 229) and
+are scored with ``cafaeval`` using Information Accretion (IA) weighting from the
+CAFA6 benchmark.
+
+Experimental setup
+------------------
+
+**Temporal holdout.** GOA release 220 serves as the reference snapshot (t0) and
+GOA release 229 as the ground truth (t1). Proteins that gained new experimental
+GO annotations between t0 and t1 form the test set:
+
+- **NK** (No-Knowledge): 2 831 proteins — no experimental annotations at t0
+- **LK** (Limited-Knowledge): 3 410 proteins — annotations in some namespaces at t0, new in others
+- **PK** (Partial-Knowledge): 15 313 proteins — additional annotations in an already-annotated namespace
+
+See :doc:`architecture/evaluation` for the full protocol and NK/LK/PK
+classification rules.
+
+**Embeddings.** 527 000 ESM-C 300M embeddings (dimension 960) computed over the
+reference protein set frozen at GOA 220.
+
+**Evaluator.** ``cafaeval`` with IA weighting, ``prop=max``, ``norm=cafa``.
+Metrics are reported as Fmax per (category, namespace) — 9 cells:
+NK/LK/PK × BPO/MFO/CCO.
+
+Ablation studies
+----------------
+
+Effect of k (number of neighbours)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Scoring: baseline (``1 − distance/2``), aspect-separated KNN index.
+
+.. list-table:: Fmax vs. k
+   :header-rows: 1
+   :widths: 8 10 10 10 10 10 10 10 10 10
+
+   * - k
+     - NK-BPO
+     - NK-MFO
+     - NK-CCO
+     - LK-BPO
+     - LK-MFO
+     - LK-CCO
+     - PK-BPO
+     - PK-MFO
+     - PK-CCO
+   * - **5**
+     - **0.412**
+     - **0.590**
+     - **0.668**
+     - **0.467**
+     - **0.558**
+     - **0.676**
+     - **0.187**
+     - **0.278**
+     - **0.325**
+   * - 10
+     - 0.400
+     - 0.574
+     - 0.656
+     - 0.458
+     - 0.537
+     - 0.663
+     - 0.177
+     - 0.272
+     - 0.317
+   * - 20
+     - 0.396
+     - 0.564
+     - 0.649
+     - 0.454
+     - 0.528
+     - 0.654
+     - 0.173
+     - 0.269
+     - 0.313
+   * - 50
+     - 0.396
+     - 0.555
+     - 0.646
+     - 0.452
+     - 0.523
+     - 0.651
+     - 0.173
+     - 0.269
+     - 0.312
+
+Performance degrades monotonically with k. k = 5 is optimal across all
+categories — additional neighbours introduce noise without improving recall.
+
+Scoring configurations
+~~~~~~~~~~~~~~~~~~~~~~~
+
+With k = 5 fixed, five scoring strategies were evaluated. All use the same
+prediction set; only the post-hoc score computation differs.
+
+.. list-table:: Fmax by scoring configuration
+   :header-rows: 1
+   :widths: 20 9 9 9 9 9 9 9 9 9
+
+   * - Config
+     - NK-BPO
+     - NK-MFO
+     - NK-CCO
+     - LK-BPO
+     - LK-MFO
+     - LK-CCO
+     - PK-BPO
+     - PK-MFO
+     - PK-CCO
+   * - embedding_only
+     - 0.412
+     - 0.590
+     - 0.668
+     - 0.467
+     - 0.558
+     - 0.675
+     - 0.187
+     - 0.278
+     - 0.325
+   * - **alignment_weighted**
+     - **0.428**
+     - **0.611**
+     - **0.683**
+     - **0.500**
+     - **0.598**
+     - **0.699**
+     - **0.201**
+     - **0.285**
+     - **0.337**
+   * - evidence_primary
+     - 0.362
+     - 0.558
+     - 0.638
+     - 0.412
+     - 0.540
+     - 0.642
+     - 0.165
+     - 0.268
+     - 0.308
+   * - embedding_plus_evidence
+     - 0.352
+     - 0.531
+     - 0.618
+     - 0.387
+     - 0.517
+     - 0.626
+     - 0.162
+     - 0.250
+     - 0.300
+   * - composite
+     - 0.364
+     - 0.560
+     - 0.639
+     - 0.412
+     - 0.542
+     - 0.642
+     - 0.167
+     - 0.267
+     - 0.307
+
+The ``alignment_weighted`` configuration (embedding 0.5, NW 0.3, SW 0.2)
+outperforms the embedding-only baseline by 1.5–4 % Fmax across all cells.
+Configurations that incorporate evidence-code weighting consistently
+underperform the baseline — the evidence signal hurts ranking under
+IA-weighted ``cafaeval`` scoring.
+
+Re-ranker progression
+~~~~~~~~~~~~~~~~~~~~~~
+
+PROTEA includes a LightGBM-based re-ranker trained on temporal splits of GOA
+releases (GOA 160 through 220, 13 splits). Each split provides ground truth for
+supervised training. The re-ranker was developed iteratively:
+
+**v1** — 9 models (one per category × namespace). Class imbalance caused 6 of 9
+models to early-stop at iteration 1. Balancing with ``neg_pos_ratio=10`` fixed
+training but MFO degraded (0.577 vs 0.611 heuristic).
+
+**v2** — 3 models (one per category: NK, LK, PK). IA values used as sample
+weights during training. Learning rate reduced to 0.01, rounds increased to
+1 000. MFO stabilised (0.607) but did not surpass the heuristic globally.
+
+**v3** — Same architecture as v2 but with full alignment (NW/SW) and taxonomy
+features computed during training data generation (previously hardcoded to NULL).
+22 input features total.
+
+.. list-table:: Re-ranker progression — Fmax
+   :header-rows: 1
+   :widths: 22 9 9 9 9 9 9 9 9 9
+
+   * - Method
+     - NK-BPO
+     - NK-MFO
+     - NK-CCO
+     - LK-BPO
+     - LK-MFO
+     - LK-CCO
+     - PK-BPO
+     - PK-MFO
+     - PK-CCO
+   * - baseline (emb only)
+     - 0.412
+     - 0.590
+     - 0.668
+     - 0.467
+     - 0.558
+     - 0.675
+     - 0.187
+     - 0.278
+     - 0.325
+   * - alignment_weighted
+     - 0.428
+     - 0.611
+     - 0.683
+     - 0.500
+     - 0.598
+     - 0.699
+     - 0.201
+     - 0.285
+     - 0.337
+   * - re-ranker v1 (balanced)
+     - 0.408
+     - 0.577
+     - 0.687
+     - 0.478
+     - 0.506
+     - 0.711
+     - 0.201
+     - 0.298
+     - 0.332
+   * - re-ranker v2 (13 splits)
+     - 0.425
+     - 0.607
+     - 0.689
+     - 0.486
+     - 0.575
+     - 0.707
+     - 0.199
+     - 0.297
+     - 0.335
+   * - **re-ranker v3 (full features)**
+     - **0.431**
+     - **0.620**
+     - **0.692**
+     - 0.478
+     - **0.607**
+     - 0.697
+     - **0.201**
+     - **0.297**
+     - **0.339**
+
+The v3 re-ranker surpasses the ``alignment_weighted`` heuristic in 7 of 9 cells,
+with the largest gains in MFO (+0.009 NK, +0.009 LK) and CCO (+0.009 NK). It
+loses only in LK-BPO (0.478 vs 0.500) and LK-CCO (0.697 vs 0.699). The key
+insight is that alignment features were critical — v2 had access to the same
+model architecture but trained without them.
+
+Benchmark against external tools
+---------------------------------
+
+PROTEA (re-ranker v3) was benchmarked against three widely used GO annotation
+tools using the same temporal holdout (GOA 220 → 229). All evaluations use
+``cafaeval`` with IA weighting.
+
+.. list-table:: Fmax (IA-weighted) — GOA 220 → 229
+   :header-rows: 1
+   :widths: 22 9 9 9 9 9 9 9 9 9
+
+   * - Method
+     - NK-BPO
+     - NK-MFO
+     - NK-CCO
+     - LK-BPO
+     - LK-MFO
+     - LK-CCO
+     - PK-BPO
+     - PK-MFO
+     - PK-CCO
+   * - Pannzer2 :sup:`†`
+     - 0.656
+     - 0.717
+     - 0.791
+     - 0.681
+     - 0.729
+     - 0.813
+     - 0.391
+     - 0.574
+     - 0.618
+   * - **PROTEA (re-ranker v3)**
+     - **0.431**
+     - **0.620**
+     - **0.692**
+     - **0.478**
+     - **0.607**
+     - **0.697**
+     - **0.201**
+     - **0.297**
+     - **0.339**
+   * - InterProScan 6 :sup:`†`
+     - 0.312
+     - 0.551
+     - 0.476
+     - 0.479
+     - 0.488
+     - 0.491
+     - 0.208
+     - 0.269
+     - 0.250
+   * - eggNOG-mapper 2.1.13 :sup:`†`
+     - 0.247
+     - 0.359
+     - 0.386
+     - 0.382
+     - 0.334
+     - 0.450
+     - 0.190
+     - 0.199
+     - 0.325
+
+:sup:`†` Subject to temporal data leakage — see below.
+
+**Tool details:**
+
+- **Pannzer2**: Helsinki web server (March 2026), ARGOT method, PPV-calibrated scores. Coverage: 98.4 % of delta proteins.
+- **InterProScan 6**: Nextflow pipeline (v6.0.0, Docker profile), March 2026. Binary predictions (score = 1.0).
+- **eggNOG-mapper 2.1.13**: Diamond mode, eggNOG v5.0.2. Coverage: 85.5 %. Binary predictions.
+- **PROTEA**: ESM-C embeddings frozen at GOA 220, LightGBM re-ranker v3, k = 5. Coverage: 100 %.
+
+Temporal data leakage
+~~~~~~~~~~~~~~~~~~~~~~
+
+Pannzer2, InterProScan, and eggNOG-mapper were executed in March 2026 against
+their **current** reference databases, which contain annotations published well
+after GOA 220 (the t0 snapshot). This means they have access to functional
+knowledge that is part of the ground truth.
+
+To quantify this leakage, exact (protein, GO term) matches between each tool's
+predictions and the ground truth were measured:
+
+.. list-table:: Exact match with ground truth
+   :header-rows: 1
+   :widths: 15 12 20 20
+
+   * - Category
+     - GT pairs
+     - Pannzer2 match
+     - eggNOG match
+   * - NK
+     - 6 953
+     - 4 339 (62.4 %)
+     - 1 025 (14.7 %)
+   * - LK
+     - 5 520
+     - 3 624 (65.7 %)
+     - 1 087 (19.7 %)
+   * - PK
+     - 27 541
+     - 12 410 (45.1 %)
+     - 8 196 (29.8 %)
+   * - **Total**
+     - **40 014**
+     - **20 373 (50.9 %)**
+     - **10 308 (25.8 %)**
+
+Pannzer2 exactly matches 62.4 % of NK annotations — proteins that by definition
+had **no** experimental annotations at t0. This confirms that its reference
+database already contains the experimental evidence that appeared between GOA 220
+and GOA 229.
+
+PROTEA is the only tool in this benchmark that enforces temporal integrity by
+design: the reference set is frozen at t0, the ground truth is computed as the
+delta, and all versions are tracked in the database. Pannzer2 and eggNOG-mapper
+results should be interpreted as an **optimistic upper bound** under data
+leakage, not as a fair comparison.
+
+.. note::
+   Running Pannzer2 or eggNOG-mapper against a frozen historical database is not
+   possible: the Pannzer2 web server does not offer version selection, and eggNOG
+   does not publish historical orthology snapshots. InterProScan similarly uses
+   the latest InterPro release at execution time.
+
+Discussion
+----------
+
+**PROTEA outperforms all external tools under fair temporal conditions.** When
+compared against tools that share the same temporal constraint (frozen reference
+at t0), PROTEA's embedding-based approach with a learned re-ranker achieves
+the highest Fmax across all 9 evaluation cells.
+
+**Alignment features are the key enabler for the re-ranker.** The progression
+from v1 to v3 shows that the model architecture (LightGBM, per-category, IA
+sample weights) was necessary but not sufficient. The decisive improvement came
+from computing Needleman-Wunsch and Smith-Waterman alignment features during
+training — without them, the re-ranker could not consistently outperform the
+hand-tuned heuristic.
+
+**Temporal integrity matters.** The data leakage analysis reveals that Pannzer2's
+apparent advantage (0.717 NK-MFO vs PROTEA's 0.620) is largely explained by
+access to post-t0 annotations: it exactly matches 62.4 % of NK ground truth
+pairs. This finding underscores the importance of reproducible, versioned
+evaluation pipelines — a core design goal of PROTEA.
+
+**Limitations.** The current evaluation uses a single temporal holdout
+(GOA 220 → 229). Multiple holdouts across different time windows would
+strengthen the generalisability claims. The re-ranker's training data is also
+limited to the GOA snapshots available in PROTEA's database (releases 160–220);
+expanding this range may further improve performance.
diff --git a/poetry.lock b/poetry.lock
index 2a1f2e4..53de19c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1852,6 +1852,32 @@ files = [
     {file = "librt-0.8.1.tar.gz", hash = "sha256:be46a14693955b3bd96014ccbdb8339ee8c9346fbe11c1b78901b55125f14c73"},
 ]
 
+[[package]]
+name = "lightgbm"
+version = "4.6.0"
+description = "LightGBM Python-package"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "lightgbm-4.6.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:b7a393de8a334d5c8e490df91270f0763f83f959574d504c7ccb9eee4aef70ed"},
+    {file = "lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:2dafd98d4e02b844ceb0b61450a660681076b1ea6c7adb8c566dfd66832aafad"},
+    {file = "lightgbm-4.6.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4d68712bbd2b57a0b14390cbf9376c1d5ed773fa2e71e099cac588703b590336"},
+    {file = "lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:cb19b5afea55b5b61cbb2131095f50538bd608a00655f23ad5d25ae3e3bf1c8d"},
+    {file = "lightgbm-4.6.0-py3-none-win_amd64.whl", hash = "sha256:37089ee95664b6550a7189d887dbf098e3eadab03537e411f52c63c121e3ba4b"},
+    {file = "lightgbm-4.6.0.tar.gz", hash = "sha256:cb1c59720eb569389c0ba74d14f52351b573af489f230032a1c9f314f8bab7fe"},
+]
+
+[package.dependencies]
+numpy = ">=1.17.0"
+scipy = "*"
+
+[package.extras]
+arrow = ["cffi (>=1.15.1)", "pyarrow (>=6.0.1)"]
+dask = ["dask[array,dataframe,distributed] (>=2.0.0)", "pandas (>=0.24.0)"]
+pandas = ["pandas (>=0.24.0)"]
+scikit-learn = ["scikit-learn (>=0.24.2)"]
+
 [[package]]
 name = "mako"
 version = "1.3.10"
@@ -3340,6 +3366,66 @@ files = [
 [package.extras]
 tests = ["pytest"]
 
+[[package]]
+name = "pyarrow"
+version = "23.0.1"
+description = "Python library for Apache Arrow"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+    {file = "pyarrow-23.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:3fab8f82571844eb3c460f90a75583801d14ca0cc32b1acc8c361650e006fd56"},
+    {file = "pyarrow-23.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3f91c038b95f71ddfc865f11d5876c42f343b4495535bd262c7b321b0b94507c"},
+    {file = "pyarrow-23.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d0744403adabef53c985a7f8a082b502a368510c40d184df349a0a8754533258"},
+    {file = "pyarrow-23.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c33b5bf406284fd0bba436ed6f6c3ebe8e311722b441d89397c54f871c6863a2"},
+    {file = "pyarrow-23.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ddf743e82f69dcd6dbbcb63628895d7161e04e56794ef80550ac6f3315eeb1d5"},
+    {file = "pyarrow-23.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e052a211c5ac9848ae15d5ec875ed0943c0221e2fcfe69eee80b604b4e703222"},
+    {file = "pyarrow-23.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:5abde149bb3ce524782d838eb67ac095cd3fd6090eba051130589793f1a7f76d"},
+    {file = "pyarrow-23.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:6f0147ee9e0386f519c952cc670eb4a8b05caa594eeffe01af0e25f699e4e9bb"},
+    {file = "pyarrow-23.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:0ae6e17c828455b6265d590100c295193f93cc5675eb0af59e49dbd00d2de350"},
+    {file = "pyarrow-23.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:fed7020203e9ef273360b9e45be52a2a47d3103caf156a30ace5247ffb51bdbd"},
+    {file = "pyarrow-23.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:26d50dee49d741ac0e82185033488d28d35be4d763ae6f321f97d1140eb7a0e9"},
+    {file = "pyarrow-23.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3c30143b17161310f151f4a2bcfe41b5ff744238c1039338779424e38579d701"},
+    {file = "pyarrow-23.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db2190fa79c80a23fdd29fef4b8992893f024ae7c17d2f5f4db7171fa30c2c78"},
+    {file = "pyarrow-23.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:f00f993a8179e0e1c9713bcc0baf6d6c01326a406a9c23495ec1ba9c9ebf2919"},
+    {file = "pyarrow-23.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f"},
+    {file = "pyarrow-23.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:7707d2b6673f7de054e2e83d59f9e805939038eebe1763fe811ee8fa5c0cd1a7"},
+    {file = "pyarrow-23.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86ff03fb9f1a320266e0de855dee4b17da6794c595d207f89bba40d16b5c78b9"},
+    {file = "pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:813d99f31275919c383aab17f0f455a04f5a429c261cc411b1e9a8f5e4aaaa05"},
+    {file = "pyarrow-23.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bf5842f960cddd2ef757d486041d57c96483efc295a8c4a0e20e704cbbf39c67"},
+    {file = "pyarrow-23.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564baf97c858ecc03ec01a41062e8f4698abc3e6e2acd79c01c2e97880a19730"},
+    {file = "pyarrow-23.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:07deae7783782ac7250989a7b2ecde9b3c343a643f82e8a4df03d93b633006f0"},
+    {file = "pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8"},
+    {file = "pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f"},
+    {file = "pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677"},
+    {file = "pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2"},
+    {file = "pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37"},
+    {file = "pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2"},
+    {file = "pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a"},
+    {file = "pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1"},
+    {file = "pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500"},
+    {file = "pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41"},
+    {file = "pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07"},
+    {file = "pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83"},
+    {file = "pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125"},
+    {file = "pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8"},
+    {file = "pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca"},
+    {file = "pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1"},
+    {file = "pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb"},
+    {file = "pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1"},
+    {file = "pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886"},
+    {file = "pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f"},
+    {file = "pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5"},
+    {file = "pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d"},
+    {file = "pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f"},
+    {file = "pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814"},
+    {file = "pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d"},
+    {file = "pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7"},
+    {file = "pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690"},
+    {file = "pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce"},
+    {file = "pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019"},
+]
+
 [[package]]
 name = "pycodestyle"
 version = "2.14.0"
@@ -5448,4 +5534,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.12,<4.0"
-content-hash = "f7f543945514fd156dd191f84b67f856aa5cce6adb22e2b717f8008fe4a4666f"
+content-hash = "d640923dbd204299fc10398e1ec5cc607a8deeec7d4e49051da26fa7b40e2712"
diff --git a/protea/api/app.py b/protea/api/app.py
index 2d075bb..28c0756 100644
--- a/protea/api/app.py
+++ b/protea/api/app.py
@@ -3,11 +3,12 @@
 
 from pathlib import Path
 
-from fastapi import FastAPI
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 
 from protea.api.routers import admin as admin_router
+from protea.api.routers import annotate as annotate_router
 from protea.api.routers import annotations as annotations_router
 from protea.api.routers import embeddings as embeddings_router
 from protea.api.routers import jobs as jobs_router
@@ -15,6 +16,7 @@
 from protea.api.routers import proteins as proteins_router
 from protea.api.routers import query_sets as query_sets_router
 from protea.api.routers import scoring as scoring_router
+from protea.api.routers import showcase as showcase_router
 from protea.api.routers import support as support_router
 from protea.infrastructure.session import build_session_factory
 from protea.infrastructure.settings import load_settings
@@ -69,20 +71,56 @@ def create_app(project_root: Path | None = None) -> FastAPI:
                 "description": "Scoring configs, scored prediction export, and CAFA metrics.",
             },
             {"name": "support", "description": "Community thumbs-up and comments."},
+            {
+                "name": "annotate",
+                "description": "One-click protein annotation — upload FASTA, auto-run the full pipeline.",
+            },
         ],
     )
     app.state.session_factory = factory
     app.state.amqp_url = settings.amqp_url
     app.state.artifacts_dir = settings.artifacts_dir
 
+    allowed_origins = [
+        "http://localhost:3000",
+        "http://127.0.0.1:3000",
+        "https://protea.ngrok.app",
+    ]
     app.add_middleware(
         CORSMiddleware,
-        allow_origins=["*"],
+        allow_origins=allowed_origins,
         allow_credentials=True,
         allow_methods=["*"],
         allow_headers=["*"],
     )
 
+    @app.get("/health", tags=["health"])
+    def health_check() -> dict[str, str]:
+        """Liveness probe — returns 200 if the API process is up."""
+        return {"status": "ok"}
+
+    @app.get("/health/ready", tags=["health"])
+    def readiness_check() -> dict[str, str]:
+        """Readiness probe — verifies database and RabbitMQ connections."""
+        from sqlalchemy import text
+
+        from protea.infrastructure.session import session_scope
+
+        with session_scope(factory) as session:
+            session.execute(text("SELECT 1"))
+
+        # Check RabbitMQ connectivity
+        import pika
+
+        try:
+            conn = pika.BlockingConnection(pika.URLParameters(settings.amqp_url))
+            conn.close()
+        except Exception as exc:
+            raise HTTPException(status_code=503, detail=f"RabbitMQ unreachable: {exc}") from exc
+
+        return {"status": "ready"}
+
+    app.include_router(annotate_router.router)
     app.include_router(jobs_router.router)
     app.include_router(proteins_router.router)
     app.include_router(annotations_router.router)
@@ -91,6 +129,7 @@ def create_app(project_root: Path | None = None) -> FastAPI:
     app.include_router(maintenance_router.router)
     app.include_router(admin_router.router)
     app.include_router(scoring_router.router)
+    app.include_router(showcase_router.router)
     app.include_router(support_router.router)
 
     sphinx_build = project_root / "docs" / "build" / "html"
diff --git a/protea/api/deps.py b/protea/api/deps.py
new file mode 100644
index 0000000..7c2dcbb
--- /dev/null
+++ b/protea/api/deps.py
@@ -0,0 +1,28 @@
+"""Shared FastAPI dependency functions for all routers."""
+from __future__ import annotations
+
+from pathlib import Path
+
+from sqlalchemy.orm import Session, sessionmaker
+from starlette.requests import Request
+
+
+def get_session_factory(request: Request) -> sessionmaker[Session]:
+    factory = getattr(request.app.state, "session_factory", None)
+    if factory is None:
+        raise RuntimeError("app.state.session_factory is not set")
+    return factory  # type: ignore[no-any-return]
+
+
+def get_amqp_url(request: Request) -> str:
+    url = getattr(request.app.state, "amqp_url", None)
+    if url is None:
+        raise RuntimeError("app.state.amqp_url is not set")
+    return url  # type: ignore[no-any-return]
+
+
+def get_artifacts_dir(request: Request) -> Path:
+    d = getattr(request.app.state, "artifacts_dir", None)
+    if d is None:
+        raise RuntimeError("app.state.artifacts_dir is not set")
+    return d  # type: ignore[no-any-return]
diff --git a/protea/api/routers/admin.py b/protea/api/routers/admin.py
index 3a3940b..9cd03eb 100644
--- a/protea/api/routers/admin.py
+++ b/protea/api/routers/admin.py
@@ -1,9 +1,10 @@
 from __future__ import annotations
 
+import os
 import subprocess
 from pathlib import Path
 
-from fastapi import APIRouter
+from fastapi import APIRouter, Header, HTTPException
 from starlette.requests import Request
 
 from protea.infrastructure.session import build_session_factory
@@ -13,10 +14,26 @@
 
 _PROJECT_ROOT = Path(__file__).resolve().parents[3]
 
+_ADMIN_TOKEN = os.getenv("PROTEA_ADMIN_TOKEN", "")
+
+
+def _require_admin_token(authorization: str | None) -> None:
+    """Validate bearer token for destructive admin endpoints."""
+    if not _ADMIN_TOKEN:
+        raise HTTPException(
+            status_code=403,
+            detail="Admin operations are disabled — set PROTEA_ADMIN_TOKEN env var to enable.",
+        )
+    if not authorization or not authorization.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Missing Bearer token.")
+    if authorization[7:] != _ADMIN_TOKEN:
+        raise HTTPException(status_code=403, detail="Invalid admin token.")
+
 
 @router.post("/reset-db")
-def reset_db(request: Request) -> dict:
+def reset_db(request: Request, authorization: str | None = Header(default=None)) -> dict:
     """Drop and recreate the public schema, then re-apply all Alembic migrations."""
+    _require_admin_token(authorization)
     settings = load_settings(_PROJECT_ROOT)
 
     # 1. Drop + recreate schema using a raw connection (outside SQLAlchemy pool)
diff --git a/protea/api/routers/annotate.py b/protea/api/routers/annotate.py
new file mode 100644
index 0000000..c85d09a
--- /dev/null
+++ b/protea/api/routers/annotate.py
@@ -0,0 +1,243 @@
+"""One-click protein annotation endpoint.
+
+Accepts a FASTA file (or raw text), auto-selects the best available
+embedding config, annotation set, and ontology snapshot, creates a
+QuerySet, and kicks off ``compute_embeddings``.  Returns all the IDs the
+frontend needs to chain ``predict_go_terms`` once embeddings finish.
+"""
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import APIRouter, Depends, Form, HTTPException, UploadFile
+from sqlalchemy import func
+from sqlalchemy.orm import Session, sessionmaker
+
+from protea.api.deps import get_amqp_url, get_session_factory
+from protea.api.routers.query_sets import _parse_fasta
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+from protea.infrastructure.orm.models.embedding.sequence_embedding import SequenceEmbedding
+from protea.infrastructure.orm.models.job import Job, JobEvent
+from protea.infrastructure.orm.models.query.query_set import QuerySet, QuerySetEntry
+from protea.infrastructure.orm.models.sequence.sequence import Sequence
+from protea.infrastructure.queue.publisher import publish_job
+from protea.infrastructure.session import session_scope
+
+router = APIRouter(prefix="/annotate", tags=["annotate"])
+
+# Default embedding recipe (ESM-2 650M, last layer, mean pooling).
+_DEFAULT_CONFIG = {
+    "model_name": "facebook/esm2_t33_650M_UR50D",
+    "model_backend": "esm",
+    "layer_indices": [0],
+    "layer_agg": "mean",
+    "pooling": "mean",
+    "normalize_residues": False,
+    "normalize": True,
+    "max_length": 1022,
+    "use_chunking": False,
+    "chunk_size": 512,
+    "chunk_overlap": 0,
+}
+
+
+def _best_embedding_config(session: Session) -> EmbeddingConfig | None:
+    """Pick the config with the most computed embeddings (prefer ESM-2)."""
+    rows = (
+        session.query(
+            EmbeddingConfig,
+            func.count(SequenceEmbedding.id).label("cnt"),
+        )
+        .outerjoin(SequenceEmbedding, SequenceEmbedding.embedding_config_id == EmbeddingConfig.id)
+        .group_by(EmbeddingConfig.id)
+        .order_by(func.count(SequenceEmbedding.id).desc())
+        .all()
+    )
+    if not rows:
+        return None
+    # Prefer a config that already has embeddings
+    for config, cnt in rows:
+        if cnt > 0:
+            return config
+    return rows[0][0]
+
+
+def _newest_annotation_set(session: Session) -> AnnotationSet | None:
+    return (
+        session.query(AnnotationSet)
+        .order_by(AnnotationSet.created_at.desc())
+        .first()
+    )
+
+
+def _newest_ontology_snapshot(session: Session) -> OntologySnapshot | None:
+    return (
+        session.query(OntologySnapshot)
+        .order_by(OntologySnapshot.loaded_at.desc())
+        .first()
+    )
+
+
+@router.post("", summary="Annotate proteins from FASTA")
+async def annotate(
+    file: UploadFile | None = None,
+    fasta_text: str | None = Form(None),
+    name: str = Form("Quick annotation"),
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+    amqp_url: str = Depends(get_amqp_url),
+) -> dict[str, Any]:
+    """One-click annotation: upload FASTA, auto-select best method, run pipeline.
+
+    Accepts either an uploaded FASTA ``file`` **or** raw ``fasta_text``.
+    Creates a QuerySet, picks the best embedding config (or creates the
+    default ESM-2 650M config), and queues a ``compute_embeddings`` job.
+
+    Returns the IDs the frontend needs to monitor progress and chain
+    ``predict_go_terms`` once embeddings are ready.
+    """
+    # ── Parse FASTA ──────────────────────────────────────────────────
+    _MAX_FASTA_BYTES = 50 * 1024 * 1024  # 50 MB
+    if file is not None:
+        raw = await file.read()
+        if len(raw) > _MAX_FASTA_BYTES:
+            raise HTTPException(status_code=413, detail="FASTA file exceeds 50 MB limit")
+        try:
+            content = raw.decode("utf-8")
+        except UnicodeDecodeError:
+            raise HTTPException(status_code=422, detail="FASTA file must be UTF-8 encoded") from None
+    elif fasta_text:
+        if len(fasta_text.encode("utf-8")) > _MAX_FASTA_BYTES:
+            raise HTTPException(status_code=413, detail="FASTA text exceeds 50 MB limit")
+        content = fasta_text
+    else:
+        raise HTTPException(status_code=422, detail="Provide a FASTA file or fasta_text")
+
+    records = _parse_fasta(content)
+    if not records:
+        raise HTTPException(status_code=422, detail="No valid sequences found in the FASTA input")
+
+    seen: set[str] = set()
+    for acc, _ in records:
+        if acc in seen:
+            raise HTTPException(status_code=422, detail=f"Duplicate accession: '{acc}'")
+        seen.add(acc)
+
+    # ── Create QuerySet + upsert sequences ───────────────────────────
+    with session_scope(factory) as session:
+        # Upsert sequences
+        hash_to_seq_id: dict[str, int] = {}
+        hashes = [Sequence.compute_hash(seq) for _, seq in records]
+        existing = (
+            session.query(Sequence.sequence_hash, Sequence.id)
+            .filter(Sequence.sequence_hash.in_(hashes))
+            .all()
+        )
+        for h, sid in existing:
+            hash_to_seq_id[h] = sid
+        for (_, seq), h in zip(records, hashes, strict=False):
+            if h not in hash_to_seq_id:
+                new_seq = Sequence(sequence=seq, sequence_hash=h)
+                session.add(new_seq)
+                session.flush()
+                hash_to_seq_id[h] = new_seq.id
+
+        qs = QuerySet(name=name, description="Created via quick annotation")
+        session.add(qs)
+        session.flush()
+        entries = [
+            QuerySetEntry(
+                query_set_id=qs.id,
+                sequence_id=hash_to_seq_id[h],
+                accession=acc,
+            )
+            for (acc, _), h in zip(records, hashes, strict=False)
+        ]
+        session.add_all(entries)
+        session.flush()
+        query_set_id = qs.id
+
+        # ── Auto-select best resources ───────────────────────────────
+        config = _best_embedding_config(session)
+        if config is None:
+            config = EmbeddingConfig(**_DEFAULT_CONFIG)
+            session.add(config)
+            session.flush()
+        config_id = config.id
+
+        ann = _newest_annotation_set(session)
+        if ann is None:
+            raise HTTPException(
+                status_code=409,
+                detail="No annotation sets available. Load GO annotations first.",
+            )
+        annotation_set_id = ann.id
+
+        snap = _newest_ontology_snapshot(session)
+        if snap is None:
+            raise HTTPException(
+                status_code=409,
+                detail="No ontology snapshots available. Load a GO ontology first.",
+            )
+        ontology_snapshot_id = snap.id
+
+        # ── Check for trained reranker ────────────────────────────────
+        best_reranker = (
+            session.query(RerankerModel)
+            .order_by(RerankerModel.created_at.desc())
+            .first()
+        )
+        reranker_id = best_reranker.id if best_reranker else None
+
+        # ── Create compute_embeddings job ────────────────────────────
+        embed_payload = {
+            "embedding_config_id": str(config_id),
+            "query_set_id": str(query_set_id),
+            "device": "cuda",
+            "skip_existing": True,
+            "batch_size": 8,
+            "sequences_per_job": 64,
+        }
+        job = Job(
+            operation="compute_embeddings",
+            queue_name="protea.embeddings",
+            payload=embed_payload,
+        )
+        session.add(job)
+        session.flush()
+        embed_job_id = job.id
+        session.add(
+            JobEvent(
+                job_id=embed_job_id,
+                event="job.created",
+                fields={"operation": "compute_embeddings", "source": "annotate"},
+            )
+        )
+
+    publish_job(amqp_url, "protea.embeddings", embed_job_id)
+
+    # Build the predict payload the frontend will POST when embeddings finish.
+    predict_payload: dict[str, Any] = {
+        "embedding_config_id": str(config_id),
+        "annotation_set_id": str(annotation_set_id),
+        "ontology_snapshot_id": str(ontology_snapshot_id),
+        "query_set_id": str(query_set_id),
+        "search_backend": "numpy",
+        "aspect_separated_knn": True,
+        "compute_alignments": True,
+        "compute_taxonomy": True,
+        "compute_reranker_features": True,
+    }
+
+    return {
+        "query_set_id": str(query_set_id),
+        "embedding_config_id": str(config_id),
+        "annotation_set_id": str(annotation_set_id),
+        "ontology_snapshot_id": str(ontology_snapshot_id),
+        "embedding_job_id": str(embed_job_id),
+        "predict_payload": predict_payload,
+        "reranker_id": str(reranker_id) if reranker_id else None,
+        "sequence_count": len(records),
+    }
diff --git a/protea/api/routers/annotations.py b/protea/api/routers/annotations.py
index d4f50a3..cf26501 100644
--- a/protea/api/routers/annotations.py
+++ b/protea/api/routers/annotations.py
@@ -13,8 +13,8 @@
 from sqlalchemy import func
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session, sessionmaker
-from starlette.requests import Request
 
+from protea.api.deps import get_amqp_url, get_artifacts_dir, get_session_factory
 from protea.core.evaluation import compute_evaluation_data
 from protea.core.operations.generate_evaluation_set import GenerateEvaluationSetPayload
 from protea.core.operations.load_goa_annotations import LoadGOAAnnotationsPayload
@@ -39,27 +39,6 @@
 _JOBS_QUEUE = "protea.jobs"
 
 
-def get_session_factory(request: Request) -> sessionmaker[Session]:
-    factory = getattr(request.app.state, "session_factory", None)
-    if factory is None:
-        raise RuntimeError("app.state.session_factory is not set")
-    return factory  # type: ignore[no-any-return]
-
-
-def get_amqp_url(request: Request) -> str:
-    url = getattr(request.app.state, "amqp_url", None)
-    if url is None:
-        raise RuntimeError("app.state.amqp_url is not set")
-    return url  # type: ignore[no-any-return]
-
-
-def get_artifacts_dir(request: Request) -> Path:
-    d = getattr(request.app.state, "artifacts_dir", None)
-    if d is None:
-        raise RuntimeError("app.state.artifacts_dir is not set")
-    return d  # type: ignore[no-any-return]
-
-
 # ── Ontology Snapshots ────────────────────────────────────────────────────────
 
 
@@ -812,6 +791,8 @@ def list_evaluation_results(
                 "evaluation_set_id": str(r.evaluation_set_id),
                 "prediction_set_id": str(r.prediction_set_id),
                 "scoring_config_id": str(r.scoring_config_id) if r.scoring_config_id else None,
+                "reranker_model_id": str(r.reranker_model_id) if r.reranker_model_id else None,
+                "reranker_config": r.reranker_config,
                 "job_id": str(r.job_id) if r.job_id else None,
                 "created_at": r.created_at.isoformat(),
                 "results": r.results,
diff --git a/protea/api/routers/embeddings.py b/protea/api/routers/embeddings.py
index 6d6ddec..693e34f 100644
--- a/protea/api/routers/embeddings.py
+++ b/protea/api/routers/embeddings.py
@@ -9,8 +9,8 @@
 from fastapi.responses import StreamingResponse
 from sqlalchemy import func
 from sqlalchemy.orm import Session, sessionmaker
-from starlette.requests import Request
 
+from protea.api.deps import get_amqp_url, get_session_factory
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
 from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
@@ -30,20 +30,6 @@
 _VALID_POOLING = {"mean", "max", "cls", "mean_max"}
 
 
-def get_session_factory(request: Request) -> sessionmaker[Session]:
-    factory = getattr(request.app.state, "session_factory", None)
-    if factory is None:
-        raise RuntimeError("app.state.session_factory is not set")
-    return factory  # type: ignore[no-any-return]
-
-
-def get_amqp_url(request: Request) -> str:
-    url = getattr(request.app.state, "amqp_url", None)
-    if url is None:
-        raise RuntimeError("app.state.amqp_url is not set")
-    return url  # type: ignore[no-any-return]
-
-
 def _validate_embedding_config_body(body: dict[str, Any]) -> dict[str, Any]:
     errors: list[str] = []
 
@@ -331,8 +317,17 @@ def list_prediction_sets(
 ) -> list[dict[str, Any]]:
     """List the 100 most recent prediction sets with their GO prediction counts."""
     with session_scope(factory) as session:
+        # Single query with a correlated subquery for counts (avoids N+1).
+        count_subq = (
+            session.query(func.count(GOPrediction.id))
+            .filter(GOPrediction.prediction_set_id == PredictionSet.id)
+            .correlate(PredictionSet)
+            .scalar_subquery()
+        )
         rows = (
-            session.query(PredictionSet, EmbeddingConfig, AnnotationSet, OntologySnapshot)
+            session.query(
+                PredictionSet, EmbeddingConfig, AnnotationSet, OntologySnapshot, count_subq
+            )
             .join(EmbeddingConfig, PredictionSet.embedding_config_id == EmbeddingConfig.id)
             .join(AnnotationSet, PredictionSet.annotation_set_id == AnnotationSet.id)
             .join(OntologySnapshot, PredictionSet.ontology_snapshot_id == OntologySnapshot.id)
@@ -341,12 +336,7 @@ def list_prediction_sets(
             .all()
         )
         result = []
-        for ps, ec, ann, snap in rows:
-            prediction_count = (
-                session.query(func.count(GOPrediction.id))
-                .filter(GOPrediction.prediction_set_id == ps.id)
-                .scalar()
-            )
+        for ps, ec, ann, snap, prediction_count in rows:
             result.append(
                 {
                     "id": str(ps.id),
@@ -362,7 +352,7 @@ def list_prediction_sets(
                     "limit_per_entry": ps.limit_per_entry,
                     "distance_threshold": ps.distance_threshold,
                     "created_at": ps.created_at.isoformat(),
-                    "prediction_count": prediction_count,
+                    "prediction_count": prediction_count or 0,
                 }
             )
         return result
@@ -561,6 +551,12 @@ def get_protein_predictions(
                 "taxonomic_distance": pred.taxonomic_distance,
                 "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
                 "taxonomic_relation": pred.taxonomic_relation,
+                # Re-ranker features
+                "vote_count": pred.vote_count,
+                "k_position": pred.k_position,
+                "go_term_frequency": pred.go_term_frequency,
+                "ref_annotation_density": pred.ref_annotation_density,
+                "neighbor_distance_std": pred.neighbor_distance_std,
             }
             for pred, gt in rows
         ]
@@ -652,6 +648,12 @@ def get_go_term_distribution(
     "taxonomic_distance",
     "taxonomic_common_ancestors",
     "taxonomic_relation",
+    # Re-ranker features
+    "vote_count",
+    "k_position",
+    "go_term_frequency",
+    "ref_annotation_density",
+    "neighbor_distance_std",
 ]
 
 
@@ -741,6 +743,13 @@ def _generate():
                         if pred.taxonomic_common_ancestors is not None
                         else "",
                         pred.taxonomic_relation or "",
+                        pred.vote_count if pred.vote_count is not None else "",
+                        pred.k_position if pred.k_position is not None else "",
+                        pred.go_term_frequency if pred.go_term_frequency is not None else "",
+                        pred.ref_annotation_density
+                        if pred.ref_annotation_density is not None
+                        else "",
+                        _fmt(pred.neighbor_distance_std),
                     ]
                 )
                 yield buf.getvalue()
@@ -811,28 +820,39 @@ def download_predictions_cafa(
 
     def _generate():
         with session_scope(factory) as session:
-            q = (
-                session.query(GOPrediction, GOTerm)
-                .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
+            # Deduplicate at the DB level: keep the lowest distance per
+            # (protein_accession, go_id) pair so we never need an unbounded
+            # `seen` set in Python — this preserves true streaming.
+            from sqlalchemy import func as sa_func
+
+            min_dist = (
+                session.query(
+                    GOPrediction.protein_accession,
+                    GOPrediction.go_term_id,
+                    sa_func.min(GOPrediction.distance).label("min_distance"),
+                )
                 .filter(GOPrediction.prediction_set_id == set_id)
             )
+            if max_distance is not None:
+                min_dist = min_dist.filter(GOPrediction.distance <= max_distance)
+            min_dist = min_dist.group_by(
+                GOPrediction.protein_accession, GOPrediction.go_term_id
+            ).subquery()
+
+            q = (
+                session.query(min_dist.c.protein_accession, GOTerm.go_id, min_dist.c.min_distance)
+                .join(GOTerm, min_dist.c.go_term_id == GOTerm.id)
+            )
             if aspect:
                 q = q.filter(GOTerm.aspect == aspect.upper())
-            if max_distance is not None:
-                q = q.filter(GOPrediction.distance <= max_distance)
             if delta_proteins is not None:
-                q = q.filter(GOPrediction.protein_accession.in_(delta_proteins))
+                q = q.filter(min_dist.c.protein_accession.in_(delta_proteins))
 
-            q = q.order_by(GOPrediction.protein_accession, GOTerm.go_id, GOPrediction.distance)
+            q = q.order_by(min_dist.c.protein_accession, GOTerm.go_id)
 
-            seen: set[tuple[str, str]] = set()
-            for pred, gt in q.yield_per(1000):
-                key = (pred.protein_accession, gt.go_id)
-                if key in seen:
-                    continue
-                seen.add(key)
-                score = max(0.0, 1.0 - pred.distance)
-                yield f"{pred.protein_accession}\t{gt.go_id}\t{score:.4f}\n"
+            for acc, go_id, dist in q.yield_per(1000):
+                score = max(0.0, 1.0 - dist)
+                yield f"{acc}\t{go_id}\t{score:.4f}\n"
 
     filename = f"predictions_cafa_{set_id}.tsv"
     return StreamingResponse(
diff --git a/protea/api/routers/jobs.py b/protea/api/routers/jobs.py
index c3d554c..b2982bb 100644
--- a/protea/api/routers/jobs.py
+++ b/protea/api/routers/jobs.py
@@ -7,8 +7,9 @@
 from fastapi import APIRouter, Depends, HTTPException, Query
 from pydantic import BaseModel, Field, field_validator
 from sqlalchemy.orm import Session, sessionmaker
-from starlette.requests import Request
 
+from protea.api.deps import get_amqp_url, get_session_factory
+from protea.core.utils import utcnow
 from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
 from protea.infrastructure.queue.publisher import publish_job
 from protea.infrastructure.session import session_scope
@@ -38,23 +39,6 @@ def strip_and_require(cls, v: str) -> str:
         return v.strip()
 
 
-# --- Dependency hook (wire this in your app factory) ---
-
-
-def get_session_factory(request: Request) -> sessionmaker[Session]:
-    factory = getattr(request.app.state, "session_factory", None)
-    if factory is None:
-        raise RuntimeError("app.state.session_factory is not set")
-    return factory  # type: ignore[no-any-return]
-
-
-def get_amqp_url(request: Request) -> str:
-    url = getattr(request.app.state, "amqp_url", None)
-    if url is None:
-        raise RuntimeError("app.state.amqp_url is not set")
-    return url  # type: ignore[no-any-return]
-
-
 @router.post("", summary="Create and enqueue a job")
 def create_job(
     body: CreateJobRequest,
@@ -228,10 +212,12 @@ def cancel_job(
     job_id: UUID,
     factory: sessionmaker[Session] = Depends(get_session_factory),
 ) -> dict[str, Any]:
-    """Mark a job (and any queued child jobs) as CANCELLED.
+    """Mark a job (and any non-terminal child jobs) as CANCELLED.
 
     Already-finished jobs (SUCCEEDED/FAILED) are returned as-is with no state change.
-    Note: workers processing a batch mid-flight will complete their current message before stopping.
+    Children in QUEUED are cancelled immediately.  Children in RUNNING are also
+    marked CANCELLED — the worker's parent-check in BaseWorker.handle_job() will
+    detect the cancelled parent on the next iteration and stop gracefully.
     """
     with session_scope(factory) as session:
         j = session.get(Job, job_id)
@@ -242,16 +228,21 @@ def cancel_job(
             return {"id": str(j.id), "status": j.status.value}
 
         j.status = JobStatus.CANCELLED
+        j.finished_at = utcnow()
         session.add(JobEvent(job_id=job_id, event="job.cancelled", fields={}))
 
-        # Cancel any queued children so they are not picked up by a worker.
+        # Cancel all non-terminal children (QUEUED and RUNNING).
         children = (
             session.query(Job)
-            .filter(Job.parent_job_id == job_id, Job.status == JobStatus.QUEUED)
+            .filter(
+                Job.parent_job_id == job_id,
+                Job.status.in_((JobStatus.QUEUED, JobStatus.RUNNING)),
+            )
             .all()
         )
         for child in children:
             child.status = JobStatus.CANCELLED
+            child.finished_at = utcnow()
             session.add(
                 JobEvent(
                     job_id=child.id,
diff --git a/protea/api/routers/maintenance.py b/protea/api/routers/maintenance.py
index 3574c5f..65e79f0 100644
--- a/protea/api/routers/maintenance.py
+++ b/protea/api/routers/maintenance.py
@@ -5,8 +5,8 @@
 from fastapi import APIRouter, Depends
 from sqlalchemy import text
 from sqlalchemy.orm import Session, sessionmaker
-from starlette.requests import Request
 
+from protea.api.deps import get_session_factory
 from protea.infrastructure.orm.models.embedding.sequence_embedding import SequenceEmbedding
 from protea.infrastructure.orm.models.sequence.sequence import Sequence
 from protea.infrastructure.session import session_scope
@@ -14,13 +14,6 @@
 router = APIRouter(prefix="/maintenance", tags=["maintenance"])
 
 
-def get_session_factory(request: Request) -> sessionmaker[Session]:
-    factory = getattr(request.app.state, "session_factory", None)
-    if factory is None:
-        raise RuntimeError("app.state.session_factory is not set")
-    return factory  # type: ignore[no-any-return]
-
-
 @router.get("/vacuum-sequences/preview")
 def preview_orphan_sequences(
     factory: sessionmaker[Session] = Depends(get_session_factory),
diff --git a/protea/api/routers/proteins.py b/protea/api/routers/proteins.py
index ab7060e..2c027be 100644
--- a/protea/api/routers/proteins.py
+++ b/protea/api/routers/proteins.py
@@ -6,8 +6,8 @@
 from fastapi import APIRouter, Depends, HTTPException, Query
 from sqlalchemy import distinct, func
 from sqlalchemy.orm import Session, sessionmaker
-from starlette.requests import Request
 
+from protea.api.deps import get_session_factory
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
 from protea.infrastructure.orm.models.annotation.protein_go_annotation import ProteinGOAnnotation
@@ -19,13 +19,6 @@
 router = APIRouter(prefix="/proteins", tags=["proteins"])
 
 
-def get_session_factory(request: Request) -> sessionmaker[Session]:
-    factory = getattr(request.app.state, "session_factory", None)
-    if factory is None:
-        raise RuntimeError("app.state.session_factory is not set")
-    return factory
-
-
 # ── Stats ─────────────────────────────────────────────────────────────────────
 
 
@@ -101,7 +94,9 @@ def list_proteins(
         if reviewed is not None:
             q = q.filter(Protein.reviewed == reviewed)
         if search:
-            like = f"%{search}%"
+            # Limit search length to prevent abuse; escape LIKE special chars.
+            term = search[:100].replace("%", r"\%").replace("_", r"\_")
+            like = f"%{term}%"
             q = q.filter(
                 Protein.accession.ilike(like)
                 | Protein.entry_name.ilike(like)
diff --git a/protea/api/routers/query_sets.py b/protea/api/routers/query_sets.py
index dafe7e0..55d6a74 100644
--- a/protea/api/routers/query_sets.py
+++ b/protea/api/routers/query_sets.py
@@ -6,8 +6,8 @@
 from fastapi import APIRouter, Depends, Form, HTTPException, UploadFile
 from sqlalchemy import func
 from sqlalchemy.orm import Session, sessionmaker
-from starlette.requests import Request
 
+from protea.api.deps import get_session_factory
 from protea.infrastructure.orm.models.query.query_set import QuerySet, QuerySetEntry
 from protea.infrastructure.orm.models.sequence.sequence import Sequence
 from protea.infrastructure.session import session_scope
@@ -15,13 +15,6 @@
 router = APIRouter(prefix="/query-sets", tags=["query-sets"])
 
 
-def get_session_factory(request: Request) -> sessionmaker[Session]:
-    factory = getattr(request.app.state, "session_factory", None)
-    if factory is None:
-        raise RuntimeError("app.state.session_factory is not set")
-    return factory  # type: ignore[no-any-return]
-
-
 def _parse_fasta(content: str) -> list[tuple[str, str]]:
     """Return list of (accession, sequence) from FASTA text.
 
@@ -80,7 +73,10 @@ async def create_query_set(
     preserving the original FASTA accession. Duplicate accessions within the
     same upload are rejected with 422.
     """
+    _MAX_FASTA_BYTES = 50 * 1024 * 1024  # 50 MB
     raw = await file.read()
+    if len(raw) > _MAX_FASTA_BYTES:
+        raise HTTPException(status_code=413, detail="FASTA file exceeds 50 MB limit")
     try:
         content = raw.decode("utf-8")
     except UnicodeDecodeError:
diff --git a/protea/api/routers/scoring.py b/protea/api/routers/scoring.py
index acce2df..71d0dd7 100644
--- a/protea/api/routers/scoring.py
+++ b/protea/api/routers/scoring.py
@@ -34,16 +34,28 @@
 from typing import Any
 
 from fastapi import APIRouter, Depends, HTTPException, Query
-from fastapi.requests import Request
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field, field_validator
 
+from protea.api.deps import get_session_factory
 from protea.core.evaluation import compute_evaluation_data
 from protea.core.metrics import compute_cafa_metrics
+from protea.core.reranker import (
+    model_from_string,
+    model_to_string,
+)
+from protea.core.reranker import (
+    predict as reranker_predict,
+)
+from protea.core.reranker import (
+    train as reranker_train,
+)
 from protea.core.scoring import compute_score
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
 from protea.infrastructure.orm.models.annotation.go_term import GOTerm
 from protea.infrastructure.orm.models.embedding.go_prediction import GOPrediction
 from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
 from protea.infrastructure.orm.models.embedding.scoring_config import (
     DEFAULT_EVIDENCE_WEIGHTS,
     DEFAULT_WEIGHTS,
@@ -145,15 +157,6 @@
 ]
 
 
-# ---------------------------------------------------------------------------
-# FastAPI dependency
-# ---------------------------------------------------------------------------
-
-
-def get_session_factory(request: Request):
-    return request.app.state.session_factory
-
-
 # ---------------------------------------------------------------------------
 # Request / response models
 # ---------------------------------------------------------------------------
@@ -554,3 +557,621 @@ def compute_metrics(
             for p in metrics.curve
         ],
     }
+
+
+# ---------------------------------------------------------------------------
+# Training data endpoint (re-ranker)
+# ---------------------------------------------------------------------------
+
+_TRAINING_COLUMNS = [
+    "protein_accession",
+    "go_id",
+    "aspect",
+    "label",
+    "distance",
+    "ref_protein_accession",
+    "qualifier",
+    "evidence_code",
+    # NW alignment
+    "identity_nw",
+    "similarity_nw",
+    "alignment_score_nw",
+    "gaps_pct_nw",
+    "alignment_length_nw",
+    # SW alignment
+    "identity_sw",
+    "similarity_sw",
+    "alignment_score_sw",
+    "gaps_pct_sw",
+    "alignment_length_sw",
+    # Lengths
+    "length_query",
+    "length_ref",
+    # Taxonomy
+    "query_taxonomy_id",
+    "ref_taxonomy_id",
+    "taxonomic_lca",
+    "taxonomic_distance",
+    "taxonomic_common_ancestors",
+    "taxonomic_relation",
+    # Re-ranker features
+    "vote_count",
+    "k_position",
+    "go_term_frequency",
+    "ref_annotation_density",
+    "neighbor_distance_std",
+]
+
+
+@router.get(
+    "/prediction-sets/{set_id}/training-data.tsv",
+    summary="Export labeled training data for the re-ranker",
+    response_class=StreamingResponse,
+)
+def download_training_data(
+    set_id: uuid.UUID,
+    evaluation_set_id: uuid.UUID = Query(..., description="EvaluationSet to derive ground-truth labels from"),
+    category: str = Query("nk", pattern="^(nk|lk|pk)$", description="Ground-truth category: nk, lk, or pk"),
+    factory=Depends(get_session_factory),
+) -> StreamingResponse:
+    """Stream labeled training data for the re-ranker model.
+
+    Joins all GOPrediction feature columns with a binary ``label`` derived
+    from the temporal ground-truth delta of the given EvaluationSet.
+
+    A prediction is labeled **1** if the (protein_accession, go_id) pair
+    appears in the selected category's ground truth, **0** otherwise.
+
+    Parameters
+    ----------
+    evaluation_set_id:
+        UUID of the EvaluationSet (old → new annotation sets).
+    category:
+        ``"nk"`` (no-knowledge), ``"lk"`` (limited-knowledge), or
+        ``"pk"`` (partial-knowledge).
+    """
+    with session_scope(factory) as session:
+        ps = session.get(PredictionSet, set_id)
+        if ps is None:
+            raise HTTPException(status_code=404, detail="PredictionSet not found")
+
+        es = session.get(EvaluationSet, evaluation_set_id)
+        if es is None:
+            raise HTTPException(status_code=404, detail="EvaluationSet not found")
+
+        ontology_snapshot_id = ps.ontology_snapshot_id
+
+        eval_data = compute_evaluation_data(
+            session,
+            old_annotation_set_id=es.old_annotation_set_id,
+            new_annotation_set_id=es.new_annotation_set_id,
+            ontology_snapshot_id=ontology_snapshot_id,
+        )
+
+    ground_truth: dict[str, set[str]] = getattr(eval_data, category)
+    gt_pairs: set[tuple[str, str]] = set()
+    for protein, go_ids in ground_truth.items():
+        for go_id in go_ids:
+            gt_pairs.add((protein, go_id))
+
+    def _generate() -> Iterator[bytes]:
+        yield ("\t".join(_TRAINING_COLUMNS) + "\n").encode()
+
+        with session_scope(factory) as session:
+            q = (
+                session.query(GOPrediction, GOTerm.go_id, GOTerm.aspect)
+                .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
+                .filter(GOPrediction.prediction_set_id == set_id)
+            )
+
+            for pred, go_id, aspect in q.yield_per(1000):
+                label = 1 if (pred.protein_accession, go_id) in gt_pairs else 0
+
+                def _v(val: object) -> str:
+                    return "" if val is None else str(val)
+
+                row = "\t".join([
+                    pred.protein_accession,
+                    go_id,
+                    aspect or "",
+                    str(label),
+                    _v(pred.distance),
+                    pred.ref_protein_accession or "",
+                    pred.qualifier or "",
+                    pred.evidence_code or "",
+                    _v(pred.identity_nw),
+                    _v(pred.similarity_nw),
+                    _v(pred.alignment_score_nw),
+                    _v(pred.gaps_pct_nw),
+                    _v(pred.alignment_length_nw),
+                    _v(pred.identity_sw),
+                    _v(pred.similarity_sw),
+                    _v(pred.alignment_score_sw),
+                    _v(pred.gaps_pct_sw),
+                    _v(pred.alignment_length_sw),
+                    _v(pred.length_query),
+                    _v(pred.length_ref),
+                    _v(pred.query_taxonomy_id),
+                    _v(pred.ref_taxonomy_id),
+                    _v(pred.taxonomic_lca),
+                    _v(pred.taxonomic_distance),
+                    _v(pred.taxonomic_common_ancestors),
+                    pred.taxonomic_relation or "",
+                    _v(pred.vote_count),
+                    _v(pred.k_position),
+                    _v(pred.go_term_frequency),
+                    _v(pred.ref_annotation_density),
+                    _v(pred.neighbor_distance_std),
+                ]) + "\n"
+                yield row.encode()
+
+    filename = f"training_data_{set_id}_{category}.tsv"
+    return StreamingResponse(
+        _generate(),
+        media_type="text/tab-separated-values",
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
+
+
+# ---------------------------------------------------------------------------
+# Re-ranker model CRUD + train + apply
+# ---------------------------------------------------------------------------
+
+
+_ASPECT_MAP = {"bpo": "P", "mfo": "F", "cco": "C"}
+
+
+class _TrainingPair(BaseModel):
+    prediction_set_id: uuid.UUID
+    evaluation_set_id: uuid.UUID
+
+
+class RerankerTrainRequest(BaseModel):
+    """Request body for POST /scoring/rerankers/train."""
+
+    name: str = Field(..., min_length=1, max_length=255)
+    prediction_set_id: uuid.UUID
+    evaluation_set_id: uuid.UUID
+    category: str = Field("nk", pattern="^(nk|lk|pk)$")
+    aspect: str | None = Field(
+        default=None,
+        pattern="^(bpo|mfo|cco)$",
+        description="Train only on predictions for this GO aspect. None trains on all aspects.",
+    )
+    neg_pos_ratio: float | None = Field(
+        default=None,
+        ge=1.0,
+        description="Subsample negatives to this ratio vs positives (e.g. 1.0 for 1:1, 10.0 for 10:1). None keeps all.",
+    )
+    extra_pairs: list[_TrainingPair] | None = Field(
+        default=None,
+        description="Additional (prediction_set, evaluation_set) pairs to include in training data. "
+        "Data from all pairs is concatenated before training a single model.",
+    )
+
+    model_config = {"extra": "forbid"}
+
+
+class RerankerResponse(BaseModel):
+    """Serialised representation of a stored RerankerModel."""
+
+    id: uuid.UUID
+    name: str
+    prediction_set_id: uuid.UUID | None
+    evaluation_set_id: uuid.UUID | None
+    category: str
+    aspect: str | None
+    metrics: dict[str, Any]
+    feature_importance: dict[str, Any]
+    created_at: Any
+
+
+def _reranker_to_response(m: RerankerModel) -> RerankerResponse:
+    return RerankerResponse(
+        id=m.id,
+        name=m.name,
+        prediction_set_id=m.prediction_set_id,
+        evaluation_set_id=m.evaluation_set_id,
+        category=m.category,
+        aspect=m.aspect,
+        metrics=m.metrics,
+        feature_importance=m.feature_importance,
+        created_at=m.created_at,
+    )
+
+
+def _collect_training_records(
+    session: Any,
+    prediction_set_id: uuid.UUID,
+    evaluation_set_id: uuid.UUID,
+    category: str,
+    aspect_filter_char: str | None,
+) -> list[dict[str, Any]]:
+    """Build labeled training records from a (PredictionSet, EvaluationSet) pair."""
+    ps = session.get(PredictionSet, prediction_set_id)
+    if ps is None:
+        raise HTTPException(status_code=404, detail=f"PredictionSet {prediction_set_id} not found")
+    es = session.get(EvaluationSet, evaluation_set_id)
+    if es is None:
+        raise HTTPException(status_code=404, detail=f"EvaluationSet {evaluation_set_id} not found")
+
+    eval_data = compute_evaluation_data(
+        session,
+        old_annotation_set_id=es.old_annotation_set_id,
+        new_annotation_set_id=es.new_annotation_set_id,
+        ontology_snapshot_id=ps.ontology_snapshot_id,
+    )
+
+    ground_truth: dict[str, set[str]] = getattr(eval_data, category)
+    gt_pairs: set[tuple[str, str]] = set()
+    for protein, go_ids in ground_truth.items():
+        for go_id in go_ids:
+            gt_pairs.add((protein, go_id))
+
+    records: list[dict[str, Any]] = []
+    q_preds = (
+        session.query(GOPrediction, GOTerm.go_id, GOTerm.aspect)
+        .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
+        .filter(GOPrediction.prediction_set_id == prediction_set_id)
+    )
+    if aspect_filter_char:
+        q_preds = q_preds.filter(GOTerm.aspect == aspect_filter_char)
+    for pred, go_id, aspect in q_preds.yield_per(5000):
+        label = 1 if (pred.protein_accession, go_id) in gt_pairs else 0
+        records.append({
+            "protein_accession": pred.protein_accession,
+            "go_id": go_id,
+            "aspect": aspect or "",
+            "label": label,
+            "distance": pred.distance,
+            "ref_protein_accession": pred.ref_protein_accession or "",
+            "qualifier": pred.qualifier or "",
+            "evidence_code": pred.evidence_code or "",
+            "identity_nw": pred.identity_nw,
+            "similarity_nw": pred.similarity_nw,
+            "alignment_score_nw": pred.alignment_score_nw,
+            "gaps_pct_nw": pred.gaps_pct_nw,
+            "alignment_length_nw": pred.alignment_length_nw,
+            "identity_sw": pred.identity_sw,
+            "similarity_sw": pred.similarity_sw,
+            "alignment_score_sw": pred.alignment_score_sw,
+            "gaps_pct_sw": pred.gaps_pct_sw,
+            "alignment_length_sw": pred.alignment_length_sw,
+            "length_query": pred.length_query,
+            "length_ref": pred.length_ref,
+            "query_taxonomy_id": pred.query_taxonomy_id,
+            "ref_taxonomy_id": pred.ref_taxonomy_id,
+            "taxonomic_lca": pred.taxonomic_lca,
+            "taxonomic_distance": pred.taxonomic_distance,
+            "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
+            "taxonomic_relation": pred.taxonomic_relation or "",
+            "vote_count": pred.vote_count,
+            "k_position": pred.k_position,
+            "go_term_frequency": pred.go_term_frequency,
+            "ref_annotation_density": pred.ref_annotation_density,
+            "neighbor_distance_std": pred.neighbor_distance_std,
+        })
+    return records
+
+
+@router.post("/rerankers/train", response_model=RerankerResponse, status_code=201)
+def train_reranker(
+    body: RerankerTrainRequest,
+    factory=Depends(get_session_factory),
+):
+    """Train a LightGBM re-ranker from one or more (PredictionSet, EvaluationSet) pairs.
+
+    When ``extra_pairs`` is provided, training data from all pairs is
+    concatenated before training a single model — useful for multi-temporal
+    holdout training where each pair represents a different GOA time split.
+    """
+    import pandas as pd
+
+    aspect_filter_char = _ASPECT_MAP.get(body.aspect) if body.aspect else None
+
+    with session_scope(factory) as session:
+        # Check name uniqueness
+        existing = session.query(RerankerModel).filter(RerankerModel.name == body.name).first()
+        if existing is not None:
+            raise HTTPException(status_code=409, detail=f"Reranker with name '{body.name}' already exists")
+
+        # Collect records from the primary pair
+        records = _collect_training_records(
+            session, body.prediction_set_id, body.evaluation_set_id,
+            body.category, aspect_filter_char,
+        )
+
+        # Collect records from extra pairs
+        if body.extra_pairs:
+            for pair in body.extra_pairs:
+                extra = _collect_training_records(
+                    session, pair.prediction_set_id, pair.evaluation_set_id,
+                    body.category, aspect_filter_char,
+                )
+                records.extend(extra)
+
+    if not records:
+        raise HTTPException(status_code=422, detail="No predictions found across all pairs")
+
+    df = pd.DataFrame(records)
+    result = reranker_train(df, neg_pos_ratio=body.neg_pos_ratio)
+
+    with session_scope(factory) as session:
+        model = RerankerModel(
+            name=body.name,
+            prediction_set_id=body.prediction_set_id,
+            evaluation_set_id=body.evaluation_set_id,
+            category=body.category,
+            aspect=body.aspect,
+            model_data=model_to_string(result.model),
+            metrics=result.metrics,
+            feature_importance=result.feature_importance,
+        )
+        session.add(model)
+        session.flush()
+        return _reranker_to_response(model)
+
+
+@router.get("/rerankers", response_model=list[RerankerResponse])
+def list_rerankers(factory=Depends(get_session_factory)):
+    """Return all stored re-ranker models ordered by creation time."""
+    with session_scope(factory) as session:
+        models = session.query(RerankerModel).order_by(RerankerModel.created_at).all()
+        return [_reranker_to_response(m) for m in models]
+
+
+@router.get("/rerankers/{reranker_id}", response_model=RerankerResponse)
+def get_reranker(reranker_id: uuid.UUID, factory=Depends(get_session_factory)):
+    """Retrieve a single re-ranker model by UUID."""
+    with session_scope(factory) as session:
+        model = session.get(RerankerModel, reranker_id)
+        if model is None:
+            raise HTTPException(status_code=404, detail="RerankerModel not found")
+        return _reranker_to_response(model)
+
+
+@router.delete("/rerankers/{reranker_id}", status_code=204)
+def delete_reranker(reranker_id: uuid.UUID, factory=Depends(get_session_factory)):
+    """Delete a re-ranker model by UUID."""
+    with session_scope(factory) as session:
+        model = session.get(RerankerModel, reranker_id)
+        if model is None:
+            raise HTTPException(status_code=404, detail="RerankerModel not found")
+        session.delete(model)
+
+
+@router.get(
+    "/prediction-sets/{set_id}/rerank.tsv",
+    summary="Apply a trained re-ranker to predictions",
+    response_class=StreamingResponse,
+)
+def download_reranked_predictions(
+    set_id: uuid.UUID,
+    reranker_id: uuid.UUID = Query(..., description="UUID of the trained RerankerModel to apply"),
+    min_score: float | None = Query(None, ge=0.0, le=1.0, description="Minimum re-ranker score threshold"),
+    factory=Depends(get_session_factory),
+) -> StreamingResponse:
+    """Stream predictions re-scored by a trained LightGBM model.
+
+    Each row includes the original prediction data plus a ``reranker_score``
+    column (probability 0–1, higher = more likely correct).  Rows are sorted
+    by descending score within each protein.
+    """
+    import pandas as pd
+
+    with session_scope(factory) as session:
+        ps = session.get(PredictionSet, set_id)
+        if ps is None:
+            raise HTTPException(status_code=404, detail="PredictionSet not found")
+        rm = session.get(RerankerModel, reranker_id)
+        if rm is None:
+            raise HTTPException(status_code=404, detail="RerankerModel not found")
+        model_str = rm.model_data
+
+        records: list[dict[str, Any]] = []
+        for pred, go_id, aspect in (
+            session.query(GOPrediction, GOTerm.go_id, GOTerm.aspect)
+            .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
+            .filter(GOPrediction.prediction_set_id == set_id)
+            .yield_per(5000)
+        ):
+            records.append({
+                "protein_accession": pred.protein_accession,
+                "go_id": go_id,
+                "aspect": aspect or "",
+                "distance": pred.distance,
+                "ref_protein_accession": pred.ref_protein_accession or "",
+                "qualifier": pred.qualifier or "",
+                "evidence_code": pred.evidence_code or "",
+                "identity_nw": pred.identity_nw,
+                "similarity_nw": pred.similarity_nw,
+                "alignment_score_nw": pred.alignment_score_nw,
+                "gaps_pct_nw": pred.gaps_pct_nw,
+                "alignment_length_nw": pred.alignment_length_nw,
+                "identity_sw": pred.identity_sw,
+                "similarity_sw": pred.similarity_sw,
+                "alignment_score_sw": pred.alignment_score_sw,
+                "gaps_pct_sw": pred.gaps_pct_sw,
+                "alignment_length_sw": pred.alignment_length_sw,
+                "length_query": pred.length_query,
+                "length_ref": pred.length_ref,
+                "query_taxonomy_id": pred.query_taxonomy_id,
+                "ref_taxonomy_id": pred.ref_taxonomy_id,
+                "taxonomic_lca": pred.taxonomic_lca,
+                "taxonomic_distance": pred.taxonomic_distance,
+                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
+                "taxonomic_relation": pred.taxonomic_relation or "",
+                "vote_count": pred.vote_count,
+                "k_position": pred.k_position,
+                "go_term_frequency": pred.go_term_frequency,
+                "ref_annotation_density": pred.ref_annotation_density,
+                "neighbor_distance_std": pred.neighbor_distance_std,
+                "label": 0,
+            })
+
+    if not records:
+        def _empty() -> Iterator[bytes]:
+            yield b"protein_accession\tgo_id\taspect\treranker_score\tdistance\n"
+        return StreamingResponse(
+            _empty(),
+            media_type="text/tab-separated-values",
+            headers={"Content-Disposition": f'attachment; filename="reranked_{set_id}.tsv"'},
+        )
+
+    df = pd.DataFrame(records)
+    model = model_from_string(model_str)
+    scores = reranker_predict(model, df)
+    df["reranker_score"] = scores
+
+    # Sort by protein then descending score
+    df = df.sort_values(["protein_accession", "reranker_score"], ascending=[True, False])
+
+    _RERANK_COLUMNS = [
+        "protein_accession", "go_id", "aspect", "reranker_score", "distance",
+        "ref_protein_accession", "evidence_code", "qualifier",
+    ]
+
+    def _generate() -> Iterator[bytes]:
+        yield ("\t".join(_RERANK_COLUMNS) + "\n").encode()
+        for _, row in df.iterrows():
+            if min_score is not None and row["reranker_score"] < min_score:
+                continue
+            line = "\t".join([
+                str(row["protein_accession"]),
+                str(row["go_id"]),
+                str(row["aspect"]),
+                f"{row['reranker_score']:.6f}",
+                str(row["distance"]) if pd.notna(row["distance"]) else "",
+                str(row["ref_protein_accession"]),
+                str(row["evidence_code"]),
+                str(row["qualifier"]),
+            ]) + "\n"
+            yield line.encode()
+
+    filename = f"reranked_{set_id}.tsv"
+    return StreamingResponse(
+        _generate(),
+        media_type="text/tab-separated-values",
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
+
+
+@router.get("/prediction-sets/{set_id}/reranker-metrics")
+def compute_reranker_metrics(
+    set_id: uuid.UUID,
+    reranker_id: uuid.UUID = Query(..., description="UUID of the trained RerankerModel"),
+    evaluation_set_id: uuid.UUID = Query(..., description="UUID of the EvaluationSet"),
+    category: str = Query("nk", pattern="^(nk|lk|pk)$"),
+    factory=Depends(get_session_factory),
+):
+    """Compute CAFA Fmax and AUC-PR using re-ranker scores instead of ScoringConfig.
+
+    Applies the trained LightGBM model to all predictions in the PredictionSet,
+    then evaluates against the temporal ground truth of the EvaluationSet.
+
+    This closes the full re-ranker loop: train → apply → evaluate.
+    """
+    import pandas as pd
+
+    with session_scope(factory) as session:
+        ps = session.get(PredictionSet, set_id)
+        if ps is None:
+            raise HTTPException(status_code=404, detail="PredictionSet not found")
+        rm = session.get(RerankerModel, reranker_id)
+        if rm is None:
+            raise HTTPException(status_code=404, detail="RerankerModel not found")
+        es = session.get(EvaluationSet, evaluation_set_id)
+        if es is None:
+            raise HTTPException(status_code=404, detail="EvaluationSet not found")
+
+        model_str = rm.model_data
+        reranker_name = rm.name
+
+        eval_data = compute_evaluation_data(
+            session,
+            old_annotation_set_id=es.old_annotation_set_id,
+            new_annotation_set_id=es.new_annotation_set_id,
+            ontology_snapshot_id=ps.ontology_snapshot_id,
+        )
+
+        records: list[dict[str, Any]] = []
+        for pred, go_id in (
+            session.query(GOPrediction, GOTerm.go_id)
+            .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
+            .filter(GOPrediction.prediction_set_id == set_id)
+            .yield_per(5000)
+        ):
+            records.append({
+                "protein_accession": pred.protein_accession,
+                "go_id": go_id,
+                "distance": pred.distance,
+                "qualifier": pred.qualifier or "",
+                "evidence_code": pred.evidence_code or "",
+                "identity_nw": pred.identity_nw,
+                "similarity_nw": pred.similarity_nw,
+                "alignment_score_nw": pred.alignment_score_nw,
+                "gaps_pct_nw": pred.gaps_pct_nw,
+                "alignment_length_nw": pred.alignment_length_nw,
+                "identity_sw": pred.identity_sw,
+                "similarity_sw": pred.similarity_sw,
+                "alignment_score_sw": pred.alignment_score_sw,
+                "gaps_pct_sw": pred.gaps_pct_sw,
+                "alignment_length_sw": pred.alignment_length_sw,
+                "length_query": pred.length_query,
+                "length_ref": pred.length_ref,
+                "query_taxonomy_id": pred.query_taxonomy_id,
+                "ref_taxonomy_id": pred.ref_taxonomy_id,
+                "taxonomic_lca": pred.taxonomic_lca,
+                "taxonomic_distance": pred.taxonomic_distance,
+                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
+                "taxonomic_relation": pred.taxonomic_relation or "",
+                "vote_count": pred.vote_count,
+                "k_position": pred.k_position,
+                "go_term_frequency": pred.go_term_frequency,
+                "ref_annotation_density": pred.ref_annotation_density,
+                "neighbor_distance_std": pred.neighbor_distance_std,
+                "label": 0,
+            })
+
+    if not records:
+        return {
+            "prediction_set_id": str(set_id),
+            "reranker_id": str(reranker_id),
+            "reranker_name": reranker_name,
+            "category": category,
+            "fmax": 0.0,
+            "auc_pr": 0.0,
+            "n_predictions": 0,
+            "curve": [],
+        }
+
+    df = pd.DataFrame(records)
+    model = model_from_string(model_str)
+    scores = reranker_predict(model, df)
+
+    scored: list[dict[str, Any]] = [
+        {
+            "protein_accession": records[i]["protein_accession"],
+            "go_id": records[i]["go_id"],
+            "score": float(scores[i]),
+        }
+        for i in range(len(records))
+    ]
+
+    metrics = compute_cafa_metrics(scored, eval_data, category=category)
+
+    return {
+        "prediction_set_id": str(set_id),
+        "reranker_id": str(reranker_id),
+        "reranker_name": reranker_name,
+        **metrics.summary(),
+        "curve": [
+            {
+                "threshold": p.threshold,
+                "precision": p.precision,
+                "recall": p.recall,
+                "f1": p.f1,
+            }
+            for p in metrics.curve
+        ],
+    }
diff --git a/protea/api/routers/showcase.py b/protea/api/routers/showcase.py
new file mode 100644
index 0000000..6bc415a
--- /dev/null
+++ b/protea/api/routers/showcase.py
@@ -0,0 +1,160 @@
+"""Showcase endpoint — aggregates platform stats and best evaluation results."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import APIRouter, Depends
+from sqlalchemy import func
+from sqlalchemy.orm import Session, sessionmaker
+
+from protea.api.deps import get_session_factory
+from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
+from protea.infrastructure.orm.models.embedding.go_prediction import GOPrediction
+from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+from protea.infrastructure.orm.models.embedding.sequence_embedding import SequenceEmbedding
+from protea.infrastructure.orm.models.protein.protein import Protein
+from protea.infrastructure.orm.models.sequence.sequence import Sequence
+from protea.infrastructure.session import session_scope
+
+router = APIRouter(prefix="/showcase", tags=["showcase"])
+
+
+def _derive_method(
+    scoring_config_id: Any, reranker_model_id: Any
+) -> tuple[str, str]:
+    """Return (method_key, human_label) from nullable FK columns."""
+    if reranker_model_id is not None:
+        return "knn_reranker", "KNN + Re-ranker"
+    if scoring_config_id is not None:
+        return "knn_scored", "KNN + Scoring"
+    return "knn_baseline", "KNN (embedding distance)"
+
+
+# Method display order
+_METHOD_ORDER = ["knn_baseline", "knn_scored", "knn_reranker"]
+_ASPECTS = ["BPO", "MFO", "CCO"]
+
+
+@router.get("", summary="Platform showcase data")
+def get_showcase(
+    factory: sessionmaker[Session] = Depends(get_session_factory),
+) -> dict[str, Any]:
+    """Aggregate stats, best evaluation metrics, and method comparison for the
+    landing page.  Returns a single JSON object so the frontend needs only one
+    fetch on mount."""
+
+    with session_scope(factory) as session:
+        # ── Protein stats (mirrors /proteins/stats but lighter) ──────────
+        total_proteins = session.query(func.count(Protein.accession)).scalar() or 0
+        canonical_proteins = (
+            session.query(func.count(Protein.accession))
+            .filter(Protein.is_canonical.is_(True))
+            .scalar()
+            or 0
+        )
+
+        # ── Counts ───────────────────────────────────────────────────────
+        total_sequences = session.query(func.count(Sequence.id)).scalar() or 0
+        total_embeddings = session.query(func.count(SequenceEmbedding.id)).scalar() or 0
+        total_prediction_sets = session.query(func.count(PredictionSet.id)).scalar() or 0
+        total_predictions = session.query(func.count(GOPrediction.id)).scalar() or 0
+        total_rerankers = session.query(func.count(RerankerModel.id)).scalar() or 0
+
+        # ── Evaluation results ───────────────────────────────────────────
+        eval_rows = session.query(EvaluationResult).all()
+        total_evaluations = len(eval_rows)
+
+        # Group by category → method, track best fmax per aspect
+        _CATEGORIES = ["NK", "LK", "PK"]
+        # best_fmax[category][aspect] = {fmax, method, ...}
+        best_fmax: dict[str, dict[str, dict[str, Any]]] = {}
+        # method_best[category][method_key] = {label, BPO: {fmax}, ...}
+        method_best: dict[str, dict[str, dict[str, Any]]] = {}
+
+        for er in eval_rows:
+            method_key, method_label = _derive_method(
+                er.scoring_config_id, er.reranker_model_id
+            )
+            results = er.results or {}
+
+            for cat in _CATEGORIES:
+                cat_data = results.get(cat, {})
+                if not cat_data:
+                    continue
+
+                if cat not in method_best:
+                    method_best[cat] = {}
+                if method_key not in method_best[cat]:
+                    method_best[cat][method_key] = {
+                        "label": method_label,
+                        **{a: {"fmax": None} for a in _ASPECTS},
+                    }
+
+                for aspect in _ASPECTS:
+                    aspect_data = cat_data.get(aspect, {})
+                    fmax = aspect_data.get("fmax")
+                    if fmax is None:
+                        continue
+
+                    # Update method-level best for this category
+                    cur = method_best[cat][method_key][aspect].get("fmax")
+                    if cur is None or fmax > cur:
+                        method_best[cat][method_key][aspect] = {"fmax": round(fmax, 4)}
+
+                    # Update global best for this category
+                    if cat not in best_fmax:
+                        best_fmax[cat] = {}
+                    if aspect not in best_fmax[cat] or fmax > best_fmax[cat][aspect]["fmax"]:
+                        best_fmax[cat][aspect] = {
+                            "fmax": round(fmax, 4),
+                            "method": method_key,
+                            "method_label": method_label,
+                            "evaluation_result_id": str(er.id),
+                        }
+
+        # Build ordered method_comparison per category
+        method_comparison: dict[str, list[dict[str, Any]]] = {}
+        for cat in _CATEGORIES:
+            cat_methods = method_best.get(cat, {})
+            cat_list: list[dict[str, Any]] = []
+            for mk in _METHOD_ORDER:
+                if mk in cat_methods:
+                    entry: dict[str, Any] = {
+                        "method": mk,
+                        "label": cat_methods[mk]["label"],
+                    }
+                    for aspect in _ASPECTS:
+                        entry[aspect] = cat_methods[mk][aspect]
+                    cat_list.append(entry)
+            if cat_list:
+                method_comparison[cat] = cat_list
+
+        # Pipeline stages
+        pipeline_stages = [
+            {"name": "sequences", "count": total_sequences, "href": "/proteins"},
+            {"name": "embeddings", "count": total_embeddings, "href": "/embeddings"},
+            {"name": "predictions", "count": total_predictions, "href": "/functional-annotation"},
+            {"name": "reranker_models", "count": total_rerankers, "href": "/reranker"},
+            {"name": "evaluations", "count": total_evaluations, "href": "/evaluation"},
+        ]
+
+        return {
+            "protein_stats": {
+                "total": total_proteins,
+                "canonical": canonical_proteins,
+            },
+            "best_fmax": best_fmax if best_fmax else {},
+            "method_comparison": method_comparison,
+            "counts": {
+                "proteins": total_proteins,
+                "sequences": total_sequences,
+                "embeddings": total_embeddings,
+                "prediction_sets": total_prediction_sets,
+                "predictions": total_predictions,
+                "reranker_models": total_rerankers,
+                "evaluations": total_evaluations,
+            },
+            "pipeline_stages": pipeline_stages,
+        }
diff --git a/protea/api/routers/support.py b/protea/api/routers/support.py
index 79f3dbe..65228da 100644
--- a/protea/api/routers/support.py
+++ b/protea/api/routers/support.py
@@ -4,8 +4,8 @@
 
 from fastapi import APIRouter, Depends, Query
 from pydantic import BaseModel, Field
-from starlette.requests import Request
 
+from protea.api.deps import get_session_factory
 from protea.infrastructure.orm.models.support_entry import SupportEntry
 from protea.infrastructure.session import session_scope
 
@@ -16,10 +16,6 @@
 _PAGE_LIMIT = 100
 
 
-def get_session_factory(request: Request):
-    return request.app.state.session_factory
-
-
 class SupportCreate(BaseModel):
     comment: str | None = Field(default=None, max_length=_MAX_COMMENT_LENGTH)
 
diff --git a/protea/config/system.yaml b/protea/config/system.yaml
index 5cd11ca..11af208 100644
--- a/protea/config/system.yaml
+++ b/protea/config/system.yaml
@@ -6,3 +6,6 @@ queue:
 
 storage:
   artifacts_dir: storage/evaluation_artifacts
+
+admin:
+  token: protea-admin
diff --git a/protea/core/feature_engineering.py b/protea/core/feature_engineering.py
index 056a563..12760ba 100644
--- a/protea/core/feature_engineering.py
+++ b/protea/core/feature_engineering.py
@@ -129,6 +129,22 @@ def _get_ncbi() -> NCBITaxa:
     return _ncbi
 
 
+def warmup_taxonomy_db() -> None:
+    """Pre-initialize the NCBITaxa database.
+
+    Call at worker startup so the download (~100 MB on first run)
+    happens before any batch is processed, not mid-flight.
+    """
+    if not _ETE3_AVAILABLE:
+        return
+    import logging
+
+    log = logging.getLogger(__name__)
+    log.info("Warming up NCBI taxonomy database...")
+    _get_ncbi()
+    log.info("NCBI taxonomy database ready.")
+
+
 @lru_cache(maxsize=100_000)
 def _cached_lineage(tid: int) -> list[int]:
     return _get_ncbi().get_lineage(tid)  # type: ignore[return-value]
diff --git a/protea/core/metrics.py b/protea/core/metrics.py
index ba50be9..8577a89 100644
--- a/protea/core/metrics.py
+++ b/protea/core/metrics.py
@@ -5,7 +5,7 @@
 
 CAFA protocol summary
 ---------------------
-- Evaluate only on proteins present in the ground truth (NK or LK).
+- Evaluate only on proteins present in the ground truth (NK, LK, or PK).
 - At each score threshold t:
     precision(t) = mean over proteins-with-predictions of |pred ∩ true| / |pred|
     recall(t)    = mean over ALL ground-truth proteins of |pred ∩ true| / |true|
@@ -41,7 +41,7 @@ class PRPoint:
 class CAFAMetrics:
     """CAFA evaluation results for one (PredictionSet, ScoringConfig, category) triple."""
 
-    category: str  # "nk" or "lk"
+    category: str  # "nk", "lk", or "pk"
     fmax: float
     threshold_at_fmax: float
     auc_pr: float
@@ -79,18 +79,16 @@ def compute_cafa_metrics(
     evaluation_data:
         Ground truth from ``compute_evaluation_data()``.
     category:
-        ``"nk"`` (no-knowledge) or ``"lk"`` (limited-knowledge).
+        ``"nk"`` (no-knowledge), ``"lk"`` (limited-knowledge), or ``"pk"`` (prior-knowledge).
 
     Returns
     -------
     CAFAMetrics
     """
-    if category not in ("nk", "lk"):
-        raise ValueError(f"category must be 'nk' or 'lk', got {category!r}")
+    if category not in ("nk", "lk", "pk"):
+        raise ValueError(f"category must be 'nk', 'lk', or 'pk', got {category!r}")
 
-    ground_truth: dict[str, set[str]] = (
-        evaluation_data.nk if category == "nk" else evaluation_data.lk
-    )
+    ground_truth: dict[str, set[str]] = getattr(evaluation_data, category)
 
     # Group predictions by protein, keep only proteins in ground truth
     preds_by_protein: dict[str, list[tuple[float, str]]] = defaultdict(list)
diff --git a/protea/core/operations/compute_embeddings.py b/protea/core/operations/compute_embeddings.py
index 633a737..9abc1f0 100644
--- a/protea/core/operations/compute_embeddings.py
+++ b/protea/core/operations/compute_embeddings.py
@@ -519,7 +519,7 @@ def _update_parent_progress(self, session: Session, parent_job_id: UUID, emit: E
             .returning(Job.progress_current, Job.progress_total)
         ).fetchone()
 
-        if row is None or row.progress_current < row.progress_total:
+        if row is None or row.progress_current != row.progress_total:
             return
 
         closed = session.execute(
@@ -552,13 +552,18 @@ def _update_parent_progress(self, session: Session, parent_job_id: UUID, emit: E
 
 # Keyed by (model_name, model_backend, device) — one entry per worker process.
 # Workers are long-lived processes, so the model is loaded once and reused for
-# all subsequent batch messages with the same config.
+# all subsequent batch messages with the same config.  Max 1 entry to avoid
+# accumulating multi-GB models in GPU memory when configs change.
 _MODEL_CACHE: dict[tuple[str, str, str], tuple[Any, Any]] = {}
+_MODEL_CACHE_MAX = 1
 
 
 def _get_or_load_model(config: EmbeddingConfig, device: str, emit: EmitFn) -> tuple[Any, Any]:
     key = (config.model_name, config.model_backend, device)
     if key not in _MODEL_CACHE:
+        if len(_MODEL_CACHE) >= _MODEL_CACHE_MAX:
+            evict_key = next(iter(_MODEL_CACHE))
+            del _MODEL_CACHE[evict_key]
         _MODEL_CACHE[key] = _load_model(config, device, emit)
     return _MODEL_CACHE[key]
 
@@ -873,24 +878,28 @@ def _aggregate_residue_layers(layer_tensors: list[Any], layer_agg: str) -> Any:
     """Combine [L, D] tensors from multiple layers into one [L, D] tensor."""
     import torch
 
-    if layer_agg == "mean":
+    if layer_agg == "last":
+        return layer_tensors[-1]
+    elif layer_agg == "mean":
         return torch.stack(layer_tensors, dim=0).mean(dim=0)
     elif layer_agg == "concat":
         return torch.cat(layer_tensors, dim=-1)
     else:
-        raise ValueError(f"Unknown layer_agg: {layer_agg!r}. Choose: mean, concat")
+        raise ValueError(f"Unknown layer_agg: {layer_agg!r}. Choose: last, mean, concat")
 
 
 def _aggregate_1d(layer_tensors: list[Any], layer_agg: str) -> Any:
     """Combine [D] tensors from multiple layers into one [D] tensor (CLS path)."""
     import torch
 
-    if layer_agg == "mean":
+    if layer_agg == "last":
+        return layer_tensors[-1]
+    elif layer_agg == "mean":
         return torch.stack(layer_tensors, dim=0).mean(dim=0)
     elif layer_agg == "concat":
         return torch.cat(layer_tensors, dim=-1)
     else:
-        raise ValueError(f"Unknown layer_agg: {layer_agg!r}. Choose: mean, concat")
+        raise ValueError(f"Unknown layer_agg: {layer_agg!r}. Choose: last, mean, concat")
 
 
 def _chunk_and_pool(residues: Any, config: EmbeddingConfig) -> list[ChunkEmbedding]:
diff --git a/protea/core/operations/fetch_uniprot_metadata.py b/protea/core/operations/fetch_uniprot_metadata.py
index 552a8e5..5d92980 100644
--- a/protea/core/operations/fetch_uniprot_metadata.py
+++ b/protea/core/operations/fetch_uniprot_metadata.py
@@ -87,6 +87,10 @@ def __init__(self) -> None:
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
     ) -> OperationResult:
+        self._http_requests = 0
+        self._http_retries = 0
+        self._total_results = None
+
         p = FetchUniProtMetadataPayload.model_validate(payload)
 
         t0 = time.perf_counter()
diff --git a/protea/core/operations/insert_proteins.py b/protea/core/operations/insert_proteins.py
index 0bf3963..a12aad6 100644
--- a/protea/core/operations/insert_proteins.py
+++ b/protea/core/operations/insert_proteins.py
@@ -69,6 +69,10 @@ def __init__(self) -> None:
     def execute(
         self, session: Session, payload: dict[str, Any], *, emit: EmitFn
     ) -> OperationResult:
+        self._http_requests = 0
+        self._http_retries = 0
+        self._total_results = None
+
         p = InsertProteinsPayload.model_validate(payload)
 
         t0 = time.perf_counter()
diff --git a/protea/core/operations/predict_go_terms.py b/protea/core/operations/predict_go_terms.py
index 9e01430..7e54d36 100644
--- a/protea/core/operations/predict_go_terms.py
+++ b/protea/core/operations/predict_go_terms.py
@@ -53,7 +53,7 @@
 # at KNN time with negligible accuracy loss for cosine similarity.
 # Limited to 1 entry — evicts previous reference on config change.
 # ---------------------------------------------------------------------------
-_REF_CACHE: dict[tuple[str, str], dict[str, Any]] = {}
+_REF_CACHE: dict[tuple[str, str, bool], dict[str, Any]] = {}
 _REF_CACHE_MAX = 1
 
 # ---------------------------------------------------------------------------
@@ -258,6 +258,7 @@ class PredictGOTermsPayload(ProteaPayload, frozen=True):
     # Feature engineering (opt-in)
     compute_alignments: bool = False
     compute_taxonomy: bool = False
+    compute_reranker_features: bool = False
 
     # Per-aspect KNN indices (opt-in)
     # When True, three separate KNN indices are built — one per GO aspect (P/F/C).
@@ -299,6 +300,7 @@ class PredictGOTermsBatchPayload(ProteaPayload, frozen=True):
     faiss_hnsw_ef_search: int = 64
     compute_alignments: bool = False
     compute_taxonomy: bool = False
+    compute_reranker_features: bool = False
     aspect_separated_knn: bool = True
 
 
@@ -421,6 +423,7 @@ def execute(
                             "faiss_hnsw_ef_search": p.faiss_hnsw_ef_search,
                             "compute_alignments": p.compute_alignments,
                             "compute_taxonomy": p.compute_taxonomy,
+                            "compute_reranker_features": p.compute_reranker_features,
                             "aspect_separated_knn": p.aspect_separated_knn,
                         },
                     },
@@ -994,6 +997,30 @@ def _run_aspect_separated_knn(
         seen_per_query: dict[str, set[int]] = {acc: set() for acc in valid_accessions}
         pair_features: dict[tuple[str, str], dict[str, Any]] = {}
 
+        compute_rr = p.compute_reranker_features
+
+        # Pre-compute per-query reranker stats across all aspects
+        rr_distance_std_per_query: dict[str, float] = {}
+        rr_vote_count_per_query: dict[str, dict[int, int]] = {}
+        rr_k_position_per_query: dict[str, dict[int, int]] = {}
+        # go_term_frequency and ref_annotation_density are computed per-aspect below
+        all_go_term_freq: dict[int, int] = {}
+        all_ref_ann_density: dict[str, int] = {}
+
+        if compute_rr:
+            for q_idx, q_acc in enumerate(valid_accessions):
+                rr_vote_count_per_query[q_acc] = {}
+                rr_k_position_per_query[q_acc] = {}
+                all_distances = []
+                for aspect in _ASPECTS:
+                    aspect_neighbors = neighbors_by_aspect[aspect]
+                    if q_idx < len(aspect_neighbors):
+                        for _, d in aspect_neighbors[q_idx]:
+                            all_distances.append(d)
+                rr_distance_std_per_query[q_acc] = (
+                    float(np.std(all_distances)) if len(all_distances) > 1 else 0.0
+                )
+
         for aspect in _ASPECTS:
             unique_neighbors_aspect: set[str] = set()
             for top_refs in neighbors_by_aspect[aspect]:
@@ -1016,6 +1043,29 @@ def _run_aspect_separated_knn(
                     session, annotation_set_id, unique_neighbors_aspect, aspect=aspect
                 )
 
+            # Pre-compute reranker aggregates for this aspect's go_map
+            if compute_rr:
+                for acc, anns in go_map.items():
+                    if acc not in all_ref_ann_density:
+                        all_ref_ann_density[acc] = 0
+                    all_ref_ann_density[acc] += len(anns)
+                    for ann in anns:
+                        gtid = ann["go_term_id"]
+                        all_go_term_freq[gtid] = all_go_term_freq.get(gtid, 0) + 1
+
+                # vote_count and k_position per query per aspect
+                for q_idx, q_acc in enumerate(valid_accessions):
+                    vc = rr_vote_count_per_query.setdefault(q_acc, {})
+                    kp = rr_k_position_per_query.setdefault(q_acc, {})
+                    aspect_neighbors = neighbors_by_aspect[aspect]
+                    if q_idx < len(aspect_neighbors):
+                        for k_pos, (ref_acc, _) in enumerate(aspect_neighbors[q_idx], 1):
+                            for ann in go_map.get(ref_acc, []):
+                                gtid = ann["go_term_id"]
+                                vc[gtid] = vc.get(gtid, 0) + 1
+                                if gtid not in kp:
+                                    kp[gtid] = k_pos
+
             for q_acc, top_refs in zip(valid_accessions, neighbors_by_aspect[aspect], strict=False):
                 seen_terms = seen_per_query[q_acc]
 
@@ -1054,6 +1104,12 @@ def _run_aspect_separated_knn(
                             pred["qualifier"] = ann["qualifier"]
                         if ann.get("evidence_code"):
                             pred["evidence_code"] = ann["evidence_code"]
+                        if compute_rr:
+                            pred["vote_count"] = rr_vote_count_per_query.get(q_acc, {}).get(go_term_id, 1)
+                            pred["k_position"] = rr_k_position_per_query.get(q_acc, {}).get(go_term_id, 1)
+                            pred["go_term_frequency"] = all_go_term_freq.get(go_term_id, 0)
+                            pred["ref_annotation_density"] = all_ref_ann_density.get(ref_acc, 0)
+                            pred["neighbor_distance_std"] = rr_distance_std_per_query.get(q_acc, 0.0)
                         for key in (
                             "identity_nw",
                             "similarity_nw",
@@ -1236,10 +1292,34 @@ def _predict_batch(
         go_map = ref_data["go_map"]
         predictions: list[dict[str, Any]] = []
 
+        # Pre-compute reranker aggregates if requested
+        compute_rr = p.compute_reranker_features
+        go_term_freq: dict[int, int] = {}
+        ref_ann_density: dict[str, int] = {}
+        if compute_rr:
+            for acc, anns in go_map.items():
+                ref_ann_density[acc] = len(anns)
+                for ann in anns:
+                    gtid = ann["go_term_id"]
+                    go_term_freq[gtid] = go_term_freq.get(gtid, 0) + 1
+
         for q_acc, top_refs in zip(query_accessions, neighbors, strict=False):
             seen_terms: set[int] = set()
             pair_features: dict[str, dict[str, Any]] = {}
 
+            # Reranker: pre-compute per-query stats
+            rr_distance_std: float | None = None
+            rr_vote_count: dict[int, int] = {}
+            rr_k_position: dict[int, int] = {}
+            if compute_rr and top_refs:
+                rr_distance_std = float(np.std([d for _, d in top_refs])) if len(top_refs) > 1 else 0.0
+                for k_pos, (ref_acc, _) in enumerate(top_refs, 1):
+                    for ann in go_map.get(ref_acc, []):
+                        gtid = ann["go_term_id"]
+                        rr_vote_count[gtid] = rr_vote_count.get(gtid, 0) + 1
+                        if gtid not in rr_k_position:
+                            rr_k_position[gtid] = k_pos
+
             for ref_acc, distance in top_refs:
                 if ref_acc not in pair_features:
                     features: dict[str, Any] = {}
@@ -1279,6 +1359,12 @@ def _predict_batch(
                         pred["qualifier"] = ann["qualifier"]
                     if ann.get("evidence_code"):
                         pred["evidence_code"] = ann["evidence_code"]
+                    if compute_rr:
+                        pred["vote_count"] = rr_vote_count.get(go_term_id, 1)
+                        pred["k_position"] = rr_k_position.get(go_term_id, 1)
+                        pred["go_term_frequency"] = go_term_freq.get(go_term_id, 0)
+                        pred["ref_annotation_density"] = ref_ann_density.get(ref_acc, 0)
+                        pred["neighbor_distance_std"] = rr_distance_std
                     for key in (
                         "identity_nw",
                         "similarity_nw",
@@ -1439,6 +1525,11 @@ def execute(
                         "taxonomic_distance": pred.get("taxonomic_distance"),
                         "taxonomic_common_ancestors": pred.get("taxonomic_common_ancestors"),
                         "taxonomic_relation": pred.get("taxonomic_relation"),
+                        "vote_count": pred.get("vote_count"),
+                        "k_position": pred.get("k_position"),
+                        "go_term_frequency": pred.get("go_term_frequency"),
+                        "ref_annotation_density": pred.get("ref_annotation_density"),
+                        "neighbor_distance_std": pred.get("neighbor_distance_std"),
                     }
                     for pred in p.predictions
                 ],
@@ -1466,7 +1557,7 @@ def _update_parent_progress(self, session: Session, parent_job_id: UUID, emit: E
             .returning(Job.progress_current, Job.progress_total)
         ).fetchone()
 
-        if row is None or row.progress_current < row.progress_total:
+        if row is None or row.progress_current != row.progress_total:
             return
 
         closed = session.execute(
diff --git a/protea/core/operations/run_cafa_evaluation.py b/protea/core/operations/run_cafa_evaluation.py
index ddfd1b6..a266a38 100644
--- a/protea/core/operations/run_cafa_evaluation.py
+++ b/protea/core/operations/run_cafa_evaluation.py
@@ -21,6 +21,9 @@
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
 from protea.infrastructure.orm.models.embedding.go_prediction import GOPrediction
 from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+from protea.infrastructure.orm.models.embedding.reranker_model import (
+    RerankerModel as RerankerModelORM,
+)
 from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
 
 # Namespace labels used by cafaeval OBO parser
@@ -38,6 +41,17 @@ class RunCafaEvaluationPayload(ProteaPayload, frozen=True):
     max_distance: float | None = Field(default=None, ge=0.0, le=2.0)
     artifacts_dir: str | None = Field(default=None)
     scoring_config_id: str | None = Field(default=None)
+    reranker_id_nk: str | None = Field(default=None)
+    reranker_id_lk: str | None = Field(default=None)
+    reranker_id_pk: str | None = Field(default=None)
+    rerankers: dict[str, dict[str, str]] | None = Field(
+        default=None,
+        description=(
+            "Nested mapping of category → aspect → reranker_model_id. "
+            "E.g. {\"nk\": {\"bpo\": \"uuid\", \"mfo\": \"uuid\"}, \"lk\": {...}}. "
+            "Overrides the flat reranker_id_* fields when present."
+        ),
+    )
     ia_file: str | None = Field(
         default=None,
         description=(
@@ -148,6 +162,46 @@ def execute(
                 weights=dict(sc.weights),
             )
 
+        # Load per-category (and optionally per-aspect) reranker models before session commit.
+        # reranker_models: setting → aspect → model_data  (aspect="" means single model for all aspects)
+        reranker_models: dict[str, dict[str, str]] = {}
+        reranker_config_snapshot: dict[str, dict[str, str]] | None = None  # for persisting in EvaluationResult
+
+        if p.rerankers:
+            # New nested mapping: {"nk": {"bpo": "uuid", "mfo": "uuid", ...}, ...}
+            reranker_config_snapshot = {}
+            _aspect_map = {"bpo": "P", "mfo": "F", "cco": "C"}
+            for cat_key, aspect_map in p.rerankers.items():
+                setting = cat_key.upper()
+                reranker_models[setting] = {}
+                reranker_config_snapshot[cat_key] = {}
+                for aspect_key, rid_str in aspect_map.items():
+                    rid = uuid.UUID(rid_str)
+                    rm = session.get(RerankerModelORM, rid)
+                    if rm is None:
+                        raise ValueError(f"RerankerModel {rid_str} not found")
+                    aspect_char = _aspect_map.get(aspect_key, aspect_key)
+                    reranker_models[setting][aspect_char] = rm.model_data
+                    reranker_config_snapshot[cat_key][aspect_key] = rid_str
+                    emit("run_cafa_evaluation.reranker_loaded", None, {
+                        "setting": setting, "aspect": aspect_key,
+                        "reranker_id": str(rid), "name": rm.name,
+                    }, "info")
+        else:
+            # Legacy flat fields: one model per category (all aspects)
+            for setting, field in [("NK", p.reranker_id_nk), ("LK", p.reranker_id_lk), ("PK", p.reranker_id_pk)]:
+                if field:
+                    rid = uuid.UUID(field)
+                    rm = session.get(RerankerModelORM, rid)
+                    if rm is None:
+                        raise ValueError(f"RerankerModel {field} not found")
+                    reranker_models[setting] = {"": rm.model_data}  # "" = all aspects
+                    emit(
+                        "run_cafa_evaluation.reranker_loaded", None,
+                        {"setting": setting, "reranker_id": str(rid), "name": rm.name},
+                        "info",
+                    )
+
         # Pre-generate result_id so the artifact directory name matches the DB row.
         result_id = uuid.uuid4()
 
@@ -201,27 +255,25 @@ def execute(
             self._write_gt(data.known, known_path)
             self._write_gt(data.pk_known, pk_known_path)
 
-            # Write predictions (CAFA format) filtered to delta proteins
-            pred_dir = os.path.join(gt_dir, "predictions")
-            os.makedirs(pred_dir, exist_ok=True)
-            pred_path = os.path.join(pred_dir, "predictions.tsv")
             delta_proteins = set(data.nk) | set(data.lk) | set(data.pk)
             emit(
                 "run_cafa_evaluation.writing_predictions",
                 None,
-                {
-                    "delta_proteins": len(delta_proteins),
-                },
+                {"delta_proteins": len(delta_proteins)},
                 "info",
             )
-            self._write_predictions(
-                session,
-                pred_set_id,
-                delta_proteins,
-                p.max_distance,
-                pred_path,
-                scoring_config_snapshot,
-            )
+
+            # If any reranker is set, write per-setting prediction files;
+            # otherwise write a single shared file.
+            has_rerankers = bool(reranker_models)
+            if not has_rerankers:
+                pred_dir = os.path.join(gt_dir, "predictions")
+                os.makedirs(pred_dir, exist_ok=True)
+                pred_path = os.path.join(pred_dir, "predictions.tsv")
+                self._write_predictions(
+                    session, pred_set_id, delta_proteins, p.max_distance,
+                    pred_path, scoring_config_snapshot,
+                )
 
             # No-op commit: releases the DB connection back to the pool before
             # cafaeval forks worker processes via multiprocessing.Pool.  Forked
@@ -237,6 +289,25 @@ def execute(
                 ("LK", lk_path, None),
                 ("PK", pk_path, pk_known_path),
             ]:
+                # Write per-setting predictions if this setting has a reranker
+                if has_rerankers:
+                    pred_dir = os.path.join(gt_dir, f"predictions_{setting}")
+                    os.makedirs(pred_dir, exist_ok=True)
+                    pred_path = os.path.join(pred_dir, "predictions.tsv")
+                    rr_aspect_map = reranker_models.get(setting, {})
+                    if "" in rr_aspect_map:
+                        # Single model for all aspects (legacy flat field)
+                        self._write_predictions(
+                            session, pred_set_id, delta_proteins, p.max_distance,
+                            pred_path, scoring_config_snapshot,
+                            reranker_model_str=rr_aspect_map[""],
+                        )
+                    else:
+                        # Per-aspect models
+                        self._write_predictions_per_aspect(
+                            session, pred_set_id, delta_proteins, p.max_distance,
+                            pred_path, rr_aspect_map,
+                        )
                 emit("run_cafa_evaluation.evaluating", None, {"setting": setting}, "info")
                 try:
                     # Reset SIGTERM/SIGINT to defaults before cafaeval forks pool
@@ -292,11 +363,31 @@ def execute(
                     results[setting] = {}
 
         # ── 3. Persist EvaluationResult ───────────────────────────────────────
+        # For backwards compat, pick a single representative reranker_model_id
+        first_reranker_id: uuid.UUID | None = None
+        if reranker_config_snapshot:
+            for _cat_map in reranker_config_snapshot.values():
+                for _rid_str in _cat_map.values():
+                    first_reranker_id = uuid.UUID(_rid_str)
+                    break
+                if first_reranker_id:
+                    break
+        elif reranker_models:
+            # Flat per-category fields: build config snapshot and pick first ID
+            reranker_config_snapshot = {}
+            for setting, field in [("nk", p.reranker_id_nk), ("lk", p.reranker_id_lk), ("pk", p.reranker_id_pk)]:
+                if field:
+                    reranker_config_snapshot[setting] = {"all": field}
+                    if first_reranker_id is None:
+                        first_reranker_id = uuid.UUID(field)
+
         eval_result = EvaluationResult(
             id=result_id,
             evaluation_set_id=eval_set_id,
             prediction_set_id=pred_set_id,
             scoring_config_id=uuid.UUID(p.scoring_config_id) if p.scoring_config_id else None,
+            reranker_model_id=first_reranker_id,
+            reranker_config=reranker_config_snapshot,
             results=results,
         )
         session.add(eval_result)
@@ -387,13 +478,22 @@ def _write_predictions(
         max_distance: float | None,
         path: str,
         scoring_config: ScoringConfig | None = None,
+        reranker_model_str: str | None = None,
     ) -> None:
         """Write CAFA-format predictions (protein\\tgo_id\\tscore) for delta proteins.
 
-        If a ScoringConfig is provided, scores are computed via compute_score()
-        using all available signals (embedding similarity, evidence, alignment,
-        taxonomy).  Otherwise falls back to ``1 - cosine_distance / 2``.
+        Scoring priority:
+          1. If ``reranker_model_str`` is provided, apply the LightGBM model to
+             all predictions and use re-ranker probabilities as scores.
+          2. If a ``ScoringConfig`` is provided, compute scores via ``compute_score()``.
+          3. Otherwise fall back to ``1 - cosine_distance / 2``.
         """
+        if reranker_model_str is not None:
+            self._write_predictions_reranked(
+                session, pred_set_id, delta_proteins, max_distance, path, reranker_model_str,
+            )
+            return
+
         q = (
             session.query(GOPrediction, GOTerm)
             .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
@@ -424,6 +524,177 @@ def _write_predictions(
                     score = max(0.0, 1.0 - (pred.distance or 0.0) / 2.0)
                 f.write(f"{pred.protein_accession}\t{gt.go_id}\t{score:.4f}\n")
 
+    def _write_predictions_reranked(
+        self,
+        session: Session,
+        pred_set_id: uuid.UUID,
+        delta_proteins: set[str],
+        max_distance: float | None,
+        path: str,
+        reranker_model_str: str,
+    ) -> None:
+        """Write CAFA-format predictions using LightGBM re-ranker scores."""
+        import pandas as pd
+
+        from protea.core.reranker import model_from_string
+        from protea.core.reranker import predict as reranker_predict
+
+        q = (
+            session.query(GOPrediction, GOTerm.go_id)
+            .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
+            .filter(GOPrediction.prediction_set_id == pred_set_id)
+            .filter(GOPrediction.protein_accession.in_(delta_proteins))
+        )
+        if max_distance is not None:
+            q = q.filter(GOPrediction.distance <= max_distance)
+
+        records: list[dict[str, Any]] = []
+        for pred, go_id in q.yield_per(5000):
+            records.append({
+                "protein_accession": pred.protein_accession,
+                "go_id": go_id,
+                "distance": pred.distance,
+                "qualifier": pred.qualifier or "",
+                "evidence_code": pred.evidence_code or "",
+                "identity_nw": pred.identity_nw,
+                "similarity_nw": pred.similarity_nw,
+                "alignment_score_nw": pred.alignment_score_nw,
+                "gaps_pct_nw": pred.gaps_pct_nw,
+                "alignment_length_nw": pred.alignment_length_nw,
+                "identity_sw": pred.identity_sw,
+                "similarity_sw": pred.similarity_sw,
+                "alignment_score_sw": pred.alignment_score_sw,
+                "gaps_pct_sw": pred.gaps_pct_sw,
+                "alignment_length_sw": pred.alignment_length_sw,
+                "length_query": pred.length_query,
+                "length_ref": pred.length_ref,
+                "query_taxonomy_id": pred.query_taxonomy_id,
+                "ref_taxonomy_id": pred.ref_taxonomy_id,
+                "taxonomic_lca": pred.taxonomic_lca,
+                "taxonomic_distance": pred.taxonomic_distance,
+                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
+                "taxonomic_relation": pred.taxonomic_relation or "",
+                "vote_count": pred.vote_count,
+                "k_position": pred.k_position,
+                "go_term_frequency": pred.go_term_frequency,
+                "ref_annotation_density": pred.ref_annotation_density,
+                "neighbor_distance_std": pred.neighbor_distance_std,
+            })
+
+        if not records:
+            with open(path, "w") as f:
+                pass
+            return
+
+        df = pd.DataFrame(records)
+        model = model_from_string(reranker_model_str)
+        scores = reranker_predict(model, df)
+
+        # Deduplicate: keep highest score per (protein, go_id)
+        df["score"] = scores
+        df = df.sort_values("score", ascending=False).drop_duplicates(
+            subset=["protein_accession", "go_id"], keep="first",
+        )
+
+        with open(path, "w") as f:
+            for _, row in df.iterrows():
+                f.write(f"{row['protein_accession']}\t{row['go_id']}\t{row['score']:.4f}\n")
+
+    def _write_predictions_per_aspect(
+        self,
+        session: Session,
+        pred_set_id: uuid.UUID,
+        delta_proteins: set[str],
+        max_distance: float | None,
+        path: str,
+        aspect_models: dict[str, str],
+    ) -> None:
+        """Write CAFA-format predictions applying per-aspect LightGBM models.
+
+        ``aspect_models`` maps GO aspect char (P/F/C) to model_data strings.
+        Predictions whose aspect has no model fall back to ``1 - distance/2``.
+        """
+        import pandas as pd
+
+        from protea.core.reranker import model_from_string
+        from protea.core.reranker import predict as reranker_predict
+
+        q = (
+            session.query(GOPrediction, GOTerm.go_id, GOTerm.aspect)
+            .join(GOTerm, GOPrediction.go_term_id == GOTerm.id)
+            .filter(GOPrediction.prediction_set_id == pred_set_id)
+            .filter(GOPrediction.protein_accession.in_(delta_proteins))
+        )
+        if max_distance is not None:
+            q = q.filter(GOPrediction.distance <= max_distance)
+
+        records: list[dict[str, Any]] = []
+        for pred, go_id, aspect in q.yield_per(5000):
+            records.append({
+                "protein_accession": pred.protein_accession,
+                "go_id": go_id,
+                "aspect": aspect or "",
+                "distance": pred.distance,
+                "qualifier": pred.qualifier or "",
+                "evidence_code": pred.evidence_code or "",
+                "identity_nw": pred.identity_nw,
+                "similarity_nw": pred.similarity_nw,
+                "alignment_score_nw": pred.alignment_score_nw,
+                "gaps_pct_nw": pred.gaps_pct_nw,
+                "alignment_length_nw": pred.alignment_length_nw,
+                "identity_sw": pred.identity_sw,
+                "similarity_sw": pred.similarity_sw,
+                "alignment_score_sw": pred.alignment_score_sw,
+                "gaps_pct_sw": pred.gaps_pct_sw,
+                "alignment_length_sw": pred.alignment_length_sw,
+                "length_query": pred.length_query,
+                "length_ref": pred.length_ref,
+                "query_taxonomy_id": pred.query_taxonomy_id,
+                "ref_taxonomy_id": pred.ref_taxonomy_id,
+                "taxonomic_lca": pred.taxonomic_lca,
+                "taxonomic_distance": pred.taxonomic_distance,
+                "taxonomic_common_ancestors": pred.taxonomic_common_ancestors,
+                "taxonomic_relation": pred.taxonomic_relation or "",
+                "vote_count": pred.vote_count,
+                "k_position": pred.k_position,
+                "go_term_frequency": pred.go_term_frequency,
+                "ref_annotation_density": pred.ref_annotation_density,
+                "neighbor_distance_std": pred.neighbor_distance_std,
+            })
+
+        if not records:
+            with open(path, "w") as f:
+                pass
+            return
+
+        df = pd.DataFrame(records)
+
+        # Score each aspect group with its corresponding model
+        df["score"] = 0.0
+        for aspect_char, model_str in aspect_models.items():
+            mask = df["aspect"] == aspect_char
+            if not mask.any():
+                continue
+            model = model_from_string(model_str)
+            df.loc[mask, "score"] = reranker_predict(model, df.loc[mask])
+
+        # Fallback for aspects without a model
+        modeled_aspects = set(aspect_models.keys())
+        fallback_mask = ~df["aspect"].isin(modeled_aspects)
+        if fallback_mask.any():
+            df.loc[fallback_mask, "score"] = df.loc[fallback_mask, "distance"].apply(
+                lambda d: max(0.0, 1.0 - (d or 0.0) / 2.0)
+            )
+
+        # Deduplicate: keep highest score per (protein, go_id)
+        df = df.sort_values("score", ascending=False).drop_duplicates(
+            subset=["protein_accession", "go_id"], keep="first",
+        )
+
+        with open(path, "w") as f:
+            for _, row in df.iterrows():
+                f.write(f"{row['protein_accession']}\t{row['go_id']}\t{row['score']:.4f}\n")
+
     def _parse_results(self, dfs_best: dict) -> dict[str, Any]:
         """Extract per-namespace Fmax metrics from cafaeval dfs_best."""
         ns_results: dict[str, Any] = {}
diff --git a/protea/core/operations/train_reranker.py b/protea/core/operations/train_reranker.py
new file mode 100644
index 0000000..8bd552f
--- /dev/null
+++ b/protea/core/operations/train_reranker.py
@@ -0,0 +1,1487 @@
+"""Train LightGBM re-rankers from temporal holdout pairs.
+
+Provides two operations:
+
+* ``train_reranker`` — single pair (old → new annotation set).
+* ``train_reranker_auto`` — automated multi-split training: generates
+  consecutive pairs from a list of GOA version numbers, concatenates all
+  labeled data, trains one combined model, and evaluates on a held-out
+  test split.
+
+Both operations run entirely in-process (no RabbitMQ coordination).
+"""
+
+from __future__ import annotations
+
+import gc
+import shutil
+import tempfile
+import time
+import uuid
+from pathlib import Path
+from typing import Annotated, Any
+
+import numpy as np
+import pandas as pd
+from pydantic import Field, field_validator
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from protea.core.contracts.operation import EmitFn, OperationResult, ProteaPayload
+from protea.core.evaluation import compute_evaluation_data
+from protea.core.feature_engineering import compute_alignment, compute_taxonomy
+from protea.core.knn_search import search_knn
+from protea.core.metrics import compute_cafa_metrics
+from protea.core.reranker import (
+    ALL_FEATURES,
+    LABEL_COLUMN,
+    model_to_string,
+)
+from protea.core.reranker import (
+    predict as reranker_predict,
+)
+from protea.core.reranker import (
+    train as reranker_train,
+)
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
+from protea.infrastructure.orm.models.embedding.sequence_embedding import (
+    SequenceEmbedding,
+)
+from protea.infrastructure.orm.models.protein.protein import Protein
+from protea.infrastructure.orm.models.sequence.sequence import Sequence
+
+PositiveInt = Annotated[int, Field(gt=0)]
+
+_ASPECTS = ("P", "F", "C")
+_ANNOTATION_CHUNK_SIZE = 10_000
+_STREAM_CHUNK_SIZE = 2_000
+
+
+# ---------------------------------------------------------------------------
+# Payload
+# ---------------------------------------------------------------------------
+
+
+class TrainRerankerPayload(ProteaPayload, frozen=True):
+    """Payload for the train_reranker operation."""
+
+    name: str
+    old_annotation_set_id: str
+    new_annotation_set_id: str
+    embedding_config_id: str
+    ontology_snapshot_id: str
+
+    # Evaluation category
+    category: str = "nk"
+
+    # KNN parameters
+    limit_per_entry: PositiveInt = 5
+    distance_threshold: float | None = None
+    search_backend: str = "numpy"
+    metric: str = "cosine"
+    faiss_index_type: str = "Flat"
+    faiss_nlist: int = 100
+    faiss_nprobe: int = 10
+
+    # LightGBM parameters
+    num_boost_round: int = 1000
+    early_stopping_rounds: int = 50
+    val_fraction: float = 0.2
+    neg_pos_ratio: float | None = None
+
+    # Feature computation
+    compute_alignments: bool = False
+    compute_taxonomy: bool = False
+
+    # Per-aspect model (None = all aspects)
+    aspect: str | None = None
+
+    @field_validator(
+        "old_annotation_set_id",
+        "new_annotation_set_id",
+        "embedding_config_id",
+        "ontology_snapshot_id",
+        "name",
+        mode="before",
+    )
+    @classmethod
+    def must_be_non_empty(cls, v: str) -> str:
+        if not isinstance(v, str) or not v.strip():
+            raise ValueError("must be a non-empty string")
+        return v.strip()
+
+    @field_validator("category", mode="before")
+    @classmethod
+    def valid_category(cls, v: str) -> str:
+        if v not in ("nk", "lk", "pk"):
+            raise ValueError("category must be nk, lk, or pk")
+        return v
+
+
+# ---------------------------------------------------------------------------
+# Operation
+# ---------------------------------------------------------------------------
+
+_ASPECT_MAP = {"bpo": "P", "mfo": "F", "cco": "C"}
+
+
+class TrainRerankerOperation:
+    """Trains a LightGBM re-ranker from a single temporal holdout pair.
+
+    Pipeline (all in-process, no RabbitMQ coordination):
+    1. Validate inputs.
+    2. Compute evaluation delta (old → new annotation set).
+    3. Load reference embeddings (proteins annotated in old set).
+    4. Load query embeddings (delta proteins with embeddings).
+    5. Run per-aspect KNN + GO term transfer.
+    6. Label predictions against delta.
+    7. Train LightGBM.
+    8. Compute baseline Fmax (distance-based) and re-ranker Fmax.
+    9. Store RerankerModel in DB.
+    """
+
+    name = "train_reranker"
+
+    def execute(
+        self, session: Session, payload: dict[str, Any], *, emit: EmitFn
+    ) -> OperationResult:
+        p = TrainRerankerPayload.model_validate(payload)
+        t0 = time.perf_counter()
+
+        old_set_id = uuid.UUID(p.old_annotation_set_id)
+        new_set_id = uuid.UUID(p.new_annotation_set_id)
+        emb_config_id = uuid.UUID(p.embedding_config_id)
+        ontology_snapshot_id = uuid.UUID(p.ontology_snapshot_id)
+
+        # ── 1. Validate ──────────────────────────────────────────────────
+        self._validate(session, p, old_set_id, new_set_id, emb_config_id, ontology_snapshot_id)
+
+        emit(
+            "train_reranker.start",
+            None,
+            {
+                "name": p.name,
+                "old_annotation_set_id": p.old_annotation_set_id,
+                "new_annotation_set_id": p.new_annotation_set_id,
+                "category": p.category,
+                "limit_per_entry": p.limit_per_entry,
+            },
+            "info",
+        )
+
+        # ── 2. Evaluation delta ──────────────────────────────────────────
+        emit("train_reranker.computing_delta", None, {}, "info")
+        eval_data = compute_evaluation_data(
+            session, old_set_id, new_set_id, ontology_snapshot_id
+        )
+        ground_truth: dict[str, set[str]] = getattr(eval_data, p.category)
+        gt_pairs: set[tuple[str, str]] = set()
+        for protein, go_ids in ground_truth.items():
+            for go_id in go_ids:
+                gt_pairs.add((protein, go_id))
+
+        emit(
+            "train_reranker.delta_computed",
+            None,
+            {
+                **eval_data.stats(),
+                "gt_pairs": len(gt_pairs),
+            },
+            "info",
+        )
+
+        if not gt_pairs:
+            raise ValueError(
+                f"No ground truth found for category '{p.category}' "
+                f"between annotation sets {old_set_id} and {new_set_id}"
+            )
+
+        # ── 3. GO term mappings ──────────────────────────────────────────
+        go_id_map, aspect_map = self._load_go_maps(session, ontology_snapshot_id)
+
+        # ── 4. Load reference embeddings per aspect ──────────────────────
+        emit("train_reranker.loading_references", None, {}, "info")
+        ref_by_aspect = self._load_reference_per_aspect(
+            session, emb_config_id, old_set_id, emit
+        )
+
+        # ── 5. Load query embeddings ─────────────────────────────────────
+        query_accessions = list(ground_truth.keys())
+        emit(
+            "train_reranker.loading_queries",
+            None,
+            {"delta_proteins": len(query_accessions)},
+            "info",
+        )
+        query_emb, valid_queries = self._load_query_embeddings(
+            session, query_accessions, emb_config_id
+        )
+        emit(
+            "train_reranker.queries_loaded",
+            None,
+            {"with_embeddings": len(valid_queries)},
+            "info",
+        )
+
+        if not valid_queries:
+            raise ValueError("No delta proteins have embeddings")
+
+        # ── 6. KNN + GO transfer + label ─────────────────────────────────
+        # Load sequences / taxonomy before releasing the DB connection
+        qs: dict[str, str] | None = None
+        rs: dict[str, str] | None = None
+        qt: dict[str, int | None] | None = None
+        rt: dict[str, int | None] | None = None
+        if p.compute_alignments or p.compute_taxonomy:
+            all_ref_accs: set[str] = set()
+            for asp in _ASPECTS:
+                all_ref_accs.update(ref_by_aspect[asp]["accessions"])
+            query_set = set(valid_queries)
+            if p.compute_alignments:
+                emit("train_reranker.loading_sequences", None, {}, "info")
+                qs = self._load_sequences(session, query_set)
+                rs = self._load_sequences(session, all_ref_accs)
+            if p.compute_taxonomy:
+                emit("train_reranker.loading_taxonomy", None, {}, "info")
+                qt = self._load_taxonomy_ids(session, query_set)
+                rt = self._load_taxonomy_ids(session, all_ref_accs)
+
+        # Release DB connection before CPU-heavy phase
+        session.expire_all()
+
+        emit("train_reranker.running_knn", None, {}, "info")
+        labeled_preds = self._knn_transfer_and_label(
+            session,
+            valid_queries,
+            query_emb,
+            ref_by_aspect,
+            go_id_map,
+            aspect_map,
+            gt_pairs,
+            p,
+            query_sequences=qs,
+            ref_sequences=rs,
+            query_tax_ids=qt,
+            ref_tax_ids=rt,
+        )
+
+        emit(
+            "train_reranker.knn_done",
+            None,
+            {
+                "total_predictions": len(labeled_preds),
+                "positives": sum(1 for r in labeled_preds if r["label"] == 1),
+                "negatives": sum(1 for r in labeled_preds if r["label"] == 0),
+            },
+            "info",
+        )
+
+        if not labeled_preds:
+            raise ValueError("KNN produced no predictions for delta proteins")
+
+        # ── 7. Train LightGBM ────────────────────────────────────────────
+        emit("train_reranker.training", None, {}, "info")
+        df = pd.DataFrame(labeled_preds)
+
+        # Aspect filter if requested
+        aspect_filter = _ASPECT_MAP.get(p.aspect) if p.aspect else None
+        if aspect_filter:
+            df = df[df["aspect"] == aspect_filter]
+
+        train_result = reranker_train(
+            df,
+            num_boost_round=p.num_boost_round,
+            early_stopping_rounds=p.early_stopping_rounds,
+            val_fraction=p.val_fraction,
+            neg_pos_ratio=p.neg_pos_ratio,
+        )
+
+        emit(
+            "train_reranker.trained",
+            None,
+            train_result.metrics,
+            "info",
+        )
+
+        # ── 8. Compute baseline vs re-ranker Fmax ────────────────────────
+        emit("train_reranker.evaluating", None, {}, "info")
+        metrics_result = self._compute_comparison_metrics(
+            df, train_result, eval_data, p.category
+        )
+
+        emit(
+            "train_reranker.evaluated",
+            None,
+            {
+                "baseline_fmax": metrics_result["baseline_fmax"],
+                "reranker_fmax": metrics_result["reranker_fmax"],
+                "fmax_improvement": metrics_result["fmax_improvement"],
+            },
+            "info",
+        )
+
+        # ── 9. Store RerankerModel ────────────────────────────────────────
+        full_metrics = {
+            **train_result.metrics,
+            **metrics_result,
+            "category": p.category,
+            "old_annotation_set_id": str(old_set_id),
+            "new_annotation_set_id": str(new_set_id),
+            "embedding_config_id": str(emb_config_id),
+            "limit_per_entry": p.limit_per_entry,
+            "search_backend": p.search_backend,
+            "n_query_proteins": len(valid_queries),
+            "n_predictions": len(labeled_preds),
+            "elapsed_seconds": round(time.perf_counter() - t0, 1),
+        }
+
+        model = RerankerModel(
+            name=p.name,
+            prediction_set_id=None,
+            evaluation_set_id=None,
+            category=p.category,
+            aspect=p.aspect,
+            model_data=model_to_string(train_result.model),
+            metrics=full_metrics,
+            feature_importance=train_result.feature_importance,
+        )
+        session.add(model)
+        session.flush()
+
+        result = {
+            "reranker_model_id": str(model.id),
+            "name": p.name,
+            **full_metrics,
+        }
+        emit("train_reranker.done", None, result, "info")
+        return OperationResult(result=result)
+
+    # ── validation ────────────────────────────────────────────────────────
+
+    def _validate(
+        self,
+        session: Session,
+        p: TrainRerankerPayload,
+        old_set_id: uuid.UUID,
+        new_set_id: uuid.UUID,
+        emb_config_id: uuid.UUID,
+        ontology_snapshot_id: uuid.UUID,
+    ) -> None:
+        if session.get(AnnotationSet, old_set_id) is None:
+            raise ValueError(f"AnnotationSet {old_set_id} not found")
+        if session.get(AnnotationSet, new_set_id) is None:
+            raise ValueError(f"AnnotationSet {new_set_id} not found")
+        if session.get(EmbeddingConfig, emb_config_id) is None:
+            raise ValueError(f"EmbeddingConfig {emb_config_id} not found")
+        existing = (
+            session.query(RerankerModel)
+            .filter(RerankerModel.name == p.name)
+            .first()
+        )
+        if existing is not None:
+            raise ValueError(f"RerankerModel with name '{p.name}' already exists")
+
+    # ── GO term mappings ──────────────────────────────────────────────────
+
+    def _load_go_maps(
+        self, session: Session, snapshot_id: uuid.UUID
+    ) -> tuple[dict[int, str], dict[int, str]]:
+        """Load {go_term.id: go_id} and {go_term.id: aspect} for the snapshot."""
+        rows = session.execute(
+            text("SELECT id, go_id, aspect FROM go_term WHERE ontology_snapshot_id = :snap_id"),
+            {"snap_id": snapshot_id},
+        ).fetchall()
+        id_map = {db_id: go_id for db_id, go_id, _ in rows}
+        aspect_map = {db_id: aspect for db_id, _, aspect in rows if aspect}
+        return id_map, aspect_map
+
+    # ── bulk embedding preload (used by train_reranker_auto) ─────────────
+
+    def _preload_all_embeddings(
+        self,
+        session: Session,
+        emb_config_id: uuid.UUID,
+        emit: EmitFn,
+    ) -> tuple[np.ndarray, list[str], dict[str, int]]:
+        """Load ALL embeddings once into memory.
+
+        Returns (embeddings_f16, accessions, acc_to_idx).
+        This avoids reloading 527K vectors from PostgreSQL on every split.
+        """
+        conn = session.connection()
+
+        count_row = conn.execute(text(
+            "SELECT COUNT(*), "
+            "       (SELECT vector_dims(se2.embedding) "
+            "          FROM sequence_embedding se2 "
+            "         WHERE se2.embedding_config_id = :ecid LIMIT 1) "
+            "  FROM protein p "
+            "  JOIN sequence_embedding se "
+            "    ON se.sequence_id = p.sequence_id "
+            "   AND se.embedding_config_id = :ecid"
+        ), {"ecid": emb_config_id}).one()
+        total, dim = int(count_row[0]), int(count_row[1]) if count_row[1] else 960
+
+        emit(
+            "train_reranker_auto.preloading_embeddings",
+            None,
+            {"total": total, "dim": dim},
+            "info",
+        )
+
+        embeddings = np.empty((total, dim), dtype=np.float16)
+        accessions: list[str] = []
+        result_proxy = conn.execute(text(
+            "SELECT p.accession, se.embedding "
+            "  FROM protein p "
+            "  JOIN sequence_embedding se "
+            "    ON se.sequence_id = p.sequence_id "
+            "   AND se.embedding_config_id = :ecid"
+        ), {"ecid": emb_config_id}).yield_per(_STREAM_CHUNK_SIZE)
+
+        for i, (acc, emb_str) in enumerate(result_proxy):
+            if isinstance(emb_str, str):
+                emb_arr = np.fromstring(emb_str.strip("[]"), sep=",", dtype=np.float16)
+            else:
+                emb_arr = np.array(emb_str, dtype=np.float16)
+            embeddings[i] = emb_arr
+            accessions.append(acc)
+
+        acc_to_idx = {acc: i for i, acc in enumerate(accessions)}
+
+        emit(
+            "train_reranker_auto.embeddings_preloaded",
+            None,
+            {"total": len(accessions), "dim": dim, "memory_mb": round(embeddings.nbytes / 1024 / 1024, 1)},
+            "info",
+        )
+
+        return embeddings, accessions, acc_to_idx
+
+    def _build_reference_from_cache(
+        self,
+        session: Session,
+        annotation_set_id: uuid.UUID,
+        all_embeddings: np.ndarray,
+        all_accessions: list[str],
+        acc_to_idx: dict[str, int],
+        emit: EmitFn,
+    ) -> dict[str, dict[str, Any]]:
+        """Build per-aspect reference data using preloaded embeddings.
+
+        Only loads annotations from the DB (fast, small rows), then filters
+        the preloaded embedding matrix in memory.
+        """
+        conn = session.connection()
+        dim = all_embeddings.shape[1] if all_embeddings.ndim == 2 else 960
+
+        ann_rows = conn.execute(text(
+            "SELECT pga.protein_accession, gt.aspect, pga.go_term_id, "
+            "       pga.qualifier, pga.evidence_code "
+            "  FROM protein_go_annotation pga "
+            "  JOIN go_term gt ON gt.id = pga.go_term_id "
+            " WHERE pga.annotation_set_id = :asid "
+            "   AND gt.aspect IN ('P', 'F', 'C') "
+            "   AND (pga.qualifier IS NULL OR pga.qualifier NOT LIKE '%%NOT%%')"
+        ), {"asid": annotation_set_id}).yield_per(50_000)
+
+        aspect_accs: dict[str, set[str]] = {a: set() for a in _ASPECTS}
+        aspect_go_map: dict[str, dict[str, list[dict[str, Any]]]] = {a: {} for a in _ASPECTS}
+        for acc, asp, go_term_id, qualifier, evidence_code in ann_rows:
+            if asp in aspect_accs and acc in acc_to_idx:
+                aspect_accs[asp].add(acc)
+                aspect_go_map[asp].setdefault(acc, []).append({
+                    "go_term_id": go_term_id,
+                    "qualifier": qualifier,
+                    "evidence_code": evidence_code,
+                })
+
+        result: dict[str, dict[str, Any]] = {}
+        for asp in _ASPECTS:
+            indices = np.array(
+                [acc_to_idx[a] for a in aspect_accs[asp]],
+                dtype=np.int32,
+            )
+            asp_accessions = [all_accessions[i] for i in indices]
+            asp_embeddings = all_embeddings[indices] if len(indices) > 0 else np.empty((0, dim), dtype=np.float16)
+            result[asp] = {
+                "accessions": asp_accessions,
+                "embeddings": asp_embeddings,
+                "go_map": aspect_go_map[asp],
+            }
+            emit(
+                "train_reranker.aspect_loaded",
+                None,
+                {"aspect": asp, "references": len(indices)},
+                "info",
+            )
+
+        return result
+
+    # ── reference embeddings per aspect ───────────────────────────────────
+
+    def _load_reference_per_aspect(
+        self,
+        session: Session,
+        emb_config_id: uuid.UUID,
+        annotation_set_id: uuid.UUID,
+        emit: EmitFn,
+    ) -> dict[str, dict[str, Any]]:
+        """Load per-aspect reference data: accessions, embeddings, annotations.
+
+        Returns {aspect: {accessions, embeddings (float16), go_map}}.
+
+        Uses raw SQL + server-side cursor to avoid SQLAlchemy identity map
+        overhead (540k ORM rows would consume ~20GB of Python objects).
+        """
+        conn = session.connection()
+
+        # Step 1: count + dimension
+        count_row = conn.execute(text(
+            "SELECT COUNT(*), "
+            "       (SELECT vector_dims(se2.embedding) "
+            "          FROM sequence_embedding se2 "
+            "         WHERE se2.embedding_config_id = :ecid LIMIT 1) "
+            "  FROM protein p "
+            "  JOIN sequence_embedding se "
+            "    ON se.sequence_id = p.sequence_id "
+            "   AND se.embedding_config_id = :ecid "
+            " WHERE p.accession IN ("
+            "   SELECT DISTINCT protein_accession "
+            "     FROM protein_go_annotation "
+            "    WHERE annotation_set_id = :asid"
+            " )"
+        ), {"ecid": emb_config_id, "asid": annotation_set_id}).one()
+        total, dim = int(count_row[0]), int(count_row[1]) if count_row[1] else 960
+
+        if total == 0:
+            return {asp: {"accessions": [], "embeddings": np.empty((0,), dtype=np.float16), "go_map": {}} for asp in _ASPECTS}
+
+        # Step 2: stream embeddings via raw SQL — no ORM objects kept
+        embeddings = np.empty((total, dim), dtype=np.float16)
+        accessions: list[str] = []
+        result_proxy = conn.execute(text(
+            "SELECT p.accession, se.embedding "
+            "  FROM protein p "
+            "  JOIN sequence_embedding se "
+            "    ON se.sequence_id = p.sequence_id "
+            "   AND se.embedding_config_id = :ecid "
+            " WHERE p.accession IN ("
+            "   SELECT DISTINCT protein_accession "
+            "     FROM protein_go_annotation "
+            "    WHERE annotation_set_id = :asid"
+            " )"
+        ), {"ecid": emb_config_id, "asid": annotation_set_id}).yield_per(_STREAM_CHUNK_SIZE)
+
+        for i, (acc, emb_str) in enumerate(result_proxy):
+            # pgvector returns text like '[0.1,0.2,...]'; parse to numpy
+            if isinstance(emb_str, str):
+                emb_arr = np.fromstring(emb_str.strip("[]"), sep=",", dtype=np.float16)
+            else:
+                emb_arr = np.array(emb_str, dtype=np.float16)
+            embeddings[i] = emb_arr
+            accessions.append(acc)
+
+        acc_to_idx = {acc: i for i, acc in enumerate(accessions)}
+
+        emit(
+            "train_reranker.references_loaded",
+            None,
+            {"total_references": len(accessions), "dim": dim},
+            "info",
+        )
+
+        # Step 3: load annotations per aspect (also raw SQL)
+        ann_rows = conn.execute(text(
+            "SELECT pga.protein_accession, gt.aspect, pga.go_term_id, "
+            "       pga.qualifier, pga.evidence_code "
+            "  FROM protein_go_annotation pga "
+            "  JOIN go_term gt ON gt.id = pga.go_term_id "
+            " WHERE pga.annotation_set_id = :asid "
+            "   AND gt.aspect IN ('P', 'F', 'C') "
+            "   AND (pga.qualifier IS NULL OR pga.qualifier NOT LIKE '%%NOT%%')"
+        ), {"asid": annotation_set_id}).yield_per(50_000)
+
+        aspect_accs: dict[str, set[str]] = {a: set() for a in _ASPECTS}
+        aspect_go_map: dict[str, dict[str, list[dict[str, Any]]]] = {a: {} for a in _ASPECTS}
+        for acc, asp, go_term_id, qualifier, evidence_code in ann_rows:
+            if asp in aspect_accs:
+                aspect_accs[asp].add(acc)
+                aspect_go_map[asp].setdefault(acc, []).append({
+                    "go_term_id": go_term_id,
+                    "qualifier": qualifier,
+                    "evidence_code": evidence_code,
+                })
+
+        # Step 4: build per-aspect views
+        result: dict[str, dict[str, Any]] = {}
+        for asp in _ASPECTS:
+            indices = np.array(
+                [acc_to_idx[a] for a in aspect_accs[asp] if a in acc_to_idx],
+                dtype=np.int32,
+            )
+            asp_accessions = [accessions[i] for i in indices]
+            asp_embeddings = embeddings[indices] if len(indices) > 0 else np.empty((0, dim), dtype=np.float16)
+            result[asp] = {
+                "accessions": asp_accessions,
+                "embeddings": asp_embeddings,
+                "go_map": aspect_go_map[asp],
+            }
+            emit(
+                "train_reranker.aspect_loaded",
+                None,
+                {"aspect": asp, "references": len(indices)},
+                "info",
+            )
+
+        return result
+
+    # ── query embeddings ──────────────────────────────────────────────────
+
+    def _load_query_embeddings(
+        self,
+        session: Session,
+        accessions: list[str],
+        emb_config_id: uuid.UUID,
+    ) -> tuple[np.ndarray, list[str]]:
+        """Load embeddings for delta proteins. Returns (embeddings_f32, valid_accessions)."""
+        all_valid: list[str] = []
+        all_emb: list[list[float]] = []
+        for i in range(0, len(accessions), _ANNOTATION_CHUNK_SIZE):
+            chunk = accessions[i : i + _ANNOTATION_CHUNK_SIZE]
+            rows = (
+                session.query(Protein.accession, SequenceEmbedding.embedding)
+                .join(
+                    SequenceEmbedding,
+                    (SequenceEmbedding.sequence_id == Protein.sequence_id)
+                    & (SequenceEmbedding.embedding_config_id == emb_config_id),
+                )
+                .filter(Protein.accession.in_(chunk))
+                .all()
+            )
+            for acc, emb in rows:
+                all_valid.append(acc)
+                all_emb.append(list(emb))
+
+        if not all_valid:
+            return np.empty((0,)), []
+        return np.array(all_emb, dtype=np.float32), all_valid
+
+    # ── Sequence / taxonomy loaders ───────────────────────────────────────
+
+    def _load_sequences(
+        self, session: Session, accessions: set[str],
+    ) -> dict[str, str]:
+        result: dict[str, str] = {}
+        acc_list = list(accessions)
+        for i in range(0, len(acc_list), _ANNOTATION_CHUNK_SIZE):
+            chunk = acc_list[i : i + _ANNOTATION_CHUNK_SIZE]
+            rows = (
+                session.query(Protein.accession, Sequence.sequence)
+                .join(Protein.sequence)
+                .filter(Protein.accession.in_(chunk))
+                .all()
+            )
+            for acc, seq in rows:
+                result[acc] = seq
+        return result
+
+    def _load_taxonomy_ids(
+        self, session: Session, accessions: set[str],
+    ) -> dict[str, int | None]:
+        result: dict[str, int | None] = {}
+        acc_list = list(accessions)
+        for i in range(0, len(acc_list), _ANNOTATION_CHUNK_SIZE):
+            chunk = acc_list[i : i + _ANNOTATION_CHUNK_SIZE]
+            rows = (
+                session.query(Protein.accession, Protein.taxonomy_id)
+                .filter(Protein.accession.in_(chunk))
+                .all()
+            )
+            for acc, tid in rows:
+                result[acc] = int(tid) if tid else None
+        return result
+
+    # ── KNN + transfer + label ────────────────────────────────────────────
+
+    def _knn_transfer_and_label(
+        self,
+        session: Session,
+        valid_queries: list[str],
+        query_emb: np.ndarray,
+        ref_by_aspect: dict[str, dict[str, Any]],
+        go_id_map: dict[int, str],
+        aspect_map: dict[int, str],
+        gt_pairs: set[tuple[str, str]],
+        p: TrainRerankerPayload | TrainRerankerAutoPayload,
+        *,
+        query_sequences: dict[str, str] | None = None,
+        ref_sequences: dict[str, str] | None = None,
+        query_tax_ids: dict[str, int | None] | None = None,
+        ref_tax_ids: dict[str, int | None] | None = None,
+    ) -> list[dict[str, Any]]:
+        """Run per-aspect KNN, transfer GO terms, label, compute features."""
+        # Collect neighbors per aspect
+        neighbors_by_aspect: dict[str, list[list[tuple[str, float]]]] = {}
+        for aspect in _ASPECTS:
+            ref = ref_by_aspect[aspect]
+            if not ref["accessions"]:
+                neighbors_by_aspect[aspect] = [[] for _ in valid_queries]
+                continue
+            ref_f32 = ref["embeddings"].astype(np.float32)
+            neighbors_by_aspect[aspect] = search_knn(
+                query_emb,
+                ref_f32,
+                ref["accessions"],
+                k=p.limit_per_entry,
+                distance_threshold=p.distance_threshold,
+                backend=p.search_backend,
+                metric=p.metric,
+                faiss_index_type=p.faiss_index_type,
+                faiss_nlist=p.faiss_nlist,
+                faiss_nprobe=p.faiss_nprobe,
+            )
+            del ref_f32
+
+        # Pre-compute reranker features
+        rr_distance_std: dict[str, float] = {}
+        rr_vote_count: dict[str, dict[int, int]] = {}
+        rr_k_position: dict[str, dict[int, int]] = {}
+        go_term_freq: dict[int, int] = {}
+        ref_ann_density: dict[str, int] = {}
+
+        for q_idx, q_acc in enumerate(valid_queries):
+            all_dists: list[float] = []
+            rr_vote_count[q_acc] = {}
+            rr_k_position[q_acc] = {}
+            for aspect in _ASPECTS:
+                nbs = neighbors_by_aspect[aspect]
+                if q_idx < len(nbs):
+                    for _, d in nbs[q_idx]:
+                        all_dists.append(d)
+            rr_distance_std[q_acc] = float(np.std(all_dists)) if len(all_dists) > 1 else 0.0
+
+        for aspect in _ASPECTS:
+            go_map = ref_by_aspect[aspect]["go_map"]
+            # Ref annotation density
+            for acc, anns in go_map.items():
+                if acc not in ref_ann_density:
+                    ref_ann_density[acc] = 0
+                ref_ann_density[acc] += len(anns)
+                for ann in anns:
+                    gtid = ann["go_term_id"]
+                    go_term_freq[gtid] = go_term_freq.get(gtid, 0) + 1
+
+            # Vote count and k_position per query
+            for q_idx, q_acc in enumerate(valid_queries):
+                vc = rr_vote_count[q_acc]
+                kp = rr_k_position[q_acc]
+                nbs = neighbors_by_aspect[aspect]
+                if q_idx < len(nbs):
+                    for k_pos, (ref_acc, _) in enumerate(nbs[q_idx], 1):
+                        for ann in go_map.get(ref_acc, []):
+                            gtid = ann["go_term_id"]
+                            vc[gtid] = vc.get(gtid, 0) + 1
+                            if gtid not in kp:
+                                kp[gtid] = k_pos
+
+        # Pre-compute alignment and taxonomy features per unique (query, ref) pair
+        pair_features: dict[tuple[str, str], dict[str, Any]] = {}
+        do_alignments = p.compute_alignments and query_sequences is not None and ref_sequences is not None
+        do_taxonomy = p.compute_taxonomy and query_tax_ids is not None and ref_tax_ids is not None
+
+        if do_alignments or do_taxonomy:
+            for aspect in _ASPECTS:
+                nbs = neighbors_by_aspect[aspect]
+                for q_idx, q_acc in enumerate(valid_queries):
+                    if q_idx >= len(nbs):
+                        continue
+                    for ref_acc, _ in nbs[q_idx]:
+                        pair_key = (q_acc, ref_acc)
+                        if pair_key in pair_features:
+                            continue
+                        feats: dict[str, Any] = {}
+                        if do_alignments:
+                            q_seq = query_sequences.get(q_acc, "")
+                            r_seq = ref_sequences.get(ref_acc, "")
+                            if q_seq and r_seq:
+                                feats.update(compute_alignment(q_seq, r_seq))
+                        if do_taxonomy:
+                            q_tid = query_tax_ids.get(q_acc)
+                            r_tid = ref_tax_ids.get(ref_acc)
+                            feats.update(compute_taxonomy(q_tid, r_tid))
+                            feats["query_taxonomy_id"] = q_tid
+                            feats["ref_taxonomy_id"] = r_tid
+                        pair_features[pair_key] = feats
+
+        # Build labeled predictions
+        records: list[dict[str, Any]] = []
+        for aspect in _ASPECTS:
+            go_map = ref_by_aspect[aspect]["go_map"]
+            for q_idx, q_acc in enumerate(valid_queries):
+                nbs = neighbors_by_aspect[aspect]
+                if q_idx >= len(nbs):
+                    continue
+                seen_terms: set[int] = set()
+                for ref_acc, distance in nbs[q_idx]:
+                    for ann in go_map.get(ref_acc, []):
+                        go_term_id = ann["go_term_id"]
+                        if go_term_id in seen_terms:
+                            continue
+                        seen_terms.add(go_term_id)
+
+                        go_id = go_id_map.get(go_term_id)
+                        if not go_id:
+                            continue
+                        term_aspect = aspect_map.get(go_term_id, "")
+                        label = 1 if (q_acc, go_id) in gt_pairs else 0
+
+                        pf = pair_features.get((q_acc, ref_acc), {})
+                        records.append({
+                            "protein_accession": q_acc,
+                            "go_id": go_id,
+                            "aspect": term_aspect,
+                            LABEL_COLUMN: label,
+                            "distance": distance,
+                            "ref_protein_accession": ref_acc,
+                            "qualifier": ann.get("qualifier") or "",
+                            "evidence_code": ann.get("evidence_code") or "",
+                            # Alignment features
+                            "identity_nw": pf.get("identity_nw"),
+                            "similarity_nw": pf.get("similarity_nw"),
+                            "alignment_score_nw": pf.get("alignment_score_nw"),
+                            "gaps_pct_nw": pf.get("gaps_pct_nw"),
+                            "alignment_length_nw": pf.get("alignment_length_nw"),
+                            "identity_sw": pf.get("identity_sw"),
+                            "similarity_sw": pf.get("similarity_sw"),
+                            "alignment_score_sw": pf.get("alignment_score_sw"),
+                            "gaps_pct_sw": pf.get("gaps_pct_sw"),
+                            "alignment_length_sw": pf.get("alignment_length_sw"),
+                            "length_query": pf.get("length_query"),
+                            "length_ref": pf.get("length_ref"),
+                            # Taxonomy features
+                            "taxonomic_distance": pf.get("taxonomic_distance"),
+                            "taxonomic_common_ancestors": pf.get("taxonomic_common_ancestors"),
+                            "taxonomic_relation": pf.get("taxonomic_relation", ""),
+                            # Reranker features
+                            "vote_count": rr_vote_count.get(q_acc, {}).get(go_term_id, 1),
+                            "k_position": rr_k_position.get(q_acc, {}).get(go_term_id, 1),
+                            "go_term_frequency": go_term_freq.get(go_term_id, 0),
+                            "ref_annotation_density": ref_ann_density.get(ref_acc, 0),
+                            "neighbor_distance_std": rr_distance_std.get(q_acc, 0.0),
+                        })
+
+        return records
+
+    # ── metrics comparison ────────────────────────────────────────────────
+
+    def _compute_comparison_metrics(
+        self,
+        df: pd.DataFrame,
+        train_result: Any,
+        eval_data: Any,
+        category: str,
+    ) -> dict[str, Any]:
+        """Compute baseline Fmax (distance-based) and re-ranker Fmax."""
+        # Baseline: score = 1 - distance (simple cosine similarity)
+        baseline_scored = [
+            {
+                "protein_accession": row["protein_accession"],
+                "go_id": row["go_id"],
+                "score": max(0.0, 1.0 - float(row["distance"])) if pd.notna(row.get("distance")) else 0.0,
+            }
+            for _, row in df.iterrows()
+        ]
+        baseline_metrics = compute_cafa_metrics(baseline_scored, eval_data, category=category)
+
+        # Re-ranker
+        reranker_scores = reranker_predict(train_result.model, df)
+        reranker_scored = [
+            {
+                "protein_accession": df.iloc[i]["protein_accession"],
+                "go_id": df.iloc[i]["go_id"],
+                "score": float(reranker_scores[i]),
+            }
+            for i in range(len(df))
+        ]
+        reranker_metrics = compute_cafa_metrics(reranker_scored, eval_data, category=category)
+
+        return {
+            "baseline_fmax": baseline_metrics.fmax,
+            "baseline_auc_pr": baseline_metrics.auc_pr,
+            "baseline_threshold": baseline_metrics.threshold_at_fmax,
+            "reranker_fmax": reranker_metrics.fmax,
+            "reranker_auc_pr": reranker_metrics.auc_pr,
+            "reranker_threshold": reranker_metrics.threshold_at_fmax,
+            "fmax_improvement": round(reranker_metrics.fmax - baseline_metrics.fmax, 4),
+            "auc_pr_improvement": round(reranker_metrics.auc_pr - baseline_metrics.auc_pr, 4),
+            "n_ground_truth_proteins": baseline_metrics.n_ground_truth_proteins,
+        }
+
+
+# ---------------------------------------------------------------------------
+# Auto payload
+# ---------------------------------------------------------------------------
+
+
+class TrainRerankerAutoPayload(ProteaPayload, frozen=True):
+    """Payload for the train_reranker_auto operation.
+
+    Generates consecutive temporal pairs from ``train_versions``, runs KNN
+    once per pair, then trains 3 per-category LightGBM models (NK, LK, PK)
+    and evaluates each on the held-out test split.
+    """
+
+    name: str
+    embedding_config_id: str
+    ontology_snapshot_id: str
+
+    # GOA source_version numbers for training pairs (e.g. [160,165,...,220])
+    train_versions: list[int]
+    # GOA source_version numbers for test evaluation (e.g. [225] or [225,229])
+    test_versions: list[int]
+
+    # Annotation source in annotation_set (default "goa")
+    annotation_source: str = "goa"
+
+    # KNN parameters
+    limit_per_entry: PositiveInt = 5
+    distance_threshold: float | None = None
+    search_backend: str = "numpy"
+    metric: str = "cosine"
+    faiss_index_type: str = "Flat"
+    faiss_nlist: int = 100
+    faiss_nprobe: int = 10
+
+    # LightGBM parameters
+    num_boost_round: int = 1000
+    early_stopping_rounds: int = 50
+    val_fraction: float = 0.2
+    neg_pos_ratio: float | None = None
+
+    # Feature computation
+    compute_alignments: bool = False
+    compute_taxonomy: bool = False
+
+    # IA weighting: path to IA TSV file (go_id\tia_value, no header).
+    # When set, sample_weight = IA(go_term) during training so the model
+    # focuses on informative (rare, specific) GO terms — aligned with
+    # CAFA evaluation which uses IA weighting.
+    ia_file: str | None = None
+
+    @field_validator("embedding_config_id", "ontology_snapshot_id", "name", mode="before")
+    @classmethod
+    def must_be_non_empty(cls, v: str) -> str:
+        if not isinstance(v, str) or not v.strip():
+            raise ValueError("must be a non-empty string")
+        return v.strip()
+
+    @field_validator("train_versions", mode="before")
+    @classmethod
+    def at_least_two_train(cls, v: list[int]) -> list[int]:
+        if len(v) < 2:
+            raise ValueError("train_versions must have at least 2 entries to form pairs")
+        return sorted(v)
+
+    @field_validator("test_versions", mode="before")
+    @classmethod
+    def at_least_one_test(cls, v: list[int]) -> list[int]:
+        if not v:
+            raise ValueError("test_versions must have at least 1 entry")
+        return sorted(v)
+
+
+_CATEGORIES = ("nk", "lk", "pk")
+_ASPECT_NAMES = {"P": "bpo", "F": "mfo", "C": "cco"}
+
+
+# ---------------------------------------------------------------------------
+# Auto operation
+# ---------------------------------------------------------------------------
+
+
+class TrainRerankerAutoOperation:
+    """Automated multi-split temporal holdout re-ranker training.
+
+    Trains **3 per-category models** (NK, LK, PK) in a single execution.
+    Each model trains on all aspects combined, giving it ~3× more data
+    than per-aspect models and better convergence.
+
+    Pipeline:
+    1. Resolve annotation_set IDs from version numbers.
+    2. Load GO maps once.  Optionally load IA weights for sample weighting.
+    3. For each consecutive pair in train_versions:
+       a. Compute evaluation delta (all 3 categories at once).
+       b. Load references + query embeddings, run KNN + GO transfer.
+       c. Label predictions against each category's ground truth.
+    4. For each category (NK, LK, PK):
+       a. Concatenate labeled data from all splits (all aspects).
+       b. Train one LightGBM model with optional IA sample weights.
+       c. Evaluate on test split.
+       d. Store RerankerModel as ``{name}-{category}``.
+    """
+
+    name = "train_reranker_auto"
+
+    _single = TrainRerankerOperation()
+
+    def execute(
+        self, session: Session, payload: dict[str, Any], *, emit: EmitFn
+    ) -> OperationResult:
+        p = TrainRerankerAutoPayload.model_validate(payload)
+        t0 = time.perf_counter()
+
+        emb_config_id = uuid.UUID(p.embedding_config_id)
+        ontology_snapshot_id = uuid.UUID(p.ontology_snapshot_id)
+
+        # ── 1. Resolve annotation set IDs ────────────────────────────────
+        all_versions = sorted(set(p.train_versions + p.test_versions))
+        version_to_set: dict[int, uuid.UUID] = {}
+        for v in all_versions:
+            aset = (
+                session.query(AnnotationSet)
+                .filter(
+                    AnnotationSet.source == p.annotation_source,
+                    AnnotationSet.source_version == str(v),
+                )
+                .first()
+            )
+            if aset is None:
+                raise ValueError(
+                    f"AnnotationSet not found for source='{p.annotation_source}', "
+                    f"source_version='{v}'"
+                )
+            version_to_set[v] = aset.id
+
+        if session.get(EmbeddingConfig, emb_config_id) is None:
+            raise ValueError(f"EmbeddingConfig {emb_config_id} not found")
+
+        # Check no name collisions for any of the 3 per-category models
+        for cat in _CATEGORIES:
+            model_name = f"{p.name}-{cat}"
+            existing = (
+                session.query(RerankerModel)
+                .filter(RerankerModel.name == model_name)
+                .first()
+            )
+            if existing is not None:
+                raise ValueError(f"RerankerModel '{model_name}' already exists")
+
+        # Load IA weights for sample weighting (optional)
+        ia_weights: dict[str, float] | None = None
+        if p.ia_file:
+            ia_weights = {}
+            with open(p.ia_file) as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    parts = line.split("\t")
+                    if len(parts) >= 2:
+                        ia_weights[parts[0]] = float(parts[1])
+            emit(
+                "train_reranker_auto.ia_loaded",
+                None,
+                {"ia_file": p.ia_file, "n_terms": len(ia_weights)},
+                "info",
+            )
+
+        emit(
+            "train_reranker_auto.start",
+            None,
+            {
+                "name": p.name,
+                "train_versions": p.train_versions,
+                "test_versions": p.test_versions,
+                "n_pairs": len(p.train_versions) - 1,
+                "n_models": 3,
+                "ia_weighted": ia_weights is not None,
+            },
+            "info",
+        )
+
+        # ── 2. Load GO maps ──────────────────────────────────────────────
+        go_id_map, aspect_map = self._single._load_go_maps(session, ontology_snapshot_id)
+
+        # ── 2b. Preload ALL embeddings once ─────────────────────────────
+        all_embeddings, all_accessions, acc_to_idx = self._single._preload_all_embeddings(
+            session, emb_config_id, emit
+        )
+
+        # ── 3. Generate training data from consecutive pairs ─────────────
+        # Memory-optimised: each split writes to parquet on disk, then all
+        # RAM is freed before the next split.  Training reads from disk.
+        _KEEP_COLS = ["protein_accession", "go_id", "aspect"] + ALL_FEATURES + [LABEL_COLUMN]
+        tmp_dir = Path(tempfile.mkdtemp(prefix="protea_reranker_"))
+        per_split_stats: list[dict[str, Any]] = []
+        split_files: dict[str, list[Path]] = {c: [] for c in _CATEGORIES}
+
+        try:
+            for i in range(len(p.train_versions) - 1):
+                v_old = p.train_versions[i]
+                v_new = p.train_versions[i + 1]
+                old_set_id = version_to_set[v_old]
+                new_set_id = version_to_set[v_new]
+
+                emit(
+                    "train_reranker_auto.split_start",
+                    None,
+                    {"split": i + 1, "v_old": v_old, "v_new": v_new},
+                    "info",
+                )
+
+                # 3a. Compute delta — get all 3 categories at once
+                eval_data = compute_evaluation_data(
+                    session, old_set_id, new_set_id, ontology_snapshot_id
+                )
+
+                # Build gt_pairs for each category; collect union of query proteins
+                cat_gt_pairs: dict[str, set[tuple[str, str]]] = {}
+                all_query_accessions: set[str] = set()
+                for cat in _CATEGORIES:
+                    gt: dict[str, set[str]] = getattr(eval_data, cat)
+                    pairs: set[tuple[str, str]] = set()
+                    for protein, go_ids in gt.items():
+                        for go_id in go_ids:
+                            pairs.add((protein, go_id))
+                    cat_gt_pairs[cat] = pairs
+                    all_query_accessions.update(gt.keys())
+
+                if not all_query_accessions:
+                    emit(
+                        "train_reranker_auto.split_skipped",
+                        None,
+                        {"split": i + 1, "reason": "no ground truth in any category"},
+                        "warning",
+                    )
+                    per_split_stats.append({
+                        "v_old": v_old, "v_new": v_new, "skipped": True,
+                        "reason": "no ground truth",
+                    })
+                    continue
+
+                # 3b. Build references from preloaded embeddings (only loads annotations)
+                ref_by_aspect = self._single._build_reference_from_cache(
+                    session, old_set_id, all_embeddings, all_accessions, acc_to_idx, emit
+                )
+
+                # 3c. Load query embeddings from preloaded cache
+                query_accs = [a for a in all_query_accessions if a in acc_to_idx]
+                query_indices = np.array([acc_to_idx[a] for a in query_accs], dtype=np.int32)
+                query_emb = (
+                    all_embeddings[query_indices].astype(np.float32)
+                    if len(query_indices) > 0
+                    else np.empty((0, all_embeddings.shape[1]), dtype=np.float32)
+                )
+                valid_queries = query_accs
+
+                if not valid_queries:
+                    emit(
+                        "train_reranker_auto.split_skipped",
+                        None,
+                        {"split": i + 1, "reason": "no query embeddings"},
+                        "warning",
+                    )
+                    per_split_stats.append({
+                        "v_old": v_old, "v_new": v_new, "skipped": True,
+                        "reason": "no query embeddings",
+                    })
+                    del ref_by_aspect, query_emb, valid_queries
+                    gc.collect()
+                    continue
+
+                # 3d. Load sequences / taxonomy if requested
+                qs: dict[str, str] | None = None
+                rs: dict[str, str] | None = None
+                qt: dict[str, int | None] | None = None
+                rt: dict[str, int | None] | None = None
+                if p.compute_alignments or p.compute_taxonomy:
+                    all_ref_accs: set[str] = set()
+                    for asp in _ASPECTS:
+                        all_ref_accs.update(ref_by_aspect[asp]["accessions"])
+                    query_set = set(valid_queries)
+                    if p.compute_alignments:
+                        qs = self._single._load_sequences(session, query_set)
+                        rs = self._single._load_sequences(session, all_ref_accs)
+                    if p.compute_taxonomy:
+                        qt = self._single._load_taxonomy_ids(session, query_set)
+                        rt = self._single._load_taxonomy_ids(session, all_ref_accs)
+
+                # 3e. KNN + GO transfer (once, no labeling yet)
+                session.expire_all()
+                unlabeled_preds = self._single._knn_transfer_and_label(
+                    session, valid_queries, query_emb, ref_by_aspect,
+                    go_id_map, aspect_map,
+                    set(),  # empty gt → all label=0
+                    p,
+                    query_sequences=qs,
+                    ref_sequences=rs,
+                    query_tax_ids=qt,
+                    ref_tax_ids=rt,
+                )
+
+                # Free large objects immediately
+                del ref_by_aspect, query_emb, valid_queries, qs, rs, qt, rt
+                gc.collect()
+
+                split_stats: dict[str, Any] = {
+                    "v_old": v_old, "v_new": v_new, "skipped": False,
+                    "total_unlabeled": len(unlabeled_preds),
+                }
+
+                # 3e. Build DataFrame, label per category, write to parquet.
+                base_df = pd.DataFrame(unlabeled_preds, columns=_KEEP_COLS)
+                del unlabeled_preds
+                gc.collect()
+
+                for cat in _CATEGORIES:
+                    gt_p = cat_gt_pairs[cat]
+                    labels = np.array([
+                        1 if (acc, go_id) in gt_p else 0
+                        for acc, go_id in zip(base_df["protein_accession"], base_df["go_id"], strict=False)
+                    ], dtype=np.int8)
+                    base_df[LABEL_COLUMN] = labels
+                    n_pos = int(labels.sum())
+                    split_stats[f"{cat}_positives"] = n_pos
+                    split_stats[f"{cat}_negatives"] = len(base_df) - n_pos
+
+                    pq_path = tmp_dir / f"train_{cat}_split{i}.parquet"
+                    base_df.to_parquet(pq_path, index=False)
+                    split_files[cat].append(pq_path)
+
+                del base_df
+                gc.collect()
+
+                per_split_stats.append(split_stats)
+                emit("train_reranker_auto.split_done", None, split_stats, "info")
+
+            # Check we have data
+            if not any(split_files[c] for c in _CATEGORIES):
+                raise ValueError("No training data produced from any split")
+
+            # ── 4. Test split: KNN once, label per category ──────────────
+            test_old_v = p.train_versions[-1]
+            test_new_v = p.test_versions[0]
+            test_old_set_id = version_to_set[test_old_v]
+            test_new_set_id = version_to_set[test_new_v]
+
+            emit(
+                "train_reranker_auto.test_knn",
+                None,
+                {"test_old": test_old_v, "test_new": test_new_v},
+                "info",
+            )
+
+            test_eval_data = compute_evaluation_data(
+                session, test_old_set_id, test_new_set_id, ontology_snapshot_id
+            )
+
+            # Write test data to parquet too
+            test_files: dict[str, Path | None] = {c: None for c in _CATEGORIES}
+            test_all_queries: set[str] = set()
+            test_cat_gt: dict[str, set[tuple[str, str]]] = {}
+            for cat in _CATEGORIES:
+                gt: dict[str, set[str]] = getattr(test_eval_data, cat)
+                pairs: set[tuple[str, str]] = set()
+                for protein, go_ids in gt.items():
+                    for go_id in go_ids:
+                        pairs.add((protein, go_id))
+                test_cat_gt[cat] = pairs
+                test_all_queries.update(gt.keys())
+
+            if test_all_queries:
+                test_ref = self._single._build_reference_from_cache(
+                    session, test_old_set_id, all_embeddings, all_accessions, acc_to_idx, emit
+                )
+                test_accs = [a for a in test_all_queries if a in acc_to_idx]
+                test_indices = np.array([acc_to_idx[a] for a in test_accs], dtype=np.int32)
+                test_emb = (
+                    all_embeddings[test_indices].astype(np.float32)
+                    if len(test_indices) > 0
+                    else np.empty((0, all_embeddings.shape[1]), dtype=np.float32)
+                )
+                test_valid = test_accs
+                if test_valid:
+                    # Load sequences / taxonomy for test split
+                    test_qs: dict[str, str] | None = None
+                    test_rs: dict[str, str] | None = None
+                    test_qt: dict[str, int | None] | None = None
+                    test_rt: dict[str, int | None] | None = None
+                    if p.compute_alignments or p.compute_taxonomy:
+                        test_ref_accs: set[str] = set()
+                        for asp in _ASPECTS:
+                            test_ref_accs.update(test_ref[asp]["accessions"])
+                        test_query_set = set(test_valid)
+                        if p.compute_alignments:
+                            test_qs = self._single._load_sequences(session, test_query_set)
+                            test_rs = self._single._load_sequences(session, test_ref_accs)
+                        if p.compute_taxonomy:
+                            test_qt = self._single._load_taxonomy_ids(session, test_query_set)
+                            test_rt = self._single._load_taxonomy_ids(session, test_ref_accs)
+
+                    session.expire_all()
+                    test_unlabeled = self._single._knn_transfer_and_label(
+                        session, test_valid, test_emb, test_ref,
+                        go_id_map, aspect_map, set(), p,
+                        query_sequences=test_qs,
+                        ref_sequences=test_rs,
+                        query_tax_ids=test_qt,
+                        ref_tax_ids=test_rt,
+                    )
+                    del test_ref, test_emb, test_valid, test_qs, test_rs, test_qt, test_rt
+                    gc.collect()
+
+                    test_base_df = pd.DataFrame(test_unlabeled, columns=_KEEP_COLS)
+                    del test_unlabeled
+                    gc.collect()
+
+                    for cat in _CATEGORIES:
+                        gt_p = test_cat_gt[cat]
+                        labels = np.array([
+                            1 if (acc, go_id) in gt_p else 0
+                            for acc, go_id in zip(test_base_df["protein_accession"], test_base_df["go_id"], strict=False)
+                        ], dtype=np.int8)
+                        test_base_df[LABEL_COLUMN] = labels
+                        pq_path = tmp_dir / f"test_{cat}.parquet"
+                        test_base_df.to_parquet(pq_path, index=False)
+                        test_files[cat] = pq_path
+
+                    del test_base_df
+                    gc.collect()
+                else:
+                    del test_ref, test_emb, test_valid
+                    gc.collect()
+
+            # ── 5. Train 3 per-category models — read from parquet ────────
+            models_created: list[dict[str, Any]] = []
+
+            for cat in _CATEGORIES:
+                if not split_files[cat]:
+                    continue
+                model_name = f"{p.name}-{cat}"
+                combined_df = pd.concat(
+                    [pd.read_parquet(f) for f in split_files[cat]],
+                    ignore_index=True,
+                )
+
+                if len(combined_df) == 0 or int(combined_df[LABEL_COLUMN].sum()) == 0:
+                    emit(
+                        "train_reranker_auto.model_skipped",
+                        None,
+                        {"model": model_name, "reason": "no data or no positives"},
+                        "warning",
+                    )
+                    del combined_df
+                    gc.collect()
+                    continue
+
+                # Load test data for this category
+                test_df: pd.DataFrame | None = None
+                if test_files.get(cat) is not None:
+                    test_df = pd.read_parquet(test_files[cat])
+
+                # Build sample weights from IA values (if available)
+                sw: np.ndarray | None = None
+                if ia_weights is not None:
+                    sw = combined_df["go_id"].map(
+                        lambda gid: ia_weights.get(gid, 1.0)
+                    ).values.astype(np.float64)
+
+                emit(
+                    "train_reranker_auto.training_model",
+                    None,
+                    {
+                        "model": model_name,
+                        "samples": len(combined_df),
+                        "positives": int(combined_df[LABEL_COLUMN].sum()),
+                        "ia_weighted": sw is not None,
+                    },
+                    "info",
+                )
+
+                train_result = reranker_train(
+                    combined_df,
+                    num_boost_round=p.num_boost_round,
+                    early_stopping_rounds=p.early_stopping_rounds,
+                    val_fraction=p.val_fraction,
+                    neg_pos_ratio=p.neg_pos_ratio,
+                    sample_weight=sw,
+                )
+
+                # Evaluate on test split (all aspects combined)
+                test_metrics: dict[str, Any] = {}
+                if test_df is not None:
+                    if len(test_df) > 0 and int(test_df[LABEL_COLUMN].sum()) > 0:
+                        test_metrics = self._single._compute_comparison_metrics(
+                            test_df, train_result, test_eval_data, cat
+                        )
+
+                full_metrics: dict[str, Any] = {
+                    **train_result.metrics,
+                    "category": cat,
+                    "aspect": None,
+                    "train_versions": p.train_versions,
+                    "test_versions": p.test_versions,
+                    "annotation_source": p.annotation_source,
+                    "embedding_config_id": str(emb_config_id),
+                    "limit_per_entry": p.limit_per_entry,
+                    "search_backend": p.search_backend,
+                    "n_splits": len(split_files[cat]),
+                    "n_predictions": len(combined_df),
+                    "per_split_stats": per_split_stats,
+                    "ia_weighted": ia_weights is not None,
+                }
+                if test_metrics:
+                    full_metrics["test_evaluation"] = {
+                        "v_old": test_old_v,
+                        "v_new": test_new_v,
+                        **test_metrics,
+                    }
+
+                model = RerankerModel(
+                    name=model_name,
+                    prediction_set_id=None,
+                    evaluation_set_id=None,
+                    category=cat,
+                    aspect=None,
+                    model_data=model_to_string(train_result.model),
+                    metrics=full_metrics,
+                    feature_importance=train_result.feature_importance,
+                )
+                session.add(model)
+                session.flush()
+
+                model_summary = {
+                    "reranker_model_id": str(model.id),
+                    "name": model_name,
+                    "category": cat,
+                    "aspect": None,
+                    "n_predictions": len(combined_df),
+                    "positives": int(combined_df[LABEL_COLUMN].sum()),
+                    **{f"test_{k}": v for k, v in test_metrics.items()},
+                }
+                models_created.append(model_summary)
+
+                emit(
+                    "train_reranker_auto.model_done",
+                    None,
+                    model_summary,
+                    "info",
+                )
+
+                del combined_df, test_df, sw
+                gc.collect()
+
+        finally:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+
+        elapsed = round(time.perf_counter() - t0, 1)
+        result: dict[str, Any] = {
+            "n_models": len(models_created),
+            "models": models_created,
+            "elapsed_seconds": elapsed,
+        }
+        emit("train_reranker_auto.done", None, result, "info")
+        return OperationResult(result=result)
diff --git a/protea/core/reranker.py b/protea/core/reranker.py
new file mode 100644
index 0000000..a58b797
--- /dev/null
+++ b/protea/core/reranker.py
@@ -0,0 +1,301 @@
+"""LightGBM re-ranker for GO term predictions.
+
+Trains a binary classifier on labeled prediction data (from temporal holdout)
+and produces calibrated probability scores that replace or supplement the
+original distance-based ranking.
+
+Feature columns are the numeric signals stored in ``GOPrediction``.  Categorical
+features (``qualifier``, ``evidence_code``, ``taxonomic_relation``) are
+label-encoded.  Missing values are left as NaN — LightGBM handles them natively.
+"""
+
+from __future__ import annotations
+
+import io
+from dataclasses import dataclass
+from typing import Any
+
+import lightgbm as lgb
+import numpy as np
+import pandas as pd
+
+# ---------------------------------------------------------------------------
+# Feature definitions
+# ---------------------------------------------------------------------------
+
+NUMERIC_FEATURES: list[str] = [
+    "distance",
+    # NW alignment
+    "identity_nw",
+    "similarity_nw",
+    "alignment_score_nw",
+    "gaps_pct_nw",
+    "alignment_length_nw",
+    # SW alignment
+    "identity_sw",
+    "similarity_sw",
+    "alignment_score_sw",
+    "gaps_pct_sw",
+    "alignment_length_sw",
+    # Lengths
+    "length_query",
+    "length_ref",
+    # Taxonomy
+    "taxonomic_distance",
+    "taxonomic_common_ancestors",
+    # Re-ranker features
+    "vote_count",
+    "k_position",
+    "go_term_frequency",
+    "ref_annotation_density",
+    "neighbor_distance_std",
+]
+
+CATEGORICAL_FEATURES: list[str] = [
+    "qualifier",
+    "evidence_code",
+    "taxonomic_relation",
+]
+
+ALL_FEATURES: list[str] = NUMERIC_FEATURES + CATEGORICAL_FEATURES
+
+LABEL_COLUMN = "label"
+
+
+# ---------------------------------------------------------------------------
+# Data preparation
+# ---------------------------------------------------------------------------
+
+
+def prepare_dataset(df: pd.DataFrame) -> tuple[pd.DataFrame, pd.Series]:
+    """Extract feature matrix and label vector from a training DataFrame.
+
+    Categorical columns are converted to pandas ``category`` dtype so that
+    LightGBM can handle them directly (no manual encoding needed).
+
+    Returns (X, y) where X has only the feature columns and y is the binary label.
+    """
+    X = df[ALL_FEATURES].copy()
+    for col in NUMERIC_FEATURES:
+        if col in X.columns:
+            X[col] = pd.to_numeric(X[col], errors="coerce")
+    for col in CATEGORICAL_FEATURES:
+        if col in X.columns:
+            X[col] = X[col].replace("", pd.NA).astype("category")
+    y = df[LABEL_COLUMN].astype(int)
+    return X, y
+
+
+# ---------------------------------------------------------------------------
+# Training
+# ---------------------------------------------------------------------------
+
+_DEFAULT_PARAMS: dict[str, Any] = {
+    "objective": "binary",
+    "metric": ["binary_logloss", "auc"],
+    "boosting_type": "gbdt",
+    "num_leaves": 31,
+    "learning_rate": 0.01,
+    "feature_fraction": 0.8,
+    "bagging_fraction": 0.8,
+    "bagging_freq": 5,
+    "verbose": -1,
+    "seed": 42,
+}
+
+
+@dataclass
+class TrainResult:
+    """Result of training a re-ranker model."""
+
+    model: lgb.Booster
+    metrics: dict[str, Any]
+    feature_importance: dict[str, int]
+
+
+def train(
+    df: pd.DataFrame,
+    *,
+    params: dict[str, Any] | None = None,
+    num_boost_round: int = 1000,
+    early_stopping_rounds: int = 50,
+    val_fraction: float = 0.2,
+    neg_pos_ratio: float | None = None,
+    sample_weight: np.ndarray | None = None,
+) -> TrainResult:
+    """Train a LightGBM binary classifier on labeled prediction data.
+
+    Parameters
+    ----------
+    df:
+        DataFrame with feature columns + ``label`` column (0/1).
+    params:
+        LightGBM parameters.  Merged on top of ``_DEFAULT_PARAMS``.
+    num_boost_round:
+        Maximum number of boosting rounds.
+    early_stopping_rounds:
+        Stop if validation metric doesn't improve for this many rounds.
+    val_fraction:
+        Fraction of data to hold out for early stopping validation.
+    neg_pos_ratio:
+        If set, subsample negatives so that the ratio of negatives to
+        positives is at most this value (e.g. 1.0 for 1:1, 10.0 for 10:1).
+        Applied independently to train and val splits.  When ``None``
+        (default), all negatives are kept.
+    sample_weight:
+        Per-sample weights (e.g. Information Accretion of each GO term).
+        Must have the same length as ``df``.  When provided, the weights
+        are passed to LightGBM so that high-weight samples contribute
+        more to the loss.
+
+    Returns
+    -------
+    TrainResult with the trained Booster, validation metrics, and feature importance.
+    """
+    X, y = prepare_dataset(df)
+
+    merged_params = {**_DEFAULT_PARAMS, **(params or {})}
+
+    # Stratified train/val split
+    rng = np.random.RandomState(merged_params.get("seed", 42))
+    pos_idx = np.where(y == 1)[0]
+    neg_idx = np.where(y == 0)[0]
+    rng.shuffle(pos_idx)
+    rng.shuffle(neg_idx)
+
+    n_pos_val = max(1, int(len(pos_idx) * val_fraction))
+    n_neg_val = max(1, int(len(neg_idx) * val_fraction))
+
+    val_pos = pos_idx[:n_pos_val]
+    val_neg = neg_idx[:n_neg_val]
+    train_pos = pos_idx[n_pos_val:]
+    train_neg = neg_idx[n_neg_val:]
+
+    # Subsample negatives if requested
+    if neg_pos_ratio is not None:
+        max_train_neg = max(1, int(len(train_pos) * neg_pos_ratio))
+        if len(train_neg) > max_train_neg:
+            train_neg = train_neg[:max_train_neg]
+        max_val_neg = max(1, int(len(val_pos) * neg_pos_ratio))
+        if len(val_neg) > max_val_neg:
+            val_neg = val_neg[:max_val_neg]
+
+    val_idx = np.concatenate([val_pos, val_neg])
+    train_idx = np.concatenate([train_pos, train_neg])
+
+    cat_cols = [c for c in CATEGORICAL_FEATURES if c in X.columns]
+
+    train_w = sample_weight[train_idx] if sample_weight is not None else None
+    val_w = sample_weight[val_idx] if sample_weight is not None else None
+
+    train_ds = lgb.Dataset(
+        X.iloc[train_idx],
+        label=y.iloc[train_idx],
+        weight=train_w,
+        categorical_feature=cat_cols,
+        free_raw_data=False,
+    )
+    val_ds = lgb.Dataset(
+        X.iloc[val_idx],
+        label=y.iloc[val_idx],
+        weight=val_w,
+        categorical_feature=cat_cols,
+        reference=train_ds,
+        free_raw_data=False,
+    )
+
+    callbacks = [
+        lgb.early_stopping(early_stopping_rounds, verbose=False),
+        lgb.log_evaluation(period=0),
+    ]
+
+    booster = lgb.train(
+        merged_params,
+        train_ds,
+        num_boost_round=num_boost_round,
+        valid_sets=[val_ds],
+        valid_names=["val"],
+        callbacks=callbacks,  # type: ignore[arg-type]
+    )
+
+    # Collect validation metrics
+    val_preds = np.asarray(booster.predict(X.iloc[val_idx]))
+    val_labels = y.iloc[val_idx].values
+
+    tp = np.sum((val_preds >= 0.5) & (val_labels == 1))
+    fp = np.sum((val_preds >= 0.5) & (val_labels == 0))
+    fn = np.sum((val_preds < 0.5) & (val_labels == 1))
+    precision = float(tp / (tp + fp)) if (tp + fp) > 0 else 0.0
+    recall = float(tp / (tp + fn)) if (tp + fn) > 0 else 0.0
+    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
+
+    metrics = {
+        "best_iteration": booster.best_iteration,
+        "val_auc": float(booster.best_score.get("val", {}).get("auc", 0.0)),
+        "val_logloss": float(booster.best_score.get("val", {}).get("binary_logloss", 0.0)),
+        "val_precision": round(precision, 4),
+        "val_recall": round(recall, 4),
+        "val_f1": round(f1, 4),
+        "train_samples": len(train_idx),
+        "val_samples": len(val_idx),
+        "positive_rate": round(float(y.mean()), 4),
+    }
+
+    importance = dict(
+        zip(booster.feature_name(), booster.feature_importance(importance_type="gain").tolist(), strict=False)
+    )
+
+    return TrainResult(model=booster, metrics=metrics, feature_importance=importance)
+
+
+# ---------------------------------------------------------------------------
+# Inference
+# ---------------------------------------------------------------------------
+
+
+def predict(model: lgb.Booster, df: pd.DataFrame) -> np.ndarray:
+    """Score predictions using a trained re-ranker.
+
+    Returns an array of probabilities (0–1) where higher = more likely correct.
+    """
+    if LABEL_COLUMN in df.columns:
+        X, _ = prepare_dataset(df)
+    else:
+        X = df[ALL_FEATURES].copy()
+        for col in NUMERIC_FEATURES:
+            if col in X.columns:
+                X[col] = pd.to_numeric(X[col], errors="coerce")
+        for col in CATEGORICAL_FEATURES:
+            if col in X.columns:
+                X[col] = X[col].replace("", pd.NA).astype("category")
+
+    return np.asarray(model.predict(X))
+
+
+# ---------------------------------------------------------------------------
+# Serialization
+# ---------------------------------------------------------------------------
+
+
+def model_to_string(model: lgb.Booster) -> str:
+    """Serialize a trained model to a string for DB storage."""
+    return model.model_to_string()
+
+
+def model_from_string(model_str: str) -> lgb.Booster:
+    """Deserialize a model from its string representation."""
+    return lgb.Booster(model_str=model_str)
+
+
+def load_training_tsv(tsv_content: str | bytes) -> pd.DataFrame:
+    """Parse a training data TSV (as produced by the training-data.tsv endpoint)."""
+    if isinstance(tsv_content, bytes):
+        tsv_content = tsv_content.decode("utf-8")
+    df = pd.read_csv(io.StringIO(tsv_content), sep="\t", dtype=str)
+    # Convert numeric columns
+    for col in NUMERIC_FEATURES:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    if LABEL_COLUMN in df.columns:
+        df[LABEL_COLUMN] = pd.to_numeric(df[LABEL_COLUMN], errors="coerce").fillna(0).astype(int)
+    return df
diff --git a/protea/infrastructure/database/engine.py b/protea/infrastructure/database/engine.py
index dd02e91..7df6692 100644
--- a/protea/infrastructure/database/engine.py
+++ b/protea/infrastructure/database/engine.py
@@ -5,4 +5,11 @@
 
 
 def build_engine(db_url: str) -> Engine:
-    return create_engine(db_url, future=True, pool_pre_ping=True)
+    return create_engine(
+        db_url,
+        future=True,
+        pool_pre_ping=True,
+        pool_size=20,
+        max_overflow=40,
+        pool_recycle=3600,
+    )
diff --git a/protea/infrastructure/logging.py b/protea/infrastructure/logging.py
new file mode 100644
index 0000000..7ecc15d
--- /dev/null
+++ b/protea/infrastructure/logging.py
@@ -0,0 +1,104 @@
+# protea/infrastructure/logging.py
+"""Structured logging configuration for PROTEA.
+
+Provides a JSON formatter using only the Python standard library and a
+``configure_logging()`` helper that workers and the API can call at startup.
+"""
+from __future__ import annotations
+
+import json
+import logging
+from datetime import UTC, datetime
+from typing import Any
+
+
+class JSONFormatter(logging.Formatter):
+    """Formats log records as single-line JSON objects.
+
+    Each line contains at least ``timestamp``, ``level``, ``logger``, and
+    ``message``.  Any *extra* fields attached to the record are merged into
+    the top-level JSON object, making it easy to add structured context
+    (e.g. ``logger.info("started", extra={"queue": "protea.jobs"})``).
+    """
+
+    # Keys that belong to the standard LogRecord and should not be forwarded
+    # as extra fields.
+    _BUILTIN_ATTRS: frozenset[str] = frozenset(
+        {
+            "args",
+            "created",
+            "exc_info",
+            "exc_text",
+            "filename",
+            "funcName",
+            "levelname",
+            "levelno",
+            "lineno",
+            "message",
+            "module",
+            "msecs",
+            "msg",
+            "name",
+            "pathname",
+            "process",
+            "processName",
+            "relativeCreated",
+            "stack_info",
+            "taskName",
+            "thread",
+            "threadName",
+        }
+    )
+
+    def format(self, record: logging.LogRecord) -> str:  # noqa: D401
+        log_entry: dict[str, Any] = {
+            "timestamp": datetime.fromtimestamp(record.created, tz=UTC).isoformat(),
+            "level": record.levelname,
+            "logger": record.name,
+            "message": record.getMessage(),
+        }
+
+        # Merge any extra fields the caller attached to the record.
+        for key, value in record.__dict__.items():
+            if key not in self._BUILTIN_ATTRS:
+                log_entry[key] = value
+
+        # Append exception info when present.
+        if record.exc_info and record.exc_info[1] is not None:
+            log_entry["exception"] = self.formatException(record.exc_info)
+
+        if record.stack_info:
+            log_entry["stack_info"] = record.stack_info
+
+        return json.dumps(log_entry, default=str)
+
+
+_TEXT_FORMAT = "%(asctime)s %(levelname)s %(message)s"
+
+
+def configure_logging(*, json: bool = True, level: str = "INFO") -> None:
+    """Configure the root logger for the process.
+
+    Parameters
+    ----------
+    json:
+        When *True* (the default), use :class:`JSONFormatter` so that every
+        log line is a valid JSON object.  When *False*, fall back to the
+        plain-text format used during local development.
+    level:
+        Root log level name (e.g. ``"INFO"``, ``"DEBUG"``).
+    """
+    root = logging.getLogger()
+    root.setLevel(getattr(logging, level.upper(), logging.INFO))
+
+    # Remove any handlers that may have been added by earlier basicConfig
+    # calls or library imports so we start fresh.
+    root.handlers.clear()
+
+    handler = logging.StreamHandler()
+    if json:
+        handler.setFormatter(JSONFormatter())
+    else:
+        handler.setFormatter(logging.Formatter(_TEXT_FORMAT))
+
+    root.addHandler(handler)
diff --git a/protea/infrastructure/orm/models/__init__.py b/protea/infrastructure/orm/models/__init__.py
index 2dd358e..f3a41a8 100644
--- a/protea/infrastructure/orm/models/__init__.py
+++ b/protea/infrastructure/orm/models/__init__.py
@@ -8,6 +8,7 @@
 from .embedding.embedding_config import EmbeddingConfig  # noqa: F401
 from .embedding.go_prediction import GOPrediction  # noqa: F401
 from .embedding.prediction_set import PredictionSet  # noqa: F401
+from .embedding.reranker_model import RerankerModel  # noqa: F401
 from .embedding.scoring_config import ScoringConfig  # noqa: F401
 from .embedding.sequence_embedding import SequenceEmbedding  # noqa: F401
 from .job import Job, JobEvent  # noqa: F401
diff --git a/protea/infrastructure/orm/models/annotation/evaluation_result.py b/protea/infrastructure/orm/models/annotation/evaluation_result.py
index 187eb5d..0c6c4dc 100644
--- a/protea/infrastructure/orm/models/annotation/evaluation_result.py
+++ b/protea/infrastructure/orm/models/annotation/evaluation_result.py
@@ -13,6 +13,7 @@
 if TYPE_CHECKING:
     from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
     from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+    from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
     from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
     from protea.infrastructure.orm.models.job import Job
 
@@ -59,6 +60,13 @@ class EvaluationResult(Base):
         nullable=True,
         index=True,
     )
+    reranker_model_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("reranker_model.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    reranker_config: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True)
     job_id: Mapped[uuid.UUID | None] = mapped_column(
         UUID(as_uuid=True),
         ForeignKey("job.id", ondelete="SET NULL"),
@@ -73,4 +81,5 @@ class EvaluationResult(Base):
     evaluation_set: Mapped[EvaluationSet] = relationship("EvaluationSet")
     prediction_set: Mapped[PredictionSet] = relationship("PredictionSet")
     scoring_config: Mapped[ScoringConfig | None] = relationship("ScoringConfig")
+    reranker_model: Mapped[RerankerModel | None] = relationship("RerankerModel")
     job: Mapped[Job | None] = relationship("Job")
diff --git a/protea/infrastructure/orm/models/annotation/protein_go_annotation.py b/protea/infrastructure/orm/models/annotation/protein_go_annotation.py
index 6804860..934917d 100644
--- a/protea/infrastructure/orm/models/annotation/protein_go_annotation.py
+++ b/protea/infrastructure/orm/models/annotation/protein_go_annotation.py
@@ -3,7 +3,7 @@
 import uuid
 from typing import TYPE_CHECKING
 
-from sqlalchemy import BigInteger, ForeignKey, String, UniqueConstraint
+from sqlalchemy import BigInteger, ForeignKey, Index, String, UniqueConstraint
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
@@ -37,6 +37,7 @@ class ProteinGOAnnotation(Base):
             "evidence_code",
             name="uq_pga_set_protein_term_evidence",
         ),
+        Index("ix_pga_set_accession", "annotation_set_id", "protein_accession"),
     )
 
     id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
diff --git a/protea/infrastructure/orm/models/embedding/embedding_config.py b/protea/infrastructure/orm/models/embedding/embedding_config.py
index 5e59d0d..6633149 100644
--- a/protea/infrastructure/orm/models/embedding/embedding_config.py
+++ b/protea/infrastructure/orm/models/embedding/embedding_config.py
@@ -10,7 +10,7 @@
 
 from protea.infrastructure.orm.base import Base
 
-_VALID_LAYER_AGG = {"mean", "concat"}
+_VALID_LAYER_AGG = {"mean", "last", "concat"}
 _VALID_POOLING = {"mean", "max", "cls", "mean_max"}
 _VALID_BACKENDS = {"esm", "esm3c", "t5", "auto"}
 
diff --git a/protea/infrastructure/orm/models/embedding/go_prediction.py b/protea/infrastructure/orm/models/embedding/go_prediction.py
index ef58a93..84ca1fa 100644
--- a/protea/infrastructure/orm/models/embedding/go_prediction.py
+++ b/protea/infrastructure/orm/models/embedding/go_prediction.py
@@ -3,7 +3,7 @@
 import uuid
 from typing import TYPE_CHECKING
 
-from sqlalchemy import BigInteger, Float, ForeignKey, Integer, String, UniqueConstraint
+from sqlalchemy import BigInteger, Float, ForeignKey, Index, Integer, String, UniqueConstraint
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
@@ -31,6 +31,7 @@ class GOPrediction(Base):
             "go_term_id",
             name="uq_go_prediction_set_protein_term",
         ),
+        Index("ix_go_prediction_set_accession", "prediction_set_id", "protein_accession"),
     )
 
     id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
@@ -74,6 +75,13 @@ class GOPrediction(Base):
     length_query: Mapped[int | None] = mapped_column(Integer, nullable=True)
     length_ref: Mapped[int | None] = mapped_column(Integer, nullable=True)
 
+    # --- Re-ranker features ---
+    vote_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    k_position: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    go_term_frequency: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    ref_annotation_density: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    neighbor_distance_std: Mapped[float | None] = mapped_column(Float, nullable=True)
+
     # --- Taxonomy features ---
     query_taxonomy_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
     ref_taxonomy_id: Mapped[int | None] = mapped_column(Integer, nullable=True)
diff --git a/protea/infrastructure/orm/models/embedding/reranker_model.py b/protea/infrastructure/orm/models/embedding/reranker_model.py
new file mode 100644
index 0000000..106659b
--- /dev/null
+++ b/protea/infrastructure/orm/models/embedding/reranker_model.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+from typing import Any
+
+from sqlalchemy import DateTime, ForeignKey, String, Text, func
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from protea.infrastructure.orm.base import Base
+
+
+class RerankerModel(Base):
+    """A trained LightGBM re-ranker model stored in the database.
+
+    The model is serialized as a LightGBM model string and stored in
+    ``model_data``.  Training metrics and feature importance are stored
+    as JSONB for easy querying and display.
+    """
+
+    __tablename__ = "reranker_model"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    name: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
+    prediction_set_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("prediction_set.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    evaluation_set_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("evaluation_set.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    category: Mapped[str] = mapped_column(String(10), nullable=False)
+    aspect: Mapped[str | None] = mapped_column(String(3), nullable=True)
+    model_data: Mapped[str] = mapped_column(Text, nullable=False)
+    metrics: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
+    feature_importance: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, server_default=func.now()
+    )
diff --git a/protea/infrastructure/orm/models/job.py b/protea/infrastructure/orm/models/job.py
index 9b5be20..45ad944 100644
--- a/protea/infrastructure/orm/models/job.py
+++ b/protea/infrastructure/orm/models/job.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import enum
-from datetime import UTC, datetime
+from datetime import datetime
 from typing import Any
 from uuid import UUID, uuid4
 
@@ -10,13 +10,10 @@
 from sqlalchemy.dialects.postgresql import UUID as PG_UUID
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
+from protea.core.utils import utcnow
 from protea.infrastructure.orm.base import Base
 
 
-def utcnow() -> datetime:
-    return datetime.now(UTC)
-
-
 class JobStatus(enum.StrEnum):
     QUEUED = "queued"
     RUNNING = "running"
diff --git a/protea/infrastructure/queue/consumer.py b/protea/infrastructure/queue/consumer.py
index 8c3310a..876b1cf 100644
--- a/protea/infrastructure/queue/consumer.py
+++ b/protea/infrastructure/queue/consumer.py
@@ -13,11 +13,22 @@
 
 from protea.core.contracts.operation import RetryLaterError
 from protea.core.contracts.registry import OperationRegistry
+from protea.infrastructure.orm.models.job import JobEvent
 from protea.infrastructure.queue.publisher import publish_operation
 from protea.workers.base_worker import BaseWorker
 
 logger = logging.getLogger(__name__)
 
+_DLX_NAME = "protea.dlx"
+_DLQ_NAME = "protea.dead-letter"
+
+
+def _setup_dead_letter(channel: BlockingChannel) -> None:
+    """Declare the dead-letter exchange and queue (idempotent)."""
+    channel.exchange_declare(exchange=_DLX_NAME, exchange_type="fanout", durable=True)
+    channel.queue_declare(queue=_DLQ_NAME, durable=True)
+    channel.queue_bind(queue=_DLQ_NAME, exchange=_DLX_NAME)
+
 
 class QueueConsumer:
     """
@@ -63,7 +74,12 @@ def run(self) -> None:
         connection = pika.BlockingConnection(params)
         channel = connection.channel()
 
-        channel.queue_declare(queue=self._queue_name, durable=True)
+        _setup_dead_letter(channel)
+        channel.queue_declare(
+            queue=self._queue_name,
+            durable=True,
+            arguments={"x-dead-letter-exchange": _DLX_NAME},
+        )
         channel.basic_qos(prefetch_count=self._prefetch_count)
         channel.basic_consume(
             queue=self._queue_name,
@@ -182,7 +198,12 @@ def run(self) -> None:
         connection = pika.BlockingConnection(params)
         channel = connection.channel()
 
-        channel.queue_declare(queue=self._queue_name, durable=True)
+        _setup_dead_letter(channel)
+        channel.queue_declare(
+            queue=self._queue_name,
+            durable=True,
+            arguments={"x-dead-letter-exchange": _DLX_NAME},
+        )
         channel.basic_qos(prefetch_count=self._prefetch_count)
         channel.basic_consume(
             queue=self._queue_name,
@@ -234,6 +255,14 @@ def _on_message(
             "Dispatching operation. operation=%s queue=%s", operation_name, self._queue_name
         )
 
+        parent_job_id: UUID | None = None
+        raw_job_id = data.get("job_id")
+        if raw_job_id:
+            try:
+                parent_job_id = UUID(raw_job_id)
+            except (ValueError, TypeError):
+                pass
+
         op = self._registry.get(operation_name)
         session = self._factory()
         try:
@@ -245,6 +274,32 @@ def emit(
                 level: str = "info",
             ) -> None:
                 logger.info("operation.%s fields=%s", event, fields or {})
+                if parent_job_id is not None:
+                    event_session = self._factory()
+                    try:
+                        event_session.add(
+                            JobEvent(
+                                job_id=parent_job_id,
+                                event=f"child.{event}",
+                                message=message,
+                                fields=fields or {},
+                                level=level,
+                            )
+                        )
+                        event_session.commit()
+                    except Exception as emit_exc:
+                        logger.warning(
+                            "Failed to write child event to parent job. "
+                            "parent_job_id=%s error=%s",
+                            parent_job_id,
+                            emit_exc,
+                        )
+                        try:
+                            event_session.rollback()
+                        except Exception:
+                            pass
+                    finally:
+                        event_session.close()
 
             result = op.execute(session, payload, emit=emit)
             session.commit()
@@ -270,6 +325,30 @@ def emit(
                 )
             else:
                 logger.error("Operation failed. operation=%s error=%s", operation_name, exc)
+                # Record failure event on parent job so it's visible in the UI.
+                if parent_job_id is not None:
+                    err_session = self._factory()
+                    try:
+                        err_session.add(
+                            JobEvent(
+                                job_id=parent_job_id,
+                                event="child.failed",
+                                message=str(exc)[:2000],
+                                fields={
+                                    "operation": operation_name,
+                                    "error_code": exc.__class__.__name__,
+                                },
+                                level="error",
+                            )
+                        )
+                        err_session.commit()
+                    except Exception:
+                        try:
+                            err_session.rollback()
+                        except Exception:
+                            pass
+                    finally:
+                        err_session.close()
             try:
                 session.rollback()
             except Exception:
diff --git a/protea/infrastructure/queue/publisher.py b/protea/infrastructure/queue/publisher.py
index 29a3985..8417251 100644
--- a/protea/infrastructure/queue/publisher.py
+++ b/protea/infrastructure/queue/publisher.py
@@ -2,6 +2,7 @@
 
 import json
 import logging
+import threading
 import time
 from typing import Any
 from uuid import UUID
@@ -10,37 +11,64 @@
 
 logger = logging.getLogger(__name__)
 
-_RETRY_DELAYS = (1, 3, 10)  # seconds between attempts (3 attempts total)
+_MAX_ATTEMPTS = 5
+_BASE_DELAY = 1  # seconds; exponential backoff: 1, 2, 4, 8, 16 (capped at 30)
+
+# Thread-local persistent connection to avoid opening/closing per publish.
+_local = threading.local()
+
+
+def _get_connection(amqp_url: str) -> pika.BlockingConnection:
+    """Return a reusable connection, creating one if needed."""
+    conn: pika.BlockingConnection | None = getattr(_local, "connection", None)
+    if conn is not None and conn.is_open:
+        return conn
+    _local.connection = pika.BlockingConnection(pika.URLParameters(amqp_url))
+    return _local.connection
+
+
+def _close_cached_connection() -> None:
+    conn: pika.BlockingConnection | None = getattr(_local, "connection", None)
+    if conn is not None and conn.is_open:
+        try:
+            conn.close()
+        except Exception:
+            pass
+    _local.connection = None
 
 
 def _publish(amqp_url: str, queue_name: str, body: bytes) -> None:
-    """Core publish logic with retries. Used by both publish_job and publish_operation."""
+    """Core publish logic with retries and connection reuse."""
     last_exc: Exception | None = None
 
-    for attempt, delay in enumerate((*_RETRY_DELAYS, None), start=1):
+    for attempt in range(1, _MAX_ATTEMPTS + 1):
         try:
-            connection = pika.BlockingConnection(pika.URLParameters(amqp_url))
-            try:
-                channel = connection.channel()
-                channel.queue_declare(queue=queue_name, durable=True)
-                channel.basic_publish(
-                    exchange="",
-                    routing_key=queue_name,
-                    body=body,
-                    properties=pika.BasicProperties(
-                        delivery_mode=pika.DeliveryMode.Persistent,
-                    ),
-                )
-                return
-            finally:
-                if connection.is_open:
-                    connection.close()
+            connection = _get_connection(amqp_url)
+            channel = connection.channel()
+            channel.queue_declare(
+                queue=queue_name,
+                durable=True,
+                arguments={"x-dead-letter-exchange": "protea.dlx"},
+            )
+            channel.basic_publish(
+                exchange="",
+                routing_key=queue_name,
+                body=body,
+                properties=pika.BasicProperties(
+                    delivery_mode=pika.DeliveryMode.Persistent,
+                ),
+            )
+            return
         except Exception as exc:
             last_exc = exc
-            if delay is not None:
+            # Connection is stale — discard it so next attempt creates a fresh one.
+            _close_cached_connection()
+            if attempt < _MAX_ATTEMPTS:
+                delay = min(_BASE_DELAY * (2 ** (attempt - 1)), 30)
                 logger.warning(
-                    "publish failed (attempt %d), retrying in %ds. queue=%s error=%s",
+                    "publish failed (attempt %d/%d), retrying in %ds. queue=%s error=%s",
                     attempt,
+                    _MAX_ATTEMPTS,
                     delay,
                     queue_name,
                     exc,
@@ -49,13 +77,13 @@ def _publish(amqp_url: str, queue_name: str, body: bytes) -> None:
             else:
                 logger.error(
                     "publish failed after %d attempts. queue=%s error=%s",
-                    attempt,
+                    _MAX_ATTEMPTS,
                     queue_name,
                     exc,
                 )
 
     raise RuntimeError(
-        f"Failed to publish to queue {queue_name!r} after {len(_RETRY_DELAYS) + 1} attempts"
+        f"Failed to publish to queue {queue_name!r} after {_MAX_ATTEMPTS} attempts"
     ) from last_exc
 
 
diff --git a/protea/infrastructure/settings.py b/protea/infrastructure/settings.py
index 2c4ce9a..ada09a1 100644
--- a/protea/infrastructure/settings.py
+++ b/protea/infrastructure/settings.py
@@ -13,6 +13,7 @@ class Settings:
     db_url: str
     amqp_url: str
     artifacts_dir: Path
+    admin_token: str
 
 
 def _load_yaml(path: Path) -> dict[str, Any]:
@@ -56,4 +57,10 @@ def load_settings(project_root: Path, *, env_prefix: str = "PROTEA_") -> Setting
     if not artifacts_dir.is_absolute():
         artifacts_dir = project_root / artifacts_dir
 
-    return Settings(db_url=db_url, amqp_url=amqp_url, artifacts_dir=artifacts_dir)
+    admin_token = (
+        os.getenv(f"{env_prefix}ADMIN_TOKEN")
+        or system.get("admin", {}).get("token")
+        or ""
+    )
+
+    return Settings(db_url=db_url, amqp_url=amqp_url, artifacts_dir=artifacts_dir, admin_token=admin_token)
diff --git a/protea/workers/base_worker.py b/protea/workers/base_worker.py
index b5d7323..7381b00 100644
--- a/protea/workers/base_worker.py
+++ b/protea/workers/base_worker.py
@@ -177,6 +177,16 @@ def emit(
 
             except RetryLaterError as e:
                 # Resource busy — reset to QUEUED so the consumer can re-publish.
+                # Adaptive backoff: count previous retries and increase delay.
+                retry_count = (
+                    session.query(func.count(JobEvent.id))
+                    .filter(JobEvent.job_id == job_id, JobEvent.event == "job.retry_later")
+                    .scalar()
+                    or 0
+                )
+                base_delay = e.delay_seconds
+                delay = min(base_delay * (2 ** retry_count), 600)  # cap at 10 min
+
                 job.status = JobStatus.QUEUED
                 job.started_at = None
                 self._emit(
@@ -184,10 +194,12 @@ def emit(
                     job_id,
                     "job.retry_later",
                     str(e),
-                    {"delay_seconds": e.delay_seconds},
+                    {"delay_seconds": delay, "retry_count": retry_count + 1},
                     level="info",
                 )
                 session.commit()
+                # Propagate adaptive delay to the consumer.
+                e.delay_seconds = delay
                 raise  # consumer handles re-publish
 
             except Exception as e:
diff --git a/protea/workers/stale_job_reaper.py b/protea/workers/stale_job_reaper.py
new file mode 100644
index 0000000..6af5556
--- /dev/null
+++ b/protea/workers/stale_job_reaper.py
@@ -0,0 +1,105 @@
+# protea/workers/stale_job_reaper.py
+"""Periodic reaper that marks long-running jobs as FAILED.
+
+Workers are single-threaded and cannot be interrupted mid-operation without
+risking data corruption.  Instead, this lightweight reaper runs on a timer
+and transitions any job that has been in RUNNING status for longer than
+``timeout_seconds`` to FAILED with error_code ``JobTimeout``.
+
+Usage::
+
+    reaper = StaleJobReaper(session_factory, timeout_seconds=3600)
+    reaper.run(interval_seconds=60)  # checks every minute
+"""
+from __future__ import annotations
+
+import logging
+import signal
+import time
+from datetime import timedelta
+
+from sqlalchemy.orm import Session, sessionmaker
+
+from protea.core.utils import utcnow
+from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
+
+logger = logging.getLogger(__name__)
+
+
+class StaleJobReaper:
+    def __init__(
+        self,
+        session_factory: sessionmaker[Session],
+        timeout_seconds: int = 3600,
+    ) -> None:
+        self._factory = session_factory
+        self._timeout = timedelta(seconds=timeout_seconds)
+        self._stop = False
+
+    def run(self, interval_seconds: int = 60) -> None:
+        signal.signal(signal.SIGINT, self._handle_stop)
+        signal.signal(signal.SIGTERM, self._handle_stop)
+
+        logger.info(
+            "StaleJobReaper started. timeout=%ss interval=%ss",
+            self._timeout.total_seconds(),
+            interval_seconds,
+        )
+        while not self._stop:
+            try:
+                reaped = self._reap()
+                if reaped:
+                    logger.info("Reaped %d stale job(s).", reaped)
+            except Exception as exc:
+                logger.error("Reaper cycle failed: %s", exc)
+            time.sleep(interval_seconds)
+
+        logger.info("StaleJobReaper stopped.")
+
+    def _handle_stop(self, *_: object) -> None:
+        self._stop = True
+
+    def _reap(self) -> int:
+        cutoff = utcnow() - self._timeout
+        session = self._factory()
+        try:
+            stale_jobs = (
+                session.query(Job)
+                .filter(
+                    Job.status == JobStatus.RUNNING,
+                    Job.started_at < cutoff,
+                )
+                .all()
+            )
+            for job in stale_jobs:
+                job.status = JobStatus.FAILED
+                job.finished_at = utcnow()
+                job.error_code = "JobTimeout"
+                job.error_message = (
+                    f"Job exceeded timeout of {self._timeout.total_seconds():.0f}s"
+                )
+                session.add(
+                    JobEvent(
+                        job_id=job.id,
+                        event="job.timeout",
+                        message=job.error_message,
+                        fields={"timeout_seconds": self._timeout.total_seconds()},
+                        level="error",
+                    )
+                )
+                logger.warning(
+                    "Marking stale job FAILED. job_id=%s operation=%s started_at=%s",
+                    job.id,
+                    job.operation,
+                    job.started_at,
+                )
+            session.commit()
+            return len(stale_jobs)
+        except Exception:
+            try:
+                session.rollback()
+            except Exception:
+                pass
+            raise
+        finally:
+            session.close()
diff --git a/pyproject.toml b/pyproject.toml
index e7438be..12d6125 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "protea"
-version = "0.2.0"
+version = "0.3.0"
 description = ""
 authors = [
     {name = "frapercan",email = "frapercan1@alum.us.es"}
@@ -24,6 +24,8 @@ dependencies = [
     "parasail (>=1.3.4)",
     "ete3 (>=3.1.3)",
     "cafaeval @ git+https://github.com/claradepaolis/CAFA-evaluator-PK.git",
+    "lightgbm (>=4.6.0,<5.0.0)",
+    "pyarrow (>=23.0.1,<24.0.0)",
 ]
 
 
diff --git a/scripts/evaluate_external_tool.py b/scripts/evaluate_external_tool.py
new file mode 100644
index 0000000..275c0d5
--- /dev/null
+++ b/scripts/evaluate_external_tool.py
@@ -0,0 +1,413 @@
+#!/usr/bin/env python3
+"""Evaluate an external tool's GO predictions using the same CAFA protocol as PROTEA.
+
+This script:
+  1. Connects to PROTEA's DB to compute the NK/LK/PK ground truth for a given
+     EvaluationSet (same logic as run_cafa_evaluation).
+  2. Parses an external tool's output (eggNOG-mapper, InterProScan, BLAST) and
+     converts it to CAFA-format predictions (protein  go_id  score).
+  3. Runs cafaeval for NK, LK, PK and prints the Fmax table.
+
+Usage:
+  poetry run python scripts/evaluate_external_tool.py \
+      --evaluation-set-id 42b34e79-6fe9-4fa0-b718-02f43a1e3192 \
+      --tool emapper \
+      --input /path/to/test_proteins.emapper.annotations \
+      [--ia-file /path/to/IA_cafa6.tsv]
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import signal
+import sys
+import tempfile
+import uuid
+from pathlib import Path
+
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from sqlalchemy.orm import Session
+
+from protea.core.evaluation import compute_evaluation_data
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.session import build_session_factory, session_scope
+from protea.infrastructure.settings import load_settings
+
+# ---------------------------------------------------------------------------
+# Parsers for external tools
+# ---------------------------------------------------------------------------
+
+def parse_emapper(path: str) -> dict[str, set[str]]:
+    """Parse eggNOG-mapper .annotations file → {protein: {GO:xxxx, ...}}."""
+    predictions: dict[str, set[str]] = {}
+    with open(path) as f:
+        for line in f:
+            if line.startswith("#"):
+                continue
+            cols = line.rstrip("\n").split("\t")
+            if len(cols) < 10:
+                continue
+            protein = cols[0]
+            gos = cols[9]
+            if gos == "-" or not gos.strip():
+                continue
+            go_set = {g.strip() for g in gos.split(",") if g.strip().startswith("GO:")}
+            if go_set:
+                predictions[protein] = go_set
+    return predictions
+
+
+def parse_interproscan(path: str) -> dict[str, set[str]]:
+    """Parse InterProScan TSV output → {protein: {GO:xxxx, ...}}.
+
+    InterProScan TSV columns (0-indexed):
+      0: protein accession
+      13: GO annotations (pipe-separated)
+    """
+    predictions: dict[str, set[str]] = {}
+    with open(path) as f:
+        for line in f:
+            if line.startswith("#"):
+                continue
+            cols = line.rstrip("\n").split("\t")
+            if len(cols) < 14:
+                continue
+            protein = cols[0]
+            gos = cols[13]
+            if not gos or gos == "-":
+                continue
+            go_set = set()
+            for g in gos.split("|"):
+                g = g.strip()
+                if g.startswith("GO:"):
+                    # Strip source suffix e.g. "GO:0016020(InterPro)" → "GO:0016020"
+                    paren = g.find("(")
+                    go_set.add(g[:paren] if paren != -1 else g)
+            if go_set:
+                predictions.setdefault(protein, set()).update(go_set)
+    return predictions
+
+
+def parse_blast_go(path: str) -> dict[str, set[str]]:
+    """Parse a simple TSV with columns: protein  go_id  [score].
+
+    This is a generic CAFA-like format that can be produced from BLAST results
+    via custom post-processing.
+    """
+    predictions: dict[str, set[str]] = {}
+    with open(path) as f:
+        for line in f:
+            if line.startswith("#"):
+                continue
+            cols = line.rstrip("\n").split("\t")
+            if len(cols) < 2:
+                continue
+            protein = cols[0]
+            go_id = cols[1]
+            if go_id.startswith("GO:"):
+                predictions.setdefault(protein, set()).add(go_id)
+    return predictions
+
+
+def parse_pannzer2(path: str) -> dict[str, dict[str, float]]:
+    """Parse PANNZER2 anno.out file → {protein: {GO:xxxx: ppv_score, ...}}.
+
+    PANNZER2 anno.out columns (tab-separated):
+      0: qpid (query protein ID)
+      1: type (e.g. MF_ARGOT, BP_ARGOT, CC_ARGOT)
+      2: score (raw ARGOT score)
+      3: PPV (positive predictive value, 0-1 calibrated confidence)
+      4: id (GO ID, e.g. GO:0005524)
+      5: desc (GO term description)
+
+    We use PPV as the confidence score and filter for ARGOT predictions only.
+    """
+    predictions: dict[str, dict[str, float]] = {}
+    with open(path) as f:
+        for line in f:
+            if line.startswith("#") or line.startswith("qpid"):
+                continue
+            cols = line.rstrip("\n").split("\t")
+            if len(cols) < 5:
+                continue
+            protein = cols[0]
+            pred_type = cols[1]
+            go_id = cols[4]
+            if not go_id.startswith("GO:"):
+                continue
+            # Filter for ARGOT predictions (best PANNZER2 method)
+            if "ARGOT" not in pred_type:
+                continue
+            try:
+                ppv = float(cols[3])
+            except (ValueError, IndexError):
+                ppv = 1.0
+            if protein not in predictions:
+                predictions[protein] = {}
+            # Keep highest PPV per (protein, GO) pair
+            if go_id not in predictions[protein] or ppv > predictions[protein][go_id]:
+                predictions[protein][go_id] = ppv
+    return predictions
+
+
+# Type alias: parsers return either binary sets or scored dicts
+Predictions = dict[str, set[str]] | dict[str, dict[str, float]]
+
+PARSERS: dict[str, callable] = {
+    "emapper": parse_emapper,
+    "interproscan": parse_interproscan,
+    "blast": parse_blast_go,
+    "pannzer2": parse_pannzer2,
+}
+
+
+# ---------------------------------------------------------------------------
+# Ground truth + evaluation
+# ---------------------------------------------------------------------------
+
+def write_gt(annotations: dict[str, set[str]], path: str) -> None:
+    with open(path, "w") as f:
+        for protein in sorted(annotations):
+            for go_id in sorted(annotations[protein]):
+                f.write(f"{protein}\t{go_id}\n")
+
+
+def write_cafa_predictions(
+    predictions: Predictions,
+    delta_proteins: set[str],
+    path: str,
+) -> int:
+    """Write CAFA-format predictions for delta proteins.
+
+    Accepts either binary predictions ({protein: {go_ids}}) or scored
+    predictions ({protein: {go_id: score}}). Binary predictions are
+    written with score 1.0.
+
+    Returns: number of (protein, GO) pairs written.
+    """
+    n = 0
+    with open(path, "w") as f:
+        for protein in sorted(predictions):
+            if protein not in delta_proteins:
+                continue
+            terms = predictions[protein]
+            if isinstance(terms, dict):
+                for go_id in sorted(terms):
+                    f.write(f"{protein}\t{go_id}\t{terms[go_id]:.4f}\n")
+                    n += 1
+            else:
+                for go_id in sorted(terms):
+                    f.write(f"{protein}\t{go_id}\t1.0000\n")
+                    n += 1
+    return n
+
+
+def download_file(url: str, dest: str) -> None:
+    import gzip
+    import shutil
+
+    import requests
+
+    if url.startswith("/") or url.startswith("file://"):
+        local = url[len("file://"):] if url.startswith("file://") else url
+        if url.endswith(".gz"):
+            with gzip.open(local, "rb") as src, open(dest, "wb") as f:
+                shutil.copyfileobj(src, f)
+        else:
+            shutil.copy2(local, dest)
+        return
+
+    resp = requests.get(url, stream=True, timeout=300)
+    resp.raise_for_status()
+    if url.endswith(".gz"):
+        with open(dest, "wb") as f:
+            f.write(gzip.decompress(resp.content))
+    else:
+        with open(dest, "w") as f:
+            f.write(resp.text)
+
+
+NS_LABELS = {
+    "biological_process": "BPO",
+    "molecular_function": "MFO",
+    "cellular_component": "CCO",
+}
+
+
+def run_evaluation(
+    session: Session,
+    eval_set_id: uuid.UUID,
+    predictions: Predictions,
+    ia_file: str | None = None,
+    artifacts_dir: str | None = None,
+) -> dict[str, dict[str, float]]:
+    from cafaeval.evaluation import cafa_eval
+
+    eval_set = session.get(EvaluationSet, eval_set_id)
+    if eval_set is None:
+        raise ValueError(f"EvaluationSet {eval_set_id} not found")
+
+    ann_old = session.get(AnnotationSet, eval_set.old_annotation_set_id)
+    snapshot = session.get(OntologySnapshot, ann_old.ontology_snapshot_id)
+
+    print("Computing ground truth delta...")
+    data = compute_evaluation_data(
+        session,
+        eval_set.old_annotation_set_id,
+        eval_set.new_annotation_set_id,
+        ann_old.ontology_snapshot_id,
+    )
+    print(f"  NK: {data.nk_proteins} proteins, {data.nk_annotations} annotations")
+    print(f"  LK: {data.lk_proteins} proteins, {data.lk_annotations} annotations")
+    print(f"  PK: {data.pk_proteins} proteins, {data.pk_annotations} annotations")
+
+    delta_proteins = set(data.nk) | set(data.lk) | set(data.pk)
+    covered = delta_proteins & set(predictions)
+    print(f"\nExternal tool covers {len(covered)}/{len(delta_proteins)} delta proteins "
+          f"({100*len(covered)/len(delta_proteins):.1f}%)")
+
+    # Release DB connection before cafaeval forks
+    session.commit()
+
+    results: dict[str, dict[str, float]] = {}
+
+    with tempfile.TemporaryDirectory(prefix="protea_ext_eval_") as tmpdir:
+        # Download OBO
+        print(f"Downloading OBO from {snapshot.obo_url}...")
+        obo_path = os.path.join(tmpdir, "go.obo")
+        download_file(snapshot.obo_url, obo_path)
+
+        # Resolve IA file
+        ia_path = ia_file
+        if ia_path is None and snapshot.ia_url:
+            ia_path = os.path.join(tmpdir, "ia.tsv")
+            print(f"Downloading IA from {snapshot.ia_url}...")
+            download_file(snapshot.ia_url, ia_path)
+        if ia_path:
+            print(f"Using IA file: {ia_path}")
+        else:
+            print("WARNING: No IA file — using uniform IC=1")
+
+        # Write ground truth
+        gt_dir = artifacts_dir or tmpdir
+        os.makedirs(gt_dir, exist_ok=True)
+
+        nk_path = os.path.join(gt_dir, "gt_NK.tsv")
+        lk_path = os.path.join(gt_dir, "gt_LK.tsv")
+        pk_path = os.path.join(gt_dir, "gt_PK.tsv")
+        pk_known_path = os.path.join(gt_dir, "pk_known_terms.tsv")
+
+        write_gt(data.nk, nk_path)
+        write_gt(data.lk, lk_path)
+        write_gt(data.pk, pk_path)
+        write_gt(data.pk_known, pk_known_path)
+
+        # Write predictions
+        pred_dir = os.path.join(gt_dir, "predictions")
+        os.makedirs(pred_dir, exist_ok=True)
+        pred_path = os.path.join(pred_dir, "predictions.tsv")
+        n_written = write_cafa_predictions(predictions, delta_proteins, pred_path)
+        print(f"Wrote {n_written} prediction pairs for {len(covered)} proteins")
+
+        # Run cafaeval per setting
+        for setting, gt_file, known_file in [
+            ("NK", nk_path, None),
+            ("LK", lk_path, None),
+            ("PK", pk_path, pk_known_path),
+        ]:
+            print(f"\nEvaluating {setting}...")
+            try:
+                old_sigterm = signal.signal(signal.SIGTERM, signal.SIG_DFL)
+                old_sigint = signal.signal(signal.SIGINT, signal.SIG_DFL)
+                try:
+                    df, dfs_best = cafa_eval(
+                        obo_path,
+                        pred_dir,
+                        gt_file,
+                        ia=ia_path,
+                        exclude=known_file,
+                        prop="max",
+                        norm="cafa",
+                        n_cpu=1,
+                    )
+                finally:
+                    signal.signal(signal.SIGTERM, old_sigterm)
+                    signal.signal(signal.SIGINT, old_sigint)
+
+                df_f = dfs_best.get("f")
+                if df_f is not None and not df_f.empty:
+                    df_f = df_f.reset_index()
+                    for _, row in df_f.iterrows():
+                        ns = NS_LABELS.get(str(row.get("ns", "")))
+                        if ns:
+                            key = f"{setting}-{ns}"
+                            results[key] = {
+                                "fmax": round(float(row.get("f", 0)), 4),
+                                "precision": round(float(row.get("pr", 0)), 4),
+                                "recall": round(float(row.get("rc", 0)), 4),
+                                "coverage": round(float(row.get("cov_max", row.get("cov", 0))), 4),
+                            }
+                            print(f"  {key}: Fmax={results[key]['fmax']:.3f}  "
+                                  f"P={results[key]['precision']:.3f}  "
+                                  f"R={results[key]['recall']:.3f}  "
+                                  f"Cov={results[key]['coverage']:.3f}")
+
+            except Exception as exc:
+                print(f"  {setting} FAILED: {exc}")
+
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Evaluate external tool predictions with CAFA protocol")
+    parser.add_argument("--evaluation-set-id", required=True, help="EvaluationSet UUID")
+    parser.add_argument("--tool", required=True, choices=list(PARSERS.keys()), help="External tool format")
+    parser.add_argument("--input", required=True, help="Path to tool output file")
+    parser.add_argument("--ia-file", default=None, help="Path to IA TSV file (optional)")
+    parser.add_argument("--artifacts-dir", default=None, help="Directory to save evaluation artifacts")
+    args = parser.parse_args()
+
+    settings = load_settings(Path(__file__).resolve().parent.parent)
+    factory = build_session_factory(settings.db_url)
+
+    print(f"Parsing {args.tool} output from {args.input}...")
+    parse_fn = PARSERS[args.tool]
+    predictions = parse_fn(args.input)
+    print(f"Parsed {len(predictions)} proteins with GO predictions")
+
+    eval_set_id = uuid.UUID(args.evaluation_set_id)
+
+    with session_scope(factory) as session:
+        results = run_evaluation(
+            session,
+            eval_set_id,
+            predictions,
+            ia_file=args.ia_file,
+            artifacts_dir=args.artifacts_dir,
+        )
+
+    # Print summary table
+    print("\n" + "=" * 80)
+    print("SUMMARY — Fmax (IA-weighted)")
+    print("=" * 80)
+    header = f"{'Method':<20} {'NK-BPO':>8} {'NK-MFO':>8} {'NK-CCO':>8} {'LK-BPO':>8} {'LK-MFO':>8} {'LK-CCO':>8} {'PK-BPO':>8} {'PK-MFO':>8} {'PK-CCO':>8}"
+    print(header)
+    print("-" * len(header))
+
+    row = f"{args.tool:<20}"
+    for setting in ["NK", "LK", "PK"]:
+        for ns in ["BPO", "MFO", "CCO"]:
+            key = f"{setting}-{ns}"
+            val = results.get(key, {}).get("fmax", 0.0)
+            row += f" {val:>8.3f}"
+    print(row)
+    print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/manage.sh b/scripts/manage.sh
index 020a8e3..fe49d1d 100755
--- a/scripts/manage.sh
+++ b/scripts/manage.sh
@@ -103,8 +103,12 @@ cmd_start() {
     done
     _start_bg worker-predictions-write poetry run python scripts/worker.py --queue protea.predictions.write
 
+    # Stale job reaper
+    printf "\n${BOLD}[6] Stale job reaper${RESET}\n"
+    _start_bg worker-reaper poetry run python scripts/worker.py --queue reaper
+
     # Frontend
-    printf "\n${BOLD}[6] Frontend${RESET}\n"
+    printf "\n${BOLD}[7] Frontend${RESET}\n"
     cd "$ROOT/apps/web"
     _start_bg frontend npm run dev
     sleep 6
diff --git a/scripts/query_eval_results.py b/scripts/query_eval_results.py
new file mode 100644
index 0000000..5cdf167
--- /dev/null
+++ b/scripts/query_eval_results.py
@@ -0,0 +1,111 @@
+"""Query all EvaluationResult rows and compare PK metrics across configurations."""
+
+from pathlib import Path
+
+from sqlalchemy.orm import joinedload
+
+from protea.infrastructure.orm.models.annotation.evaluation_result import EvaluationResult
+from protea.infrastructure.session import build_session_factory, session_scope
+from protea.infrastructure.settings import load_settings
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+settings = load_settings(PROJECT_ROOT)
+factory = build_session_factory(settings.db_url)
+
+with session_scope(factory) as session:
+    results = (
+        session.query(EvaluationResult)
+        .options(
+            joinedload(EvaluationResult.prediction_set),
+            joinedload(EvaluationResult.scoring_config),
+            joinedload(EvaluationResult.reranker_model),
+        )
+        .order_by(EvaluationResult.created_at)
+        .all()
+    )
+
+    if not results:
+        print("No EvaluationResult rows found.")
+        raise SystemExit(0)
+
+    # Header
+    header = (
+        f"{'eval_id':>8s}  "
+        f"{'pred_set_id':>11s}  "
+        f"{'K':>5s}  "
+        f"{'scoring_config':>40s}  "
+        f"{'reranker':>30s}  "
+        f"{'PK/BPO Fmax':>11s}  "
+        f"{'PK/MFO Fmax':>11s}  "
+        f"{'PK/CCO Fmax':>11s}  "
+        f"{'NK/BPO Fmax':>11s}  "
+        f"{'NK/MFO Fmax':>11s}  "
+        f"{'NK/CCO Fmax':>11s}  "
+        f"{'LK/BPO Fmax':>11s}  "
+        f"{'LK/MFO Fmax':>11s}  "
+        f"{'LK/CCO Fmax':>11s}"
+    )
+    print(header)
+    print("-" * len(header))
+
+    for er in results:
+        ps = er.prediction_set
+        k_val = str(ps.limit_per_entry) if ps else "?"
+        pred_id = str(ps.id)[:8] if ps else "?"
+        eval_id = str(er.id)[:8]
+
+        sc_name = er.scoring_config.name if er.scoring_config else "(none)"
+        sc_formula = er.scoring_config.formula if er.scoring_config else ""
+        sc_label = f"{sc_name} [{sc_formula}]" if sc_formula else sc_name
+
+        rr_name = er.reranker_model.name if er.reranker_model else "(none)"
+
+        r = er.results or {}
+
+        def fmax(cat: str, ns: str, _r: dict = r) -> str:
+            val = _r.get(cat, {}).get(ns, {}).get("fmax")
+            if val is None:
+                return "-"
+            return f"{val:.4f}"
+
+        print(
+            f"{eval_id:>8s}  "
+            f"{pred_id:>11s}  "
+            f"{k_val:>5s}  "
+            f"{sc_label:>40s}  "
+            f"{rr_name:>30s}  "
+            f"{fmax('PK','BPO'):>11s}  "
+            f"{fmax('PK','MFO'):>11s}  "
+            f"{fmax('PK','CCO'):>11s}  "
+            f"{fmax('NK','BPO'):>11s}  "
+            f"{fmax('NK','MFO'):>11s}  "
+            f"{fmax('NK','CCO'):>11s}  "
+            f"{fmax('LK','BPO'):>11s}  "
+            f"{fmax('LK','MFO'):>11s}  "
+            f"{fmax('LK','CCO'):>11s}"
+        )
+
+    # Summary: group by K and show PK averages
+    print("\n\n=== PK Fmax Summary by K value ===\n")
+    from collections import defaultdict
+    by_k: dict[int, list] = defaultdict(list)
+    for er in results:
+        ps = er.prediction_set
+        if not ps:
+            continue
+        k = ps.limit_per_entry
+        pk = (er.results or {}).get("PK", {})
+        by_k[k].append(pk)
+
+    for k in sorted(by_k.keys()):
+        entries = by_k[k]
+        n = len(entries)
+        for ns in ("BPO", "MFO", "CCO"):
+            vals = [e.get(ns, {}).get("fmax") for e in entries if e.get(ns, {}).get("fmax") is not None]
+            if vals:
+                avg = sum(vals) / len(vals)
+                best = max(vals)
+                worst = min(vals)
+                print(f"  K={k:>3d}  {ns}  n={len(vals):>3d}  avg={avg:.4f}  best={best:.4f}  worst={worst:.4f}")
+            else:
+                print(f"  K={k:>3d}  {ns}  n=  0  (no data)")
diff --git a/scripts/queue_evals_when_ready.py b/scripts/queue_evals_when_ready.py
new file mode 100644
index 0000000..41bd485
--- /dev/null
+++ b/scripts/queue_evals_when_ready.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""Wait for predict jobs to finish, then queue CAFA evaluations.
+
+Usage:
+    python scripts/queue_evals_when_ready.py
+"""
+import time
+
+import requests
+
+API = "http://localhost:8000"
+EVAL_SET = "42b34e79-6fe9-4fa0-b718-02f43a1e3192"
+
+# (predict_job_id, prediction_set_id, k)
+PENDING = [
+    ("20b56bb7-8f3c-4278-89e7-715a1792c3c4", "a4442444-a7c7-4568-8432-eb1efecf1e24", 20),
+    ("5a21422b-c4ae-4bde-979b-e5a357c8cb80", "d41b8d05-e591-4153-85bb-04d22413d1e7", 50),
+]
+
+POLL = 30  # seconds
+
+
+def main():
+    remaining = list(PENDING)
+    while remaining:
+        still_waiting = []
+        for job_id, ps_id, k in remaining:
+            r = requests.get(f"{API}/jobs/{job_id}", timeout=10)
+            status = r.json()["status"]
+            progress = r.json().get("progress_current", "?")
+            total = r.json().get("progress_total", "?")
+            print(f"  k={k}  job={job_id[:8]}  status={status}  {progress}/{total}")
+
+            if status == "succeeded":
+                # Queue CAFA eval
+                resp = requests.post(
+                    f"{API}/annotations/evaluation-sets/{EVAL_SET}/run",
+                    json={"prediction_set_id": ps_id},
+                    timeout=10,
+                )
+                resp.raise_for_status()
+                eval_id = resp.json()["id"]
+                print(f"  → Queued CAFA eval for k={k}: {eval_id}")
+            elif status in ("failed", "cancelled"):
+                print(f"  → SKIP k={k}: job {status}")
+            else:
+                still_waiting.append((job_id, ps_id, k))
+
+        remaining = still_waiting
+        if remaining:
+            print(f"\n  Waiting {POLL}s for {len(remaining)} job(s)...\n")
+            time.sleep(POLL)
+
+    print("\nDone — all CAFA evals queued.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py
new file mode 100644
index 0000000..37ac63e
--- /dev/null
+++ b/scripts/run_experiments.py
@@ -0,0 +1,616 @@
+#!/usr/bin/env python
+"""Experiment battery: train on GOA-N, evaluate against GOA-M.
+
+K (limit_per_entry) is fixed via --limit.  The battery sweeps all other
+axes: scoring formula, feature engineering, and distance threshold.
+
+Submits prediction jobs through the PROTEA API, polls until completion,
+computes CAFA metrics for every (PredictionSet × ScoringConfig × category)
+triple and writes the results to a TSV file.
+
+Usage
+-----
+    python scripts/run_experiments.py \\
+        --goa-train  <annotation_set_id>   \\
+        --goa-test   <annotation_set_id>   \\
+        --emb-config <embedding_config_id> \\
+        --ontology   <ontology_snapshot_id> \\
+        [--limit     10]                   \\
+        [--api-url   http://localhost:8000] \\
+        [--output    results/goa200_vs_goa229.tsv] \\
+        [--groups    A,C,D,E]
+
+Groups
+------
+  A  Scoring-config sweep         (no features, threshold=None)
+  C  Feature-engineering sweep    (all base scorings)
+  D  Full-composite scoring       (alignment_weighted / composite)
+  E  Distance-threshold sweep     (no features, all base scorings)
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import requests
+
+# ---------------------------------------------------------------------------
+# Typing helpers
+# ---------------------------------------------------------------------------
+
+JsonDict = dict[str, Any]
+
+# ---------------------------------------------------------------------------
+# Experiment matrix definitions
+# ---------------------------------------------------------------------------
+
+# Each PredictionSetSpec describes a single prediction-job to submit.
+# Multiple scoring configs will be applied to every PredictionSet.
+
+
+@dataclass
+class PredictionSpec:
+    """Configuration for one prediction job."""
+
+    label: str
+    limit_per_entry: int = 10
+    distance_threshold: float | None = None
+    compute_alignments: bool = False
+    compute_taxonomy: bool = False
+    aspect_separated_knn: bool = True
+    search_backend: str = "numpy"
+    # which scoring-config names to evaluate on this spec
+    scoring_configs: list[str] = field(default_factory=list)
+
+
+# Scoring configs that require alignment or taxonomy signals
+_NEEDS_ALIGN = {"alignment_weighted", "composite"}
+_NEEDS_TAXONOMY = {"composite"}
+
+# Preset names that the runner will seed into the DB via POST /scoring/configs/presets
+ALL_PRESET_NAMES = [
+    "embedding_only",
+    "embedding_plus_evidence",
+    "evidence_primary",
+    "alignment_weighted",
+    "composite",
+    "iea_dominant",
+    "iea_equalised",
+    "embedding_dominant",
+]
+
+# Base scoring configs (no feature engineering required)
+BASE_SCORINGS = [
+    "embedding_only",
+    "embedding_plus_evidence",
+    "evidence_primary",
+    "iea_dominant",
+    "iea_equalised",
+    "embedding_dominant",
+]
+
+
+def build_experiment_matrix(groups: set[str], limit: int) -> list[PredictionSpec]:
+    """Build the experiment matrix with a fixed K (limit_per_entry).
+
+    Groups
+    ------
+    A  Scoring sweep — one PredictionSet (no features, no threshold).
+       Evaluates all base scoring configs on the same raw distances.
+    C  Feature-engineering sweep — two additional PredictionSets:
+         C1: +alignments (NW+SW)
+         C2: +alignments +taxonomy
+       Each evaluated with all applicable scoring configs.
+    D  Full-composite standalone entry (only if C is absent, to avoid
+       running the same KNN twice).
+    E  Distance-threshold sweep — three PredictionSets (no features):
+         threshold ∈ {0.2, 0.3, 0.5}
+       Evaluated with all base scoring configs.
+    """
+    specs: list[PredictionSpec] = []
+
+    if "A" in groups:
+        # Single PredictionSet, no features, no threshold.
+        # All base scoring configs are applied post-hoc → one job only.
+        specs.append(
+            PredictionSpec(
+                label=f"A_base_k{limit}",
+                limit_per_entry=limit,
+                scoring_configs=BASE_SCORINGS,
+            )
+        )
+
+    if "C" in groups:
+        specs.append(
+            PredictionSpec(
+                label=f"C1_align_k{limit}",
+                limit_per_entry=limit,
+                compute_alignments=True,
+                compute_taxonomy=False,
+                scoring_configs=BASE_SCORINGS + ["alignment_weighted"],
+            )
+        )
+        specs.append(
+            PredictionSpec(
+                label=f"C2_full_k{limit}",
+                limit_per_entry=limit,
+                compute_alignments=True,
+                compute_taxonomy=True,
+                scoring_configs=BASE_SCORINGS + ["alignment_weighted", "composite"],
+            )
+        )
+
+    if "D" in groups and "C" not in groups:
+        # C2 already covers this; only add when C is skipped.
+        specs.append(
+            PredictionSpec(
+                label=f"D_composite_k{limit}",
+                limit_per_entry=limit,
+                compute_alignments=True,
+                compute_taxonomy=True,
+                scoring_configs=["alignment_weighted", "composite"] + BASE_SCORINGS,
+            )
+        )
+
+    if "E" in groups:
+        for thresh in [0.2, 0.3, 0.5]:
+            label = f"E_thresh{int(thresh * 100):02d}_k{limit}"
+            specs.append(
+                PredictionSpec(
+                    label=label,
+                    limit_per_entry=limit,
+                    distance_threshold=thresh,
+                    scoring_configs=BASE_SCORINGS,
+                )
+            )
+
+    return specs
+
+
+# ---------------------------------------------------------------------------
+# API helpers
+# ---------------------------------------------------------------------------
+
+
+class ProteaClient:
+    def __init__(self, base_url: str, timeout: int = 30):
+        self.base = base_url.rstrip("/")
+        self.timeout = timeout
+
+    def _get(self, path: str, **params) -> JsonDict:
+        r = requests.get(f"{self.base}{path}", params=params, timeout=self.timeout)
+        r.raise_for_status()
+        return r.json()
+
+    def _post(self, path: str, body: JsonDict) -> JsonDict:
+        r = requests.post(
+            f"{self.base}{path}", json=body, timeout=self.timeout
+        )
+        r.raise_for_status()
+        return r.json()
+
+    # ── Scoring configs ────────────────────────────────────────────────────
+
+    def seed_preset_scoring_configs(self) -> list[str]:
+        result = self._post("/scoring/configs/presets", {})
+        return result.get("created", [])
+
+    def list_scoring_configs(self) -> dict[str, str]:
+        """Returns {name: id}."""
+        configs = self._get("/scoring/configs")
+        return {c["name"]: c["id"] for c in configs}
+
+    # ── Jobs ───────────────────────────────────────────────────────────────
+
+    def submit_predict_job(
+        self,
+        embedding_config_id: str,
+        annotation_set_id: str,
+        ontology_snapshot_id: str,
+        spec: PredictionSpec,
+        meta: JsonDict | None = None,
+    ) -> str:
+        payload: JsonDict = {
+            "embedding_config_id": embedding_config_id,
+            "annotation_set_id": annotation_set_id,
+            "ontology_snapshot_id": ontology_snapshot_id,
+            "limit_per_entry": spec.limit_per_entry,
+            "compute_alignments": spec.compute_alignments,
+            "compute_taxonomy": spec.compute_taxonomy,
+            "aspect_separated_knn": spec.aspect_separated_knn,
+            "search_backend": spec.search_backend,
+        }
+        if spec.distance_threshold is not None:
+            payload["distance_threshold"] = spec.distance_threshold
+
+        body: JsonDict = {
+            "operation": "predict_go_terms",
+            "queue_name": "protea.jobs",
+            "payload": payload,
+            "meta": meta or {"experiment_label": spec.label},
+        }
+        resp = self._post("/jobs", body)
+        return resp["id"]
+
+    def wait_for_job(
+        self,
+        job_id: str,
+        poll_interval: float = 5.0,
+        timeout: float = 3600.0,
+    ) -> JsonDict:
+        """Block until the job reaches a terminal state, then return the job dict."""
+        deadline = time.monotonic() + timeout
+        while True:
+            job = self._get(f"/jobs/{job_id}")
+            status = job["status"]
+            if status in ("SUCCEEDED", "FAILED", "CANCELLED"):
+                return job
+            if time.monotonic() > deadline:
+                raise TimeoutError(
+                    f"Job {job_id} did not finish within {timeout}s (last status: {status})"
+                )
+            time.sleep(poll_interval)
+
+    def find_prediction_set_for_job(self, job_id: str) -> str | None:
+        """Look up the PredictionSet created by a completed predict job.
+
+        The coordinator stores the prediction_set_id in the job meta or in
+        its events.  We check the events first.
+        """
+        events = self._get(f"/jobs/{job_id}/events")
+        for ev in reversed(events):
+            payload = ev.get("payload") or {}
+            if "prediction_set_id" in payload:
+                return payload["prediction_set_id"]
+        # Fallback: scan prediction sets ordered by creation (newest first)
+        return None
+
+    # ── Metrics ───────────────────────────────────────────────────────────
+
+    def compute_metrics(
+        self,
+        prediction_set_id: str,
+        scoring_config_id: str,
+        old_annotation_set_id: str,
+        new_annotation_set_id: str,
+        ontology_snapshot_id: str,
+        category: str = "nk",
+    ) -> JsonDict:
+        return self._get(
+            f"/scoring/prediction-sets/{prediction_set_id}/metrics",
+            scoring_config_id=scoring_config_id,
+            old_annotation_set_id=old_annotation_set_id,
+            new_annotation_set_id=new_annotation_set_id,
+            ontology_snapshot_id=ontology_snapshot_id,
+            category=category,
+        )
+
+
+# ---------------------------------------------------------------------------
+# PredictionSet discovery
+# ---------------------------------------------------------------------------
+
+
+def resolve_prediction_set(
+    client: ProteaClient, job_id: str, job_result: JsonDict
+) -> str | None:
+    """Try several strategies to find the PredictionSet ID for a completed job."""
+    # Strategy 1: look in job events
+    ps_id = client.find_prediction_set_for_job(job_id)
+    if ps_id:
+        return ps_id
+
+    # Strategy 2: check job meta
+    meta = job_result.get("meta") or {}
+    if "prediction_set_id" in meta:
+        return meta["prediction_set_id"]
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Result row
+# ---------------------------------------------------------------------------
+
+RESULT_COLUMNS = [
+    "experiment_label",
+    "prediction_set_id",
+    "scoring_config",
+    "category",
+    "fmax",
+    "auc_pr",
+    "best_threshold",
+    "limit_per_entry",
+    "distance_threshold",
+    "compute_alignments",
+    "compute_taxonomy",
+    "job_id",
+    "job_status",
+]
+
+
+# ---------------------------------------------------------------------------
+# Main runner
+# ---------------------------------------------------------------------------
+
+
+def run(args: argparse.Namespace) -> int:
+    client = ProteaClient(args.api_url)
+
+    # ── 1. Seed preset scoring configs ────────────────────────────────────
+    print("[setup] Seeding preset scoring configs …", flush=True)
+    created = client.seed_preset_scoring_configs()
+    if created:
+        print(f"        Created: {created}", flush=True)
+    else:
+        print("        All presets already present.", flush=True)
+
+    scoring_map = client.list_scoring_configs()
+    print(f"        Available configs: {list(scoring_map.keys())}", flush=True)
+
+    # ── 2. Build experiment matrix ─────────────────────────────────────────
+    groups = set(args.groups.upper().split(","))
+    specs = build_experiment_matrix(groups, limit=args.limit)
+    print(
+        f"\n[matrix] {len(specs)} PredictionSet(s) to run across groups {groups} "
+        f"with K={args.limit}."
+    )
+
+    # ── 3. Submit all prediction jobs ─────────────────────────────────────
+    submitted: list[tuple[PredictionSpec, str]] = []  # (spec, job_id)
+
+    for spec in specs:
+        print(f"\n[submit] {spec.label} …", end=" ", flush=True)
+        try:
+            job_id = client.submit_predict_job(
+                embedding_config_id=args.emb_config,
+                annotation_set_id=args.goa_train,
+                ontology_snapshot_id=args.ontology,
+                spec=spec,
+            )
+            submitted.append((spec, job_id))
+            print(f"job_id={job_id}", flush=True)
+        except requests.HTTPError as exc:
+            print(f"ERROR: {exc.response.status_code} {exc.response.text}", flush=True)
+            if not args.skip_errors:
+                return 1
+
+    if not submitted:
+        print("\n[error] No jobs submitted.", flush=True)
+        return 1
+
+    # ── 4. Poll until all jobs complete ───────────────────────────────────
+    print(f"\n[poll] Waiting for {len(submitted)} job(s) to finish …", flush=True)
+    completed: list[tuple[PredictionSpec, str, JsonDict]] = []
+
+    for spec, job_id in submitted:
+        print(f"       Polling {spec.label} (job {job_id}) …", end=" ", flush=True)
+        try:
+            job_result = client.wait_for_job(
+                job_id,
+                poll_interval=args.poll_interval,
+                timeout=args.job_timeout,
+            )
+            status = job_result["status"]
+            print(status, flush=True)
+            completed.append((spec, job_id, job_result))
+        except TimeoutError as exc:
+            print(f"TIMEOUT: {exc}", flush=True)
+            if not args.skip_errors:
+                return 1
+
+    # ── 5. Resolve PredictionSet IDs ──────────────────────────────────────
+    print("\n[resolve] Looking up PredictionSet IDs …", flush=True)
+    resolved: list[tuple[PredictionSpec, str, str]] = []  # (spec, job_id, ps_id)
+
+    for spec, job_id, job_result in completed:
+        if job_result["status"] != "SUCCEEDED":
+            print(f"  SKIP {spec.label}: job {job_id} ended with {job_result['status']}")
+            continue
+        ps_id = resolve_prediction_set(client, job_id, job_result)
+        if ps_id:
+            print(f"  {spec.label}: prediction_set_id={ps_id}")
+            resolved.append((spec, job_id, ps_id))
+        else:
+            print(f"  WARNING: could not find PredictionSet for job {job_id} ({spec.label})")
+            if not args.skip_errors:
+                return 1
+
+    if not resolved:
+        print("[error] No PredictionSets resolved.", flush=True)
+        return 1
+
+    # ── 6. Compute metrics for every (PredictionSet × scoring × category) ─
+    print(f"\n[metrics] Computing metrics for {len(resolved)} prediction set(s) …", flush=True)
+    rows: list[dict[str, Any]] = []
+
+    for spec, job_id, ps_id in resolved:
+        for scoring_name in spec.scoring_configs:
+            scoring_id = scoring_map.get(scoring_name)
+            if scoring_id is None:
+                print(f"  WARNING: scoring config '{scoring_name}' not in DB, skipping.")
+                continue
+
+            for category in ["nk", "lk"]:
+                tag = f"{spec.label}/{scoring_name}/{category}"
+                print(f"  {tag} … ", end="", flush=True)
+                try:
+                    result = client.compute_metrics(
+                        prediction_set_id=ps_id,
+                        scoring_config_id=scoring_id,
+                        old_annotation_set_id=args.goa_train,
+                        new_annotation_set_id=args.goa_test,
+                        ontology_snapshot_id=args.ontology,
+                        category=category,
+                    )
+                    fmax = result.get("fmax", "")
+                    auc = result.get("auc_pr", "")
+                    best_t = result.get("best_threshold", "")
+                    print(f"Fmax={fmax:.4f}  AUC-PR={auc:.4f}  @t={best_t:.3f}", flush=True)
+
+                    rows.append(
+                        {
+                            "experiment_label": spec.label,
+                            "prediction_set_id": ps_id,
+                            "scoring_config": scoring_name,
+                            "category": category,
+                            "fmax": fmax,
+                            "auc_pr": auc,
+                            "best_threshold": best_t,
+                            "limit_per_entry": spec.limit_per_entry,
+                            "distance_threshold": spec.distance_threshold
+                            if spec.distance_threshold is not None
+                            else "",
+                            "compute_alignments": spec.compute_alignments,
+                            "compute_taxonomy": spec.compute_taxonomy,
+                            "job_id": job_id,
+                            "job_status": "SUCCEEDED",
+                        }
+                    )
+                except requests.HTTPError as exc:
+                    print(f"ERROR {exc.response.status_code}: {exc.response.text}", flush=True)
+                    rows.append(
+                        {
+                            "experiment_label": spec.label,
+                            "prediction_set_id": ps_id,
+                            "scoring_config": scoring_name,
+                            "category": category,
+                            "fmax": "ERROR",
+                            "auc_pr": "ERROR",
+                            "best_threshold": "ERROR",
+                            "limit_per_entry": spec.limit_per_entry,
+                            "distance_threshold": spec.distance_threshold
+                            if spec.distance_threshold is not None
+                            else "",
+                            "compute_alignments": spec.compute_alignments,
+                            "compute_taxonomy": spec.compute_taxonomy,
+                            "job_id": job_id,
+                            "job_status": "SUCCEEDED",
+                        }
+                    )
+
+    # ── 7. Write results ──────────────────────────────────────────────────
+    output = Path(args.output)
+    output.parent.mkdir(parents=True, exist_ok=True)
+
+    with output.open("w", newline="") as fh:
+        writer = csv.DictWriter(fh, fieldnames=RESULT_COLUMNS, delimiter="\t")
+        writer.writeheader()
+        writer.writerows(rows)
+
+    print(f"\n[done] {len(rows)} result rows written to {output}", flush=True)
+
+    # Also dump a JSON summary for easier programmatic consumption
+    json_output = output.with_suffix(".json")
+    with json_output.open("w") as fh:
+        json.dump(
+            {
+                "goa_train": args.goa_train,
+                "goa_test": args.goa_test,
+                "embedding_config": args.emb_config,
+                "ontology_snapshot": args.ontology,
+                "limit_per_entry": args.limit,
+                "groups": args.groups,
+                "results": rows,
+            },
+            fh,
+            indent=2,
+        )
+    print(f"       JSON summary written to {json_output}", flush=True)
+
+    return 0
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p.add_argument(
+        "--goa-train",
+        required=True,
+        metavar="UUID",
+        help="AnnotationSet ID to use as KNN reference (e.g. GOA200).",
+    )
+    p.add_argument(
+        "--goa-test",
+        required=True,
+        metavar="UUID",
+        help="AnnotationSet ID to use as ground truth (e.g. GOA229).",
+    )
+    p.add_argument(
+        "--emb-config",
+        required=True,
+        metavar="UUID",
+        help="EmbeddingConfig UUID (must match the stored SequenceEmbeddings).",
+    )
+    p.add_argument(
+        "--ontology",
+        required=True,
+        metavar="UUID",
+        help="OntologySnapshot UUID to use for GO DAG evaluation.",
+    )
+    p.add_argument(
+        "--api-url",
+        default="http://localhost:8000",
+        metavar="URL",
+        help="PROTEA API base URL (default: http://localhost:8000).",
+    )
+    p.add_argument(
+        "--output",
+        default="results/experiments.tsv",
+        metavar="PATH",
+        help="Output TSV path (default: results/experiments.tsv).",
+    )
+    p.add_argument(
+        "--limit",
+        type=int,
+        default=10,
+        metavar="K",
+        help="Fixed limit_per_entry (K) for all prediction jobs (default: 10).",
+    )
+    p.add_argument(
+        "--groups",
+        default="A,C,D,E",
+        metavar="GROUPS",
+        help=(
+            "Comma-separated list of experiment groups to run "
+            "(A=scoring, C=features, D=composite, E=threshold). "
+            "Default: A,C,D,E."
+        ),
+    )
+    p.add_argument(
+        "--poll-interval",
+        type=float,
+        default=10.0,
+        metavar="SECONDS",
+        help="Seconds between job-status polls (default: 10).",
+    )
+    p.add_argument(
+        "--job-timeout",
+        type=float,
+        default=7200.0,
+        metavar="SECONDS",
+        help="Maximum seconds to wait for a single job (default: 7200).",
+    )
+    p.add_argument(
+        "--skip-errors",
+        action="store_true",
+        help="Continue past HTTP errors and failed jobs instead of aborting.",
+    )
+    return p.parse_args(argv)
+
+
+if __name__ == "__main__":
+    sys.exit(run(parse_args()))
diff --git a/scripts/worker.py b/scripts/worker.py
index de129da..4ccb709 100644
--- a/scripts/worker.py
+++ b/scripts/worker.py
@@ -32,22 +32,32 @@
     StorePredictionsOperation,
 )
 from protea.core.operations.run_cafa_evaluation import RunCafaEvaluationOperation
+from protea.core.operations.train_reranker import TrainRerankerAutoOperation, TrainRerankerOperation
 from protea.infrastructure.queue.consumer import OperationConsumer, QueueConsumer
 from protea.infrastructure.session import build_session_factory
 from protea.infrastructure.settings import load_settings
 from protea.workers.base_worker import BaseWorker, WorkerConfig
-
-logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
-# Suppress pika's verbose connection lifecycle messages
-logging.getLogger("pika").setLevel(logging.WARNING)
+from protea.workers.stale_job_reaper import StaleJobReaper
 
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="PROTEA queue worker")
     parser.add_argument("--queue", default="protea.jobs", help="Queue name to consume")
     parser.add_argument("--requeue-on-failure", action="store_true")
+    parser.add_argument(
+        "--log-format",
+        choices=["json", "text"],
+        default="json",
+        help="Log output format (default: json)",
+    )
     args = parser.parse_args()
 
+    from protea.infrastructure.logging import configure_logging
+
+    configure_logging(json=(args.log_format == "json"))
+    # Suppress pika's verbose connection lifecycle messages
+    logging.getLogger("pika").setLevel(logging.WARNING)
+
     project_root = Path(__file__).resolve().parents[1]
     settings = load_settings(project_root)
 
@@ -68,6 +78,8 @@ def main() -> None:
     registry.register(PredictGOTermsOperation())
     registry.register(PredictGOTermsBatchOperation())
     registry.register(StorePredictionsOperation())
+    registry.register(TrainRerankerOperation())
+    registry.register(TrainRerankerAutoOperation())
 
     # Queues that carry ephemeral operation messages (no DB Job row per message)
     # use OperationConsumer.  All other queues use the standard QueueConsumer.
@@ -78,6 +90,13 @@ def main() -> None:
         "protea.predictions.write",
     }
 
+    # Special mode: stale job reaper (no queue, just periodic DB check).
+    if args.queue == "reaper":
+        reaper = StaleJobReaper(factory, timeout_seconds=21600)
+        logging.info("Stale job reaper started. timeout=21600s interval=60s")
+        reaper.run(interval_seconds=60)
+        return
+
     if args.queue in _OPERATION_QUEUES:
         consumer: QueueConsumer | OperationConsumer = OperationConsumer(
             amqp_url=settings.amqp_url,
@@ -95,6 +114,15 @@ def main() -> None:
             requeue_on_failure=args.requeue_on_failure,
         )
 
+    # Pre-warm taxonomy DB for prediction workers that may need it.
+    if args.queue in ("protea.predictions.batch", "protea.jobs"):
+        try:
+            from protea.core.feature_engineering import warmup_taxonomy_db
+
+            warmup_taxonomy_db()
+        except Exception as exc:
+            logging.warning("Taxonomy DB warmup skipped: %s", exc)
+
     logging.info("Worker started. queue=%s", args.queue)
     while True:
         try:
diff --git a/tests/conftest.py b/tests/conftest.py
index 6c7b9f2..96d8143 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -38,6 +38,12 @@ def _wait_ready(container: str, user: str, db: str, timeout_s: int = 60) -> None
         time.sleep(1)
 
 
+@pytest.fixture()
+def noop_emit():
+    """Shared no-op emit callback for operation tests."""
+    return lambda *_args, **_kwargs: None
+
+
 def pytest_addoption(parser: pytest.Parser) -> None:
     parser.addoption(
         "--with-postgres",
diff --git a/tests/test_admin_router.py b/tests/test_admin_router.py
new file mode 100644
index 0000000..8ff0382
--- /dev/null
+++ b/tests/test_admin_router.py
@@ -0,0 +1,145 @@
+"""Unit tests for the /admin router.
+
+Database and subprocess calls are fully mocked -- no real infrastructure required.
+"""
+from __future__ import annotations
+
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from protea.api.routers.admin import router
+
+_TEST_TOKEN = "test-admin-secret"
+_AUTH_HEADER = {"Authorization": f"Bearer {_TEST_TOKEN}"}
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_app():
+    app = FastAPI()
+    app.state.session_factory = MagicMock()
+    app.include_router(router)
+    return app
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture()
+def mock_psycopg():
+    """Ensure psycopg is available as a mock in sys.modules for the local import."""
+    mock_mod = MagicMock()
+    conn_ctx = MagicMock()
+    mock_mod.connect.return_value.__enter__ = MagicMock(return_value=conn_ctx)
+    mock_mod.connect.return_value.__exit__ = MagicMock(return_value=False)
+    with patch.dict(sys.modules, {"psycopg": mock_mod}):
+        yield mock_mod, conn_ctx
+
+
+@pytest.fixture()
+def client(mock_psycopg):
+    app = _make_app()
+    with TestClient(app) as c, patch("protea.api.routers.admin._ADMIN_TOKEN", _TEST_TOKEN):
+        yield c, app, mock_psycopg
+
+
+# ---------------------------------------------------------------------------
+# POST /admin/reset-db
+# ---------------------------------------------------------------------------
+
+class TestResetDBAuth:
+    def test_no_token_configured_returns_403(self, mock_psycopg):
+        app = _make_app()
+        with TestClient(app) as c, patch("protea.api.routers.admin._ADMIN_TOKEN", ""):
+            resp = c.post("/admin/reset-db", headers=_AUTH_HEADER)
+            assert resp.status_code == 403
+            assert "disabled" in resp.json()["detail"]
+
+    def test_missing_header_returns_401(self, client):
+        c, *_ = client
+        resp = c.post("/admin/reset-db")
+        assert resp.status_code == 401
+
+    def test_wrong_token_returns_403(self, client):
+        c, *_ = client
+        resp = c.post("/admin/reset-db", headers={"Authorization": "Bearer wrong"})
+        assert resp.status_code == 403
+        assert "Invalid" in resp.json()["detail"]
+
+
+class TestResetDB:
+    @patch("protea.api.routers.admin.build_session_factory")
+    @patch("protea.api.routers.admin.subprocess.run")
+    @patch("protea.api.routers.admin.load_settings")
+    def test_reset_db_success(self, mock_settings, mock_run, mock_build, client):
+        c, app, (mock_psycopg_mod, conn_ctx) = client
+        settings = MagicMock()
+        settings.db_url = "postgresql+psycopg://u:p@localhost/db"
+        mock_settings.return_value = settings
+
+        mock_run.return_value = MagicMock(returncode=0)
+        mock_build.return_value = MagicMock()
+
+        resp = c.post("/admin/reset-db", headers=_AUTH_HEADER)
+        assert resp.status_code == 200
+        assert resp.json()["ok"] is True
+        mock_build.assert_called_once()
+
+    @patch("protea.api.routers.admin.build_session_factory")
+    @patch("protea.api.routers.admin.subprocess.run")
+    @patch("protea.api.routers.admin.load_settings")
+    def test_reset_db_migration_failure(self, mock_settings, mock_run, mock_build, client):
+        c, app, (mock_psycopg_mod, conn_ctx) = client
+        settings = MagicMock()
+        settings.db_url = "postgresql+psycopg://u:p@localhost/db"
+        mock_settings.return_value = settings
+
+        mock_run.return_value = MagicMock(returncode=1, stderr="migration error")
+
+        resp = c.post("/admin/reset-db", headers=_AUTH_HEADER)
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["ok"] is False
+        assert "migration error" in data["error"]
+        mock_build.assert_not_called()
+
+    @patch("protea.api.routers.admin.build_session_factory")
+    @patch("protea.api.routers.admin.subprocess.run")
+    @patch("protea.api.routers.admin.load_settings")
+    def test_reset_db_drops_and_recreates_schema(self, mock_settings, mock_run, mock_build, client):
+        c, app, (mock_psycopg_mod, conn_ctx) = client
+        settings = MagicMock()
+        settings.db_url = "postgresql+psycopg://u:p@localhost/db"
+        mock_settings.return_value = settings
+
+        mock_run.return_value = MagicMock(returncode=0)
+
+        resp = c.post("/admin/reset-db", headers=_AUTH_HEADER)
+        assert resp.status_code == 200
+        conn_ctx.execute.assert_any_call("DROP SCHEMA public CASCADE")
+        conn_ctx.execute.assert_any_call("CREATE SCHEMA public")
+
+    @patch("protea.api.routers.admin.build_session_factory")
+    @patch("protea.api.routers.admin.subprocess.run")
+    @patch("protea.api.routers.admin.load_settings")
+    def test_reset_db_replaces_psycopg_in_url(self, mock_settings, mock_run, mock_build, client):
+        c, app, (mock_psycopg_mod, conn_ctx) = client
+        settings = MagicMock()
+        settings.db_url = "postgresql+psycopg://u:p@localhost/db"
+        mock_settings.return_value = settings
+
+        mock_run.return_value = MagicMock(returncode=0)
+
+        resp = c.post("/admin/reset-db", headers=_AUTH_HEADER)
+        assert resp.status_code == 200
+        # Verify psycopg.connect was called with the URL without +psycopg
+        mock_psycopg_mod.connect.assert_called_once_with(
+            "postgresql://u:p@localhost/db", autocommit=True
+        )
diff --git a/tests/test_annotate_router.py b/tests/test_annotate_router.py
new file mode 100644
index 0000000..4255fe4
--- /dev/null
+++ b/tests/test_annotate_router.py
@@ -0,0 +1,374 @@
+"""Unit tests for the /annotate router.
+
+Database and queue are fully mocked — no real infrastructure required.
+"""
+from __future__ import annotations
+
+from contextlib import contextmanager
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from protea.api.routers.annotate import router
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_app(session_factory, amqp_url="amqp://guest:guest@localhost:5672/"):
+    app = FastAPI()
+    app.state.session_factory = session_factory
+    app.state.amqp_url = amqp_url
+    app.include_router(router)
+    return app
+
+
+@contextmanager
+def _mock_scope(session):
+    yield session
+
+
+def _fasta_content(records: list[tuple[str, str]]) -> str:
+    lines = []
+    for acc, seq in records:
+        lines.append(f">{acc}")
+        lines.append(seq)
+    return "\n".join(lines)
+
+
+def _mock_embedding_config(session, has_embeddings=True):
+    config = MagicMock()
+    config.id = uuid4()
+    if has_embeddings:
+        row = (config, 100)
+    else:
+        row = (config, 0)
+    q = session.query.return_value
+    q.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [row]
+    return config
+
+
+def _mock_annotation_set(session):
+    ann = MagicMock()
+    ann.id = uuid4()
+    return ann
+
+
+def _mock_ontology_snapshot(session):
+    snap = MagicMock()
+    snap.id = uuid4()
+    return snap
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture()
+def session():
+    return MagicMock()
+
+
+@pytest.fixture()
+def factory(session):
+    return MagicMock()
+
+
+@pytest.fixture()
+def client(session, factory):
+    app = _make_app(factory)
+    with patch(
+        "protea.api.routers.annotate.session_scope",
+        side_effect=lambda _: _mock_scope(session),
+    ), patch(
+        "protea.api.routers.annotate.publish_job",
+    ) as mock_publish:
+        with TestClient(app) as c:
+            yield c, session, mock_publish
+
+
+# ---------------------------------------------------------------------------
+# POST /annotate — input validation
+# ---------------------------------------------------------------------------
+
+class TestAnnotateInputValidation:
+    def test_no_input_returns_422(self, client):
+        c, session, _ = client
+        resp = c.post("/annotate")
+        assert resp.status_code == 422
+
+    def test_empty_fasta_text_returns_422(self, client):
+        c, session, _ = client
+        resp = c.post("/annotate", data={"fasta_text": ""})
+        assert resp.status_code == 422
+
+    def test_invalid_fasta_returns_422(self, client):
+        c, session, _ = client
+        resp = c.post("/annotate", data={"fasta_text": "not a fasta"})
+        assert resp.status_code == 422
+
+    def test_duplicate_accession_returns_422(self, client):
+        c, session, _ = client
+        fasta = _fasta_content([("P12345", "MKVL"), ("P12345", "MKVL")])
+        resp = c.post("/annotate", data={"fasta_text": fasta})
+        assert resp.status_code == 422
+        assert "Duplicate" in resp.json()["detail"]
+
+    def test_file_upload_non_utf8_returns_422(self, client):
+        c, session, _ = client
+        resp = c.post(
+            "/annotate",
+            files={"file": ("test.fasta", b"\x80\x81\x82\x83", "text/plain")},
+        )
+        assert resp.status_code == 422
+        assert "UTF-8" in resp.json()["detail"]
+
+
+# ---------------------------------------------------------------------------
+# POST /annotate — missing prerequisites
+# ---------------------------------------------------------------------------
+
+class TestAnnotatePrerequisites:
+    def _setup_session(self, session, has_config=True, has_ann=True, has_snap=True):
+        """Configure mock session for the annotate flow."""
+        # Sequence upsert: no existing sequences
+        query_mock = MagicMock()
+        session.query.return_value = query_mock
+        query_mock.filter.return_value.all.return_value = []
+        query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = []
+
+        # Sequence hash computation
+        sequence_mock = MagicMock()
+        sequence_mock.id = 1
+
+        # Make session.add assign an id to new objects
+        def add_side_effect(obj):
+            if not hasattr(obj, "id") or obj.id is None:
+                obj.id = uuid4()
+        session.add.side_effect = add_side_effect
+        session.flush.return_value = None
+
+        # Config
+        if has_config:
+            config = MagicMock()
+            config.id = uuid4()
+            query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+        else:
+            query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = []
+
+        # Annotation set
+        if has_ann:
+            ann = MagicMock()
+            ann.id = uuid4()
+            query_mock.order_by.return_value.first.return_value = ann
+        else:
+            query_mock.order_by.return_value.first.return_value = None
+
+        # Ontology snapshot — separate query
+        if has_snap:
+            snap = MagicMock()
+            snap.id = uuid4()
+        else:
+            snap = None
+
+        return query_mock
+
+    def test_no_annotation_set_returns_409(self, client):
+        c, session, _ = client
+        fasta = _fasta_content([("P12345", "MKVLWAGS")])
+
+        # Setup: config exists, but no annotation set
+        query_mock = MagicMock()
+        session.query.return_value = query_mock
+        query_mock.filter.return_value.all.return_value = []
+
+        # Sequence mock
+        def add_side_effect(obj):
+            if not hasattr(obj, "id") or obj.id is None:
+                obj.id = uuid4()
+        session.add.side_effect = add_side_effect
+
+        config = MagicMock()
+        config.id = uuid4()
+        query_mock.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+        # No annotation set
+        query_mock.order_by.return_value.first.return_value = None
+
+        resp = c.post("/annotate", data={"fasta_text": fasta})
+        assert resp.status_code == 409
+        assert "annotation" in resp.json()["detail"].lower()
+
+
+# ---------------------------------------------------------------------------
+# POST /annotate — successful flow
+# ---------------------------------------------------------------------------
+
+class TestAnnotateSuccess:
+    def test_fasta_text_happy_path(self, client):
+        c, session, mock_publish = client
+        fasta = _fasta_content([("P12345", "MKVLWAGS"), ("Q99999", "ACDEF")])
+
+        config = MagicMock()
+        config.id = uuid4()
+        ann = MagicMock()
+        ann.id = uuid4()
+        snap = MagicMock()
+        snap.id = uuid4()
+        reranker = MagicMock()
+        reranker.id = uuid4()
+
+        first_results = iter([ann, snap, reranker])
+
+        def query_side_effect(*args):
+            q = MagicMock()
+            q.filter.return_value.all.return_value = []
+            q.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+            q.order_by.return_value.first.side_effect = lambda: next(first_results)
+            return q
+
+        session.query.side_effect = query_side_effect
+
+        def add_side_effect(obj):
+            if not hasattr(obj, "id") or obj.id is None:
+                obj.id = uuid4()
+        session.add.side_effect = add_side_effect
+        session.flush.return_value = None
+
+        resp = c.post("/annotate", data={"fasta_text": fasta, "name": "Test annotation"})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "query_set_id" in data
+        assert "embedding_config_id" in data
+        assert "annotation_set_id" in data
+        assert "embedding_job_id" in data
+        assert "predict_payload" in data
+        assert data["sequence_count"] == 2
+        mock_publish.assert_called_once()
+
+    def test_file_upload_happy_path(self, client):
+        c, session, mock_publish = client
+        fasta = _fasta_content([("P12345", "MKVLWAGS")])
+
+        config = MagicMock()
+        config.id = uuid4()
+        ann = MagicMock()
+        ann.id = uuid4()
+        snap = MagicMock()
+        snap.id = uuid4()
+
+        first_results = iter([ann, snap, None])
+
+        def query_side_effect(*args):
+            q = MagicMock()
+            q.filter.return_value.all.return_value = []
+            q.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [(config, 10)]
+            q.order_by.return_value.first.side_effect = lambda: next(first_results)
+            return q
+
+        session.query.side_effect = query_side_effect
+
+        def add_side_effect(obj):
+            if not hasattr(obj, "id") or obj.id is None:
+                obj.id = uuid4()
+        session.add.side_effect = add_side_effect
+
+        resp = c.post(
+            "/annotate",
+            files={"file": ("test.fasta", fasta.encode(), "text/plain")},
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["sequence_count"] == 1
+        assert data["reranker_id"] is None
+
+
+# ---------------------------------------------------------------------------
+# Helper functions
+# ---------------------------------------------------------------------------
+
+class TestBestEmbeddingConfig:
+    def test_returns_config_with_most_embeddings(self):
+        from protea.api.routers.annotate import _best_embedding_config
+
+        session = MagicMock()
+        config_a = MagicMock()
+        config_a.id = uuid4()
+        config_b = MagicMock()
+        config_b.id = uuid4()
+
+        session.query.return_value.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [
+            (config_a, 100),
+            (config_b, 50),
+        ]
+
+        result = _best_embedding_config(session)
+        assert result is config_a
+
+    def test_returns_none_when_no_configs(self):
+        from protea.api.routers.annotate import _best_embedding_config
+
+        session = MagicMock()
+        session.query.return_value.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = []
+
+        result = _best_embedding_config(session)
+        assert result is None
+
+    def test_returns_config_with_zero_embeddings_if_only_option(self):
+        from protea.api.routers.annotate import _best_embedding_config
+
+        session = MagicMock()
+        config = MagicMock()
+        config.id = uuid4()
+        session.query.return_value.outerjoin.return_value.group_by.return_value.order_by.return_value.all.return_value = [
+            (config, 0),
+        ]
+
+        result = _best_embedding_config(session)
+        assert result is config
+
+
+class TestNewestAnnotationSet:
+    def test_returns_newest(self):
+        from protea.api.routers.annotate import _newest_annotation_set
+
+        session = MagicMock()
+        ann = MagicMock()
+        session.query.return_value.order_by.return_value.first.return_value = ann
+        assert _newest_annotation_set(session) is ann
+
+    def test_returns_none_when_empty(self):
+        from protea.api.routers.annotate import _newest_annotation_set
+
+        session = MagicMock()
+        session.query.return_value.order_by.return_value.first.return_value = None
+        assert _newest_annotation_set(session) is None
+
+
+class TestNewestOntologySnapshot:
+    def test_returns_newest(self):
+        from protea.api.routers.annotate import _newest_ontology_snapshot
+
+        session = MagicMock()
+        snap = MagicMock()
+        session.query.return_value.order_by.return_value.first.return_value = snap
+        assert _newest_ontology_snapshot(session) is snap
+
+    def test_returns_none_when_empty(self):
+        from protea.api.routers.annotate import _newest_ontology_snapshot
+
+        session = MagicMock()
+        session.query.return_value.order_by.return_value.first.return_value = None
+        assert _newest_ontology_snapshot(session) is None
+
+
+class TestDeriveMethod:
+    def test_derive_method_used_in_showcase(self):
+        from protea.api.routers.showcase import _derive_method
+
+        assert _derive_method(None, None) == ("knn_baseline", "KNN (embedding distance)")
+        assert _derive_method(uuid4(), None) == ("knn_scored", "KNN + Scoring")
+        assert _derive_method(None, uuid4()) == ("knn_reranker", "KNN + Re-ranker")
+        assert _derive_method(uuid4(), uuid4()) == ("knn_reranker", "KNN + Re-ranker")
diff --git a/tests/test_annotations_router.py b/tests/test_annotations_router.py
new file mode 100644
index 0000000..ccaac33
--- /dev/null
+++ b/tests/test_annotations_router.py
@@ -0,0 +1,1249 @@
+"""Unit tests for the /annotations router.
+
+Database and queue are fully mocked -- no real infrastructure required.
+"""
+from __future__ import annotations
+
+from contextlib import contextmanager
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+from sqlalchemy.exc import IntegrityError
+
+from protea.api.routers.annotations import router
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_app(session_factory, amqp_url="amqp://guest:guest@localhost", artifacts_dir=None):
+    app = FastAPI()
+    app.state.session_factory = session_factory
+    app.state.amqp_url = amqp_url
+    app.state.artifacts_dir = artifacts_dir or Path("/tmp/protea-test-artifacts")
+    app.include_router(router)
+    return app
+
+
+@contextmanager
+def _mock_scope(session):
+    yield session
+
+
+def _make_snapshot(snap_id=None, obo_url="http://obo", obo_version="2024-01-01", ia_url=None):
+    s = MagicMock()
+    s.id = snap_id or uuid4()
+    s.obo_url = obo_url
+    s.obo_version = obo_version
+    s.ia_url = ia_url
+    s.loaded_at = MagicMock()
+    s.loaded_at.isoformat.return_value = "2024-01-01T00:00:00"
+    return s
+
+
+def _make_annotation_set(set_id=None, source="goa", source_version="2024-01", snap_id=None, job_id=None):
+    a = MagicMock()
+    a.id = set_id or uuid4()
+    a.source = source
+    a.source_version = source_version
+    a.ontology_snapshot_id = snap_id or uuid4()
+    a.job_id = job_id
+    a.created_at = MagicMock()
+    a.created_at.isoformat.return_value = "2024-01-01T00:00:00"
+    a.meta = {"key": "value"}
+    return a
+
+
+def _make_evaluation_set(eval_id=None, old_id=None, new_id=None, job_id=None, stats=None):
+    e = MagicMock()
+    e.id = eval_id or uuid4()
+    e.old_annotation_set_id = old_id or uuid4()
+    e.new_annotation_set_id = new_id or uuid4()
+    e.job_id = job_id
+    e.created_at = MagicMock()
+    e.created_at.isoformat.return_value = "2024-06-01T00:00:00"
+    e.stats = stats or {"nk": 10, "lk": 5}
+    return e
+
+
+def _make_evaluation_result(result_id=None, eval_set_id=None, pred_set_id=None, scoring_id=None, job_id=None, results=None):
+    r = MagicMock()
+    r.id = result_id or uuid4()
+    r.evaluation_set_id = eval_set_id or uuid4()
+    r.prediction_set_id = pred_set_id or uuid4()
+    r.scoring_config_id = scoring_id
+    r.job_id = job_id
+    r.created_at = MagicMock()
+    r.created_at.isoformat.return_value = "2024-07-01T00:00:00"
+    r.results = results or {}
+    return r
+
+
+@pytest.fixture()
+def session():
+    return MagicMock()
+
+
+@pytest.fixture()
+def factory(session):
+    return MagicMock()
+
+
+@pytest.fixture()
+def client(session, factory):
+    app = _make_app(factory)
+    with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with TestClient(app) as c:
+            yield c, session
+
+
+@pytest.fixture()
+def client_with_artifacts(session, factory, tmp_path):
+    app = _make_app(factory, artifacts_dir=tmp_path)
+    with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+        with TestClient(app) as c:
+            yield c, session, tmp_path
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/snapshots (lines 71-86)
+# ---------------------------------------------------------------------------
+
+
+class TestListSnapshots:
+    def test_returns_list(self, client):
+        c, session = client
+        snap = _make_snapshot()
+        # Simulate the subquery join: session.query(...).outerjoin(...).order_by(...).all()
+        session.query.return_value.group_by.return_value.subquery.return_value = MagicMock()
+        session.query.return_value.outerjoin.return_value.order_by.return_value.all.return_value = [
+            (snap, 42)
+        ]
+
+        resp = c.get("/annotations/snapshots")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data) == 1
+        assert data[0]["obo_version"] == "2024-01-01"
+        assert data[0]["go_term_count"] == 42
+
+    def test_empty_list(self, client):
+        c, session = client
+        session.query.return_value.group_by.return_value.subquery.return_value = MagicMock()
+        session.query.return_value.outerjoin.return_value.order_by.return_value.all.return_value = []
+
+        resp = c.get("/annotations/snapshots")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+    def test_null_count_defaults_to_zero(self, client):
+        c, session = client
+        snap = _make_snapshot()
+        session.query.return_value.group_by.return_value.subquery.return_value = MagicMock()
+        session.query.return_value.outerjoin.return_value.order_by.return_value.all.return_value = [
+            (snap, None)
+        ]
+
+        resp = c.get("/annotations/snapshots")
+        assert resp.status_code == 200
+        assert resp.json()[0]["go_term_count"] == 0
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/snapshots/{snapshot_id} (lines 105-116)
+# ---------------------------------------------------------------------------
+
+
+class TestGetSnapshot:
+    def test_returns_snapshot(self, client):
+        c, session = client
+        snap = _make_snapshot()
+        session.get.return_value = snap
+        session.query.return_value.filter.return_value.scalar.return_value = 99
+
+        resp = c.get(f"/annotations/snapshots/{snap.id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["obo_version"] == "2024-01-01"
+        assert data["go_term_count"] == 99
+
+    def test_not_found(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/snapshots/{uuid4()}")
+        assert resp.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# POST /annotations/snapshots/load (lines 176-195)
+# ---------------------------------------------------------------------------
+
+
+class TestLoadOntologySnapshot:
+    def test_success(self, client):
+        c, session = client
+
+        def add_side(obj):
+            from protea.infrastructure.orm.models.job import Job
+            if isinstance(obj, Job):
+                obj.id = uuid4()
+        session.add.side_effect = add_side
+
+        with patch("protea.api.routers.annotations.publish_job"):
+            resp = c.post(
+                "/annotations/snapshots/load",
+                json={"obo_url": "http://example.com/go.obo"},
+            )
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "queued"
+
+    def test_invalid_payload(self, client):
+        c, session = client
+        resp = c.post("/annotations/snapshots/load", json={})
+        assert resp.status_code == 422
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/sets (lines 207-222)
+# ---------------------------------------------------------------------------
+
+
+class TestListAnnotationSets:
+    def test_returns_list(self, client):
+        c, session = client
+        aset = _make_annotation_set()
+        session.query.return_value.group_by.return_value.subquery.return_value = MagicMock()
+        q_mock = session.query.return_value.outerjoin.return_value
+        q_mock.filter.return_value.order_by.return_value.all.return_value = [(aset, 10)]
+        q_mock.order_by.return_value.all.return_value = [(aset, 10)]
+
+        resp = c.get("/annotations/sets")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data) == 1
+        assert data[0]["source"] == "goa"
+
+    def test_filter_by_source(self, client):
+        c, session = client
+        aset = _make_annotation_set(source="quickgo")
+        session.query.return_value.group_by.return_value.subquery.return_value = MagicMock()
+        q_mock = session.query.return_value.outerjoin.return_value
+        q_mock.filter.return_value.order_by.return_value.all.return_value = [(aset, 5)]
+
+        resp = c.get("/annotations/sets?source=quickgo")
+        assert resp.status_code == 200
+
+    def test_empty(self, client):
+        c, session = client
+        session.query.return_value.group_by.return_value.subquery.return_value = MagicMock()
+        q_mock = session.query.return_value.outerjoin.return_value
+        q_mock.order_by.return_value.all.return_value = []
+
+        resp = c.get("/annotations/sets")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/sets/{set_id} (lines 243-254)
+# ---------------------------------------------------------------------------
+
+
+class TestGetAnnotationSet:
+    def test_returns_set(self, client):
+        c, session = client
+        aset = _make_annotation_set(job_id=uuid4())
+        session.get.return_value = aset
+        session.query.return_value.filter.return_value.scalar.return_value = 100
+
+        resp = c.get(f"/annotations/sets/{aset.id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["annotation_count"] == 100
+        assert data["job_id"] is not None
+
+    def test_not_found(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/sets/{uuid4()}")
+        assert resp.status_code == 404
+
+    def test_no_job_id(self, client):
+        c, session = client
+        aset = _make_annotation_set(job_id=None)
+        session.get.return_value = aset
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+
+        resp = c.get(f"/annotations/sets/{aset.id}")
+        assert resp.status_code == 200
+        assert resp.json()["job_id"] is None
+
+
+# ---------------------------------------------------------------------------
+# POST /annotations/sets/load-goa (lines 300-319)
+# ---------------------------------------------------------------------------
+
+
+class TestLoadGOAAnnotations:
+    def test_success(self, client):
+        c, session = client
+
+        def add_side(obj):
+            from protea.infrastructure.orm.models.job import Job
+            if isinstance(obj, Job):
+                obj.id = uuid4()
+        session.add.side_effect = add_side
+
+        with patch("protea.api.routers.annotations.publish_job"):
+            resp = c.post(
+                "/annotations/sets/load-goa",
+                json={
+                    "ontology_snapshot_id": str(uuid4()),
+                    "gaf_url": "http://example.com/goa.gaf.gz",
+                    "source_version": "2024-01",
+                },
+            )
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "queued"
+
+    def test_invalid_payload(self, client):
+        c, session = client
+        resp = c.post("/annotations/sets/load-goa", json={})
+        assert resp.status_code == 422
+
+
+# ---------------------------------------------------------------------------
+# POST /annotations/sets/load-quickgo (lines 330-349)
+# ---------------------------------------------------------------------------
+
+
+class TestLoadQuickGOAnnotations:
+    def test_success(self, client):
+        c, session = client
+
+        def add_side(obj):
+            from protea.infrastructure.orm.models.job import Job
+            if isinstance(obj, Job):
+                obj.id = uuid4()
+        session.add.side_effect = add_side
+
+        with patch("protea.api.routers.annotations.publish_job"):
+            resp = c.post(
+                "/annotations/sets/load-quickgo",
+                json={
+                    "ontology_snapshot_id": str(uuid4()),
+                    "source_version": "2024-01",
+                },
+            )
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "queued"
+
+    def test_invalid_payload(self, client):
+        c, session = client
+        resp = c.post("/annotations/sets/load-quickgo", json={})
+        assert resp.status_code == 422
+
+
+# ---------------------------------------------------------------------------
+# Dependency edge cases (lines 45, 52, 57-60)
+# ---------------------------------------------------------------------------
+
+
+class TestDependencyGuards:
+    def test_missing_session_factory_raises(self):
+        app = FastAPI()
+        app.include_router(router)
+        with TestClient(app, raise_server_exceptions=False) as c:
+            resp = c.get("/annotations/snapshots")
+        assert resp.status_code == 500
+
+    def test_missing_amqp_url_raises(self, session):
+        app = FastAPI()
+        app.state.session_factory = MagicMock()
+        # no amqp_url set
+        app.include_router(router)
+        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app, raise_server_exceptions=False) as c:
+                resp = c.post("/annotations/snapshots/load", json={"obo_url": "http://example.com/go.obo"})
+        assert resp.status_code == 500
+
+    def test_missing_artifacts_dir_raises(self, session):
+        app = FastAPI()
+        app.state.session_factory = MagicMock()
+        # no artifacts_dir set
+        app.include_router(router)
+        eval_id = uuid4()
+        with patch("protea.api.routers.annotations.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app, raise_server_exceptions=False) as c:
+                resp = c.delete(f"/annotations/evaluation-sets/{eval_id}")
+        assert resp.status_code == 500
+
+
+# ---------------------------------------------------------------------------
+# PATCH /annotations/snapshots/{snapshot_id}/ia-url (lines 146-158)
+# ---------------------------------------------------------------------------
+
+
+class TestSetSnapshotIaUrl:
+    def test_set_ia_url_success(self, client):
+        c, session = client
+        snap = _make_snapshot()
+        session.get.return_value = snap
+
+        resp = c.patch(
+            f"/annotations/snapshots/{snap.id}/ia-url",
+            json={"ia_url": "http://example.com/ia.tsv"},
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["id"] == str(snap.id)
+        assert data["obo_version"] == snap.obo_version
+
+    def test_set_ia_url_null_clears(self, client):
+        c, session = client
+        snap = _make_snapshot(ia_url="http://old.com/ia.tsv")
+        session.get.return_value = snap
+
+        resp = c.patch(
+            f"/annotations/snapshots/{snap.id}/ia-url",
+            json={"ia_url": None},
+        )
+        assert resp.status_code == 200
+
+    def test_missing_ia_url_key_returns_422(self, client):
+        c, session = client
+        snap = _make_snapshot()
+
+        resp = c.patch(
+            f"/annotations/snapshots/{snap.id}/ia-url",
+            json={"wrong_key": "value"},
+        )
+        assert resp.status_code == 422
+
+    def test_snapshot_not_found_returns_404(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.patch(
+            f"/annotations/snapshots/{uuid4()}/ia-url",
+            json={"ia_url": "http://example.com/ia.tsv"},
+        )
+        assert resp.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# DELETE /annotations/sets/{set_id} (lines 272-289)
+# ---------------------------------------------------------------------------
+
+
+class TestDeleteAnnotationSet:
+    def test_delete_success(self, client):
+        c, session = client
+        aset = _make_annotation_set()
+        session.get.return_value = aset
+        session.query.return_value.filter.return_value.scalar.return_value = 42
+
+        resp = c.delete(f"/annotations/sets/{aset.id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["deleted"] == str(aset.id)
+        assert data["annotations_deleted"] == 42
+        session.delete.assert_called_once_with(aset)
+
+    def test_delete_not_found(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.delete(f"/annotations/sets/{uuid4()}")
+        assert resp.status_code == 404
+
+    def test_delete_integrity_error_returns_409(self, client):
+        c, session = client
+        aset = _make_annotation_set()
+        session.get.return_value = aset
+        session.query.return_value.filter.return_value.scalar.return_value = 10
+        session.flush.side_effect = IntegrityError("stmt", "params", Exception("fk"))
+
+        resp = c.delete(f"/annotations/sets/{aset.id}")
+        assert resp.status_code == 409
+        assert "referenced" in resp.json()["detail"].lower()
+
+
+# ---------------------------------------------------------------------------
+# POST /annotations/evaluation-sets/generate (lines 367-386)
+# ---------------------------------------------------------------------------
+
+
+class TestGenerateEvaluationSet:
+    def test_success(self, client):
+        c, session = client
+        old_id, new_id = str(uuid4()), str(uuid4())
+
+        # Mock Job creation
+        def add_side(obj):
+            from protea.infrastructure.orm.models.job import Job
+            if isinstance(obj, Job):
+                obj.id = uuid4()
+        session.add.side_effect = add_side
+
+        with patch("protea.api.routers.annotations.publish_job"):
+            resp = c.post(
+                "/annotations/evaluation-sets/generate",
+                json={"old_annotation_set_id": old_id, "new_annotation_set_id": new_id},
+            )
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "queued"
+
+    def test_invalid_payload_returns_422(self, client):
+        c, session = client
+        resp = c.post("/annotations/evaluation-sets/generate", json={})
+        assert resp.status_code == 422
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/evaluation-sets (lines 394-396)
+# ---------------------------------------------------------------------------
+
+
+class TestListEvaluationSets:
+    def test_returns_list(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        session.query.return_value.order_by.return_value.all.return_value = [ev]
+
+        resp = c.get("/annotations/evaluation-sets")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data) == 1
+        assert data[0]["id"] == str(ev.id)
+        assert data[0]["stats"] == ev.stats
+
+    def test_empty_list(self, client):
+        c, session = client
+        session.query.return_value.order_by.return_value.all.return_value = []
+
+        resp = c.get("/annotations/evaluation-sets")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+
+# ---------------------------------------------------------------------------
+# DELETE /annotations/evaluation-sets/{eval_id} (lines 416-434)
+# ---------------------------------------------------------------------------
+
+
+class TestDeleteEvaluationSet:
+    def test_delete_success(self, client_with_artifacts):
+        c, session, tmp_path = client_with_artifacts
+        ev = _make_evaluation_set()
+        session.get.side_effect = lambda model, id_: ev if id_ == ev.id else None
+
+        # Create a fake result with an artifact directory
+        result_mock = MagicMock()
+        result_mock.id = uuid4()
+        result_dir = tmp_path / str(result_mock.id)
+        result_dir.mkdir()
+        (result_dir / "output.tsv").write_text("test")
+
+        session.query.return_value.filter.return_value.all.return_value = [result_mock]
+
+        resp = c.delete(f"/annotations/evaluation-sets/{ev.id}")
+        assert resp.status_code == 204
+        session.delete.assert_called_once_with(ev)
+        # Artifact directory should be removed
+        assert not result_dir.exists()
+
+    def test_delete_not_found(self, client_with_artifacts):
+        c, session, _ = client_with_artifacts
+        session.get.return_value = None
+
+        resp = c.delete(f"/annotations/evaluation-sets/{uuid4()}")
+        assert resp.status_code == 404
+
+    def test_delete_no_artifact_dir(self, client_with_artifacts):
+        c, session, tmp_path = client_with_artifacts
+        ev = _make_evaluation_set()
+        session.get.side_effect = lambda model, id_: ev if id_ == ev.id else None
+        session.query.return_value.filter.return_value.all.return_value = []
+
+        resp = c.delete(f"/annotations/evaluation-sets/{ev.id}")
+        assert resp.status_code == 204
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/evaluation-sets/{eval_id} (lines 442-446)
+# ---------------------------------------------------------------------------
+
+
+class TestGetEvaluationSet:
+    def test_success(self, client):
+        c, session = client
+        ev = _make_evaluation_set(job_id=uuid4())
+        session.get.return_value = ev
+
+        resp = c.get(f"/annotations/evaluation-sets/{ev.id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["id"] == str(ev.id)
+        assert data["job_id"] == str(ev.job_id)
+
+    def test_not_found(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/evaluation-sets/{uuid4()}")
+        assert resp.status_code == 404
+
+    def test_no_job_id(self, client):
+        c, session = client
+        ev = _make_evaluation_set(job_id=None)
+        session.get.return_value = ev
+
+        resp = c.get(f"/annotations/evaluation-sets/{ev.id}")
+        assert resp.status_code == 200
+        assert resp.json()["job_id"] is None
+
+
+# ---------------------------------------------------------------------------
+# _eval_set_or_404 helper (lines 457-460) -- tested indirectly via GT endpoints
+# ---------------------------------------------------------------------------
+
+
+# ---------------------------------------------------------------------------
+# Ground-truth TSV downloads (lines 475-591)
+# ---------------------------------------------------------------------------
+
+
+class _EvalData:
+    """Fake result of compute_evaluation_data."""
+    def __init__(self, nk=None, lk=None, pk=None, known=None):
+        self.nk = nk or {}
+        self.lk = lk or {}
+        self.pk = pk or {}
+        self.known = known or {}
+
+
+class TestDownloadGroundTruthNK:
+    def test_success(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+            from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+            if model is EvaluationSet:
+                return ev
+            if model is AnnotationSet:
+                return ann_old
+            return None
+        session.get.side_effect = get_side
+
+        fake_data = _EvalData(nk={"P12345": {"GO:0003674", "GO:0008150"}})
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/ground-truth-NK.tsv")
+        assert resp.status_code == 200
+        assert "text/tab-separated-values" in resp.headers["content-type"]
+        lines = resp.text.strip().split("\n")
+        assert len(lines) == 2
+        assert "P12345" in lines[0]
+
+    def test_not_found(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/evaluation-sets/{uuid4()}/ground-truth-NK.tsv")
+        assert resp.status_code == 404
+
+
+class TestDownloadGroundTruthLK:
+    def test_success(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+            from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+            if model is EvaluationSet:
+                return ev
+            if model is AnnotationSet:
+                return ann_old
+            return None
+        session.get.side_effect = get_side
+
+        fake_data = _EvalData(lk={"Q99999": {"GO:0005575"}})
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/ground-truth-LK.tsv")
+        assert resp.status_code == 200
+        lines = resp.text.strip().split("\n")
+        assert len(lines) == 1
+        assert "Q99999\tGO:0005575" in lines[0]
+
+
+class TestDownloadGroundTruthPK:
+    def test_success(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+            from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+            if model is EvaluationSet:
+                return ev
+            if model is AnnotationSet:
+                return ann_old
+            return None
+        session.get.side_effect = get_side
+
+        fake_data = _EvalData(pk={"A00001": {"GO:0003674"}})
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/ground-truth-PK.tsv")
+        assert resp.status_code == 200
+        assert "A00001\tGO:0003674" in resp.text
+
+
+class TestDownloadKnownTerms:
+    def test_success(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+            from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+            if model is EvaluationSet:
+                return ev
+            if model is AnnotationSet:
+                return ann_old
+            return None
+        session.get.side_effect = get_side
+
+        fake_data = _EvalData(known={"P12345": {"GO:0003674"}, "Q99999": {"GO:0005575"}})
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/known-terms.tsv")
+        assert resp.status_code == 200
+        lines = resp.text.strip().split("\n")
+        assert len(lines) == 2
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/evaluation-sets/{eval_id}/delta-proteins.fasta (lines 615-672)
+# ---------------------------------------------------------------------------
+
+
+class TestDownloadDeltaFasta:
+    def _setup_session(self, session, ev, ann_old, fake_data, protein_rows=None):
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+            from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+            if model is EvaluationSet:
+                return ev
+            if model is AnnotationSet:
+                return ann_old
+            return None
+        session.get.side_effect = get_side
+
+        if protein_rows is not None:
+            session.query.return_value.join.return_value.filter.return_value.order_by.return_value.all.return_value = protein_rows
+
+    def test_all_category(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        protein = MagicMock()
+        protein.accession = "P12345"
+        protein.entry_name = "P12345_HUMAN"
+        protein.organism = "Homo sapiens"
+        protein.taxonomy_id = 9606
+        seq = MagicMock()
+        seq.sequence = "ACDEFGHIKLMNPQRST"
+
+        fake_data = _EvalData(nk={"P12345": {"GO:0003674"}}, lk={})
+        self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
+
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta")
+        assert resp.status_code == 200
+        assert ">P12345" in resp.text
+        assert "ACDEFGHIKLMNPQRST" in resp.text
+        assert "(NK)" in resp.text
+
+    def test_nk_category_filter(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        protein = MagicMock()
+        protein.accession = "P12345"
+        protein.entry_name = None
+        protein.organism = None
+        protein.taxonomy_id = None
+        seq = MagicMock()
+        seq.sequence = "ACDEF"
+
+        fake_data = _EvalData(nk={"P12345": {"GO:0003674"}}, lk={"Q99999": {"GO:0005575"}})
+        self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
+
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta?category=nk")
+        assert resp.status_code == 200
+        assert ">P12345" in resp.text
+
+    def test_empty_delta_returns_empty_fasta(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        fake_data = _EvalData()
+        self._setup_session(session, ev, ann_old, fake_data, protein_rows=[])
+
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta")
+        assert resp.status_code == 200
+        assert resp.text == ""
+
+    def test_long_sequence_wraps_at_60(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        protein = MagicMock()
+        protein.accession = "P12345"
+        protein.entry_name = None
+        protein.organism = None
+        protein.taxonomy_id = None
+        seq = MagicMock()
+        seq.sequence = "A" * 120  # should wrap to two lines of 60
+
+        fake_data = _EvalData(nk={"P12345": {"GO:0003674"}})
+        self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
+
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta")
+        lines = resp.text.strip().split("\n")
+        # header + 2 sequence lines
+        assert len(lines) == 3
+        assert len(lines[1]) == 60
+        assert len(lines[2]) == 60
+
+    def test_pk_category(self, client):
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        protein = MagicMock()
+        protein.accession = "X00001"
+        protein.entry_name = "X_MOUSE"
+        protein.organism = "Mus musculus"
+        protein.taxonomy_id = 10090
+        seq = MagicMock()
+        seq.sequence = "MMLLL"
+
+        fake_data = _EvalData(pk={"X00001": {"GO:0005575"}})
+        self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
+
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta?category=pk")
+        assert resp.status_code == 200
+        assert "(PK)" in resp.text
+
+    def test_all_category_includes_lk(self, client):
+        """Ensure LK proteins are included when category=all (covers line 632)."""
+        c, session = client
+        ev = _make_evaluation_set()
+        ann_old = _make_annotation_set(snap_id=uuid4())
+
+        protein = MagicMock()
+        protein.accession = "Q99999"
+        protein.entry_name = None
+        protein.organism = None
+        protein.taxonomy_id = None
+        seq = MagicMock()
+        seq.sequence = "MMMM"
+
+        fake_data = _EvalData(nk={}, lk={"Q99999": {"GO:0005575"}})
+        self._setup_session(session, ev, ann_old, fake_data, protein_rows=[(protein, seq)])
+
+        with patch("protea.api.routers.annotations.compute_evaluation_data", return_value=fake_data):
+            resp = c.get(f"/annotations/evaluation-sets/{ev.id}/delta-proteins.fasta?category=all")
+        assert resp.status_code == 200
+        assert "(LK)" in resp.text
+
+
+# ---------------------------------------------------------------------------
+# POST /annotations/evaluation-sets/{eval_id}/run (lines 698-720)
+# ---------------------------------------------------------------------------
+
+
+class TestRunCafaEvaluation:
+    def test_success(self, client):
+        c, session = client
+        eval_id = uuid4()
+        pred_set_id = str(uuid4())
+        ev = _make_evaluation_set(eval_id=eval_id)
+        session.get.return_value = ev
+
+        def add_side(obj):
+            from protea.infrastructure.orm.models.job import Job
+            if isinstance(obj, Job):
+                obj.id = uuid4()
+        session.add.side_effect = add_side
+
+        with patch("protea.api.routers.annotations.publish_job"):
+            resp = c.post(
+                f"/annotations/evaluation-sets/{eval_id}/run",
+                json={"prediction_set_id": pred_set_id},
+            )
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "queued"
+
+    def test_invalid_payload_returns_422(self, client):
+        c, session = client
+        eval_id = uuid4()
+
+        resp = c.post(f"/annotations/evaluation-sets/{eval_id}/run", json={})
+        assert resp.status_code == 422
+
+    def test_evaluation_set_not_found(self, client):
+        c, session = client
+        eval_id = uuid4()
+        pred_set_id = str(uuid4())
+        session.get.return_value = None
+
+        with patch("protea.api.routers.annotations.publish_job"):
+            resp = c.post(
+                f"/annotations/evaluation-sets/{eval_id}/run",
+                json={"prediction_set_id": pred_set_id},
+            )
+        assert resp.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# GET .../results/{result_id}/metrics.tsv (lines 732-751)
+# ---------------------------------------------------------------------------
+
+
+class TestDownloadEvaluationMetrics:
+    def test_success_with_results(self, client):
+        c, session = client
+        eval_id = uuid4()
+        result = _make_evaluation_result(
+            eval_set_id=eval_id,
+            results={
+                "NK": {
+                    "BPO": {"fmax": 0.42, "precision": 0.5, "recall": 0.35, "tau": 0.3, "coverage": 0.8, "n_proteins": 100},
+                    "MFO": {"fmax": 0.55, "precision": 0.6, "recall": 0.5, "tau": 0.4, "coverage": 0.9, "n_proteins": 80},
+                },
+                "LK": {},
+            },
+        )
+        session.get.return_value = result
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}/metrics.tsv")
+        assert resp.status_code == 200
+        assert "text/tab-separated-values" in resp.headers["content-type"]
+        lines = resp.text.strip().split("\n")
+        # header + 2 data lines (NK/BPO and NK/MFO)
+        assert len(lines) == 3
+        assert lines[0].startswith("setting")
+        assert "NK\tBPO" in lines[1]
+
+    def test_result_not_found(self, client):
+        c, session = client
+        eval_id = uuid4()
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{uuid4()}/metrics.tsv")
+        assert resp.status_code == 404
+
+    def test_result_wrong_eval_set(self, client):
+        c, session = client
+        eval_id = uuid4()
+        result = _make_evaluation_result(eval_set_id=uuid4())  # different eval set
+        session.get.return_value = result
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}/metrics.tsv")
+        assert resp.status_code == 404
+
+    def test_empty_results(self, client):
+        c, session = client
+        eval_id = uuid4()
+        result = _make_evaluation_result(eval_set_id=eval_id, results={})
+        session.get.return_value = result
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}/metrics.tsv")
+        assert resp.status_code == 200
+        lines = resp.text.strip().split("\n")
+        assert len(lines) == 1  # header only
+
+
+# ---------------------------------------------------------------------------
+# GET .../results/{result_id}/artifacts.zip (lines 768-785)
+# ---------------------------------------------------------------------------
+
+
+class TestDownloadEvaluationArtifacts:
+    def test_success(self, client_with_artifacts):
+        c, session, tmp_path = client_with_artifacts
+        eval_id = uuid4()
+        result = _make_evaluation_result(eval_set_id=eval_id)
+        session.get.return_value = result
+
+        # Create artifact directory with files
+        result_dir = tmp_path / str(result.id)
+        result_dir.mkdir()
+        (result_dir / "pr_curve.tsv").write_text("threshold\tprecision\trecall\n0.5\t0.8\t0.6")
+        (result_dir / "metrics.json").write_text('{"fmax": 0.42}')
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}/artifacts.zip")
+        assert resp.status_code == 200
+        assert "application/zip" in resp.headers["content-type"]
+        assert len(resp.content) > 0
+
+        # Verify it's a valid zip
+        import io
+        import zipfile
+        with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+            names = zf.namelist()
+            assert "pr_curve.tsv" in names
+            assert "metrics.json" in names
+
+    def test_result_not_found(self, client_with_artifacts):
+        c, session, _ = client_with_artifacts
+        eval_id = uuid4()
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{uuid4()}/artifacts.zip")
+        assert resp.status_code == 404
+
+    def test_no_artifacts_directory(self, client_with_artifacts):
+        c, session, tmp_path = client_with_artifacts
+        eval_id = uuid4()
+        result = _make_evaluation_result(eval_set_id=eval_id)
+        session.get.return_value = result
+        # No directory created for this result
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}/artifacts.zip")
+        assert resp.status_code == 404
+        assert "No artifacts found" in resp.json()["detail"]
+
+
+# ---------------------------------------------------------------------------
+# GET .../results (lines 800-809)
+# ---------------------------------------------------------------------------
+
+
+class TestListEvaluationResults:
+    def test_success(self, client):
+        c, session = client
+        eval_id = uuid4()
+        ev = _make_evaluation_set(eval_id=eval_id)
+        result = _make_evaluation_result(eval_set_id=eval_id, scoring_id=uuid4(), job_id=uuid4())
+
+        # First call: session.get(EvaluationSet, eval_id) returns ev
+        session.get.return_value = ev
+        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [result]
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data) == 1
+        assert data[0]["id"] == str(result.id)
+
+    def test_eval_set_not_found(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/evaluation-sets/{uuid4()}/results")
+        assert resp.status_code == 404
+
+    def test_empty_results(self, client):
+        c, session = client
+        eval_id = uuid4()
+        ev = _make_evaluation_set(eval_id=eval_id)
+        session.get.return_value = ev
+        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = []
+
+        resp = c.get(f"/annotations/evaluation-sets/{eval_id}/results")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+
+# ---------------------------------------------------------------------------
+# DELETE .../results/{result_id} (lines 834-845)
+# ---------------------------------------------------------------------------
+
+
+class TestDeleteEvaluationResult:
+    def test_success(self, client_with_artifacts):
+        c, session, tmp_path = client_with_artifacts
+        eval_id = uuid4()
+        result = _make_evaluation_result(eval_set_id=eval_id)
+        session.get.return_value = result
+
+        # Create artifact dir
+        result_dir = tmp_path / str(result.id)
+        result_dir.mkdir()
+        (result_dir / "output.tsv").write_text("data")
+
+        resp = c.delete(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}")
+        assert resp.status_code == 204
+        session.delete.assert_called_once_with(result)
+        assert not result_dir.exists()
+
+    def test_not_found(self, client_with_artifacts):
+        c, session, _ = client_with_artifacts
+        eval_id = uuid4()
+        session.get.return_value = None
+
+        resp = c.delete(f"/annotations/evaluation-sets/{eval_id}/results/{uuid4()}")
+        assert resp.status_code == 404
+
+    def test_wrong_eval_set(self, client_with_artifacts):
+        c, session, _ = client_with_artifacts
+        eval_id = uuid4()
+        result = _make_evaluation_result(eval_set_id=uuid4())
+        session.get.return_value = result
+
+        resp = c.delete(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}")
+        assert resp.status_code == 404
+
+    def test_no_artifact_dir(self, client_with_artifacts):
+        c, session, tmp_path = client_with_artifacts
+        eval_id = uuid4()
+        result = _make_evaluation_result(eval_set_id=eval_id)
+        session.get.return_value = result
+
+        resp = c.delete(f"/annotations/evaluation-sets/{eval_id}/results/{result.id}")
+        assert resp.status_code == 204
+
+
+# ---------------------------------------------------------------------------
+# GET /annotations/snapshots/{snapshot_id}/subgraph (lines 859-927)
+# ---------------------------------------------------------------------------
+
+
+class TestGetGoSubgraph:
+    def _make_go_term(self, db_id, go_id, name="term", aspect="F"):
+        t = MagicMock()
+        t.id = db_id
+        t.go_id = go_id
+        t.name = name
+        t.aspect = aspect
+        t.ontology_snapshot_id = None
+        return t
+
+    def _make_rel(self, child_id, parent_id, relation_type="is_a"):
+        r = MagicMock()
+        r.child_go_term_id = child_id
+        r.parent_go_term_id = parent_id
+        r.relation_type = relation_type
+        r.ontology_snapshot_id = None
+        return r
+
+    def test_basic_subgraph(self, client):
+        c, session = client
+        snap_id = uuid4()
+        snap = _make_snapshot(snap_id=snap_id)
+
+        seed = self._make_go_term(1, "GO:0003674", "molecular_function")
+        parent = self._make_go_term(2, "GO:0005488", "binding")
+        rel = self._make_rel(1, 2, "is_a")
+
+        # session.get for snapshot
+        session.get.return_value = snap
+        # session.query(GOTerm).filter(...).all() for seed terms
+        # session.query(GOTermRelationship).filter(...).all() for rels
+        # session.query(GOTerm).filter(...).all() for parents
+        query_mock = session.query.return_value
+        filter_mock = query_mock.filter.return_value
+        filter_mock.all.side_effect = [
+            [seed],   # seed terms query
+            [rel],    # first BFS level relationships
+            [parent], # parent terms fetch
+            [],       # second BFS level relationships (no more)
+        ]
+
+        resp = c.get(f"/annotations/snapshots/{snap_id}/subgraph?go_ids=GO:0003674")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data["nodes"]) == 2
+        assert len(data["edges"]) == 1
+        # Check that the seed term is marked as is_query
+        seed_node = [n for n in data["nodes"] if n["go_id"] == "GO:0003674"][0]
+        assert seed_node["is_query"] is True
+        parent_node = [n for n in data["nodes"] if n["go_id"] == "GO:0005488"][0]
+        assert parent_node["is_query"] is False
+
+    def test_snapshot_not_found(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.get(f"/annotations/snapshots/{uuid4()}/subgraph?go_ids=GO:0003674")
+        assert resp.status_code == 404
+
+    def test_no_matching_terms_returns_empty(self, client):
+        c, session = client
+        snap = _make_snapshot()
+        session.get.return_value = snap
+        session.query.return_value.filter.return_value.all.return_value = []
+
+        resp = c.get(f"/annotations/snapshots/{snap.id}/subgraph?go_ids=GO:9999999")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data == {"nodes": [], "edges": []}
+
+    def test_multiple_go_ids(self, client):
+        c, session = client
+        snap = _make_snapshot()
+        session.get.return_value = snap
+
+        t1 = self._make_go_term(1, "GO:0003674")
+        t2 = self._make_go_term(2, "GO:0008150")
+
+        query_mock = session.query.return_value
+        filter_mock = query_mock.filter.return_value
+        filter_mock.all.side_effect = [
+            [t1, t2],  # seed terms
+            [],         # no relationships
+        ]
+
+        resp = c.get(f"/annotations/snapshots/{snap.id}/subgraph?go_ids=GO:0003674,GO:0008150")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data["nodes"]) == 2
+        assert data["edges"] == []
+
+    def test_bfs_stops_when_frontier_empty(self, client):
+        """After one BFS level with parents, next level has rels but no new parents -> frontier empty -> break (line 887)."""
+        c, session = client
+        snap = _make_snapshot()
+        session.get.return_value = snap
+
+        seed = self._make_go_term(1, "GO:0003674")
+        parent = self._make_go_term(2, "GO:0005488")
+        rel1 = self._make_rel(1, 2, "is_a")
+
+        query_mock = session.query.return_value
+        filter_mock = query_mock.filter.return_value
+        filter_mock.all.side_effect = [
+            [seed],    # seed terms
+            [rel1],    # first BFS: rel from 1->2
+            [parent],  # fetch parent 2
+            [],        # second BFS: no rels from frontier {2}
+        ]
+
+        resp = c.get(f"/annotations/snapshots/{snap.id}/subgraph?go_ids=GO:0003674&depth=5")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data["nodes"]) == 2
diff --git a/tests/test_api_query_sets.py b/tests/test_api_query_sets.py
index dc63124..8fead82 100644
--- a/tests/test_api_query_sets.py
+++ b/tests/test_api_query_sets.py
@@ -13,8 +13,7 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
-from protea.api.routers.query_sets import router
-
+from protea.api.routers.query_sets import _parse_fasta, router
 
 # ---------------------------------------------------------------------------
 # Fixtures
@@ -90,7 +89,6 @@ def flush_side():
 
         # Intercept QuerySet add to set its id
         added_qs = None
-        original_add = session.add.side_effect
 
         def add_side(obj):
             nonlocal added_qs
@@ -244,9 +242,6 @@ def test_not_found_returns_404(self, client) -> None:
 # FASTA parser unit tests
 # ---------------------------------------------------------------------------
 
-from protea.api.routers.query_sets import _parse_fasta
-
-
 class TestParseFasta:
     def test_parses_two_records(self) -> None:
         fasta = ">P12345\nACDEF\n>Q67890\nGHIKL\n"
diff --git a/tests/test_base_worker.py b/tests/test_base_worker.py
index 15925aa..b0ea184 100644
--- a/tests/test_base_worker.py
+++ b/tests/test_base_worker.py
@@ -1,18 +1,21 @@
 """
-Unit tests for BaseWorker.
+Unit tests for BaseWorker and StaleJobReaper.
 Uses a mocked session factory and a fake Operation — no real DB needed.
 """
 from __future__ import annotations
 
-from unittest.mock import MagicMock
+import signal
+from datetime import UTC, datetime, timedelta
+from unittest.mock import MagicMock, patch
 from uuid import uuid4
 
 import pytest
 
-from protea.core.contracts.operation import OperationResult
+from protea.core.contracts.operation import OperationResult, RetryLaterError
 from protea.core.contracts.registry import OperationRegistry
 from protea.infrastructure.orm.models.job import Job, JobStatus
 from protea.workers.base_worker import BaseWorker, WorkerConfig
+from protea.workers.stale_job_reaper import StaleJobReaper
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -145,3 +148,652 @@ def test_progress_fields_are_set(self):
 
         assert job.progress_current == 5
         assert job.progress_total == 10
+
+    def test_retry_later_uses_adaptive_backoff(self):
+        """RetryLaterError delay should increase based on previous retry count."""
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        # Simulate 2 previous retries
+        session.query.return_value.filter.return_value.scalar.return_value = 2
+        factory = MagicMock(return_value=session)
+
+        registry, _ = _make_registry(raises=RetryLaterError("GPU busy", delay_seconds=30))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with pytest.raises(RetryLaterError) as exc_info:
+            worker.handle_job(job.id)
+
+        # 30 * 2^2 = 120 seconds
+        assert exc_info.value.delay_seconds == 120
+        assert job.status == JobStatus.QUEUED
+
+    def test_retry_backoff_capped_at_600(self):
+        """Adaptive backoff should be capped at 600 seconds."""
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        # Simulate 10 previous retries → 60 * 2^10 = 61440, capped to 600
+        session.query.return_value.filter.return_value.scalar.return_value = 10
+        factory = MagicMock(return_value=session)
+
+        registry, _ = _make_registry(raises=RetryLaterError("GPU busy", delay_seconds=60))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with pytest.raises(RetryLaterError) as exc_info:
+            worker.handle_job(job.id)
+
+        assert exc_info.value.delay_seconds == 600
+
+
+# ---------------------------------------------------------------------------
+# StaleJobReaper
+# ---------------------------------------------------------------------------
+
+class TestStaleJobReaper:
+    def test_reaps_stale_running_jobs(self):
+        """Jobs in RUNNING for longer than timeout should be marked FAILED."""
+        stale_job = MagicMock(spec=Job)
+        stale_job.id = uuid4()
+        stale_job.status = JobStatus.RUNNING
+        stale_job.operation = "compute_embeddings"
+        stale_job.started_at = datetime.now(UTC) - timedelta(hours=2)
+
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = [stale_job]
+        factory = MagicMock(return_value=session)
+
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        count = reaper._reap()
+
+        assert count == 1
+        assert stale_job.status == JobStatus.FAILED
+        assert stale_job.error_code == "JobTimeout"
+        session.add.assert_called_once()  # JobEvent
+        session.commit.assert_called_once()
+
+    def test_no_stale_jobs_returns_zero(self):
+        """When no jobs are stale, reaper does nothing."""
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = []
+        factory = MagicMock(return_value=session)
+
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        count = reaper._reap()
+
+        assert count == 0
+        session.commit.assert_called_once()
+
+    def test_reaper_handles_db_error_gracefully(self):
+        """If the DB query fails, reaper raises but does not crash permanently."""
+        session = MagicMock()
+        session.query.side_effect = RuntimeError("DB connection lost")
+        factory = MagicMock(return_value=session)
+
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        with pytest.raises(RuntimeError, match="DB connection lost"):
+            reaper._reap()
+        session.rollback.assert_called_once()
+
+    def test_reaper_rollback_also_fails(self):
+        """If rollback itself raises, the exception from _reap still propagates."""
+        session = MagicMock()
+        session.query.side_effect = RuntimeError("DB gone")
+        session.rollback.side_effect = RuntimeError("rollback failed too")
+        factory = MagicMock(return_value=session)
+
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        with pytest.raises(RuntimeError, match="DB gone"):
+            reaper._reap()
+        session.rollback.assert_called_once()
+        session.close.assert_called_once()
+
+    def test_run_registers_signal_handlers(self):
+        """run() should register SIGINT and SIGTERM handlers."""
+        factory = MagicMock()
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        # Make _reap set _stop=True so the loop exits after one iteration
+        reaper._stop = False
+        call_count = [0]
+        def fake_reap():
+            call_count[0] += 1
+            reaper._stop = True
+            return 0
+        reaper._reap = fake_reap
+
+        with patch("protea.workers.stale_job_reaper.signal.signal") as mock_signal, \
+             patch("protea.workers.stale_job_reaper.time.sleep"):
+            reaper.run(interval_seconds=1)
+
+        # Should register both SIGINT and SIGTERM
+        calls = [c[0] for c in mock_signal.call_args_list]
+        assert (signal.SIGINT, reaper._handle_stop) in calls
+        assert (signal.SIGTERM, reaper._handle_stop) in calls
+
+    def test_run_loops_and_stops_on_flag(self):
+        """run() calls _reap repeatedly until _stop is set."""
+        factory = MagicMock()
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        reap_count = [0]
+
+        def fake_reap():
+            reap_count[0] += 1
+            if reap_count[0] >= 3:
+                reaper._stop = True
+            return 0
+
+        reaper._reap = fake_reap
+
+        with patch("protea.workers.stale_job_reaper.signal.signal"), \
+             patch("protea.workers.stale_job_reaper.time.sleep"):
+            reaper.run(interval_seconds=1)
+
+        assert reap_count[0] == 3
+
+    def test_run_logs_reaped_count(self):
+        """When _reap returns non-zero, run() logs it."""
+        factory = MagicMock()
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+
+        def fake_reap():
+            reaper._stop = True
+            return 5
+
+        reaper._reap = fake_reap
+
+        with patch("protea.workers.stale_job_reaper.signal.signal"), \
+             patch("protea.workers.stale_job_reaper.time.sleep"), \
+             patch("protea.workers.stale_job_reaper.logger") as mock_logger:
+            reaper.run(interval_seconds=1)
+
+        # Should have logged the reaped count
+        info_calls = [str(c) for c in mock_logger.info.call_args_list]
+        assert any("5" in c for c in info_calls)
+
+    def test_run_catches_reap_exception(self):
+        """If _reap raises, run() logs the error and continues."""
+        factory = MagicMock()
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        call_count = [0]
+
+        def failing_reap():
+            call_count[0] += 1
+            if call_count[0] == 1:
+                raise RuntimeError("transient DB error")
+            reaper._stop = True
+            return 0
+
+        reaper._reap = failing_reap
+
+        with patch("protea.workers.stale_job_reaper.signal.signal"), \
+             patch("protea.workers.stale_job_reaper.time.sleep"), \
+             patch("protea.workers.stale_job_reaper.logger") as mock_logger:
+            reaper.run(interval_seconds=1)
+
+        # Should have logged the error but continued
+        mock_logger.error.assert_called_once()
+        assert call_count[0] == 2
+
+    def test_handle_stop_sets_flag(self):
+        """_handle_stop sets the _stop flag."""
+        factory = MagicMock()
+        reaper = StaleJobReaper(factory, timeout_seconds=3600)
+        assert reaper._stop is False
+        reaper._handle_stop(signal.SIGINT, None)
+        assert reaper._stop is True
+
+
+# ---------------------------------------------------------------------------
+# Feature engineering warmup
+# ---------------------------------------------------------------------------
+
+class TestTaxonomyWarmup:
+    def test_warmup_calls_get_ncbi(self):
+        from protea.core.feature_engineering import warmup_taxonomy_db
+
+        with patch("protea.core.feature_engineering._get_ncbi") as mock_get, \
+             patch("protea.core.feature_engineering._ETE3_AVAILABLE", True):
+            warmup_taxonomy_db()
+        mock_get.assert_called_once()
+
+    def test_warmup_skips_when_ete3_unavailable(self):
+        from protea.core.feature_engineering import warmup_taxonomy_db
+
+        with patch("protea.core.feature_engineering._ETE3_AVAILABLE", False), \
+             patch("protea.core.feature_engineering._get_ncbi") as mock_get:
+            warmup_taxonomy_db()  # should not raise
+        mock_get.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# BaseWorker — extended coverage
+# ---------------------------------------------------------------------------
+
+class TestBaseWorkerParentCancelled:
+    """Cover parent_job_id cancellation detection (lines 93-106)."""
+
+    def test_cancelled_parent_cancels_child(self):
+        """If parent is CANCELLED during claim, child should be CANCELLED too."""
+        parent_id = uuid4()
+        child_job = _make_job(parent_job_id=parent_id)
+        parent_job = MagicMock(spec=Job)
+        parent_job.id = parent_id
+        parent_job.status = JobStatus.CANCELLED
+
+        session = MagicMock()
+        # session.get returns child_job by default, parent_job when queried by parent_id
+        def get_side_effect(model, id_val):
+            if id_val == parent_id:
+                return parent_job
+            return child_job
+        session.get.side_effect = get_side_effect
+
+        factory = MagicMock(return_value=session)
+        registry, op = _make_registry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(child_job.id)
+
+        assert child_job.status == JobStatus.CANCELLED
+        assert child_job.finished_at is not None
+        op.execute.assert_not_called()
+
+    def test_active_parent_does_not_cancel_child(self):
+        """If parent is still RUNNING, child should execute normally."""
+        parent_id = uuid4()
+        child_job = _make_job(parent_job_id=parent_id)
+        parent_job = MagicMock(spec=Job)
+        parent_job.id = parent_id
+        parent_job.status = JobStatus.RUNNING
+
+        session = MagicMock()
+        def get_side_effect(model, id_val):
+            if id_val == parent_id:
+                return parent_job
+            return child_job
+        session.get.side_effect = get_side_effect
+
+        factory = MagicMock(return_value=session)
+        registry, op = _make_registry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(child_job.id)
+
+        assert child_job.status == JobStatus.SUCCEEDED
+        op.execute.assert_called_once()
+
+
+class TestBaseWorkerUnknownOperation:
+    """Cover unknown operation name — registry.get raises KeyError."""
+
+    def test_unknown_operation_raises_key_error(self):
+        """KeyError from registry.get propagates without being caught by inner handler."""
+        job = _make_job(operation="nonexistent_op")
+        session = MagicMock()
+        session.get.return_value = job
+
+        factory = MagicMock(return_value=session)
+        # Real registry with no operations registered
+        registry = OperationRegistry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with pytest.raises(KeyError, match="nonexistent_op"):
+            worker.handle_job(job.id)
+
+        # Session should still be closed (finally block)
+        session.close.assert_called()
+
+
+class TestBaseWorkerTwoSessionPattern:
+    """Verify the two-session pattern: claim session commits before execute session."""
+
+    def test_two_sessions_are_created(self):
+        job = _make_job()
+        sessions = []
+
+        def make_session():
+            s = MagicMock()
+            s.get.return_value = job
+            sessions.append(s)
+            return s
+
+        factory = MagicMock(side_effect=make_session)
+        registry, op = _make_registry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(job.id)
+
+        # Two sessions: claim + execute
+        assert len(sessions) >= 2
+        # Both should have been committed
+        sessions[0].commit.assert_called()
+        sessions[1].commit.assert_called()
+        # Both should have been closed
+        sessions[0].close.assert_called_once()
+        sessions[1].close.assert_called_once()
+
+    def test_claim_session_sets_running_before_execute(self):
+        """First session transitions to RUNNING; second session runs the operation."""
+        job = _make_job()
+        status_log = []
+
+        call_count = [0]
+
+        def make_session():
+            s = MagicMock()
+            s.get.return_value = job
+            call_count[0] += 1
+            current_call = call_count[0]
+
+            def commit_side_effect():
+                status_log.append((current_call, job.status))
+            s.commit.side_effect = commit_side_effect
+            return s
+
+        factory = MagicMock(side_effect=make_session)
+        registry, _ = _make_registry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(job.id)
+
+        # Session 1 should commit with RUNNING, session 2 with SUCCEEDED
+        assert status_log[0] == (1, JobStatus.RUNNING)
+        assert status_log[1] == (2, JobStatus.SUCCEEDED)
+
+
+class TestBaseWorkerJobNotFoundOnExecute:
+    """Cover line 88: job is None on the execute session."""
+
+    def test_job_disappears_between_sessions(self):
+        job = _make_job()
+        call_count = [0]
+
+        def make_session():
+            s = MagicMock()
+            call_count[0] += 1
+            if call_count[0] == 1:
+                # Claim session finds the job
+                s.get.return_value = job
+            else:
+                # Execute session: job is gone
+                s.get.return_value = None
+            return s
+
+        factory = MagicMock(side_effect=make_session)
+        registry, op = _make_registry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(job.id)
+
+        op.execute.assert_not_called()
+
+
+class TestBaseWorkerProgressFromResult:
+    """Cover progress update from OperationResult (lines 139-142)."""
+
+    def test_progress_current_only(self):
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(result=OperationResult(
+            result={}, progress_current=42
+        ))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(job.id)
+
+        assert job.progress_current == 42
+
+    def test_no_progress_fields_leaves_job_unchanged(self):
+        job = _make_job()
+        job.progress_current = None
+        job.progress_total = None
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(result=OperationResult(result={}))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(job.id)
+
+        # progress_current/total should not be set if result has None
+        # (succeeded is set, but progress fields are untouched)
+        assert job.status == JobStatus.SUCCEEDED
+
+
+class TestBaseWorkerDeferredResult:
+    """Cover deferred result handling (lines 144-153)."""
+
+    def test_deferred_result_does_not_set_succeeded(self):
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(result=OperationResult(
+            result={"dispatched": True}, deferred=True
+        ))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(job.id)
+
+        # Deferred: should NOT transition to SUCCEEDED
+        assert job.status != JobStatus.SUCCEEDED
+        # Should remain RUNNING (set in claim phase)
+        assert job.status == JobStatus.RUNNING
+
+
+class TestBaseWorkerPublishAfterCommit:
+    """Cover publish_after_commit and publish_operations (lines 169-176)."""
+
+    def test_publish_after_commit_publishes_child_jobs(self):
+        child_id = uuid4()
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(result=OperationResult(
+            result={},
+            publish_after_commit=[("protea.jobs", child_id)],
+        ))
+
+        worker = BaseWorker(
+            factory, registry, WorkerConfig(worker_name="test"),
+            amqp_url="amqp://localhost/",
+        )
+
+        with patch("protea.workers.base_worker.publish_job") as mock_pub:
+            worker.handle_job(job.id)
+
+        mock_pub.assert_called_once_with("amqp://localhost/", "protea.jobs", child_id)
+
+    def test_publish_operations_publishes_ephemeral_messages(self):
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(result=OperationResult(
+            result={},
+            publish_operations=[
+                ("protea.embeddings.batch", {"batch_data": [1, 2]}),
+            ],
+        ))
+
+        worker = BaseWorker(
+            factory, registry, WorkerConfig(worker_name="test"),
+            amqp_url="amqp://localhost/",
+        )
+
+        with patch("protea.workers.base_worker.publish_operation") as mock_pub:
+            worker.handle_job(job.id)
+
+        mock_pub.assert_called_once_with(
+            "amqp://localhost/", "protea.embeddings.batch", {"batch_data": [1, 2]}
+        )
+
+    def test_no_amqp_url_skips_publish(self):
+        """Without amqp_url, publish_after_commit is silently skipped."""
+        child_id = uuid4()
+        job = _make_job()
+        session = MagicMock()
+        session.get.return_value = job
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(result=OperationResult(
+            result={},
+            publish_after_commit=[("protea.jobs", child_id)],
+        ))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+
+        with patch("protea.workers.base_worker.publish_job") as mock_pub:
+            worker.handle_job(job.id)
+
+        mock_pub.assert_not_called()
+
+
+class TestBaseWorkerEmitProgress:
+    """Cover emit callback writing _progress_current/_progress_total (lines 124-129)."""
+
+    def test_emit_with_progress_fields_updates_job(self):
+        job = _make_job()
+
+        sessions = []
+        def make_session():
+            s = MagicMock()
+            s.get.return_value = job
+            sessions.append(s)
+            return s
+
+        factory = MagicMock(side_effect=make_session)
+
+        def _execute(sess, payload, *, emit):
+            emit("progress", "step done", {"_progress_current": 5, "_progress_total": 20}, "info")
+            return OperationResult()
+
+        op = MagicMock()
+        op.name = "ping"
+        op.execute.side_effect = _execute
+        registry = OperationRegistry()
+        registry.register(op)
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker.handle_job(job.id)
+
+        # The emit session (3rd session: claim, execute, emit) should have updated progress
+        # Find the session where progress was set
+        assert job.progress_current == 5
+        assert job.progress_total == 20
+
+
+class TestBaseWorkerForceFailJob:
+    """Cover _force_fail_job (lines 242-263)."""
+
+    def test_force_fail_on_commit_failure(self):
+        """When execute session commit fails, _force_fail_job is called."""
+        job = _make_job()
+        call_count = [0]
+
+        def make_session():
+            s = MagicMock()
+            s.get.return_value = job
+            call_count[0] += 1
+            current = call_count[0]
+            if current == 2:
+                # Execute session: commit raises on second call (after failure recording)
+                commit_count = [0]
+                def commit_side():
+                    commit_count[0] += 1
+                    if commit_count[0] == 1:
+                        raise RuntimeError("DB connection dropped")
+                s.commit.side_effect = commit_side
+            return s
+
+        factory = MagicMock(side_effect=make_session)
+        registry, _ = _make_registry(raises=ValueError("op failed"))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+
+        with pytest.raises(ValueError, match="op failed"):
+            worker.handle_job(job.id)
+
+        # The fallback session (3rd) should have been created
+        assert call_count[0] >= 3
+
+    def test_force_fail_direct_call(self):
+        """Direct test of _force_fail_job method."""
+        job_id = uuid4()
+        session = MagicMock()
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        worker._force_fail_job(job_id, ValueError("original"))
+
+        session.execute.assert_called_once()
+        session.commit.assert_called_once()
+        session.close.assert_called_once()
+
+    def test_force_fail_handles_fallback_failure(self):
+        """If the fallback session also fails, it logs but doesn't crash."""
+        job_id = uuid4()
+        session = MagicMock()
+        session.commit.side_effect = RuntimeError("still broken")
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry()
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        # Should not raise
+        worker._force_fail_job(job_id, ValueError("original"))
+
+        session.close.assert_called_once()
+
+
+class TestBaseWorkerMaybeFailParent:
+    """Cover _maybe_fail_parent (lines 267-302)."""
+
+    def test_all_children_failed_marks_parent_failed(self):
+        """When all children are terminal and none succeeded, parent fails."""
+        parent_id = uuid4()
+        job = _make_job(parent_job_id=parent_id)
+
+        session = MagicMock()
+        session.get.return_value = job
+        # First query: non_terminal count = 0
+        # Second query: succeeded count = 0
+        query_results = [0, 0]
+        call_count = [0]
+
+        def scalar_side():
+            idx = call_count[0]
+            call_count[0] += 1
+            return query_results[idx] if idx < len(query_results) else 0
+
+        session.query.return_value.filter.return_value.scalar.side_effect = scalar_side
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(raises=RuntimeError("child failed"))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with pytest.raises(RuntimeError, match="child failed"):
+            worker.handle_job(job.id)
+
+        # session.execute should have been called for the sa_update on parent
+        session.execute.assert_called()
+
+    def test_children_still_running_does_not_fail_parent(self):
+        """If some children are still running, parent is not failed."""
+        parent_id = uuid4()
+        job = _make_job(parent_job_id=parent_id)
+
+        session = MagicMock()
+        session.get.return_value = job
+        # non_terminal count = 3 (children still running)
+        session.query.return_value.filter.return_value.scalar.return_value = 3
+        factory = MagicMock(return_value=session)
+        registry, _ = _make_registry(raises=RuntimeError("child failed"))
+
+        worker = BaseWorker(factory, registry, WorkerConfig(worker_name="test"))
+        with pytest.raises(RuntimeError, match="child failed"):
+            worker.handle_job(job.id)
+
+        # session.execute should NOT have been called for parent update
+        session.execute.assert_not_called()
diff --git a/tests/test_compute_embeddings.py b/tests/test_compute_embeddings.py
index 4c742e2..9348d01 100644
--- a/tests/test_compute_embeddings.py
+++ b/tests/test_compute_embeddings.py
@@ -11,12 +11,14 @@
     ComputeEmbeddingsBatchOperation,
     ComputeEmbeddingsOperation,
     ComputeEmbeddingsPayload,
+    StoreEmbeddingsOperation,
     _aggregate_1d,
     _aggregate_residue_layers,
     _chunk_and_pool,
     _compute_chunk_spans,
     _validate_layers,
 )
+from protea.infrastructure.orm.models.job import JobStatus
 
 _noop_emit = lambda *_: None  # noqa: E731
 
@@ -64,11 +66,11 @@ def test_minimal_valid(self) -> None:
         assert p.device == "cuda"
 
     def test_empty_embedding_config_id_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             ComputeEmbeddingsPayload.model_validate({"embedding_config_id": ""})
 
     def test_whitespace_embedding_config_id_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             ComputeEmbeddingsPayload.model_validate({"embedding_config_id": "   "})
 
     def test_optional_fields_override(self) -> None:
@@ -319,7 +321,9 @@ def test_esm_residue_count_matches_content_only(self) -> None:
     def test_esm3c_strips_bos_and_eos(self) -> None:
         """ESM3c [1:-1] slicing excludes BOS and EOS from residue pooling."""
         import sys
+
         import torch
+
         from protea.core.operations.compute_embeddings import _embed_esm3c
 
         dim = 8
@@ -358,6 +362,7 @@ def logits(self, tensor, lc):
     def test_t5_includes_eos_token(self) -> None:
         """T5 keeps EOS in the residue tensor (PIS convention)."""
         import torch
+
         from protea.core.operations.compute_embeddings import _embed_t5
 
         dim = 8
@@ -634,6 +639,7 @@ def _esm_cfg(self):
     def test_esm_batch_size_consistency(self):
         """ESM embeddings must be bit-exact for batch_size 1, 2, and 4."""
         from transformers import AutoTokenizer, EsmModel
+
         from protea.core.operations.compute_embeddings import _embed_esm
 
         cfg = self._esm_cfg()
@@ -651,7 +657,7 @@ def test_esm_batch_size_consistency(self):
                     _embed_esm(model, tokenizer, self.SEQUENCES[i:i + batch_size], cfg, "cpu")
                 )
 
-            for i, (got, expected) in enumerate(zip(batched, ref)):
+            for i, (got, expected) in enumerate(zip(batched, ref, strict=False)):
                 np.testing.assert_allclose(
                     got[0].vector, expected[0].vector, rtol=1e-5, atol=1e-6,
                     err_msg=f"ESM batch_size={batch_size}: mismatch at sequence {i}",
@@ -666,6 +672,7 @@ def test_t5_padding_does_not_affect_embeddings(self):
         positions would produce wrong values and fail the assertion.
         """
         import torch
+
         from protea.core.operations.compute_embeddings import _embed_t5
 
         cfg = _mock_config(
@@ -715,3 +722,184 @@ def batch_encode_plus(self, seqs, **kwargs):
                     results[i], ref[i], rtol=1e-5, atol=1e-6,
                     err_msg=f"T5 batch_size={batch_size}: mismatch at sequence {i}",
                 )
+
+
+# ---------------------------------------------------------------------------
+# StoreEmbeddingsOperation
+# ---------------------------------------------------------------------------
+
+class TestStoreEmbeddingsOperation:
+    def _op(self) -> StoreEmbeddingsOperation:
+        return StoreEmbeddingsOperation()
+
+    def _make_payload(self, n_sequences=2, skip_existing=True, **kw):
+        sequences = []
+        for i in range(n_sequences):
+            sequences.append({
+                "sequence_id": i + 1,
+                "chunks": [{
+                    "chunk_index_s": 0,
+                    "chunk_index_e": None,
+                    "vector": [0.1, 0.2, 0.3],
+                    "embedding_dim": 3,
+                }],
+            })
+        defaults = {
+            "parent_job_id": str(uuid.uuid4()),
+            "embedding_config_id": str(uuid.uuid4()),
+            "skip_existing": skip_existing,
+            "sequences": sequences,
+        }
+        defaults.update(kw)
+        return defaults
+
+    def test_stores_embeddings(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+        # No existing embeddings
+        session.query.return_value.filter_by.return_value.first.return_value = None
+        # Progress update
+        row = MagicMock()
+        row.progress_current = 1
+        row.progress_total = 5
+        session.execute.return_value.fetchone.return_value = row
+
+        result = op.execute(session, self._make_payload(), emit=_noop_emit)
+        assert result.result["embeddings_stored"] == 2
+        assert result.result["sequences_skipped"] == 0
+
+    def test_skips_existing_when_enabled(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+        # All existing
+        session.query.return_value.filter_by.return_value.first.return_value = MagicMock()
+        row = MagicMock()
+        row.progress_current = 1
+        row.progress_total = 5
+        session.execute.return_value.fetchone.return_value = row
+
+        result = op.execute(session, self._make_payload(skip_existing=True), emit=_noop_emit)
+        assert result.result["sequences_skipped"] == 2
+        assert result.result["embeddings_stored"] == 0
+
+    def test_deletes_existing_when_skip_disabled(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+        row = MagicMock()
+        row.progress_current = 1
+        row.progress_total = 5
+        session.execute.return_value.fetchone.return_value = row
+
+        result = op.execute(session, self._make_payload(skip_existing=False), emit=_noop_emit)
+        assert result.result["embeddings_stored"] == 2
+        # Should have called delete on existing rows
+        assert session.query.return_value.filter_by.return_value.delete.called
+
+    def test_skips_when_parent_cancelled(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.CANCELLED
+        session.get.return_value = parent
+
+        result = op.execute(session, self._make_payload(), emit=_noop_emit)
+        assert result.result["skipped"] is True
+
+    def test_skips_when_parent_failed(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.FAILED
+        session.get.return_value = parent
+
+        result = op.execute(session, self._make_payload(), emit=_noop_emit)
+        assert result.result["skipped"] is True
+
+    def test_last_batch_closes_parent(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+        session.query.return_value.filter_by.return_value.first.return_value = None
+
+        progress_row = MagicMock()
+        progress_row.progress_current = 3
+        progress_row.progress_total = 3
+        closed_row = MagicMock()
+        closed_row.id = uuid.uuid4()
+        session.execute.return_value.fetchone.side_effect = [progress_row, closed_row]
+
+        events = []
+        def capture_emit(event, msg, fields, level):
+            events.append(event)
+
+        op.execute(session, self._make_payload(n_sequences=1), emit=capture_emit)
+        assert "store_embeddings.parent_succeeded" in events
+
+    def test_multiple_chunks_per_sequence(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+        session.query.return_value.filter_by.return_value.first.return_value = None
+        row = MagicMock()
+        row.progress_current = 1
+        row.progress_total = 5
+        session.execute.return_value.fetchone.return_value = row
+
+        payload = self._make_payload(n_sequences=0)
+        payload["sequences"] = [{
+            "sequence_id": 1,
+            "chunks": [
+                {"chunk_index_s": 0, "chunk_index_e": 4, "vector": [0.1], "embedding_dim": 1},
+                {"chunk_index_s": 4, "chunk_index_e": 8, "vector": [0.2], "embedding_dim": 1},
+                {"chunk_index_s": 8, "chunk_index_e": 10, "vector": [0.3], "embedding_dim": 1},
+            ],
+        }]
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        assert result.result["embeddings_stored"] == 3
+
+    def test_name(self) -> None:
+        assert StoreEmbeddingsOperation().name == "store_embeddings"
+
+
+# ---------------------------------------------------------------------------
+# Coordinator — GPU retry (RetryLaterError)
+# ---------------------------------------------------------------------------
+
+class TestComputeEmbeddingsRetryLogic:
+    def _op(self) -> ComputeEmbeddingsOperation:
+        return ComputeEmbeddingsOperation()
+
+    def test_gpu_busy_raises_retry_later(self) -> None:
+        from protea.core.contracts.operation import RetryLaterError
+
+        op = self._op()
+        cfg = _mock_config()
+        session = MagicMock()
+        session.get.return_value = cfg
+
+        # Simulate another running compute_embeddings job (GPU mutex)
+        other_job = MagicMock()
+        session.query.return_value.filter.return_value.first.return_value = other_job
+
+        payload = {
+            "embedding_config_id": str(cfg.id),
+            "_job_id": str(uuid.uuid4()),
+        }
+
+        with patch.object(op, "_load_sequence_ids", return_value=[1, 2, 3]):
+            with pytest.raises(RetryLaterError):
+                op.execute(session, payload, emit=_noop_emit)
diff --git a/tests/test_core.py b/tests/test_core.py
index b995518..6bbf11c 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -4,13 +4,20 @@
 """
 from __future__ import annotations
 
+import gzip
+from io import BytesIO
 from unittest.mock import MagicMock, patch
 
 import pytest
 import requests
 
-from protea.core.contracts.operation import OperationResult
+from protea.core.contracts.operation import OperationResult, RetryLaterError
 from protea.core.contracts.registry import OperationRegistry
+from protea.core.evidence_codes import ECO_TO_CODE, EXPERIMENTAL, is_experimental, normalize
+from protea.core.operations.fetch_uniprot_metadata import (
+    FetchUniProtMetadataOperation,
+    FetchUniProtMetadataPayload,
+)
 from protea.core.operations.ping import PingOperation
 from protea.core.utils import UniProtHttpMixin, chunks
 
@@ -113,7 +120,8 @@ def __init__(self):
         self._http = MagicMock()
 
 
-_noop_emit = lambda *_: None
+def _noop_emit(*_):
+    return None
 
 
 class TestUniProtHttpMixin:
@@ -130,9 +138,11 @@ def test_returns_response_on_200(self) -> None:
 
     def test_retries_on_429(self) -> None:
         obj = self._obj()
-        bad = MagicMock(); bad.status_code = 429
+        bad = MagicMock()
+        bad.status_code = 429
         bad.headers = {}
-        good = MagicMock(); good.status_code = 200
+        good = MagicMock()
+        good.status_code = 200
         obj._http.get.side_effect = [bad, good]
         with patch("protea.core.utils.time.sleep"):
             result = obj._get_with_retries("http://x", _make_payload(), _noop_emit)
@@ -141,9 +151,11 @@ def test_retries_on_429(self) -> None:
 
     def test_uses_retry_after_header(self) -> None:
         obj = self._obj()
-        bad = MagicMock(); bad.status_code = 429
+        bad = MagicMock()
+        bad.status_code = 429
         bad.headers = {"Retry-After": "5"}
-        good = MagicMock(); good.status_code = 200
+        good = MagicMock()
+        good.status_code = 200
         obj._http.get.side_effect = [bad, good]
         sleep_calls = []
         with patch("protea.core.utils.time.sleep", side_effect=sleep_calls.append):
@@ -153,7 +165,8 @@ def test_uses_retry_after_header(self) -> None:
 
     def test_raises_after_max_retries(self) -> None:
         obj = self._obj()
-        bad = MagicMock(); bad.status_code = 503
+        bad = MagicMock()
+        bad.status_code = 503
         bad.headers = {}
         bad.raise_for_status.side_effect = requests.HTTPError("503")
         obj._http.get.return_value = bad
@@ -163,7 +176,8 @@ def test_raises_after_max_retries(self) -> None:
 
     def test_retries_on_network_exception(self) -> None:
         obj = self._obj()
-        good = MagicMock(); good.status_code = 200
+        good = MagicMock()
+        good.status_code = 200
         obj._http.get.side_effect = [requests.ConnectionError("down"), good]
         with patch("protea.core.utils.time.sleep"):
             result = obj._get_with_retries("http://x", _make_payload(), _noop_emit)
@@ -188,7 +202,6 @@ def test_extract_next_cursor_no_cursor_param(self) -> None:
 # evidence_codes — normalize and is_experimental
 # ---------------------------------------------------------------------------
 
-from protea.core.evidence_codes import normalize, is_experimental, ECO_TO_CODE, EXPERIMENTAL
 
 
 class TestNormalize:
@@ -233,9 +246,6 @@ def test_unknown_code_not_experimental(self):
 # RetryLaterError
 # ---------------------------------------------------------------------------
 
-from protea.core.contracts.operation import RetryLaterError
-
-
 class TestRetryLaterError:
     def test_default_delay(self):
         err = RetryLaterError("GPU busy")
@@ -249,3 +259,319 @@ def test_custom_delay(self):
     def test_is_exception(self):
         with pytest.raises(RetryLaterError):
             raise RetryLaterError("test")
+
+
+# ---------------------------------------------------------------------------
+# FetchUniProtMetadataOperation
+# ---------------------------------------------------------------------------
+
+def _noop_emit(*_):
+    pass
+
+
+def _make_tsv_content(rows: list[dict[str, str]], compressed: bool = True) -> bytes:
+    """Build a TSV byte string (optionally gzipped) from a list of dicts."""
+    if not rows:
+        header = "Entry\tReviewed\tEntry Name\tOrganism\tGene Names\tLength"
+        text = header + "\n"
+    else:
+        headers = list(rows[0].keys())
+        lines = ["\t".join(headers)]
+        for row in rows:
+            lines.append("\t".join(row.get(h, "") for h in headers))
+        text = "\n".join(lines) + "\n"
+
+    raw = text.encode("utf-8")
+    if compressed:
+        buf = BytesIO()
+        with gzip.GzipFile(fileobj=buf, mode="wb") as f:
+            f.write(raw)
+        return buf.getvalue()
+    return raw
+
+
+
+
+class TestFetchUniProtMetadataExecute:
+    def _make_op(self):
+        op = FetchUniProtMetadataOperation()
+        op._http = MagicMock()
+        return op
+
+    def test_execute_empty_page_continues(self):
+        """Line 108: when rows is empty, continue (skip store)."""
+        op = self._make_op()
+        events = []
+
+        def emit(event, message, fields, level):
+            events.append(event)
+
+        # Return one page with no data rows, then stop
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.headers = {"X-Total-Results": "0"}
+        resp.content = _make_tsv_content([], compressed=True)
+        op._http.get.return_value = resp
+
+        session = MagicMock()
+        payload = {"search_criteria": "organism_id:9606", "page_size": 10}
+
+        result = op.execute(session, payload, emit=emit)
+        assert result.result["rows"] == 0
+        assert result.result["pages"] == 1
+
+    def test_execute_total_limit_truncation(self):
+        """Lines 110-113: when total_limit is set and rows exceed it, truncate."""
+        op = self._make_op()
+
+        # Build 5 rows
+        rows = []
+        for i in range(5):
+            row = {"Entry": f"P0000{i}", "Reviewed": "reviewed"}
+            # Add all FIELD_MAP headers as empty
+            for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
+                row[header] = ""
+            row["Entry Name"] = ""
+            row["Organism"] = ""
+            row["Gene Names"] = ""
+            row["Length"] = ""
+            rows.append(row)
+
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.headers = {"X-Total-Results": "5"}
+        resp.content = _make_tsv_content(rows, compressed=True)
+        op._http.get.return_value = resp
+
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = []
+
+        payload = {
+            "search_criteria": "organism_id:9606",
+            "page_size": 10,
+            "total_limit": 3,
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        # Should only process 3 rows despite page having 5
+        assert result.result["rows"] == 3
+
+    def test_execute_total_limit_zero_after_truncation(self):
+        """Line 113: if truncation results in empty rows, break."""
+        op = self._make_op()
+
+        rows = [{"Entry": "P00001"}]
+        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
+            rows[0][header] = ""
+        rows[0].update({"Reviewed": "", "Entry Name": "", "Organism": "", "Gene Names": "", "Length": ""})
+
+        # First page returns 1 row, second page returns 1 row
+        resp1 = MagicMock()
+        resp1.status_code = 200
+        resp1.headers = {"X-Total-Results": "2", "link": '<http://next?cursor=ABC>; rel="next"'}
+        resp1.content = _make_tsv_content(rows, compressed=True)
+
+        resp2 = MagicMock()
+        resp2.status_code = 200
+        resp2.headers = {"X-Total-Results": "2"}
+        rows2 = [{"Entry": "P00002"}]
+        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
+            rows2[0][header] = ""
+        rows2[0].update({"Reviewed": "", "Entry Name": "", "Organism": "", "Gene Names": "", "Length": ""})
+        resp2.content = _make_tsv_content(rows2, compressed=True)
+
+        op._http.get.side_effect = [resp1, resp2]
+
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = []
+
+        payload = {
+            "search_criteria": "organism_id:9606",
+            "page_size": 1,
+            "total_limit": 1,
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        # Should stop after first page (total_limit=1, first page gives 1 row)
+        assert result.result["rows"] == 1
+
+    def test_x_total_results_none_on_invalid_header(self):
+        """Line 227: X-Total-Results header with invalid value."""
+        op = self._make_op()
+
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.headers = {"X-Total-Results": "not-a-number"}
+        resp.content = _make_tsv_content([], compressed=True)
+        op._http.get.return_value = resp
+
+        session = MagicMock()
+        payload = {"search_criteria": "test", "page_size": 10}
+
+        op.execute(session, payload, emit=_noop_emit)
+        assert op._total_results is None
+
+    def test_decode_response_uncompressed(self):
+        """Line 241-242: uncompressed response decoding."""
+        op = self._make_op()
+        resp = MagicMock()
+        resp.content = b"Entry\tReviewed\nP00001\treviewed\n"
+        text = op._decode_response(resp, compressed=False)
+        assert "P00001" in text
+
+    def test_store_rows_empty_accession_skipped(self):
+        """Line 275: rows with empty Entry are skipped."""
+        op = self._make_op()
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = []
+
+        p = FetchUniProtMetadataPayload(
+            search_criteria="test",
+            update_protein_core=False,
+        )
+
+        rows = [{"Entry": "", "Absorption": "test"}]
+        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
+            if header not in rows[0]:
+                rows[0][header] = ""
+
+        touched, upserted = op._store_rows(session, rows, p, _noop_emit)
+        assert touched == 0
+        assert upserted == 0
+
+    def test_store_rows_update_protein_core_fields(self):
+        """Lines 296-328: update_protein_core fills in missing fields on Protein."""
+        op = self._make_op()
+        session = MagicMock()
+
+        # No existing metadata
+        session.query.return_value.filter.return_value.all.return_value = []
+
+        # Create a mock protein with all None fields
+        protein = MagicMock()
+        protein.accession = "P12345"
+        protein.reviewed = None
+        protein.entry_name = None
+        protein.organism = None
+        protein.gene_name = None
+        protein.length = None
+
+        # Second query().filter().all() returns proteins
+        call_count = [0]
+        def query_side_effect(*args):
+            result = MagicMock()
+            call_count[0] += 1
+            if call_count[0] <= 1:
+                # First call: metadata lookup
+                result.filter.return_value.all.return_value = []
+            else:
+                # Second call: protein lookup
+                result.filter.return_value.all.return_value = [protein]
+            return result
+        session.query.side_effect = query_side_effect
+
+        p = FetchUniProtMetadataPayload(
+            search_criteria="test",
+            update_protein_core=True,
+        )
+
+        row = {"Entry": "P12345", "Reviewed": "reviewed", "Entry Name": "TEST_HUMAN",
+               "Organism": "Homo sapiens", "Gene Names": "TEST GENE2", "Length": "500"}
+        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
+            row.setdefault(header, "")
+
+        touched, upserted = op._store_rows(session, [row], p, _noop_emit)
+        assert protein.reviewed is True
+        assert protein.entry_name == "TEST_HUMAN"
+        assert protein.organism == "Homo sapiens"
+        assert protein.gene_name == "TEST"
+        assert protein.length == 500
+        assert touched == 1
+
+    def test_store_rows_unreviewed_protein(self):
+        """Lines 303-305: reviewed == 'unreviewed' sets pr.reviewed = False."""
+        op = self._make_op()
+        session = MagicMock()
+
+        protein = MagicMock()
+        protein.accession = "Q99999"
+        protein.reviewed = None
+        protein.entry_name = None
+        protein.organism = None
+        protein.gene_name = None
+        protein.length = None
+
+        call_count = [0]
+        def query_side_effect(*args):
+            result = MagicMock()
+            call_count[0] += 1
+            if call_count[0] <= 1:
+                result.filter.return_value.all.return_value = []
+            else:
+                result.filter.return_value.all.return_value = [protein]
+            return result
+        session.query.side_effect = query_side_effect
+
+        p = FetchUniProtMetadataPayload(
+            search_criteria="test",
+            update_protein_core=True,
+        )
+
+        row = {"Entry": "Q99999", "Reviewed": "unreviewed"}
+        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
+            row.setdefault(header, "")
+        row.setdefault("Entry Name", "")
+        row.setdefault("Organism", "")
+        row.setdefault("Gene Names", "")
+        row.setdefault("Length", "")
+
+        touched, _ = op._store_rows(session, [row], p, _noop_emit)
+        assert protein.reviewed is False
+        assert touched == 1
+
+    def test_store_rows_protein_not_in_db(self):
+        """Lines 294-295: protein not found in protein_map, no core update."""
+        op = self._make_op()
+        session = MagicMock()
+
+        call_count = [0]
+        def query_side_effect(*args):
+            result = MagicMock()
+            call_count[0] += 1
+            if call_count[0] <= 1:
+                result.filter.return_value.all.return_value = []
+            else:
+                result.filter.return_value.all.return_value = []  # No proteins
+            return result
+        session.query.side_effect = query_side_effect
+
+        p = FetchUniProtMetadataPayload(
+            search_criteria="test",
+            update_protein_core=True,
+        )
+
+        row = {"Entry": "UNKNOWN1", "Reviewed": "reviewed"}
+        for header in FetchUniProtMetadataOperation.FIELD_MAP.values():
+            row.setdefault(header, "")
+        row.setdefault("Entry Name", "")
+        row.setdefault("Organism", "")
+        row.setdefault("Gene Names", "")
+        row.setdefault("Length", "")
+
+        touched, upserted = op._store_rows(session, [row], p, _noop_emit)
+        assert touched == 0
+        # Still upserted metadata
+        assert upserted == 1
+
+    def test_load_existing_metadata_chunks(self):
+        """Line 346: _load_existing_metadata returns existing metadata by canonical."""
+        op = self._make_op()
+        session = MagicMock()
+
+        m1 = MagicMock()
+        m1.canonical_accession = "P12345"
+        session.query.return_value.filter.return_value.all.return_value = [m1]
+
+        result = op._load_existing_metadata(session, ["P12345"], chunk_size=10)
+        assert "P12345" in result
+        assert result["P12345"] is m1
diff --git a/tests/test_embeddings_router.py b/tests/test_embeddings_router.py
index 89522e9..67f8130 100644
--- a/tests/test_embeddings_router.py
+++ b/tests/test_embeddings_router.py
@@ -5,7 +5,7 @@
 from __future__ import annotations
 
 from contextlib import contextmanager
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from unittest.mock import MagicMock, patch
 from uuid import uuid4
 
@@ -37,7 +37,7 @@ def _make_config(config_id=None):
     c.chunk_size = 512
     c.chunk_overlap = 0
     c.description = None
-    c.created_at = datetime(2024, 1, 1, tzinfo=timezone.utc)
+    c.created_at = datetime(2024, 1, 1, tzinfo=UTC)
     return c
 
 
@@ -95,7 +95,7 @@ def test_valid_body_returns_201_ish(self, client, session):
         def _fake_add(obj):
             # Copy needed attributes from the validated body into the mock
             obj.id = uuid4()
-            obj.created_at = datetime(2024, 1, 1, tzinfo=timezone.utc)
+            obj.created_at = datetime(2024, 1, 1, tzinfo=UTC)
             added_objects.append(obj)
 
         session.add.side_effect = _fake_add
@@ -151,12 +151,12 @@ def test_valid_chunking_config_is_accepted(self, client, session):
         """chunk_overlap < chunk_size must be accepted."""
         body = {**_VALID_CONFIG_BODY, "use_chunking": True, "chunk_size": 512, "chunk_overlap": 64}
         # Just check it passes validation (not 422)
-        cfg = _make_config()
+        _make_config()
         added: list = []
 
         def _fake_add(obj):
             obj.id = uuid4()
-            obj.created_at = datetime(2024, 1, 1, tzinfo=timezone.utc)
+            obj.created_at = datetime(2024, 1, 1, tzinfo=UTC)
             added.append(obj)
 
         session.add.side_effect = _fake_add
@@ -218,7 +218,7 @@ def _make_prediction_set(ps_id=None):
     ps.query_set_id = None
     ps.limit_per_entry = 5
     ps.distance_threshold = None
-    ps.created_at = datetime(2024, 1, 1, tzinfo=timezone.utc)
+    ps.created_at = datetime(2024, 1, 1, tzinfo=UTC)
     return ps
 
 
@@ -240,6 +240,12 @@ def _make_go_prediction(accession="P12345", distance=0.1):
                 "taxonomic_common_ancestors"):
         setattr(pred, col, None)
     pred.taxonomic_relation = None
+    # re-ranker features
+    pred.vote_count = None
+    pred.k_position = None
+    pred.go_term_frequency = None
+    pred.ref_annotation_density = None
+    pred.neighbor_distance_std = None
     return pred
 
 
@@ -342,3 +348,682 @@ def test_multiple_rows_all_included(self, client, session):
 
         lines = resp.text.splitlines()
         assert len(lines) == 6  # 1 header + 5 data
+
+    def test_filter_by_accession(self, client, session):
+        """The accession query param should filter predictions."""
+        set_id = uuid4()
+        pred = _make_go_prediction("P99999")
+        gt = _make_go_term()
+        resp = self._get(client, session, set_id, [(pred, gt)], accession="P99999")
+        assert resp.status_code == 200
+        lines = resp.text.splitlines()
+        assert len(lines) == 2
+        assert "P99999" in lines[1]
+
+    def test_filter_by_aspect(self, client, session):
+        """The aspect query param should filter predictions."""
+        set_id = uuid4()
+        pred = _make_go_prediction()
+        gt = _make_go_term(aspect="P")
+        resp = self._get(client, session, set_id, [(pred, gt)], aspect="P")
+        assert resp.status_code == 200
+
+    def test_filter_by_max_distance(self, client, session):
+        """The max_distance query param should filter predictions."""
+        set_id = uuid4()
+        pred = _make_go_prediction(distance=0.05)
+        gt = _make_go_term()
+        resp = self._get(client, session, set_id, [(pred, gt)], max_distance=0.5)
+        assert resp.status_code == 200
+
+    def test_alignment_fields_formatted(self, client, session):
+        """Non-null alignment fields should be formatted with _fmt."""
+        set_id = uuid4()
+        pred = _make_go_prediction()
+        pred.identity_nw = 0.95123456
+        pred.similarity_nw = 0.88
+        gt = _make_go_term()
+        resp = self._get(client, session, set_id, [(pred, gt)])
+        lines = resp.text.splitlines()
+        row = lines[1].split("\t")
+        header = lines[0].split("\t")
+        identity_nw_idx = header.index("identity_nw")
+        assert row[identity_nw_idx] == "0.951235"
+
+
+# ---------------------------------------------------------------------------
+# _fmt helper
+# ---------------------------------------------------------------------------
+
+class TestFmt:
+    def test_none_returns_empty(self):
+        from protea.api.routers.embeddings import _fmt
+        assert _fmt(None) == ""
+
+    def test_float_returns_formatted(self):
+        from protea.api.routers.embeddings import _fmt
+        assert _fmt(0.123456789) == "0.123457"
+
+    def test_zero_returns_formatted(self):
+        from protea.api.routers.embeddings import _fmt
+        assert _fmt(0.0) == "0"
+
+
+# ---------------------------------------------------------------------------
+# get_session_factory / get_amqp_url — RuntimeError when not set
+# ---------------------------------------------------------------------------
+
+class TestDependencyGuards:
+    def test_session_factory_missing_raises(self):
+        from protea.api.routers.embeddings import get_session_factory
+        req = MagicMock()
+        req.app.state = MagicMock(spec=[])  # no session_factory attr
+        with pytest.raises(RuntimeError, match="session_factory"):
+            get_session_factory(req)
+
+    def test_amqp_url_missing_raises(self):
+        from protea.api.routers.embeddings import get_amqp_url
+        req = MagicMock()
+        req.app.state = MagicMock(spec=[])  # no amqp_url attr
+        with pytest.raises(RuntimeError, match="amqp_url"):
+            get_amqp_url(req)
+
+
+# ---------------------------------------------------------------------------
+# Additional validation edge cases
+# ---------------------------------------------------------------------------
+
+class TestValidationEdgeCases:
+    def test_normalize_residues_non_bool_returns_422(self, client, session):
+        body = {**_VALID_CONFIG_BODY, "normalize_residues": "yes"}
+        resp = client.post("/embeddings/configs", json=body)
+        assert resp.status_code == 422
+        assert any("normalize_residues" in str(e) for e in resp.json()["detail"])
+
+    def test_normalize_non_bool_returns_422(self, client, session):
+        body = {**_VALID_CONFIG_BODY, "normalize": "yes"}
+        resp = client.post("/embeddings/configs", json=body)
+        assert resp.status_code == 422
+        assert any("normalize" in str(e) for e in resp.json()["detail"])
+
+    def test_use_chunking_non_bool_returns_422(self, client, session):
+        body = {**_VALID_CONFIG_BODY, "use_chunking": "yes"}
+        resp = client.post("/embeddings/configs", json=body)
+        assert resp.status_code == 422
+        assert any("use_chunking" in str(e) for e in resp.json()["detail"])
+
+    def test_chunk_size_non_positive_returns_422(self, client, session):
+        body = {**_VALID_CONFIG_BODY, "chunk_size": -1}
+        resp = client.post("/embeddings/configs", json=body)
+        assert resp.status_code == 422
+        assert any("chunk_size" in str(e) for e in resp.json()["detail"])
+
+    def test_chunk_overlap_negative_returns_422(self, client, session):
+        body = {**_VALID_CONFIG_BODY, "chunk_overlap": -1}
+        resp = client.post("/embeddings/configs", json=body)
+        assert resp.status_code == 422
+        assert any("chunk_overlap" in str(e) for e in resp.json()["detail"])
+
+    def test_description_non_string_returns_422(self, client, session):
+        body = {**_VALID_CONFIG_BODY, "description": 42}
+        resp = client.post("/embeddings/configs", json=body)
+        assert resp.status_code == 422
+        assert any("description" in str(e) for e in resp.json()["detail"])
+
+
+# ---------------------------------------------------------------------------
+# GET /embeddings/configs/{config_id}
+# ---------------------------------------------------------------------------
+
+class TestGetEmbeddingConfig:
+    def test_returns_config(self, client, session):
+        cfg = _make_config()
+        config_id = cfg.id
+        session.get.return_value = cfg
+        # Mock the embedding count query
+        session.query.return_value.filter.return_value.scalar.return_value = 42
+
+        resp = client.get(f"/embeddings/configs/{config_id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["id"] == str(config_id)
+        assert data["model_name"] == "facebook/esm2_t33_650M_UR50D"
+        assert data["embedding_count"] == 42
+
+    def test_not_found_returns_404(self, client, session):
+        session.get.return_value = None
+        resp = client.get(f"/embeddings/configs/{uuid4()}")
+        assert resp.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# DELETE /embeddings/configs/{config_id} — with prediction sets
+# ---------------------------------------------------------------------------
+
+class TestDeleteEmbeddingConfigCascade:
+    def test_delete_with_prediction_sets(self, client, session):
+        cfg = _make_config()
+        config_id = cfg.id
+        session.get.return_value = cfg
+
+        pred_set_id = uuid4()
+        # query(PredictionSet.id).filter(...).all() returns [(pred_set_id,)]
+        session.query.return_value.filter.return_value.all.return_value = [(pred_set_id,)]
+        # Bulk deletes return counts
+        session.query.return_value.filter.return_value.delete.return_value = 10
+
+        resp = client.delete(f"/embeddings/configs/{config_id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["deleted"] == str(config_id)
+
+
+# ---------------------------------------------------------------------------
+# POST /embeddings/predict
+# ---------------------------------------------------------------------------
+
+class TestPredictGoTerms:
+    def _make_predict_app(self, session):
+        factory = MagicMock()
+        app = _make_app(factory)
+        return app
+
+    def test_predict_success(self, session):
+        app = self._make_predict_app(session)
+
+        config_id = uuid4()
+        ann_id = uuid4()
+        onto_id = uuid4()
+
+        # session.get returns objects for all three lookups
+        session.get.return_value = MagicMock()
+        # session.add captures Job and JobEvent
+        job_mock = MagicMock()
+        job_mock.id = 42
+        added = []
+
+        def _fake_add(obj):
+            added.append(obj)
+            # If it's a Job, set its id
+            if hasattr(obj, 'operation'):
+                obj.id = 42
+
+        session.add.side_effect = _fake_add
+        session.flush = MagicMock()
+
+        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with patch("protea.api.routers.embeddings.publish_job") as mock_pub:
+                client = TestClient(app, raise_server_exceptions=True)
+                resp = client.post("/embeddings/predict", json={
+                    "embedding_config_id": str(config_id),
+                    "annotation_set_id": str(ann_id),
+                    "ontology_snapshot_id": str(onto_id),
+                })
+
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["status"] == "queued"
+        mock_pub.assert_called_once()
+
+    def test_predict_invalid_uuid_returns_422(self, session):
+        app = self._make_predict_app(session)
+        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+            client = TestClient(app, raise_server_exceptions=True)
+            resp = client.post("/embeddings/predict", json={
+                "embedding_config_id": "not-a-uuid",
+                "annotation_set_id": str(uuid4()),
+                "ontology_snapshot_id": str(uuid4()),
+            })
+        assert resp.status_code == 422
+
+    def test_predict_config_not_found_returns_404(self, session):
+        app = self._make_predict_app(session)
+        # session.get returns None for EmbeddingConfig
+        session.get.return_value = None
+        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+            client = TestClient(app, raise_server_exceptions=True)
+            resp = client.post("/embeddings/predict", json={
+                "embedding_config_id": str(uuid4()),
+                "annotation_set_id": str(uuid4()),
+                "ontology_snapshot_id": str(uuid4()),
+            })
+        assert resp.status_code == 404
+
+    def test_predict_annotation_set_not_found_returns_404(self, session):
+        app = self._make_predict_app(session)
+
+        def _get_side(model_cls, id_val):
+            from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+            if model_cls is EmbeddingConfig:
+                return MagicMock()
+            return None
+
+        session.get.side_effect = _get_side
+        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+            client = TestClient(app, raise_server_exceptions=True)
+            resp = client.post("/embeddings/predict", json={
+                "embedding_config_id": str(uuid4()),
+                "annotation_set_id": str(uuid4()),
+                "ontology_snapshot_id": str(uuid4()),
+            })
+        assert resp.status_code == 404
+
+    def test_predict_ontology_not_found_returns_404(self, session):
+        app = self._make_predict_app(session)
+
+        call_count = [0]
+        def _get_side(model_cls, id_val):
+            call_count[0] += 1
+            from protea.infrastructure.orm.models.annotation.ontology_snapshot import (
+                OntologySnapshot,
+            )
+            if model_cls is OntologySnapshot:
+                return None
+            return MagicMock()
+
+        session.get.side_effect = _get_side
+        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+            client = TestClient(app, raise_server_exceptions=True)
+            resp = client.post("/embeddings/predict", json={
+                "embedding_config_id": str(uuid4()),
+                "annotation_set_id": str(uuid4()),
+                "ontology_snapshot_id": str(uuid4()),
+            })
+        assert resp.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# GET /embeddings/prediction-sets
+# ---------------------------------------------------------------------------
+
+class TestListPredictionSets:
+    @staticmethod
+    def _wire_list_query(session, rows):
+        """Wire the mock chain for the correlated-subquery list query."""
+        # query(PredictionSet, EmbeddingConfig, AnnotationSet, OntologySnapshot, count_subq)
+        #   .join(...).join(...).join(...).order_by(...).limit(...).all()
+        # The count subquery is built via session.query().filter().correlate().scalar_subquery()
+        # but all that matters for the mock is the final .all() result.
+        session.query.return_value.join.return_value.join.return_value.join.return_value \
+            .order_by.return_value.limit.return_value.all.return_value = rows
+
+    def test_returns_list(self, client, session):
+        ps = _make_prediction_set()
+        ec = _make_config()
+        ann = MagicMock()
+        ann.source = "goa"
+        ann.source_version = "2024-01"
+        snap = MagicMock()
+        snap.obo_version = "2024-01-01"
+
+        self._wire_list_query(session, [(ps, ec, ann, snap, 100)])
+
+        resp = client.get("/embeddings/prediction-sets")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert isinstance(data, list)
+        assert len(data) == 1
+        assert data[0]["id"] == str(ps.id)
+        assert data[0]["embedding_config_name"] == ec.model_name
+        assert data[0]["annotation_set_label"] == "goa 2024-01"
+        assert data[0]["ontology_snapshot_version"] == "2024-01-01"
+        assert data[0]["prediction_count"] == 100
+
+    def test_annotation_set_without_version(self, client, session):
+        ps = _make_prediction_set()
+        ec = _make_config()
+        ann = MagicMock()
+        ann.source = "goa"
+        ann.source_version = None
+        snap = MagicMock()
+        snap.obo_version = "2024-01-01"
+
+        self._wire_list_query(session, [(ps, ec, ann, snap, 0)])
+
+        resp = client.get("/embeddings/prediction-sets")
+        assert resp.status_code == 200
+        assert resp.json()[0]["annotation_set_label"] == "goa"
+
+    def test_empty_list(self, client, session):
+        self._wire_list_query(session, [])
+        resp = client.get("/embeddings/prediction-sets")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+
+# ---------------------------------------------------------------------------
+# GET /embeddings/prediction-sets/{set_id}
+# ---------------------------------------------------------------------------
+
+class TestGetPredictionSet:
+    def test_returns_details(self, client, session):
+        ps = _make_prediction_set()
+        ps_id = ps.id
+        session.get.return_value = ps
+        session.query.return_value.filter.return_value.scalar.return_value = 50
+        session.query.return_value.filter.return_value.group_by.return_value.all.return_value = [
+            ("P12345", 30), ("Q67890", 20),
+        ]
+
+        resp = client.get(f"/embeddings/prediction-sets/{ps_id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["id"] == str(ps_id)
+        assert data["prediction_count"] == 50
+        assert data["per_protein_counts"]["P12345"] == 30
+        assert data["per_protein_counts"]["Q67890"] == 20
+
+    def test_not_found_returns_404(self, client, session):
+        session.get.return_value = None
+        resp = client.get(f"/embeddings/prediction-sets/{uuid4()}")
+        assert resp.status_code == 404
+
+    def test_with_query_set_id(self, client, session):
+        ps = _make_prediction_set()
+        ps.query_set_id = uuid4()
+        session.get.return_value = ps
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+        session.query.return_value.filter.return_value.group_by.return_value.all.return_value = []
+
+        resp = client.get(f"/embeddings/prediction-sets/{ps.id}")
+        assert resp.status_code == 200
+        assert resp.json()["query_set_id"] == str(ps.query_set_id)
+
+
+# ---------------------------------------------------------------------------
+# GET /embeddings/prediction-sets/{set_id}/proteins
+# ---------------------------------------------------------------------------
+
+class TestListPredictionSetProteins:
+    def _setup_proteins_mocks(self, session, ps, rows_data):
+        """Set up the complex mock chain for the proteins endpoint."""
+        # We need to carefully control the mock chain.
+        # The endpoint does multiple session.query(...) calls with different args.
+        # Use a side_effect on session.query to return different mocks per call.
+        call_idx = [0]
+        main_q = MagicMock()
+        main_q.filter.return_value = main_q
+        main_q.group_by.return_value = main_q
+        main_q.count.return_value = len(rows_data)
+        main_q.order_by.return_value = main_q
+        main_q.offset.return_value = main_q
+        main_q.limit.return_value = main_q
+        main_q.all.return_value = rows_data
+
+        prot_q = MagicMock()
+        prot_mock = MagicMock()
+        prot_mock.accession = rows_data[0][0] if rows_data else "X"
+        prot_q.filter.return_value = prot_q
+        prot_q.all.return_value = [prot_mock] if rows_data else []
+
+        ann_q = MagicMock()
+        ann_q.filter.return_value = ann_q
+        ann_q.group_by.return_value = ann_q
+        ann_q.all.return_value = [(rows_data[0][0], 5)] if rows_data else []
+
+        match_q = MagicMock()
+        match_q.join.return_value = match_q
+        match_q.filter.return_value = match_q
+        match_q.group_by.return_value = match_q
+        match_q.all.return_value = [(rows_data[0][0], 3)] if rows_data else []
+
+        queries = [main_q, prot_q, ann_q, match_q]
+
+        def _query_side(*args, **kwargs):
+            idx = call_idx[0]
+            call_idx[0] += 1
+            if idx < len(queries):
+                return queries[idx]
+            return MagicMock()
+
+        session.query.side_effect = _query_side
+
+    def test_returns_paginated_proteins(self, client, session):
+        ps = _make_prediction_set()
+        ps_id = ps.id
+        session.get.return_value = ps
+        self._setup_proteins_mocks(session, ps, [("P12345", 10, 0.05)])
+
+        resp = client.get(f"/embeddings/prediction-sets/{ps_id}/proteins")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "total" in data
+        assert "items" in data
+        assert len(data["items"]) == 1
+        item = data["items"][0]
+        assert item["accession"] == "P12345"
+        assert item["go_count"] == 10
+        assert item["in_db"] is True
+
+    def test_not_found_returns_404(self, client, session):
+        session.get.return_value = None
+        resp = client.get(f"/embeddings/prediction-sets/{uuid4()}/proteins")
+        assert resp.status_code == 404
+
+    def test_search_filter(self, client, session):
+        ps = _make_prediction_set()
+        session.get.return_value = ps
+
+        call_idx = [0]
+        main_q = MagicMock()
+        main_q.filter.return_value = main_q
+        main_q.group_by.return_value = main_q
+        main_q.count.return_value = 0
+        main_q.order_by.return_value = main_q
+        main_q.offset.return_value = main_q
+        main_q.limit.return_value = main_q
+        main_q.all.return_value = []
+
+        prot_q = MagicMock()
+        prot_q.filter.return_value = prot_q
+        prot_q.all.return_value = []
+
+        queries = [main_q, prot_q]
+
+        def _query_side(*args, **kwargs):
+            idx = call_idx[0]
+            call_idx[0] += 1
+            if idx < len(queries):
+                return queries[idx]
+            return MagicMock()
+
+        session.query.side_effect = _query_side
+
+        resp = client.get(
+            f"/embeddings/prediction-sets/{ps.id}/proteins",
+            params={"search": "P123"},
+        )
+        assert resp.status_code == 200
+
+
+# ---------------------------------------------------------------------------
+# GET /embeddings/prediction-sets/{set_id}/proteins/{accession}
+# ---------------------------------------------------------------------------
+
+class TestGetProteinPredictions:
+    def test_returns_predictions(self, client, session):
+        ps = _make_prediction_set()
+        ps_id = ps.id
+        session.get.return_value = ps
+
+        pred = _make_go_prediction("P12345", distance=0.1)
+        gt = _make_go_term("GO:0003824", "catalytic activity", "F")
+
+        session.query.return_value.join.return_value.filter.return_value \
+            .order_by.return_value.all.return_value = [(pred, gt)]
+
+        resp = client.get(f"/embeddings/prediction-sets/{ps_id}/proteins/P12345")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert isinstance(data, list)
+        assert len(data) == 1
+        assert data[0]["go_id"] == "GO:0003824"
+        assert data[0]["name"] == "catalytic activity"
+        assert data[0]["aspect"] == "F"
+        assert data[0]["distance"] == pytest.approx(0.1, abs=1e-4)
+        assert data[0]["ref_protein_accession"] == "QREF01"
+        # Alignment fields should be None
+        assert data[0]["identity_nw"] is None
+        assert data[0]["taxonomic_relation"] is None
+
+    def test_not_found_returns_404(self, client, session):
+        session.get.return_value = None
+        resp = client.get(f"/embeddings/prediction-sets/{uuid4()}/proteins/P12345")
+        assert resp.status_code == 404
+
+    def test_empty_predictions_returns_empty_list(self, client, session):
+        ps = _make_prediction_set()
+        session.get.return_value = ps
+        session.query.return_value.join.return_value.filter.return_value \
+            .order_by.return_value.all.return_value = []
+        resp = client.get(f"/embeddings/prediction-sets/{ps.id}/proteins/UNKNOWN")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+
+# ---------------------------------------------------------------------------
+# GET /embeddings/prediction-sets/{set_id}/go-terms
+# ---------------------------------------------------------------------------
+
+class TestGoTermDistribution:
+    def test_returns_distribution(self, client, session):
+        ps = _make_prediction_set()
+        ps_id = ps.id
+        session.get.return_value = ps
+
+        # Top terms query
+        session.query.return_value.join.return_value.filter.return_value \
+            .group_by.return_value.order_by.return_value.limit.return_value \
+            .all.return_value = [
+                ("GO:0003824", "catalytic activity", "F", 50),
+                ("GO:0005515", "protein binding", "F", 30),
+                ("GO:0008150", "biological_process", "P", 20),
+            ]
+
+        # Aspect counts query
+        session.query.return_value.join.return_value.filter.return_value \
+            .group_by.return_value.all.return_value = [
+                ("F", 80), ("P", 20),
+            ]
+
+        resp = client.get(f"/embeddings/prediction-sets/{ps_id}/go-terms")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "by_aspect" in data
+        assert "aspect_totals" in data
+        assert "top_terms" in data
+
+    def test_not_found_returns_404(self, client, session):
+        session.get.return_value = None
+        resp = client.get(f"/embeddings/prediction-sets/{uuid4()}/go-terms")
+        assert resp.status_code == 404
+
+
+# ---------------------------------------------------------------------------
+# GET /embeddings/prediction-sets/{set_id}/predictions-cafa.tsv
+# ---------------------------------------------------------------------------
+
+class TestDownloadPredictionsCafa:
+    def _get_cafa(self, client, session, set_id, rows, **params):
+        """Wire mocks for the CAFA download endpoint.
+
+        ``rows`` should be a list of (protein_accession, go_id, distance) tuples,
+        matching the subquery-based query output.
+        """
+        ps = _make_prediction_set(set_id)
+        session.get.return_value = ps
+
+        q = MagicMock()
+        q.join.return_value = q
+        q.filter.return_value = q
+        q.group_by.return_value = q
+        q.subquery.return_value = q
+        q.c = q  # subquery column access
+        q.order_by.return_value = q
+        q.yield_per.return_value = iter(rows)
+        session.query.return_value = q
+
+        return client.get(
+            f"/embeddings/prediction-sets/{set_id}/predictions-cafa.tsv",
+            params=params,
+        )
+
+    def test_returns_cafa_format(self, client, session):
+        set_id = uuid4()
+        # New format: (accession, go_id, distance) tuples
+        resp = self._get_cafa(client, session, set_id, [("P12345", "GO:0003824", 0.3)])
+        assert resp.status_code == 200
+        assert "tab-separated" in resp.headers["content-type"]
+        lines = resp.text.splitlines()
+        assert len(lines) == 1
+        parts = lines[0].split("\t")
+        assert parts[0] == "P12345"
+        assert parts[1] == "GO:0003824"
+        # score = max(0, 1 - 0.3) = 0.7
+        assert float(parts[2]) == pytest.approx(0.7, abs=1e-3)
+
+    def test_cafa_deduplicates_go_terms(self, client, session):
+        """Deduplication now happens at DB level via GROUP BY + MIN(distance).
+        The query returns already-unique rows, so a single row is expected."""
+        set_id = uuid4()
+        # DB-level dedup means only the best (min distance) row is returned
+        resp = self._get_cafa(client, session, set_id, [("P12345", "GO:0003824", 0.2)])
+        assert resp.status_code == 200
+        lines = resp.text.splitlines()
+        assert len(lines) == 1
+
+    def test_cafa_not_found_returns_404(self, client, session):
+        session.get.return_value = None
+        with patch("protea.api.routers.embeddings.session_scope", side_effect=lambda _: _mock_scope(session)):
+            resp = client.get(f"/embeddings/prediction-sets/{uuid4()}/predictions-cafa.tsv")
+        assert resp.status_code == 404
+
+    def test_cafa_content_disposition(self, client, session):
+        set_id = uuid4()
+        resp = self._get_cafa(client, session, set_id, [])
+        disposition = resp.headers.get("content-disposition", "")
+        assert "attachment" in disposition
+        assert "cafa" in disposition
+
+    def test_cafa_filter_by_aspect(self, client, session):
+        set_id = uuid4()
+        resp = self._get_cafa(client, session, set_id, [("P12345", "GO:0003824", 0.1)], aspect="F")
+        assert resp.status_code == 200
+
+    def test_cafa_filter_by_max_distance(self, client, session):
+        set_id = uuid4()
+        resp = self._get_cafa(client, session, set_id, [("P12345", "GO:0003824", 0.05)], max_distance=0.5)
+        assert resp.status_code == 200
+
+    def test_cafa_score_clamps_at_zero(self, client, session):
+        """When distance > 1.0 the score should be 0.0, not negative."""
+        set_id = uuid4()
+        resp = self._get_cafa(client, session, set_id, [("P12345", "GO:0003824", 2.5)])
+        lines = resp.text.splitlines()
+        assert len(lines) == 1
+        score = float(lines[0].split("\t")[2])
+        assert score == 0.0
+
+
+# ---------------------------------------------------------------------------
+# DELETE /embeddings/prediction-sets/{set_id}
+# ---------------------------------------------------------------------------
+
+class TestDeletePredictionSet:
+    def test_delete_existing_returns_200(self, client, session):
+        ps = _make_prediction_set()
+        ps_id = ps.id
+        session.get.return_value = ps
+        session.query.return_value.filter.return_value.delete.return_value = 25
+
+        resp = client.delete(f"/embeddings/prediction-sets/{ps_id}")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["deleted"] == str(ps_id)
+        assert data["predictions_deleted"] == 25
+        session.delete.assert_called_once_with(ps)
+
+    def test_delete_nonexistent_returns_404(self, client, session):
+        session.get.return_value = None
+        resp = client.delete(f"/embeddings/prediction-sets/{uuid4()}")
+        assert resp.status_code == 404
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
index f88a759..b8c5476 100644
--- a/tests/test_evaluation.py
+++ b/tests/test_evaluation.py
@@ -1,8 +1,16 @@
-"""Tests for protea.core.evaluation — pure-Python components."""
-import pytest
-
-from protea.core.evaluation import EvaluationData, _get_descendants
+"""Tests for protea.core.evaluation — pure-Python components + mocked DB tests."""
+import uuid
+from unittest.mock import MagicMock, patch
 
+from protea.core.evaluation import (
+    EvaluationData,
+    _build_negative_keys,
+    _get_descendants,
+    _load_children_map,
+    _load_experimental_annotations_by_ns,
+    _load_go_maps,
+    compute_evaluation_data,
+)
 
 # ---------------------------------------------------------------------------
 # EvaluationData — dataclass properties
@@ -141,3 +149,416 @@ def test_leaf_node(self):
         children_map = {1: {2}, 2: set()}
         result = _get_descendants(1, children_map)
         assert result == {2}
+
+
+# ---------------------------------------------------------------------------
+# _load_children_map — lines 124-137
+# ---------------------------------------------------------------------------
+
+class TestLoadChildrenMap:
+    def test_loads_and_groups_by_parent(self):
+        snap_id = uuid.uuid4()
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            (10, 20),
+            (10, 30),
+            (20, 40),
+        ]
+        result = _load_children_map(mock_session, snap_id)
+        assert result == {10: {20, 30}, 20: {40}}
+
+    def test_empty_result(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = []
+        result = _load_children_map(mock_session, uuid.uuid4())
+        assert result == {}
+
+    def test_passes_snapshot_id(self):
+        snap_id = uuid.uuid4()
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = []
+        _load_children_map(mock_session, snap_id)
+        call_args = mock_session.execute.call_args
+        assert call_args[0][1]["snap_id"] == snap_id
+
+    def test_single_relationship(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [(1, 2)]
+        result = _load_children_map(mock_session, uuid.uuid4())
+        assert result == {1: {2}}
+
+
+# ---------------------------------------------------------------------------
+# _load_go_maps — lines 161-169
+# ---------------------------------------------------------------------------
+
+class TestLoadGoMaps:
+    def test_basic_maps(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            (1, "GO:0001", "F"),
+            (2, "GO:0002", "P"),
+            (3, "GO:0003", "C"),
+        ]
+        id_map, aspect_map = _load_go_maps(mock_session, uuid.uuid4())
+        assert id_map == {1: "GO:0001", 2: "GO:0002", 3: "GO:0003"}
+        assert aspect_map == {1: "F", 2: "P", 3: "C"}
+
+    def test_null_aspect_excluded_from_aspect_map(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            (1, "GO:0001", "F"),
+            (2, "GO:0002", None),
+        ]
+        id_map, aspect_map = _load_go_maps(mock_session, uuid.uuid4())
+        assert id_map == {1: "GO:0001", 2: "GO:0002"}
+        assert 2 not in aspect_map
+        assert aspect_map == {1: "F"}
+
+    def test_empty(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = []
+        id_map, aspect_map = _load_go_maps(mock_session, uuid.uuid4())
+        assert id_map == {}
+        assert aspect_map == {}
+
+
+# ---------------------------------------------------------------------------
+# _build_negative_keys — lines 182-204
+# ---------------------------------------------------------------------------
+
+class TestBuildNegativeKeys:
+    def test_no_not_annotations(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = []
+        result = _build_negative_keys(mock_session, [uuid.uuid4()], {})
+        assert result == set()
+
+    def test_single_not_no_descendants(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [("P1", 100)]
+        result = _build_negative_keys(mock_session, [uuid.uuid4()], {})
+        assert result == {("P1", 100)}
+
+    def test_not_with_descendants(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [("P1", 100)]
+        children_map = {100: {200, 300}, 200: {400}}
+        result = _build_negative_keys(mock_session, [uuid.uuid4()], children_map)
+        assert result == {("P1", 100), ("P1", 200), ("P1", 300), ("P1", 400)}
+
+    def test_multiple_proteins(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            ("P1", 10),
+            ("P2", 20),
+        ]
+        children_map = {10: {11}}
+        result = _build_negative_keys(mock_session, [uuid.uuid4()], children_map)
+        assert ("P1", 10) in result
+        assert ("P1", 11) in result
+        assert ("P2", 20) in result
+
+    def test_duplicate_rows_deduplicated(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            ("P1", 10),
+            ("P1", 10),
+        ]
+        result = _build_negative_keys(mock_session, [uuid.uuid4()], {})
+        assert result == {("P1", 10)}
+
+    def test_passes_set_ids(self):
+        ids = [uuid.uuid4(), uuid.uuid4()]
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = []
+        _build_negative_keys(mock_session, ids, {})
+        call_args = mock_session.execute.call_args
+        assert call_args[0][1]["set_ids"] == ids
+
+
+# ---------------------------------------------------------------------------
+# _load_experimental_annotations_by_ns — lines 219-238
+# ---------------------------------------------------------------------------
+
+class TestLoadExperimentalAnnotationsByNs:
+    def _go_id_map(self):
+        return {100: "GO:0001", 200: "GO:0002", 300: "GO:0003", 400: "GO:0004"}
+
+    def _aspect_map(self):
+        return {100: "F", 200: "P", 300: "C", 400: "F"}
+
+    def test_groups_by_protein_and_namespace(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            ("P1", 100),
+            ("P1", 200),
+            ("P2", 300),
+        ]
+        result = _load_experimental_annotations_by_ns(
+            mock_session, uuid.uuid4(), set(), self._go_id_map(), self._aspect_map()
+        )
+        assert result["P1"]["F"] == {"GO:0001"}
+        assert result["P1"]["P"] == {"GO:0002"}
+        assert result["P2"]["C"] == {"GO:0003"}
+
+    def test_negative_keys_excluded(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            ("P1", 100),
+            ("P1", 200),
+        ]
+        negative_keys = {("P1", 100)}
+        result = _load_experimental_annotations_by_ns(
+            mock_session, uuid.uuid4(), negative_keys, self._go_id_map(), self._aspect_map()
+        )
+        assert "F" not in result.get("P1", {})
+        assert result["P1"]["P"] == {"GO:0002"}
+
+    def test_missing_go_id_skipped(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [("P1", 999)]
+        result = _load_experimental_annotations_by_ns(
+            mock_session, uuid.uuid4(), set(), self._go_id_map(), self._aspect_map()
+        )
+        assert result == {}
+
+    def test_missing_aspect_skipped(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [("P1", 100)]
+        result = _load_experimental_annotations_by_ns(
+            mock_session, uuid.uuid4(), set(), {100: "GO:0001"}, {}
+        )
+        assert result == {}
+
+    def test_empty_rows(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = []
+        result = _load_experimental_annotations_by_ns(
+            mock_session, uuid.uuid4(), set(), {}, {}
+        )
+        assert result == {}
+
+    def test_multiple_terms_same_namespace(self):
+        mock_session = MagicMock()
+        mock_session.execute.return_value.fetchall.return_value = [
+            ("P1", 100),
+            ("P1", 400),  # also F namespace
+        ]
+        result = _load_experimental_annotations_by_ns(
+            mock_session, uuid.uuid4(), set(), self._go_id_map(), self._aspect_map()
+        )
+        assert result["P1"]["F"] == {"GO:0001", "GO:0004"}
+
+
+# ---------------------------------------------------------------------------
+# compute_evaluation_data — lines 265-322
+# ---------------------------------------------------------------------------
+
+class TestComputeEvaluationData:
+    def _ids(self):
+        return uuid.uuid4(), uuid.uuid4(), uuid.uuid4()
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_nk_protein(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Protein with no old annotations -> NK."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {},  # old
+            {"P1": {"F": {"GO:0001", "GO:0002"}}},  # new
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.nk == {"P1": {"GO:0001", "GO:0002"}}
+        assert result.lk == {}
+        assert result.pk == {}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_lk_protein(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Protein had F at t0, gains P (no old P) -> LK in P."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P1": {"F": {"GO:0001"}}},
+            {"P1": {"F": {"GO:0001"}, "P": {"GO:0002"}}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.nk == {}
+        assert result.lk == {"P1": {"GO:0002"}}
+        assert result.pk == {}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_pk_protein(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Protein had F at t0, gains new F -> PK in F."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P1": {"F": {"GO:0001"}}},
+            {"P1": {"F": {"GO:0001", "GO:0002"}}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.nk == {}
+        assert result.lk == {}
+        assert result.pk == {"P1": {"GO:0002"}}
+        assert result.pk_known == {"P1": {"GO:0001"}}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_mixed_lk_and_pk(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Same protein: PK in F, LK in C."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P1": {"F": {"GO:0001"}}},
+            {"P1": {"F": {"GO:0001", "GO:0002"}, "C": {"GO:0003"}}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.pk == {"P1": {"GO:0002"}}
+        assert result.lk == {"P1": {"GO:0003"}}
+        assert result.pk_known == {"P1": {"GO:0001"}}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_no_new_annotations(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Protein only in old -> skipped (no new_all)."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P1": {"F": {"GO:0001"}}},
+            {},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.nk == {}
+        assert result.lk == {}
+        assert result.pk == {}
+        assert result.delta_proteins == 0
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_no_delta_same_terms(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Old and new identical -> no delta."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P1": {"F": {"GO:0001"}}},
+            {"P1": {"F": {"GO:0001"}}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.nk == {}
+        assert result.lk == {}
+        assert result.pk == {}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_known_includes_all_old(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """known dict contains all old experimental annotations flattened."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P1": {"F": {"GO:0001"}, "P": {"GO:0002"}}, "P2": {"C": {"GO:0003"}}},
+            {"P1": {"F": {"GO:0001"}, "P": {"GO:0002", "GO:0099"}}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.known == {"P1": {"GO:0001", "GO:0002"}, "P2": {"GO:0003"}}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_multiple_proteins(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Multiple proteins with different categories."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P_old": {"F": {"GO:0001"}}},
+            {"P_old": {"F": {"GO:0001", "GO:0002"}}, "P_nk": {"P": {"GO:0010"}}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.nk == {"P_nk": {"GO:0010"}}
+        assert result.pk == {"P_old": {"GO:0002"}}
+        assert result.pk_known == {"P_old": {"GO:0001"}}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_protein_with_empty_new_namespaces(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Protein key in new but no namespace data -> new_all empty -> skip."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {},
+            {"P1": {}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.nk == {}
+        assert result.lk == {}
+        assert result.pk == {}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_all_three_namespaces_pk(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """All three namespaces (F, P, C) gain new terms -> PK in all."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [
+            {"P1": {"F": {"GO:F1"}, "P": {"GO:P1"}, "C": {"GO:C1"}}},
+            {"P1": {"F": {"GO:F1", "GO:F2"}, "P": {"GO:P1", "GO:P2"}, "C": {"GO:C1", "GO:C2"}}},
+        ]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.pk == {"P1": {"GO:F2", "GO:P2", "GO:C2"}}
+        assert result.pk_known == {"P1": {"GO:F1", "GO:P1", "GO:C1"}}
+
+    @patch("protea.core.evaluation._load_experimental_annotations_by_ns")
+    @patch("protea.core.evaluation._build_negative_keys")
+    @patch("protea.core.evaluation._load_children_map")
+    @patch("protea.core.evaluation._load_go_maps")
+    def test_both_empty(self, mock_go_maps, mock_children, mock_neg, mock_annots):
+        """Both old and new empty -> empty result."""
+        old_id, new_id, snap_id = self._ids()
+        mock_go_maps.return_value = ({}, {})
+        mock_children.return_value = {}
+        mock_neg.return_value = set()
+        mock_annots.side_effect = [{}, {}]
+        result = compute_evaluation_data(MagicMock(), old_id, new_id, snap_id)
+        assert result.delta_proteins == 0
+        assert result.known == {}
diff --git a/tests/test_feature_engineering.py b/tests/test_feature_engineering.py
index b215a78..2826b44 100644
--- a/tests/test_feature_engineering.py
+++ b/tests/test_feature_engineering.py
@@ -18,7 +18,6 @@
     compute_taxonomy,
 )
 
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
diff --git a/tests/test_generate_evaluation_set.py b/tests/test_generate_evaluation_set.py
index 8b54033..fff662c 100644
--- a/tests/test_generate_evaluation_set.py
+++ b/tests/test_generate_evaluation_set.py
@@ -6,12 +6,11 @@
 
 import pytest
 
+from protea.core.evaluation import EvaluationData
 from protea.core.operations.generate_evaluation_set import (
     GenerateEvaluationSetOperation,
     GenerateEvaluationSetPayload,
 )
-from protea.core.evaluation import EvaluationData
-
 
 # ---------------------------------------------------------------------------
 # Payload validator
@@ -26,11 +25,11 @@ def test_valid_uuids(self):
         assert p.new_annotation_set_id == new
 
     def test_empty_old_raises(self):
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             GenerateEvaluationSetPayload(old_annotation_set_id="  ", new_annotation_set_id=str(uuid.uuid4()))
 
     def test_empty_new_raises(self):
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             GenerateEvaluationSetPayload(old_annotation_set_id=str(uuid.uuid4()), new_annotation_set_id="")
 
     def test_strips_whitespace(self):
@@ -129,7 +128,7 @@ def test_emits_start_event(self):
         new_set = _make_annotation_set(snap_id)
         session.get.side_effect = [old_set, new_set]
 
-        eval_set = MagicMock()
+        MagicMock()
 
         def add_side(obj):
             obj.id = uuid.uuid4()
diff --git a/tests/test_infrastructure.py b/tests/test_infrastructure.py
index 9cbd0a2..8aca557 100644
--- a/tests/test_infrastructure.py
+++ b/tests/test_infrastructure.py
@@ -85,7 +85,14 @@ def test_returns_engine(self):
         with patch("protea.infrastructure.database.engine.create_engine") as mock_create:
             mock_create.return_value = MagicMock()
             engine = build_engine("sqlite:///:memory:")
-        mock_create.assert_called_once_with("sqlite:///:memory:", future=True, pool_pre_ping=True)
+        mock_create.assert_called_once_with(
+            "sqlite:///:memory:",
+            future=True,
+            pool_pre_ping=True,
+            pool_size=20,
+            max_overflow=40,
+            pool_recycle=3600,
+        )
         assert engine is mock_create.return_value
 
 
@@ -132,3 +139,153 @@ def test_jobs_router_is_registered(self):
 
         routes = [r.path for r in app.routes]
         assert any("/jobs" in p for p in routes)
+
+    def test_health_endpoint_registered(self):
+        from protea.api.app import create_app
+
+        mock_settings = MagicMock()
+        mock_settings.db_url = "sqlite:///:memory:"
+        mock_settings.amqp_url = "amqp://guest:guest@localhost/"
+
+        with patch("protea.api.app.load_settings", return_value=mock_settings), \
+             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+            app = create_app(Path("/fake/root"))
+
+        routes = [r.path for r in app.routes]
+        assert "/health" in routes
+        assert "/health/ready" in routes
+
+    def test_health_endpoint_returns_ok(self):
+        """GET /health returns 200 with status ok."""
+        from fastapi.testclient import TestClient
+
+        from protea.api.app import create_app
+
+        mock_settings = MagicMock()
+        mock_settings.db_url = "sqlite:///:memory:"
+        mock_settings.amqp_url = "amqp://guest:guest@localhost/"
+
+        with patch("protea.api.app.load_settings", return_value=mock_settings), \
+             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+            app = create_app(Path("/fake/root"))
+
+        client = TestClient(app)
+        resp = client.get("/health")
+        assert resp.status_code == 200
+        assert resp.json() == {"status": "ok"}
+
+    def test_readiness_check_succeeds(self):
+        """GET /health/ready returns 200 when DB and RabbitMQ are reachable."""
+        from fastapi.testclient import TestClient
+
+        from protea.api.app import create_app
+
+        mock_settings = MagicMock()
+        mock_settings.db_url = "sqlite:///:memory:"
+        mock_settings.amqp_url = "amqp://guest:guest@localhost/"
+
+        mock_factory = MagicMock()
+        mock_session = MagicMock()
+        mock_factory.return_value = mock_session
+        mock_session.__enter__ = lambda s: s
+        mock_session.__exit__ = MagicMock(return_value=False)
+
+        with patch("protea.api.app.load_settings", return_value=mock_settings), \
+             patch("protea.api.app.build_session_factory", return_value=mock_factory):
+            app = create_app(Path("/fake/root"))
+
+        mock_conn = MagicMock()
+        with patch("protea.infrastructure.session.session_scope") as mock_scope, \
+             patch("pika.BlockingConnection", return_value=mock_conn):
+            mock_scope.return_value.__enter__ = lambda s: mock_session
+            mock_scope.return_value.__exit__ = MagicMock(return_value=False)
+            client = TestClient(app)
+            resp = client.get("/health/ready")
+
+        assert resp.status_code == 200
+        assert resp.json() == {"status": "ready"}
+
+    def test_readiness_check_fails_when_rabbitmq_down(self):
+        """GET /health/ready returns 503 when RabbitMQ is unreachable."""
+        from fastapi.testclient import TestClient
+
+        from protea.api.app import create_app
+
+        mock_settings = MagicMock()
+        mock_settings.db_url = "sqlite:///:memory:"
+        mock_settings.amqp_url = "amqp://guest:guest@localhost/"
+
+        mock_factory = MagicMock()
+        mock_session = MagicMock()
+        mock_factory.return_value = mock_session
+        mock_session.__enter__ = lambda s: s
+        mock_session.__exit__ = MagicMock(return_value=False)
+
+        with patch("protea.api.app.load_settings", return_value=mock_settings), \
+             patch("protea.api.app.build_session_factory", return_value=mock_factory):
+            app = create_app(Path("/fake/root"))
+
+        with patch("protea.infrastructure.session.session_scope") as mock_scope, \
+             patch("pika.BlockingConnection", side_effect=Exception("Connection refused")):
+            mock_scope.return_value.__enter__ = lambda s: mock_session
+            mock_scope.return_value.__exit__ = MagicMock(return_value=False)
+            client = TestClient(app)
+            resp = client.get("/health/ready")
+
+        assert resp.status_code == 503
+        assert "RabbitMQ unreachable" in resp.json()["detail"]
+
+    def test_project_root_defaults_to_parents_2(self):
+        """When project_root is None, it defaults to Path(__file__).parents[2]."""
+        from protea.api.app import create_app
+
+        mock_settings = MagicMock()
+        mock_settings.db_url = "sqlite:///:memory:"
+        mock_settings.amqp_url = "amqp://guest:guest@localhost/"
+
+        with patch("protea.api.app.load_settings", return_value=mock_settings) as mock_load, \
+             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+            create_app()  # project_root=None
+
+        # load_settings should have been called with the resolved parents[2] path
+        called_root = mock_load.call_args[0][0]
+        assert isinstance(called_root, Path)
+        assert called_root.is_absolute()
+
+    def test_sphinx_mount_when_directory_exists(self, tmp_path):
+        """When docs/build/html exists, /sphinx is mounted."""
+        from protea.api.app import create_app
+
+        sphinx_dir = tmp_path / "docs" / "build" / "html"
+        sphinx_dir.mkdir(parents=True)
+        (sphinx_dir / "index.html").write_text("<html></html>")
+
+        mock_settings = MagicMock()
+        mock_settings.db_url = "sqlite:///:memory:"
+        mock_settings.amqp_url = "amqp://guest:guest@localhost/"
+
+        with patch("protea.api.app.load_settings", return_value=mock_settings), \
+             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+            app = create_app(project_root=tmp_path)
+
+        route_paths = [r.path for r in app.routes]
+        assert any("/sphinx" in p for p in route_paths)
+
+    def test_static_mount_when_directory_exists(self, tmp_path):
+        """When static/ exists, /static is mounted."""
+        from protea.api.app import create_app
+
+        static_dir = tmp_path / "static"
+        static_dir.mkdir()
+        (static_dir / "test.txt").write_text("hello")
+
+        mock_settings = MagicMock()
+        mock_settings.db_url = "sqlite:///:memory:"
+        mock_settings.amqp_url = "amqp://guest:guest@localhost/"
+
+        with patch("protea.api.app.load_settings", return_value=mock_settings), \
+             patch("protea.api.app.build_session_factory", return_value=MagicMock()):
+            app = create_app(project_root=tmp_path)
+
+        route_paths = [r.path for r in app.routes]
+        assert any("/static" in p for p in route_paths)
diff --git a/tests/test_insert_proteins.py b/tests/test_insert_proteins.py
index 9922641..5a4bd50 100644
--- a/tests/test_insert_proteins.py
+++ b/tests/test_insert_proteins.py
@@ -160,6 +160,231 @@ def test_sequence_hash_is_set(self):
         assert records[0]["sequence_hash"] is not None
         assert len(records[0]["sequence_hash"]) == 32  # MD5 hex
 
+    def test_empty_sequence_skipped(self):
+        """Lines 231-233: header with no sequence lines is skipped."""
+        fasta = ">sp|P12345|TEST_HUMAN Test OS=Homo sapiens OX=9606\n\n"
+        records = self.op._parse_fasta(fasta)
+        assert records == []
+
+    def test_header_without_pipe_separators(self):
+        """Lines 264-265: header without | uses first word as accession."""
+        fasta = ">SIMPLE_ACC some description\nMKTAYIAK\n"
+        records = self.op._parse_fasta(fasta)
+        assert len(records) == 1
+        assert records[0]["accession"] == "SIMPLE_ACC"
+        assert records[0]["entry_name"] is None
+
+    def test_isoform_accession_parsed(self):
+        fasta = (
+            ">sp|P12345-3|TEST_HUMAN Isoform 3 OS=Homo sapiens OX=9606 GN=TEST\n"
+            "MKTAYIAK\n"
+        )
+        records = self.op._parse_fasta(fasta)
+        r = records[0]
+        assert r["accession"] == "P12345-3"
+        assert r["canonical_accession"] == "P12345"
+        assert r["is_canonical"] is False
+        assert r["isoform_index"] == 3
+
+    def test_canonical_accession_flagged(self):
+        records = self.op._parse_fasta(FASTA_ONE)
+        r = records[0]
+        assert r["canonical_accession"] == "P12345"
+        assert r["is_canonical"] is True
+        assert r["isoform_index"] is None
+
+    def test_reviewed_vs_unreviewed(self):
+        records = self.op._parse_fasta(FASTA_TWO)
+        assert records[0]["reviewed"] is True   # sp|
+        assert records[1]["reviewed"] is False   # tr|
+
+    def test_sequence_deduplication_by_hash(self):
+        """Two identical sequences produce the same hash."""
+        fasta = (
+            ">sp|P11111|A_HUMAN Prot A OS=Homo sapiens OX=9606\nMKTAYIAK\n"
+            ">sp|P22222|B_HUMAN Prot B OS=Homo sapiens OX=9606\nMKTAYIAK\n"
+        )
+        records = self.op._parse_fasta(fasta)
+        assert len(records) == 2
+        assert records[0]["sequence_hash"] == records[1]["sequence_hash"]
+
+    def test_multiline_sequence(self):
+        fasta = (
+            ">sp|P12345|TEST_HUMAN Test OS=Homo sapiens OX=9606\n"
+            "MKTAY\n"
+            "IAKQR\n"
+        )
+        records = self.op._parse_fasta(fasta)
+        assert records[0]["sequence"] == "MKTAYIAKQR"
+        assert records[0]["length"] == 10
+
+
+# ---------------------------------------------------------------------------
+# Unit tests — _decode_response
+# ---------------------------------------------------------------------------
+
+class TestDecodeResponse:
+    def setup_method(self):
+        self.op = InsertProteinsOperation()
+
+    def test_decode_uncompressed(self):
+        """Line 217: uncompressed path."""
+        resp = MagicMock()
+        resp.content = b"hello world"
+        result = self.op._decode_response(resp, compressed=False)
+        assert result == "hello world"
+
+    def test_decode_compressed(self):
+        """Lines 215-216: gzip decompression path."""
+        import gzip
+        from io import BytesIO
+
+        raw = b"compressed content"
+        buf = BytesIO()
+        with gzip.GzipFile(fileobj=buf, mode="wb") as f:
+            f.write(raw)
+        resp = MagicMock()
+        resp.content = buf.getvalue()
+        result = self.op._decode_response(resp, compressed=True)
+        assert result == "compressed content"
+
+
+# ---------------------------------------------------------------------------
+# Unit tests — _store_records
+# ---------------------------------------------------------------------------
+
+class TestStoreRecords:
+    def setup_method(self):
+        self.op = InsertProteinsOperation()
+
+    def test_empty_records_returns_zeros(self):
+        """Line 300: empty records early return."""
+        session = _make_mock_session()
+        result = self.op._store_records(session, [], _noop_emit)
+        assert result == (0, 0, 0, 0)
+        session.query.assert_not_called()
+
+    def test_updates_existing_protein(self):
+        """Lines 350-394: existing protein gets conservative updates."""
+        from protea.infrastructure.orm.models.sequence.sequence import (
+            Sequence as SequenceModel,
+        )
+
+        seq_hash = SequenceModel.compute_hash("MKTAYIAK")
+        record = {
+            "accession": "P12345",
+            "entry_name": "TEST_HUMAN",
+            "canonical_accession": "P12345",
+            "is_canonical": True,
+            "isoform_index": None,
+            "organism": "Homo sapiens",
+            "taxonomy_id": "9606",
+            "gene_name": "TEST",
+            "reviewed": True,
+            "sequence": "MKTAYIAK",
+            "length": 8,
+            "sequence_hash": seq_hash,
+        }
+
+        # Existing protein with missing fields (triggers updates)
+        existing_prot = MagicMock()
+        existing_prot.accession = "P12345"
+        existing_prot.sequence_id = None  # will be updated
+        existing_prot.entry_name = None  # will be updated
+        existing_prot.canonical_accession = "OLD_ACC"  # will be updated
+        existing_prot.is_canonical = False  # will be updated
+        existing_prot.isoform_index = 2  # will be updated
+        existing_prot.reviewed = None  # will be updated
+        existing_prot.taxonomy_id = None  # will be updated
+        existing_prot.organism = None  # will be updated
+        existing_prot.gene_name = None  # will be updated
+        existing_prot.length = None  # will be updated
+
+        session = MagicMock(spec=Session)
+
+        # _load_existing_sequences returns the hash → id map
+        seq_query = MagicMock()
+        seq_query.filter.return_value.all.return_value = [(seq_hash, 42)]
+
+        # _load_existing_proteins returns the existing protein
+        prot_query = MagicMock()
+        prot_query.filter.return_value.all.return_value = [existing_prot]
+
+        call_idx = {"n": 0}
+
+        def query_side_effect(*args):
+            call_idx["n"] += 1
+            if call_idx["n"] == 1:
+                return seq_query
+            return prot_query
+
+        session.query.side_effect = query_side_effect
+
+        ins_p, upd_p, ins_s, re_s = self.op._store_records(session, [record], _noop_emit)
+
+        assert ins_p == 0
+        assert upd_p == 1  # existing protein was updated
+        assert re_s == 1  # sequence was reused from DB
+        assert ins_s == 0
+        # Verify fields were updated
+        assert existing_prot.sequence_id == 42
+        assert existing_prot.entry_name == "TEST_HUMAN"
+        assert existing_prot.canonical_accession == "P12345"
+        assert existing_prot.is_canonical is True
+        assert existing_prot.isoform_index is None
+        assert existing_prot.reviewed is True
+
+    def test_inserts_new_sequence_when_missing(self):
+        """Lines 318-334: new sequence inserted when hash not in DB."""
+        from protea.infrastructure.orm.models.sequence.sequence import (
+            Sequence as SequenceModel,
+        )
+
+        seq_hash = SequenceModel.compute_hash("MKTAYIAK")
+        record = {
+            "accession": "P12345",
+            "entry_name": "TEST_HUMAN",
+            "canonical_accession": "P12345",
+            "is_canonical": True,
+            "isoform_index": None,
+            "organism": "Homo sapiens",
+            "taxonomy_id": "9606",
+            "gene_name": "TEST",
+            "reviewed": True,
+            "sequence": "MKTAYIAK",
+            "length": 8,
+            "sequence_hash": seq_hash,
+        }
+
+        session = MagicMock(spec=Session)
+
+        # No existing sequences
+        seq_query = MagicMock()
+        seq_query.filter.return_value.all.return_value = []
+
+        # No existing proteins
+        prot_query = MagicMock()
+        prot_query.filter.return_value.all.return_value = []
+
+        call_idx = {"n": 0}
+
+        def query_side_effect(*args):
+            call_idx["n"] += 1
+            if call_idx["n"] == 1:
+                return seq_query
+            return prot_query
+
+        session.query.side_effect = query_side_effect
+
+        ins_p, upd_p, ins_s, re_s = self.op._store_records(session, [record], _noop_emit)
+
+        assert ins_p == 1
+        assert upd_p == 0
+        assert ins_s == 1
+        assert re_s == 0
+        # add_all called twice: once for sequences, once for proteins
+        assert session.add_all.call_count == 2
+
 
 # ---------------------------------------------------------------------------
 # Unit tests — execute() with mocked HTTP and session
@@ -242,6 +467,234 @@ def test_two_records_counts_correctly(self):
         assert result.result["retrieved_records"] == 2
         assert result.result["proteins_inserted"] == 2
 
+    def test_empty_page_continues(self):
+        """Line 93: empty records list triggers continue."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+        # First response is empty FASTA, no link header → single page with 0 records
+        empty_resp = _make_mock_response("")
+        with patch.object(self.op._http, "get", return_value=empty_resp):
+            result = self.op.execute(
+                session,
+                {"search_criteria": "q", "compressed": False},
+                emit=emit,
+            )
+        assert result.result["retrieved_records"] == 0
+        assert result.result["pages"] == 1
+
+    def test_total_limit_trims_to_zero_breaks(self):
+        """Lines 96-98: when total_limit is already reached, records trimmed to empty → break."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+
+        # Two pages: first has 2 records (we set limit=2), second also has records
+        # but after retrieving 2 on page 1 we should stop
+        page1_resp = _make_mock_response(
+            FASTA_TWO,
+            link_header='<https://rest.uniprot.org/?cursor=abc>; rel="next"',
+        )
+        page2_resp = _make_mock_response(FASTA_ONE)
+
+        call_count = {"n": 0}
+
+        def get_side_effect(*args, **kwargs):
+            call_count["n"] += 1
+            if call_count["n"] == 1:
+                return page1_resp
+            return page2_resp
+
+        with patch.object(self.op._http, "get", side_effect=get_side_effect):
+            result = self.op.execute(
+                session,
+                {"search_criteria": "q", "total_limit": 2, "compressed": False},
+                emit=emit,
+            )
+
+        assert result.result["retrieved_records"] == 2
+
+    def test_compressed_param_appended(self):
+        """Line 180: compressed=true adds compressed=true to URL params."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+
+        import gzip
+        from io import BytesIO
+
+        buf = BytesIO()
+        with gzip.GzipFile(fileobj=buf, mode="wb") as f:
+            f.write(FASTA_ONE.encode("utf-8"))
+        compressed_content = buf.getvalue()
+
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.content = compressed_content
+        resp.headers = {"link": ""}
+        resp.raise_for_status = MagicMock()
+
+        with patch.object(self.op._http, "get", return_value=resp) as mock_get:
+            self.op.execute(
+                session,
+                {"search_criteria": "q", "compressed": True},
+                emit=emit,
+            )
+
+        called_url = mock_get.call_args[0][0]
+        assert "compressed=true" in called_url
+
+    def test_total_results_from_header(self):
+        """Line 200: X-Total-Results header is captured."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+
+        resp = _make_mock_response(FASTA_ONE)
+        resp.headers["X-Total-Results"] = "42"
+
+        op = InsertProteinsOperation()
+        with patch.object(op._http, "get", return_value=resp):
+            op.execute(
+                session,
+                {"search_criteria": "q", "compressed": False},
+                emit=emit,
+            )
+
+        assert op._total_results == 42
+
+    def test_total_results_invalid_header_ignored(self):
+        """Line 200: non-numeric X-Total-Results doesn't crash."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+
+        resp = _make_mock_response(FASTA_ONE)
+        resp.headers["X-Total-Results"] = "not-a-number"
+
+        op = InsertProteinsOperation()
+        with patch.object(op._http, "get", return_value=resp):
+            op.execute(
+                session,
+                {"search_criteria": "q", "compressed": False},
+                emit=emit,
+            )
+
+        assert op._total_results is None
+
+    def test_cursor_pagination(self):
+        """Lines 208-210: cursor-based pagination follows link headers."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+
+        page1_resp = _make_mock_response(
+            FASTA_ONE,
+            link_header='<https://rest.uniprot.org/?cursor=abc123>; rel="next"',
+        )
+        page2_resp = _make_mock_response(FASTA_ONE)  # no link header → last page
+
+        call_count = {"n": 0}
+        called_urls: list[str] = []
+
+        def get_side_effect(url, **kwargs):
+            call_count["n"] += 1
+            called_urls.append(url)
+            if call_count["n"] == 1:
+                return page1_resp
+            return page2_resp
+
+        op = InsertProteinsOperation()
+        with patch.object(op._http, "get", side_effect=get_side_effect):
+            result = op.execute(
+                session,
+                {"search_criteria": "q", "compressed": False},
+                emit=emit,
+            )
+
+        assert result.result["pages"] == 2
+        assert result.result["retrieved_records"] == 2
+        # Second call URL should contain cursor
+        assert "cursor=abc123" in called_urls[1]
+
+    def test_network_failure_propagates(self):
+        """HTTP errors propagate to caller."""
+        import requests as req
+
+        session = _make_mock_session()
+        op = InsertProteinsOperation()
+
+        with patch.object(
+            op._http,
+            "get",
+            side_effect=req.ConnectionError("network down"),
+        ):
+            with pytest.raises(req.ConnectionError):
+                op.execute(
+                    session,
+                    {
+                        "search_criteria": "q",
+                        "compressed": False,
+                        "max_retries": 1,
+                        "backoff_base_seconds": 0.0,
+                        "backoff_max_seconds": 0.0,
+                        "jitter_seconds": 0.0,
+                    },
+                    emit=_noop_emit,
+                )
+
+    def test_isoform_records_counted(self):
+        """Isoform records are counted in the result."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+
+        fasta_with_isoform = (
+            ">sp|P12345|TEST_HUMAN Test OS=Homo sapiens OX=9606\nMKTAYIAK\n"
+            ">sp|P12345-2|TEST_HUMAN Isoform 2 OS=Homo sapiens OX=9606\nMKTAYIAKQR\n"
+        )
+        resp = _make_mock_response(fasta_with_isoform)
+        op = InsertProteinsOperation()
+        with patch.object(op._http, "get", return_value=resp):
+            result = op.execute(
+                session,
+                {"search_criteria": "q", "compressed": False},
+                emit=emit,
+            )
+
+        assert result.result["isoform_records"] == 1
+
+    def test_progress_emission_with_total(self):
+        """Progress events include _progress_current and _progress_total."""
+        session = _make_mock_session()
+        emit = _capturing_emit()
+
+        resp = _make_mock_response(FASTA_ONE)
+        resp.headers["X-Total-Results"] = "100"
+
+        op = InsertProteinsOperation()
+        with patch.object(op._http, "get", return_value=resp):
+            op.execute(
+                session,
+                {"search_criteria": "q", "compressed": False},
+                emit=emit,
+            )
+
+        page_done_events = [
+            c for c in emit.calls if c["event"] == "insert_proteins.page_done"
+        ]
+        assert len(page_done_events) == 1
+        fields = page_done_events[0]["fields"]
+        assert fields["_progress_current"] == 1
+        assert fields["_progress_total"] == 100
+
+    def test_include_isoforms_false_omits_param(self):
+        """include_isoforms=False does not add includeIsoform to URL."""
+        session = _make_mock_session()
+        resp = _make_mock_response(FASTA_ONE)
+        op = InsertProteinsOperation()
+        with patch.object(op._http, "get", return_value=resp) as mock_get:
+            op.execute(
+                session,
+                {"search_criteria": "q", "compressed": False, "include_isoforms": False},
+                emit=_noop_emit,
+            )
+        called_url = mock_get.call_args[0][0]
+        assert "includeIsoform" not in called_url
+
 
 # ---------------------------------------------------------------------------
 # Integration test — full round-trip against real Postgres
diff --git a/tests/test_integration.py b/tests/test_integration.py
new file mode 100644
index 0000000..d576a4e
--- /dev/null
+++ b/tests/test_integration.py
@@ -0,0 +1,627 @@
+"""Integration tests against real PostgreSQL + pgvector.
+
+Run with: poetry run pytest --with-postgres -m integration
+"""
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+import protea.infrastructure.orm.models  # noqa: F401 — register all models
+from protea.infrastructure.orm.base import Base
+from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
+from protea.infrastructure.orm.models.annotation.go_term import GOTerm
+from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.orm.models.annotation.protein_go_annotation import ProteinGOAnnotation
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+from protea.infrastructure.orm.models.embedding.go_prediction import GOPrediction
+from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+from protea.infrastructure.orm.models.embedding.sequence_embedding import SequenceEmbedding
+from protea.infrastructure.orm.models.job import Job, JobEvent, JobStatus
+from protea.infrastructure.orm.models.protein.protein import Protein
+from protea.infrastructure.orm.models.query.query_set import QuerySet, QuerySetEntry
+from protea.infrastructure.orm.models.sequence.sequence import Sequence
+
+_noop_emit = lambda *_: None  # noqa: E731
+
+_OBO_SAMPLE = """\
+format-version: 1.2
+data-version: releases/2024-01-17
+
+[Term]
+id: GO:0003674
+name: molecular_function
+namespace: molecular_function
+
+[Term]
+id: GO:0008150
+name: biological_process
+namespace: biological_process
+
+[Term]
+id: GO:0005575
+name: cellular_component
+namespace: cellular_component
+
+[Term]
+id: GO:0003824
+name: catalytic activity
+namespace: molecular_function
+is_a: GO:0003674
+
+[Typedef]
+id: part_of
+"""
+
+
+@pytest.fixture()
+def db(postgres_url: str):
+    """Create a clean database for each test."""
+    engine = create_engine(postgres_url, future=True)
+    Base.metadata.drop_all(engine)
+    Base.metadata.create_all(engine)
+    yield engine
+    engine.dispose()
+
+
+# ---------------------------------------------------------------------------
+# Load ontology snapshot — full round-trip
+# ---------------------------------------------------------------------------
+
+@pytest.mark.integration
+def test_load_ontology_snapshot_roundtrip(db):
+    from protea.core.operations.load_ontology_snapshot import LoadOntologySnapshotOperation
+
+    op = LoadOntologySnapshotOperation()
+
+    with patch.object(op, "_download", return_value=_OBO_SAMPLE):
+        with Session(db, future=True) as session:
+            result = op.execute(
+                session,
+                {"obo_url": "http://example.org/go.obo"},
+                emit=_noop_emit,
+            )
+            session.commit()
+
+    assert result.result["terms_inserted"] == 4
+    assert result.result["obo_version"] == "releases/2024-01-17"
+
+    # Verify data in DB
+    with Session(db, future=True) as session:
+        snapshot = session.query(OntologySnapshot).one()
+        assert snapshot.obo_version == "releases/2024-01-17"
+
+        terms = session.query(GOTerm).filter_by(ontology_snapshot_id=snapshot.id).all()
+        assert len(terms) == 4
+
+        go_ids = {t.go_id for t in terms}
+        assert "GO:0003674" in go_ids
+        assert "GO:0003824" in go_ids
+
+        # Aspect mapping
+        mf_term = session.query(GOTerm).filter_by(go_id="GO:0003674").one()
+        assert mf_term.aspect == "F"
+
+
+@pytest.mark.integration
+def test_load_ontology_snapshot_idempotent(db):
+    from protea.core.operations.load_ontology_snapshot import LoadOntologySnapshotOperation
+
+    op = LoadOntologySnapshotOperation()
+
+    with patch.object(op, "_download", return_value=_OBO_SAMPLE):
+        with Session(db, future=True) as session:
+            op.execute(session, {"obo_url": "http://example.org/go.obo"}, emit=_noop_emit)
+            session.commit()
+
+    # Second run — should skip
+    op2 = LoadOntologySnapshotOperation()
+    with patch.object(op2, "_download", return_value=_OBO_SAMPLE):
+        with Session(db, future=True) as session:
+            result = op2.execute(session, {"obo_url": "http://example.org/go.obo"}, emit=_noop_emit)
+            session.commit()
+
+    assert result.result["skipped"] is True
+
+    # Still only one snapshot
+    with Session(db, future=True) as session:
+        assert session.query(OntologySnapshot).count() == 1
+
+
+# ---------------------------------------------------------------------------
+# Store embeddings — pgvector round-trip
+# ---------------------------------------------------------------------------
+
+@pytest.mark.integration
+def test_store_embeddings_roundtrip(db):
+    from protea.core.operations.compute_embeddings import StoreEmbeddingsOperation
+
+    with Session(db, future=True) as session:
+        # Setup: create EmbeddingConfig + Sequence + parent Job
+        config = EmbeddingConfig(
+            model_name="test/model",
+            model_backend="esm",
+            layer_indices=[0],
+            layer_agg="mean",
+            pooling="mean",
+            normalize_residues=False,
+            normalize=True,
+            max_length=1022,
+            use_chunking=False,
+            chunk_size=512,
+            chunk_overlap=0,
+        )
+        session.add(config)
+
+        seq = Sequence(sequence="MKVLWAGS", sequence_hash=Sequence.compute_hash("MKVLWAGS"))
+        session.add(seq)
+
+        parent = Job(operation="compute_embeddings", queue_name="protea.embeddings",
+                     status=JobStatus.RUNNING, progress_current=0, progress_total=1)
+        session.add(parent)
+        session.flush()
+
+        config_id = config.id
+        seq_id = seq.id
+        parent_id = parent.id
+        session.commit()
+
+    # Execute store_embeddings
+    op = StoreEmbeddingsOperation()
+    vec = [0.1, 0.2, 0.3, 0.4]
+    payload = {
+        "parent_job_id": str(parent_id),
+        "embedding_config_id": str(config_id),
+        "skip_existing": True,
+        "sequences": [{
+            "sequence_id": seq_id,
+            "chunks": [{
+                "chunk_index_s": 0,
+                "chunk_index_e": None,
+                "vector": vec,
+                "embedding_dim": 4,
+            }],
+        }],
+    }
+
+    with Session(db, future=True) as session:
+        result = op.execute(session, payload, emit=_noop_emit)
+        session.commit()
+
+    assert result.result["embeddings_stored"] == 1
+
+    # Verify embedding in DB
+    with Session(db, future=True) as session:
+        emb = session.query(SequenceEmbedding).filter_by(sequence_id=seq_id).one()
+        assert emb.embedding_config_id == config_id
+        assert emb.embedding_dim == 4
+        stored_vec = list(emb.embedding)
+        np.testing.assert_allclose(stored_vec, vec, atol=1e-5)
+
+    # Second run — skip_existing should prevent re-insert
+    with Session(db, future=True) as session:
+        result2 = op.execute(session, payload, emit=_noop_emit)
+        session.commit()
+
+    assert result2.result["sequences_skipped"] == 1
+    assert result2.result["embeddings_stored"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Store predictions — round-trip with parent progress
+# ---------------------------------------------------------------------------
+
+@pytest.mark.integration
+def test_store_predictions_roundtrip(db):
+    from protea.core.operations.predict_go_terms import StorePredictionsOperation
+
+    with Session(db, future=True) as session:
+        # Setup: EmbeddingConfig, AnnotationSet, OntologySnapshot, PredictionSet, GOTerm, Job
+        config = EmbeddingConfig(
+            model_name="test/model",
+            model_backend="esm",
+            layer_indices=[0],
+            layer_agg="mean",
+            pooling="mean",
+            normalize_residues=False,
+            normalize=True,
+            max_length=1022,
+            use_chunking=False,
+            chunk_size=512,
+            chunk_overlap=0,
+        )
+        session.add(config)
+
+        snap = OntologySnapshot(obo_version="test-v1", obo_url="http://test")
+        session.add(snap)
+        session.flush()
+
+        go_term1 = GOTerm(
+            go_id="GO:0003674",
+            name="molecular_function",
+            aspect="F",
+            ontology_snapshot_id=snap.id,
+        )
+        go_term2 = GOTerm(
+            go_id="GO:0008150",
+            name="biological_process",
+            aspect="P",
+            ontology_snapshot_id=snap.id,
+        )
+        session.add_all([go_term1, go_term2])
+
+        ann_set = AnnotationSet(
+            ontology_snapshot_id=snap.id,
+            source="test",
+            source_version="v1",
+        )
+        session.add(ann_set)
+        session.flush()
+
+        pred_set = PredictionSet(
+            embedding_config_id=config.id,
+            annotation_set_id=ann_set.id,
+            ontology_snapshot_id=snap.id,
+            limit_per_entry=5,
+            meta={},
+        )
+        session.add(pred_set)
+
+        parent = Job(
+            operation="predict_go_terms",
+            queue_name="protea.jobs",
+            status=JobStatus.RUNNING,
+            progress_current=0,
+            progress_total=1,
+        )
+        session.add(parent)
+        session.flush()
+
+        pred_set_id = pred_set.id
+        parent_id = parent.id
+        go_term1_id = go_term1.id
+        go_term2_id = go_term2.id
+        session.commit()
+
+    # Execute store_predictions
+    op = StorePredictionsOperation()
+    payload = {
+        "parent_job_id": str(parent_id),
+        "prediction_set_id": str(pred_set_id),
+        "predictions": [
+            {
+                "protein_accession": "P12345",
+                "go_term_id": go_term1_id,
+                "ref_protein_accession": "Q99999",
+                "distance": 0.15,
+                "qualifier": "enables",
+                "evidence_code": "IDA",
+            },
+            {
+                "protein_accession": "P12345",
+                "go_term_id": go_term2_id,
+                "ref_protein_accession": "Q88888",
+                "distance": 0.25,
+            },
+        ],
+    }
+
+    events = []
+    def capture_emit(event, msg, fields, level):
+        events.append(event)
+
+    with Session(db, future=True) as session:
+        result = op.execute(session, payload, emit=capture_emit)
+        session.commit()
+
+    assert result.result["predictions_inserted"] == 2
+
+    # Parent job should be closed (progress_total=1, this was the only batch)
+    assert "store_predictions.parent_succeeded" in events
+
+    # Verify predictions in DB
+    with Session(db, future=True) as session:
+        preds = session.query(GOPrediction).filter_by(prediction_set_id=pred_set_id).all()
+        assert len(preds) == 2
+        distances = sorted(p.distance for p in preds)
+        np.testing.assert_allclose(distances, [0.15, 0.25], atol=1e-5)
+
+        # Parent job should be SUCCEEDED
+        parent = session.get(Job, parent_id)
+        assert parent.status == JobStatus.SUCCEEDED
+        assert parent.finished_at is not None
+
+
+# ---------------------------------------------------------------------------
+# Job lifecycle — parent-child with atomic progress
+# ---------------------------------------------------------------------------
+
+@pytest.mark.integration
+def test_job_parent_child_progress(db):
+    with Session(db, future=True) as session:
+        parent = Job(
+            operation="compute_embeddings",
+            queue_name="protea.embeddings",
+            status=JobStatus.RUNNING,
+            progress_current=0,
+            progress_total=3,
+        )
+        session.add(parent)
+        session.flush()
+        parent_id = parent.id
+
+        # Add events
+        session.add(JobEvent(job_id=parent_id, event="job.created", fields={}))
+        session.add(JobEvent(job_id=parent_id, event="job.started", fields={}))
+        session.commit()
+
+    # Simulate 3 child batches incrementing progress
+    from sqlalchemy import update as sa_update
+
+    from protea.core.utils import utcnow
+
+    for i in range(3):
+        with Session(db, future=True) as session:
+            row = session.execute(
+                sa_update(Job)
+                .where(Job.id == parent_id, Job.status == JobStatus.RUNNING)
+                .values(progress_current=Job.progress_current + 1)
+                .returning(Job.progress_current, Job.progress_total)
+            ).fetchone()
+            assert row is not None
+            assert row.progress_current == i + 1
+
+            if row.progress_current == row.progress_total:
+                session.execute(
+                    sa_update(Job)
+                    .where(Job.id == parent_id, Job.status == JobStatus.RUNNING)
+                    .values(status=JobStatus.SUCCEEDED, finished_at=utcnow())
+                )
+                session.add(JobEvent(
+                    job_id=parent_id,
+                    event="job.succeeded",
+                    fields={"via": "last_batch"},
+                    level="info",
+                ))
+            session.commit()
+
+    # Verify final state
+    with Session(db, future=True) as session:
+        job = session.get(Job, parent_id)
+        assert job.status == JobStatus.SUCCEEDED
+        assert job.progress_current == 3
+        assert job.progress_total == 3
+        assert job.finished_at is not None
+        assert len(job.events) == 3  # created, started, succeeded
+
+
+# ---------------------------------------------------------------------------
+# Load GOA annotations — round-trip
+# ---------------------------------------------------------------------------
+
+@pytest.mark.integration
+def test_load_goa_annotations_roundtrip(db):
+    from protea.core.operations.load_goa_annotations import LoadGOAAnnotationsOperation
+    from protea.core.operations.load_ontology_snapshot import LoadOntologySnapshotOperation
+
+    # Step 1: Load ontology
+    ont_op = LoadOntologySnapshotOperation()
+    with patch.object(ont_op, "_download", return_value=_OBO_SAMPLE):
+        with Session(db, future=True) as session:
+            ont_result = ont_op.execute(
+                session, {"obo_url": "http://example.org/go.obo"}, emit=_noop_emit,
+            )
+            session.commit()
+
+    snapshot_id = ont_result.result["ontology_snapshot_id"]
+
+    # Step 2: Insert proteins so annotations can be filtered
+    with Session(db, future=True) as session:
+        seq = Sequence(sequence="MKVLWAGS", sequence_hash=Sequence.compute_hash("MKVLWAGS"))
+        session.add(seq)
+        session.flush()
+        protein = Protein(
+            accession="P12345",
+            canonical_accession="P12345",
+            is_canonical=True,
+            sequence_id=seq.id,
+        )
+        session.add(protein)
+        session.commit()
+
+    # Step 3: Build a GAF record (as _stream_gaf yields dicts)
+    gaf_records = [
+        {
+            "accession": "P12345",
+            "go_id": "GO:0003824",
+            "qualifier": "enables",
+            "evidence_code": "IDA",
+            "db_reference": "PMID:123",
+            "with_from": "",
+            "assigned_by": "UniProt",
+            "annotation_date": "20240101",
+        },
+    ]
+
+    # Step 4: Load annotations
+    goa_op = LoadGOAAnnotationsOperation()
+    with patch.object(goa_op, "_stream_gaf", return_value=iter(gaf_records)):
+        with Session(db, future=True) as session:
+            result = goa_op.execute(
+                session,
+                {
+                    "ontology_snapshot_id": snapshot_id,
+                    "gaf_url": "http://example.org/goa.gaf.gz",
+                    "source_version": "2024-03",
+                },
+                emit=_noop_emit,
+            )
+            session.commit()
+
+    assert result.result["annotations_inserted"] > 0
+
+    # Verify annotation in DB
+    with Session(db, future=True) as session:
+        ann_set = session.query(AnnotationSet).one()
+        assert ann_set.source == "goa"
+
+        annotations = session.query(ProteinGOAnnotation).all()
+        assert len(annotations) >= 1
+        assert annotations[0].protein_accession == "P12345"
+        assert annotations[0].evidence_code == "IDA"
+
+
+# ---------------------------------------------------------------------------
+# Full pipeline: QuerySet → Embeddings → Predictions
+# ---------------------------------------------------------------------------
+
+@pytest.mark.integration
+def test_full_pipeline_queryset_to_predictions(db):
+    """End-to-end: create QuerySet, store embeddings, store predictions."""
+    from protea.core.operations.compute_embeddings import StoreEmbeddingsOperation
+    from protea.core.operations.predict_go_terms import StorePredictionsOperation
+
+    dim = 4
+
+    with Session(db, future=True) as session:
+        # 1. Create EmbeddingConfig
+        config = EmbeddingConfig(
+            model_name="test/model", model_backend="esm",
+            layer_indices=[0], layer_agg="mean", pooling="mean",
+            normalize_residues=False, normalize=True,
+            max_length=1022, use_chunking=False, chunk_size=512, chunk_overlap=0,
+        )
+        session.add(config)
+
+        # 2. Create Ontology + GOTerm
+        snap = OntologySnapshot(obo_version="pipeline-test", obo_url="http://test")
+        session.add(snap)
+        session.flush()
+
+        go_mf = GOTerm(go_id="GO:0003674", name="molecular_function", aspect="F",
+                       ontology_snapshot_id=snap.id)
+        session.add(go_mf)
+
+        # 3. Create AnnotationSet
+        ann_set = AnnotationSet(ontology_snapshot_id=snap.id, source="test", source_version="v1")
+        session.add(ann_set)
+
+        # 4. Create sequences + proteins
+        seq1 = Sequence(sequence="MKVLWAGS", sequence_hash=Sequence.compute_hash("MKVLWAGS"))
+        seq2 = Sequence(sequence="ACDEFGHI", sequence_hash=Sequence.compute_hash("ACDEFGHI"))
+        session.add_all([seq1, seq2])
+        session.flush()
+
+        p1 = Protein(accession="Q_QUERY", canonical_accession="Q_QUERY",
+                      is_canonical=True, sequence_id=seq1.id)
+        p2 = Protein(accession="R_REF", canonical_accession="R_REF",
+                      is_canonical=True, sequence_id=seq2.id)
+        session.add_all([p1, p2])
+
+        # 5. Create QuerySet
+        qs = QuerySet(name="pipeline-test", description="integration test")
+        session.add(qs)
+        session.flush()
+
+        entry = QuerySetEntry(query_set_id=qs.id, sequence_id=seq1.id, accession="Q_QUERY")
+        session.add(entry)
+
+        # 6. Create embedding parent job
+        embed_job = Job(operation="compute_embeddings", queue_name="protea.embeddings",
+                        status=JobStatus.RUNNING, progress_current=0, progress_total=1)
+        session.add(embed_job)
+        session.flush()
+
+        ids = {
+            "config_id": config.id, "snap_id": snap.id, "ann_set_id": ann_set.id,
+            "go_term_id": go_mf.id, "seq1_id": seq1.id, "seq2_id": seq2.id,
+            "qs_id": qs.id, "embed_job_id": embed_job.id,
+        }
+        session.commit()
+
+    # 7. Store embeddings for both sequences
+    store_emb = StoreEmbeddingsOperation()
+    emb_payload = {
+        "parent_job_id": str(ids["embed_job_id"]),
+        "embedding_config_id": str(ids["config_id"]),
+        "sequences": [
+            {"sequence_id": ids["seq1_id"], "chunks": [
+                {"chunk_index_s": 0, "chunk_index_e": None,
+                 "vector": [0.9, 0.1, 0.0, 0.0], "embedding_dim": dim}
+            ]},
+            {"sequence_id": ids["seq2_id"], "chunks": [
+                {"chunk_index_s": 0, "chunk_index_e": None,
+                 "vector": [0.0, 0.0, 0.1, 0.9], "embedding_dim": dim}
+            ]},
+        ],
+    }
+    with Session(db, future=True) as session:
+        emb_result = store_emb.execute(session, emb_payload, emit=_noop_emit)
+        session.commit()
+
+    assert emb_result.result["embeddings_stored"] == 2
+
+    # 8. Create prediction job + PredictionSet
+    with Session(db, future=True) as session:
+        pred_job = Job(operation="predict_go_terms", queue_name="protea.jobs",
+                       status=JobStatus.RUNNING, progress_current=0, progress_total=1)
+        session.add(pred_job)
+
+        pred_set = PredictionSet(
+            embedding_config_id=ids["config_id"],
+            annotation_set_id=ids["ann_set_id"],
+            ontology_snapshot_id=ids["snap_id"],
+            query_set_id=ids["qs_id"],
+            limit_per_entry=5, meta={},
+        )
+        session.add(pred_set)
+        session.flush()
+        pred_job_id = pred_job.id
+        pred_set_id = pred_set.id
+        session.commit()
+
+    # 9. Store predictions
+    store_pred = StorePredictionsOperation()
+    pred_payload = {
+        "parent_job_id": str(pred_job_id),
+        "prediction_set_id": str(pred_set_id),
+        "predictions": [{
+            "protein_accession": "Q_QUERY",
+            "go_term_id": ids["go_term_id"],
+            "ref_protein_accession": "R_REF",
+            "distance": 0.85,
+            "qualifier": "enables",
+            "evidence_code": "IDA",
+        }],
+    }
+    with Session(db, future=True) as session:
+        pred_result = store_pred.execute(session, pred_payload, emit=_noop_emit)
+        session.commit()
+
+    assert pred_result.result["predictions_inserted"] == 1
+
+    # 10. Verify full chain in DB
+    with Session(db, future=True) as session:
+        # QuerySet has entry
+        entries = session.query(QuerySetEntry).filter_by(query_set_id=ids["qs_id"]).all()
+        assert len(entries) == 1
+
+        # Embeddings exist
+        embs = session.query(SequenceEmbedding).filter_by(
+            embedding_config_id=ids["config_id"]
+        ).all()
+        assert len(embs) == 2
+
+        # Predictions exist
+        preds = session.query(GOPrediction).filter_by(prediction_set_id=pred_set_id).all()
+        assert len(preds) == 1
+        assert preds[0].protein_accession == "Q_QUERY"
+        assert preds[0].distance == pytest.approx(0.85, abs=1e-5)
+
+        # Predict job should be SUCCEEDED
+        pred_job = session.get(Job, pred_job_id)
+        assert pred_job.status == JobStatus.SUCCEEDED
diff --git a/tests/test_load_goa_annotations.py b/tests/test_load_goa_annotations.py
index a2bfde8..1fb880d 100644
--- a/tests/test_load_goa_annotations.py
+++ b/tests/test_load_goa_annotations.py
@@ -1,10 +1,16 @@
+"""
+Unit tests for LoadGOAAnnotationsOperation.
+No DB or network required — everything is mocked.
+"""
 from __future__ import annotations
 
+import io
 import uuid
 from unittest.mock import MagicMock, patch
 
 import pytest
 
+from protea.core.contracts.operation import OperationResult
 from protea.core.operations.load_goa_annotations import (
     LoadGOAAnnotationsOperation,
     LoadGOAAnnotationsPayload,
@@ -13,14 +19,49 @@
 _noop_emit = lambda *_: None  # noqa: E731
 
 _SNAPSHOT_ID = str(uuid.uuid4())
+_ANNOTATION_SET_ID = uuid.uuid4()
 
-_GAF_SAMPLE = """\
-!gaf-version: 2.2
-!Generated by GO
-UniProtKB\tP12345\tproteinA\tenables\tGO:0003824\tPMID:123\tIDA\t\t\t\t\tprotein\t\t20240101\tUniProt\t\t
-UniProtKB\tQ67890\tproteinB\tinvolved_in\tGO:0008150\tPMID:456\tIEA\t\t\t\t\tprotein\t\t20240101\tUniProt\t\t
-UniProtKB\tXXXXXX\tunknown\tenables\tGO:0003824\tPMID:789\tIDA\t\t\t\t\tprotein\t\t20240101\tUniProt\t\t
-"""
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_emit():
+    """Return a recording emit function and its event list."""
+    events = []
+
+    def emit(event, message, fields, level):
+        events.append({"event": event, "fields": fields, "level": level})
+
+    return emit, events
+
+
+def _gaf_line(
+    accession="P12345",
+    go_id="GO:0003674",
+    qualifier="enables",
+    evidence="IDA",
+    db_ref="PMID:1234",
+    with_from="",
+    date="20240101",
+    assigned_by="UniProt",
+):
+    """Build a valid 15-column GAF line."""
+    cols = ["UniProtKB"] + [""] * 14
+    cols[1] = accession
+    cols[3] = qualifier
+    cols[4] = go_id
+    cols[5] = db_ref
+    cols[6] = evidence
+    cols[7] = with_from
+    cols[13] = date
+    cols[14] = assigned_by
+    return "\t".join(cols)
+
+
+# ---------------------------------------------------------------------------
+# Payload validation
+# ---------------------------------------------------------------------------
 
 
 class TestLoadGOAAnnotationsPayload:
@@ -34,40 +75,105 @@ def test_valid(self) -> None:
         assert p.page_size == 10000
 
     def test_missing_required_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             LoadGOAAnnotationsPayload.model_validate({
                 "gaf_url": "https://example.org/goa.gaf.gz",
                 "source_version": "2024-03",
             })
 
     def test_empty_snapshot_id_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             LoadGOAAnnotationsPayload.model_validate({
                 "ontology_snapshot_id": "  ",
                 "gaf_url": "https://example.org/goa.gaf.gz",
                 "source_version": "2024-03",
             })
 
+    def test_empty_gaf_url_raises(self) -> None:
+        with pytest.raises(ValueError):
+            LoadGOAAnnotationsPayload(
+                ontology_snapshot_id=_SNAPSHOT_ID,
+                gaf_url="",
+                source_version="v1",
+            )
+
+    def test_empty_source_version_raises(self) -> None:
+        with pytest.raises(ValueError):
+            LoadGOAAnnotationsPayload(
+                ontology_snapshot_id=_SNAPSHOT_ID,
+                gaf_url="https://example.com/goa.gaf.gz",
+                source_version="",
+            )
+
+    def test_page_size_must_be_positive(self) -> None:
+        with pytest.raises(ValueError):
+            LoadGOAAnnotationsPayload(
+                ontology_snapshot_id=_SNAPSHOT_ID,
+                gaf_url="https://example.com/goa.gaf.gz",
+                source_version="v1",
+                page_size=0,
+            )
+
+    def test_strings_are_stripped(self) -> None:
+        p = LoadGOAAnnotationsPayload(
+            ontology_snapshot_id=f"  {_SNAPSHOT_ID}  ",
+            gaf_url="  https://example.com/goa.gaf.gz  ",
+            source_version="  v1  ",
+        )
+        assert p.ontology_snapshot_id == _SNAPSHOT_ID
+        assert p.gaf_url == "https://example.com/goa.gaf.gz"
+        assert p.source_version == "v1"
+
+    def test_defaults(self) -> None:
+        p = LoadGOAAnnotationsPayload(
+            ontology_snapshot_id=_SNAPSHOT_ID,
+            gaf_url="https://example.com/goa.gaf.gz",
+            source_version="v1",
+        )
+        assert p.timeout_seconds == 300
+        assert p.commit_every_page is True
+        assert p.total_limit is None
+
+
+# ---------------------------------------------------------------------------
+# _store_buffer
+# ---------------------------------------------------------------------------
+
 
 class TestStoreBuffer:
     def _op(self) -> LoadGOAAnnotationsOperation:
         return LoadGOAAnnotationsOperation()
 
+    def _make_record(self, accession="P12345", go_id="GO:0003824", evidence="IDA"):
+        return {
+            "accession": accession,
+            "go_id": go_id,
+            "qualifier": "enables",
+            "evidence_code": evidence,
+            "db_reference": "PMID:1",
+            "with_from": "",
+            "assigned_by": "UniProt",
+            "annotation_date": "20240101",
+        }
+
     def test_skips_unknown_accession(self) -> None:
         op = self._op()
         session = MagicMock()
-        records = [
-            {
-                "accession": "UNKNOWN",
-                "go_id": "GO:0003824",
-                "qualifier": "enables",
-                "evidence_code": "IDA",
-                "db_reference": "PMID:1",
-                "with_from": "",
-                "assigned_by": "UniProt",
-                "annotation_date": "20240101",
-            }
-        ]
+        records = [self._make_record(accession="UNKNOWN")]
+        inserted, skipped = op._store_buffer(
+            session,
+            records,
+            uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
+        )
+        assert inserted == 0
+        assert skipped == 1
+
+    def test_skips_empty_accession(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        records = [self._make_record(accession="  ")]
         inserted, skipped = op._store_buffer(
             session,
             records,
@@ -77,23 +183,11 @@ def test_skips_unknown_accession(self) -> None:
         )
         assert inserted == 0
         assert skipped == 1
-        session.add_all.assert_not_called()
 
     def test_skips_unknown_go_term(self) -> None:
         op = self._op()
         session = MagicMock()
-        records = [
-            {
-                "accession": "P12345",
-                "go_id": "GO:9999999",
-                "qualifier": "enables",
-                "evidence_code": "IDA",
-                "db_reference": "PMID:1",
-                "with_from": "",
-                "assigned_by": "UniProt",
-                "annotation_date": "20240101",
-            }
-        ]
+        records = [self._make_record(go_id="GO:9999999")]
         inserted, skipped = op._store_buffer(
             session,
             records,
@@ -108,26 +202,8 @@ def test_inserts_valid_records(self) -> None:
         op = self._op()
         session = MagicMock()
         records = [
-            {
-                "accession": "P12345",
-                "go_id": "GO:0003824",
-                "qualifier": "enables",
-                "evidence_code": "IDA",
-                "db_reference": "PMID:123",
-                "with_from": "",
-                "assigned_by": "UniProt",
-                "annotation_date": "20240101",
-            },
-            {
-                "accession": "Q67890",
-                "go_id": "GO:0008150",
-                "qualifier": "involved_in",
-                "evidence_code": "IEA",
-                "db_reference": "PMID:456",
-                "with_from": "",
-                "assigned_by": "UniProt",
-                "annotation_date": "20240101",
-            },
+            self._make_record(accession="P12345", go_id="GO:0003824"),
+            self._make_record(accession="Q67890", go_id="GO:0008150", evidence="IEA"),
         ]
         inserted, skipped = op._store_buffer(
             session,
@@ -138,5 +214,573 @@ def test_inserts_valid_records(self) -> None:
         )
         assert inserted == 2
         assert skipped == 0
-        # Uses bulk pg_insert().on_conflict_do_nothing() instead of add_all
         session.execute.assert_called()
+
+    def test_deduplicates_within_buffer(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        rec = self._make_record()
+        records = [rec.copy(), rec.copy(), rec.copy()]
+        inserted, skipped = op._store_buffer(
+            session,
+            records,
+            uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
+        )
+        assert inserted == 1
+        assert skipped == 2
+
+    def test_different_evidence_codes_not_deduplicated(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        records = [
+            self._make_record(evidence="IDA"),
+            self._make_record(evidence="IEA"),
+        ]
+        inserted, skipped = op._store_buffer(
+            session,
+            records,
+            uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
+        )
+        assert inserted == 2
+        assert skipped == 0
+
+    def test_mixed_valid_and_invalid(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        records = [
+            self._make_record(accession="P12345"),
+            self._make_record(accession="UNKNOWN"),
+            self._make_record(accession="Q67890", go_id="GO:0008150"),
+            self._make_record(go_id="GO:INVALID"),
+        ]
+        inserted, skipped = op._store_buffer(
+            session,
+            records,
+            uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345", "Q67890"},
+            go_term_map={"GO:0003824": 1, "GO:0008150": 2},
+        )
+        assert inserted == 2
+        assert skipped == 2
+
+    def test_empty_buffer(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        inserted, skipped = op._store_buffer(
+            session, [], uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"}, go_term_map={"GO:0003824": 1},
+        )
+        assert inserted == 0
+        assert skipped == 0
+        session.execute.assert_not_called()
+
+    def test_empty_evidence_treated_as_none_for_dedup(self) -> None:
+        """Empty string evidence_code becomes None; two such records are duplicates."""
+        op = self._op()
+        session = MagicMock()
+        records = [
+            self._make_record(evidence=""),
+            self._make_record(evidence=""),
+        ]
+        inserted, skipped = op._store_buffer(
+            session,
+            records,
+            uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
+        )
+        assert inserted == 1
+        assert skipped == 1
+
+
+# ---------------------------------------------------------------------------
+# _stream_gaf
+# ---------------------------------------------------------------------------
+
+
+class TestStreamGaf:
+    def setup_method(self):
+        self.op = LoadGOAAnnotationsOperation()
+
+    def _stream_from_text(self, text: str, url="https://example.com/goa.gaf"):
+        """Mock requests.get and stream GAF text through _stream_gaf."""
+        payload = LoadGOAAnnotationsPayload(
+            ontology_snapshot_id=_SNAPSHOT_ID,
+            gaf_url=url,
+            source_version="v1",
+        )
+        emit, _ = _make_emit()
+
+        raw = io.BytesIO(text.encode("utf-8"))
+        mock_resp = MagicMock()
+        mock_resp.raw = raw
+        mock_resp.raise_for_status = MagicMock()
+
+        with patch("protea.core.operations.load_goa_annotations.requests.get", return_value=mock_resp):
+            return list(self.op._stream_gaf(payload, emit))
+
+    def test_parses_valid_gaf_line(self):
+        line = _gaf_line(accession="P12345", go_id="GO:0003674", evidence="IDA")
+        records = self._stream_from_text(line + "\n")
+        assert len(records) == 1
+        assert records[0]["accession"] == "P12345"
+        assert records[0]["go_id"] == "GO:0003674"
+        assert records[0]["evidence_code"] == "IDA"
+
+    def test_skips_comment_lines(self):
+        text = "!this is a comment\n" + _gaf_line() + "\n"
+        records = self._stream_from_text(text)
+        assert len(records) == 1
+
+    def test_skips_empty_lines(self):
+        text = "\n\n" + _gaf_line() + "\n\n"
+        records = self._stream_from_text(text)
+        assert len(records) == 1
+
+    def test_skips_short_lines(self):
+        text = "col1\tcol2\tcol3\n" + _gaf_line() + "\n"
+        records = self._stream_from_text(text)
+        assert len(records) == 1
+
+    def test_multiple_records(self):
+        lines = [
+            _gaf_line(accession="A1"),
+            _gaf_line(accession="A2"),
+            _gaf_line(accession="A3"),
+        ]
+        records = self._stream_from_text("\n".join(lines) + "\n")
+        assert len(records) == 3
+        assert [r["accession"] for r in records] == ["A1", "A2", "A3"]
+
+    def test_extracts_all_fields(self):
+        line = _gaf_line(
+            accession="Q99999",
+            go_id="GO:0005575",
+            qualifier="located_in",
+            evidence="IEA",
+            db_ref="GO_REF:001",
+            with_from="InterPro:IPR000001",
+            date="20230615",
+            assigned_by="InterPro",
+        )
+        records = self._stream_from_text(line + "\n")
+        r = records[0]
+        assert r["accession"] == "Q99999"
+        assert r["go_id"] == "GO:0005575"
+        assert r["qualifier"] == "located_in"
+        assert r["evidence_code"] == "IEA"
+        assert r["db_reference"] == "GO_REF:001"
+        assert r["with_from"] == "InterPro:IPR000001"
+        assert r["annotation_date"] == "20230615"
+        assert r["assigned_by"] == "InterPro"
+
+    def test_gzip_url_uses_gzip_decompression(self):
+        import gzip as gzip_mod
+
+        line = _gaf_line() + "\n"
+        compressed = gzip_mod.compress(line.encode("utf-8"))
+
+        payload = LoadGOAAnnotationsPayload(
+            ontology_snapshot_id=_SNAPSHOT_ID,
+            gaf_url="https://example.com/goa.gaf.gz",
+            source_version="v1",
+        )
+        emit, _ = _make_emit()
+
+        raw = io.BytesIO(compressed)
+        mock_resp = MagicMock()
+        mock_resp.raw = raw
+        mock_resp.raise_for_status = MagicMock()
+
+        with patch("protea.core.operations.load_goa_annotations.requests.get", return_value=mock_resp):
+            records = list(self.op._stream_gaf(payload, emit))
+        assert len(records) == 1
+
+    def test_empty_file_returns_no_records(self):
+        records = self._stream_from_text("")
+        assert records == []
+
+    def test_file_with_only_comments(self):
+        text = "!comment1\n!comment2\n"
+        records = self._stream_from_text(text)
+        assert records == []
+
+
+# ---------------------------------------------------------------------------
+# _load_accessions
+# ---------------------------------------------------------------------------
+
+
+class TestLoadAccessions:
+    def setup_method(self):
+        self.op = LoadGOAAnnotationsOperation()
+
+    def test_returns_set_of_accessions(self):
+        session = MagicMock()
+        session.scalars.return_value = iter(["P12345", "Q99999"])
+        emit, events = _make_emit()
+
+        result = self.op._load_accessions(session, emit)
+        assert result == {"P12345", "Q99999"}
+        event_names = [e["event"] for e in events]
+        assert "load_goa_annotations.load_accessions_start" in event_names
+        assert "load_goa_annotations.load_accessions_done" in event_names
+
+    def test_returns_empty_set(self):
+        session = MagicMock()
+        session.scalars.return_value = iter([])
+        emit, _ = _make_emit()
+
+        result = self.op._load_accessions(session, emit)
+        assert result == set()
+
+    def test_emits_count_in_done_event(self):
+        session = MagicMock()
+        session.scalars.return_value = iter(["A", "B", "C"])
+        emit, events = _make_emit()
+
+        self.op._load_accessions(session, emit)
+        done = [e for e in events if e["event"] == "load_goa_annotations.load_accessions_done"]
+        assert len(done) == 1
+        assert done[0]["fields"]["canonical_accessions"] == 3
+
+
+# ---------------------------------------------------------------------------
+# _load_go_term_map
+# ---------------------------------------------------------------------------
+
+
+class TestLoadGoTermMap:
+    def setup_method(self):
+        self.op = LoadGOAAnnotationsOperation()
+
+    def _mock_session(self, rows):
+        session = MagicMock()
+        query_mock = MagicMock()
+        session.query.return_value = query_mock
+        query_mock.filter.return_value = query_mock
+        query_mock.all.return_value = rows
+        return session
+
+    def test_returns_mapping(self):
+        session = self._mock_session([("GO:0003674", 1), ("GO:0005575", 2)])
+        emit, events = _make_emit()
+
+        result = self.op._load_go_term_map(session, uuid.uuid4(), emit)
+        assert result == {"GO:0003674": 1, "GO:0005575": 2}
+        event_names = [e["event"] for e in events]
+        assert "load_goa_annotations.load_go_terms_start" in event_names
+        assert "load_goa_annotations.load_go_terms_done" in event_names
+
+    def test_empty_ontology(self):
+        session = self._mock_session([])
+        emit, _ = _make_emit()
+
+        result = self.op._load_go_term_map(session, uuid.uuid4(), emit)
+        assert result == {}
+
+    def test_emits_count_in_done_event(self):
+        session = self._mock_session([("GO:0003674", 1)])
+        emit, events = _make_emit()
+
+        self.op._load_go_term_map(session, uuid.uuid4(), emit)
+        done = [e for e in events if e["event"] == "load_goa_annotations.load_go_terms_done"]
+        assert len(done) == 1
+        assert done[0]["fields"]["go_terms"] == 1
+
+
+# ---------------------------------------------------------------------------
+# execute (full integration of all pieces, mocked)
+# ---------------------------------------------------------------------------
+
+
+class TestExecute:
+    def setup_method(self):
+        self.op = LoadGOAAnnotationsOperation()
+        self.snapshot_id = uuid.uuid4()
+
+    def _make_session(self, accessions, go_terms):
+        session = MagicMock()
+        # session.get(OntologySnapshot, id) returns a truthy mock
+        session.get.return_value = MagicMock()
+        # _load_accessions uses session.scalars
+        session.scalars.return_value = iter(accessions)
+        # _load_go_term_map uses session.query
+        query_mock = MagicMock()
+        session.query.return_value = query_mock
+        query_mock.filter.return_value = query_mock
+        query_mock.all.return_value = list(go_terms.items())
+        return session
+
+    def _run(self, gaf_text, accessions, go_terms,
+             page_size=10000, total_limit=None, commit_every_page=True,
+             store_buffer_side_effect=None):
+        session = self._make_session(accessions, go_terms)
+        emit, events = _make_emit()
+
+        ann_set_mock = MagicMock()
+        ann_set_mock.id = _ANNOTATION_SET_ID
+
+        payload = {
+            "ontology_snapshot_id": str(self.snapshot_id),
+            "gaf_url": "https://example.com/goa.gaf",
+            "source_version": "v1",
+            "page_size": page_size,
+            "commit_every_page": commit_every_page,
+        }
+        if total_limit is not None:
+            payload["total_limit"] = total_limit
+
+        raw = io.BytesIO(gaf_text.encode("utf-8"))
+        mock_resp = MagicMock()
+        mock_resp.raw = raw
+        mock_resp.raise_for_status = MagicMock()
+
+        # _store_buffer does a lazy import of pg_insert which needs a real
+        # SQLAlchemy Table object. We mock the whole method and count
+        # inserted/skipped via the records passed to it, using the real
+        # filtering logic from the valid_accessions and go_terms sets.
+        real_valid = set(accessions)
+        real_go = dict(go_terms)
+
+        def fake_store_buffer(_session, records, _ann_set_id, _valid, _go_map):
+            inserted = 0
+            skipped = 0
+            seen = set()
+            for rec in records:
+                acc = rec["accession"].strip()
+                if not acc or acc not in real_valid:
+                    skipped += 1
+                    continue
+                go_id = rec["go_id"].strip()
+                go_term_id = real_go.get(go_id)
+                if go_term_id is None:
+                    skipped += 1
+                    continue
+                ev = rec["evidence_code"] or None
+                key = (_ann_set_id, acc, go_term_id, ev)
+                if key in seen:
+                    skipped += 1
+                    continue
+                seen.add(key)
+                inserted += 1
+            return inserted, skipped
+
+        if store_buffer_side_effect is not None:
+            fake_store_buffer = store_buffer_side_effect
+
+        with patch(
+            "protea.core.operations.load_goa_annotations.requests.get",
+            return_value=mock_resp,
+        ), patch(
+            "protea.core.operations.load_goa_annotations.AnnotationSet",
+            return_value=ann_set_mock,
+        ), patch.object(
+            self.op, "_store_buffer", side_effect=fake_store_buffer,
+        ):
+            result = self.op.execute(session, payload, emit=emit)
+
+        return result, events, session
+
+    def test_basic_execution(self):
+        gaf = _gaf_line(accession="P12345", go_id="GO:0003674") + "\n"
+        result, events, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        assert isinstance(result, OperationResult)
+        assert result.result["annotations_inserted"] == 1
+        assert result.result["annotations_skipped"] == 0
+        event_names = [e["event"] for e in events]
+        assert "load_goa_annotations.start" in event_names
+        assert "load_goa_annotations.done" in event_names
+
+    def test_snapshot_not_found_raises(self):
+        session = MagicMock()
+        session.get.return_value = None
+        emit, _ = _make_emit()
+
+        payload = {
+            "ontology_snapshot_id": str(self.snapshot_id),
+            "gaf_url": "https://example.com/goa.gaf",
+            "source_version": "v1",
+        }
+        with pytest.raises(ValueError, match="not found"):
+            self.op.execute(session, payload, emit=emit)
+
+    def test_no_proteins_returns_zero(self):
+        gaf = _gaf_line() + "\n"
+        result, events, _ = self._run(
+            gaf, accessions=[], go_terms={"GO:0003674": 1},
+        )
+        assert result.result == {"annotations_inserted": 0}
+        event_names = [e["event"] for e in events]
+        assert "load_goa_annotations.no_proteins" in event_names
+
+    def test_skips_unmatched_accessions(self):
+        gaf = _gaf_line(accession="UNKNOWN") + "\n"
+        result, _, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        assert result.result["annotations_inserted"] == 0
+        assert result.result["annotations_skipped"] == 1
+
+    def test_skips_unmatched_go_ids(self):
+        gaf = _gaf_line(accession="P12345", go_id="GO:UNKNOWN") + "\n"
+        result, _, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        assert result.result["annotations_inserted"] == 0
+        assert result.result["annotations_skipped"] == 1
+
+    def test_pagination_emits_page_done(self):
+        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
+                 for i in range(5)]
+        gaf = "\n".join(lines) + "\n"
+        result, events, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            page_size=2,
+        )
+        page_events = [e for e in events if e["event"] == "load_goa_annotations.page_done"]
+        # 5 records, page_size=2 -> 2 full pages emitted (remainder flushed separately)
+        assert len(page_events) == 2
+        assert result.result["annotations_inserted"] == 5
+        assert result.result["pages"] == 3
+
+    def test_commit_every_page(self):
+        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
+                 for i in range(4)]
+        gaf = "\n".join(lines) + "\n"
+        _, _, session = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            page_size=2, commit_every_page=True,
+        )
+        # 4 records, page_size=2 -> 2 full pages -> 2 commits
+        assert session.commit.call_count == 2
+
+    def test_no_commit_when_disabled(self):
+        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
+                 for i in range(4)]
+        gaf = "\n".join(lines) + "\n"
+        _, _, session = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            page_size=2, commit_every_page=False,
+        )
+        session.commit.assert_not_called()
+
+    def test_total_limit_stops_early(self):
+        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
+                 for i in range(10)]
+        gaf = "\n".join(lines) + "\n"
+        result, events, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            page_size=3, total_limit=3,
+        )
+        assert result.result["annotations_inserted"] == 3
+        event_names = [e["event"] for e in events]
+        assert "load_goa_annotations.limit_reached" in event_names
+
+    def test_empty_file(self):
+        result, _, _ = self._run(
+            "", accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        assert result.result["annotations_inserted"] == 0
+        assert result.result["total_lines_read"] == 0
+        assert result.result["pages"] == 0
+
+    def test_result_contains_elapsed_seconds(self):
+        gaf = _gaf_line() + "\n"
+        result, _, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        assert "elapsed_seconds" in result.result
+        assert result.result["elapsed_seconds"] >= 0
+
+    def test_result_contains_annotation_set_id(self):
+        gaf = _gaf_line() + "\n"
+        result, _, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        assert result.result["annotation_set_id"] == str(_ANNOTATION_SET_ID)
+
+    def test_duplicate_annotations_in_file(self):
+        line = _gaf_line(accession="P12345", go_id="GO:0003674", evidence="IDA")
+        gaf = (line + "\n") * 5
+        result, _, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        assert result.result["annotations_inserted"] == 1
+        assert result.result["annotations_skipped"] == 4
+
+    def test_comments_and_short_lines_not_counted(self):
+        text = (
+            "!GAF header comment\n"
+            "!another comment\n"
+            "short\tline\n"
+            + _gaf_line(accession="P12345", go_id="GO:0003674") + "\n"
+        )
+        result, _, _ = self._run(
+            text, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        # Only valid GAF lines are counted as total_lines_read
+        assert result.result["total_lines_read"] == 1
+        assert result.result["annotations_inserted"] == 1
+
+    def test_annotation_set_created_event(self):
+        gaf = _gaf_line() + "\n"
+        _, events, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        event_names = [e["event"] for e in events]
+        assert "load_goa_annotations.annotation_set_created" in event_names
+        created = [e for e in events if e["event"] == "load_goa_annotations.annotation_set_created"]
+        assert created[0]["fields"]["annotation_set_id"] == str(_ANNOTATION_SET_ID)
+
+    def test_page_done_event_fields(self):
+        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
+                 for i in range(3)]
+        gaf = "\n".join(lines) + "\n"
+        _, events, _ = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            page_size=2,
+        )
+        page_events = [e for e in events if e["event"] == "load_goa_annotations.page_done"]
+        assert len(page_events) == 1
+        fields = page_events[0]["fields"]
+        assert fields["page"] == 1
+        assert fields["total_lines"] == 2
+        assert fields["total_inserted"] == 2
+
+    def test_session_flush_called_after_annotation_set_add(self):
+        gaf = _gaf_line() + "\n"
+        _, _, session = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+        )
+        session.flush.assert_called()
+
+    def test_multiple_pages_with_remainder(self):
+        """7 records with page_size=3 -> 2 full pages + 1 remainder = 3 pages total."""
+        lines = [_gaf_line(accession="P12345", go_id="GO:0003674", evidence=f"E{i}")
+                 for i in range(7)]
+        gaf = "\n".join(lines) + "\n"
+        result, events, session = self._run(
+            gaf, accessions=["P12345"], go_terms={"GO:0003674": 1},
+            page_size=3,
+        )
+        assert result.result["pages"] == 3
+        assert result.result["annotations_inserted"] == 7
+        page_events = [e for e in events if e["event"] == "load_goa_annotations.page_done"]
+        assert len(page_events) == 2  # only full pages emit page_done
+
+
+# ---------------------------------------------------------------------------
+# Operation name
+# ---------------------------------------------------------------------------
+
+
+class TestOperationName:
+    def test_name(self):
+        assert LoadGOAAnnotationsOperation.name == "load_goa_annotations"
diff --git a/tests/test_load_ontology_snapshot.py b/tests/test_load_ontology_snapshot.py
index 611713c..6d045fe 100644
--- a/tests/test_load_ontology_snapshot.py
+++ b/tests/test_load_ontology_snapshot.py
@@ -52,11 +52,11 @@ def test_valid(self) -> None:
         assert p.timeout_seconds == 120
 
     def test_empty_url_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             LoadOntologySnapshotPayload.model_validate({"obo_url": "   "})
 
     def test_missing_url_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             LoadOntologySnapshotPayload.model_validate({})
 
 
@@ -103,6 +103,177 @@ def test_typedef_not_included(self) -> None:
         assert "part_of" not in go_ids
 
 
+_OBO_WITH_RELATIONSHIPS = """\
+format-version: 1.2
+data-version: releases/2024-06-01
+
+[Term]
+id: GO:0008150
+name: biological_process
+namespace: biological_process
+def: "Root biological process." [GOC:go_curators]
+
+[Term]
+id: GO:0009987
+name: cellular process
+namespace: biological_process
+def: "Any process that is carried out at the cellular level." [GOC:go_curators]
+is_a: GO:0008150 ! biological_process
+
+[Term]
+id: GO:0044237
+name: cellular metabolic process
+namespace: biological_process
+def: "The chemical reactions involving a cell." [GOC:go_curators]
+is_a: GO:0009987 ! cellular process
+relationship: part_of GO:0008150 ! biological_process
+"""
+
+
+class TestParseTermsRelationships:
+    """Tests for is_a and relationship: parsing (lines 275-287)."""
+
+    def _op(self) -> LoadOntologySnapshotOperation:
+        return LoadOntologySnapshotOperation()
+
+    def test_is_a_relationship_parsed(self) -> None:
+        op = self._op()
+        terms = {t["go_id"]: t for t in op._parse_terms(_OBO_WITH_RELATIONSHIPS)}
+        cellular = terms["GO:0009987"]
+        assert ("is_a", "GO:0008150") in cellular["relationships"]
+
+    def test_part_of_relationship_parsed(self) -> None:
+        op = self._op()
+        terms = {t["go_id"]: t for t in op._parse_terms(_OBO_WITH_RELATIONSHIPS)}
+        metabolic = terms["GO:0044237"]
+        assert ("part_of", "GO:0008150") in metabolic["relationships"]
+
+    def test_multiple_relationships_on_single_term(self) -> None:
+        op = self._op()
+        terms = {t["go_id"]: t for t in op._parse_terms(_OBO_WITH_RELATIONSHIPS)}
+        metabolic = terms["GO:0044237"]
+        assert len(metabolic["relationships"]) == 2
+        assert ("is_a", "GO:0009987") in metabolic["relationships"]
+        assert ("part_of", "GO:0008150") in metabolic["relationships"]
+
+    def test_root_term_has_no_relationships(self) -> None:
+        op = self._op()
+        terms = {t["go_id"]: t for t in op._parse_terms(_OBO_WITH_RELATIONSHIPS)}
+        root = terms["GO:0008150"]
+        assert root["relationships"] == []
+
+    def test_all_supported_relationship_types(self) -> None:
+        """Each of the 7 supported relationship types is captured."""
+        op = self._op()
+        for rt in [
+            "part_of", "regulates", "negatively_regulates",
+            "positively_regulates", "occurs_in", "capable_of",
+            "capable_of_part_of",
+        ]:
+            obo = (
+                "format-version: 1.2\ndata-version: releases/2024-01-01\n\n"
+                "[Term]\nid: GO:0000001\nname: child\nnamespace: biological_process\n"
+                f"relationship: {rt} GO:0000002 ! parent\n"
+            )
+            terms = op._parse_terms(obo)
+            assert (rt, "GO:0000002") in terms[0]["relationships"], f"Failed for {rt}"
+
+    def test_unsupported_relationship_type_ignored(self) -> None:
+        op = self._op()
+        obo = (
+            "format-version: 1.2\ndata-version: releases/2024-01-01\n\n"
+            "[Term]\nid: GO:0000001\nname: child\nnamespace: biological_process\n"
+            "relationship: has_part GO:0000002 ! parent\n"
+        )
+        terms = op._parse_terms(obo)
+        assert terms[0].get("relationships", []) == []
+
+    def test_relationship_line_with_no_go_prefix_ignored(self) -> None:
+        """relationship: part_of SOMETHING (not GO:) is skipped."""
+        op = self._op()
+        obo = (
+            "format-version: 1.2\ndata-version: releases/2024-01-01\n\n"
+            "[Term]\nid: GO:0000001\nname: child\nnamespace: biological_process\n"
+            "relationship: part_of CHEBI:12345 ! not a GO term\n"
+        )
+        terms = op._parse_terms(obo)
+        assert terms[0].get("relationships", []) == []
+
+    def test_definition_without_quotes_gives_none(self) -> None:
+        """def: line that doesn't match the quoted pattern yields None."""
+        op = self._op()
+        obo = (
+            "format-version: 1.2\ndata-version: releases/2024-01-01\n\n"
+            "[Term]\nid: GO:0000001\nname: test\nnamespace: biological_process\n"
+            "def: no quotes here\n"
+        )
+        terms = op._parse_terms(obo)
+        assert terms[0]["definition"] is None
+
+
+class TestDownload:
+    """Tests for _download (lines 202-207)."""
+
+    def test_download_success(self) -> None:
+        op = LoadOntologySnapshotOperation()
+        payload = LoadOntologySnapshotPayload.model_validate(
+            {"obo_url": "http://example.org/go.obo"}
+        )
+        emit = MagicMock()
+
+        mock_resp = MagicMock()
+        mock_resp.text = _OBO_SAMPLE
+        mock_resp.raise_for_status = MagicMock()
+
+        with patch(
+            "protea.core.operations.load_ontology_snapshot.requests.get",
+            return_value=mock_resp,
+        ) as mock_get:
+            result = op._download(payload, emit)
+
+        assert result == _OBO_SAMPLE
+        mock_get.assert_called_once_with(
+            "http://example.org/go.obo", timeout=120, stream=True
+        )
+        # Should emit download_start and download_done
+        assert emit.call_count == 2
+        assert emit.call_args_list[0][0][0] == "load_ontology_snapshot.download_start"
+        assert emit.call_args_list[1][0][0] == "load_ontology_snapshot.download_done"
+        assert emit.call_args_list[1][0][2]["bytes"] == len(_OBO_SAMPLE)
+
+    def test_download_http_error_propagates(self) -> None:
+        import requests as req
+
+        op = LoadOntologySnapshotOperation()
+        payload = LoadOntologySnapshotPayload.model_validate(
+            {"obo_url": "http://example.org/go.obo"}
+        )
+        mock_resp = MagicMock()
+        mock_resp.raise_for_status.side_effect = req.HTTPError("404 Not Found")
+
+        with patch(
+            "protea.core.operations.load_ontology_snapshot.requests.get",
+            return_value=mock_resp,
+        ):
+            with pytest.raises(req.HTTPError):
+                op._download(payload, MagicMock())
+
+    def test_download_connection_error_propagates(self) -> None:
+        import requests as req
+
+        op = LoadOntologySnapshotOperation()
+        payload = LoadOntologySnapshotPayload.model_validate(
+            {"obo_url": "http://example.org/go.obo"}
+        )
+
+        with patch(
+            "protea.core.operations.load_ontology_snapshot.requests.get",
+            side_effect=req.ConnectionError("DNS failure"),
+        ):
+            with pytest.raises(req.ConnectionError):
+                op._download(payload, MagicMock())
+
+
 class TestLoadOntologySnapshotExecute:
     def _mock_session(self, existing_snapshot=None, rel_count=0):
         session = MagicMock()
@@ -161,3 +332,207 @@ def add_side_effect(obj):
         assert session.add_all.call_count == 2
         terms_call_args = session.add_all.call_args_list[0][0][0]
         assert len(terms_call_args) == 4
+
+    def test_new_snapshot_inserts_relationships(self) -> None:
+        """Lines 163-167: relationship GOTermRelationship objects are created for new snapshots."""
+        session = self._mock_session(existing_snapshot=None)
+
+        _id_counter = {"n": 0}
+
+        def add_side_effect(obj):
+            from protea.infrastructure.orm.models.annotation.ontology_snapshot import (
+                OntologySnapshot,
+            )
+            if isinstance(obj, OntologySnapshot):
+                obj.id = "snap-id"
+
+        session.add.side_effect = add_side_effect
+
+        def add_all_side_effect(items):
+            """Simulate DB flush assigning IDs to GOTerm objects."""
+            for item in items:
+                from protea.infrastructure.orm.models.annotation.go_term import GOTerm
+                if isinstance(item, GOTerm) and item.id is None:
+                    _id_counter["n"] += 1
+                    item.id = _id_counter["n"]
+
+        session.add_all.side_effect = add_all_side_effect
+
+        with patch.object(
+            LoadOntologySnapshotOperation,
+            "_download",
+            return_value=_OBO_WITH_RELATIONSHIPS,
+        ):
+            op = LoadOntologySnapshotOperation()
+            result = op.execute(
+                session,
+                {"obo_url": "http://example.org/go.obo"},
+                emit=_noop_emit,
+            )
+
+        # 3 terms, 3 relationships (1 is_a on GO:0009987, 1 is_a + 1 part_of on GO:0044237)
+        assert result.result["terms_inserted"] == 3
+        assert result.result["relationships_inserted"] == 3
+        # Second add_all call is the relationships
+        rel_call_args = session.add_all.call_args_list[1][0][0]
+        assert len(rel_call_args) == 3
+
+    def test_new_snapshot_skips_relationship_with_missing_parent(self) -> None:
+        """Lines 164-166: if parent GO ID not in go_id_to_db_id, relationship is skipped."""
+        obo = (
+            "format-version: 1.2\ndata-version: releases/2024-01-01\n\n"
+            "[Term]\nid: GO:0000001\nname: child\nnamespace: biological_process\n"
+            "is_a: GO:9999999 ! nonexistent parent\n"
+        )
+        session = self._mock_session(existing_snapshot=None)
+
+        _id_counter = {"n": 0}
+
+        def add_side_effect(obj):
+            from protea.infrastructure.orm.models.annotation.ontology_snapshot import (
+                OntologySnapshot,
+            )
+            if isinstance(obj, OntologySnapshot):
+                obj.id = "snap-id"
+
+        session.add.side_effect = add_side_effect
+
+        def add_all_side_effect(items):
+            for item in items:
+                from protea.infrastructure.orm.models.annotation.go_term import GOTerm
+                if isinstance(item, GOTerm) and item.id is None:
+                    _id_counter["n"] += 1
+                    item.id = _id_counter["n"]
+
+        session.add_all.side_effect = add_all_side_effect
+
+        with patch.object(
+            LoadOntologySnapshotOperation, "_download", return_value=obo
+        ):
+            op = LoadOntologySnapshotOperation()
+            result = op.execute(
+                session,
+                {"obo_url": "http://example.org/go.obo"},
+                emit=_noop_emit,
+            )
+
+        # Parent GO:9999999 doesn't exist in terms, so relationship is skipped
+        assert result.result["relationships_inserted"] == 0
+
+    def test_emits_progress_events(self) -> None:
+        session = self._mock_session(existing_snapshot=None)
+        emit = MagicMock()
+
+        with patch.object(
+            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
+        ):
+            op = LoadOntologySnapshotOperation()
+            op.execute(session, {"obo_url": "http://x.org/go.obo"}, emit=emit)
+
+        events = [c.args[0] for c in emit.call_args_list]
+        assert "load_ontology_snapshot.start" in events
+        assert "load_ontology_snapshot.version" in events
+        assert "load_ontology_snapshot.parsed" in events
+        assert "load_ontology_snapshot.done" in events
+
+    def test_done_event_includes_elapsed(self) -> None:
+        session = self._mock_session(existing_snapshot=None)
+        emit = MagicMock()
+
+        with patch.object(
+            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
+        ):
+            op = LoadOntologySnapshotOperation()
+            result = op.execute(session, {"obo_url": "http://x.org/go.obo"}, emit=emit)
+
+        assert "elapsed_seconds" in result.result
+        assert result.result["elapsed_seconds"] >= 0
+
+    def test_backfill_relationships_when_zero(self) -> None:
+        """Lines 87-125: snapshot exists but has 0 relationships — backfill them."""
+        existing = MagicMock()
+        existing.id = "existing-uuid"
+
+        call_idx = {"n": 0}
+
+        def query_side_effect(*args):
+            call_idx["n"] += 1
+            m = MagicMock()
+            if call_idx["n"] == 1:
+                # OntologySnapshot filter_by query
+                m.filter_by.return_value.first.return_value = existing
+            elif call_idx["n"] == 2:
+                # func.count(GOTermRelationship.id) → 0
+                m.filter.return_value.scalar.return_value = 0
+            elif call_idx["n"] == 3:
+                # GOTerm (go_id, id) query for the backfill map
+                m.filter.return_value.all.return_value = [
+                    ("GO:0003674", 1),
+                    ("GO:0008150", 2),
+                    ("GO:0005575", 3),
+                    ("GO:0003824", 4),
+                ]
+            return m
+
+        session = MagicMock()
+        session.query.side_effect = query_side_effect
+        emit = MagicMock()
+
+        with patch.object(
+            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
+        ):
+            op = LoadOntologySnapshotOperation()
+            result = op.execute(
+                session,
+                {"obo_url": "http://example.org/go.obo"},
+                emit=emit,
+            )
+
+        assert result.result["skipped"] is False
+        assert result.result["ontology_snapshot_id"] == "existing-uuid"
+        assert "relationships_inserted" in result.result
+        session.add_all.assert_called_once()
+        session.flush.assert_called_once()
+
+        events = [c.args[0] for c in emit.call_args_list]
+        assert "load_ontology_snapshot.backfill_relationships" in events
+        assert "load_ontology_snapshot.backfill_done" in events
+
+    def test_backfill_skips_unknown_go_ids(self) -> None:
+        """Lines 103-107: during backfill, terms with no DB ID are skipped."""
+        existing = MagicMock()
+        existing.id = "existing-uuid"
+
+        call_idx = {"n": 0}
+
+        def query_side_effect(*args):
+            call_idx["n"] += 1
+            m = MagicMock()
+            if call_idx["n"] == 1:
+                m.filter_by.return_value.first.return_value = existing
+            elif call_idx["n"] == 2:
+                m.filter.return_value.scalar.return_value = 0
+            elif call_idx["n"] == 3:
+                # Return only one term — the others won't be in the map
+                m.filter.return_value.all.return_value = [("GO:0003674", 1)]
+            return m
+
+        session = MagicMock()
+        session.query.side_effect = query_side_effect
+
+        with patch.object(
+            LoadOntologySnapshotOperation, "_download", return_value=_OBO_SAMPLE
+        ):
+            op = LoadOntologySnapshotOperation()
+            result = op.execute(
+                session,
+                {"obo_url": "http://example.org/go.obo"},
+                emit=_noop_emit,
+            )
+
+        assert result.result["relationships_inserted"] == 0
+
+    def test_invalid_payload_raises(self) -> None:
+        op = LoadOntologySnapshotOperation()
+        with pytest.raises(ValueError):
+            op.execute(MagicMock(), {}, emit=_noop_emit)
diff --git a/tests/test_load_quickgo_annotations.py b/tests/test_load_quickgo_annotations.py
index c50346a..f359de8 100644
--- a/tests/test_load_quickgo_annotations.py
+++ b/tests/test_load_quickgo_annotations.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
+import io as _io
 import uuid
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import pytest
+import requests
 
 from protea.core.operations.load_quickgo_annotations import (
     LoadQuickGOAnnotationsOperation,
@@ -73,11 +75,11 @@ def test_valid_minimal(self) -> None:
         assert p.page_size == 10000
 
     def test_missing_snapshot_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             LoadQuickGOAnnotationsPayload.model_validate({"source_version": "2026-01"})
 
     def test_empty_source_version_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             LoadQuickGOAnnotationsPayload.model_validate({
                 "ontology_snapshot_id": _SNAPSHOT_ID,
                 "source_version": "",
@@ -155,3 +157,678 @@ def test_raw_eco_stored_when_no_mapping(self) -> None:
         from sqlalchemy.dialects.postgresql import dialect as pg_dialect
         compiled = call_stmt.compile(dialect=pg_dialect())
         assert compiled.params["evidence_code_m0"] == "ECO:0000314"
+
+    def test_empty_eco_id_becomes_none(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        row = dict(_QUICKGO_ROWS[0])
+        row["ECO ID"] = ""
+        inserted, _ = op._store_buffer(
+            session, [row], uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
+            eco_map={},
+        )
+        assert inserted == 1
+
+    def test_empty_accession_skipped(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        row = dict(_QUICKGO_ROWS[0])
+        row["GENE PRODUCT ID"] = "  "
+        inserted, skipped = op._store_buffer(
+            session, [row], uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
+            eco_map={},
+        )
+        assert inserted == 0
+        assert skipped == 1
+
+    def test_chunked_insert_large_buffer(self) -> None:
+        """When to_add > 5000, session.execute is called multiple times."""
+        op = self._op()
+        session = MagicMock()
+        records = [dict(_QUICKGO_ROWS[0])] * 5001
+        inserted, skipped = op._store_buffer(
+            session, records, uuid.UUID(_SNAPSHOT_ID),
+            valid_accessions={"P12345"},
+            go_term_map={"GO:0003824": 1},
+            eco_map={},
+        )
+        assert inserted == 5001
+        assert skipped == 0
+        assert session.execute.call_count == 2
+
+
+# ---------------------------------------------------------------------------
+# _load_accessions
+# ---------------------------------------------------------------------------
+
+class TestLoadAccessions:
+    def test_returns_canonical_and_protein_sets(self) -> None:
+        op = LoadQuickGOAnnotationsOperation()
+        session = MagicMock()
+        session.scalars.side_effect = [
+            iter({"P12345", "Q99999"}),
+            iter({"P12345", "P12345-2", "Q99999"}),
+        ]
+        events: list[str] = []
+        def emit(event, msg, fields, level):
+            return events.append(event)
+
+        canon, prots = op._load_accessions(session, emit)
+        assert canon == {"P12345", "Q99999"}
+        assert prots == {"P12345", "P12345-2", "Q99999"}
+        assert "load_quickgo_annotations.load_accessions_start" in events
+        assert "load_quickgo_annotations.load_accessions_done" in events
+
+    def test_emits_counts(self) -> None:
+        op = LoadQuickGOAnnotationsOperation()
+        session = MagicMock()
+        session.scalars.side_effect = [iter({"A", "B"}), iter({"A", "B", "C"})]
+        fields_log: list[dict] = []
+        def emit(event, msg, fields, level):
+            return fields_log.append(fields)
+
+        op._load_accessions(session, emit)
+        done_fields = fields_log[-1]
+        assert done_fields["canonical_accessions"] == 2
+        assert done_fields["protein_accessions"] == 3
+
+
+# ---------------------------------------------------------------------------
+# _load_go_term_map
+# ---------------------------------------------------------------------------
+
+class TestLoadGoTermMap:
+    def test_returns_mapping(self) -> None:
+        op = LoadQuickGOAnnotationsOperation()
+        session = MagicMock()
+        sid = uuid.uuid4()
+        query_mock = MagicMock()
+        query_mock.filter.return_value.all.return_value = [
+            ("GO:0005634", 1), ("GO:0008150", 2),
+        ]
+        session.query.return_value = query_mock
+
+        events: list[str] = []
+        def emit(event, msg, fields, level):
+            return events.append(event)
+
+        result = op._load_go_term_map(session, sid, emit)
+        assert result == {"GO:0005634": 1, "GO:0008150": 2}
+        assert "load_quickgo_annotations.load_go_terms_start" in events
+        assert "load_quickgo_annotations.load_go_terms_done" in events
+
+    def test_empty_terms(self) -> None:
+        op = LoadQuickGOAnnotationsOperation()
+        session = MagicMock()
+        query_mock = MagicMock()
+        query_mock.filter.return_value.all.return_value = []
+        session.query.return_value = query_mock
+
+        result = op._load_go_term_map(session, uuid.uuid4(), _noop_emit)
+        assert result == {}
+
+
+# ---------------------------------------------------------------------------
+# _load_eco_mapping
+# ---------------------------------------------------------------------------
+
+class TestLoadEcoMapping:
+    def test_no_url_returns_empty(self) -> None:
+        op = LoadQuickGOAnnotationsOperation()
+        p = LoadQuickGOAnnotationsPayload.model_validate({
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "source_version": "v1",
+        })
+        assert op._load_eco_mapping(p, _noop_emit) == {}
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_parses_mapping_file(self, mock_get) -> None:
+        resp = MagicMock()
+        resp.text = "ECO:0000314 IDA\nECO:0000501 IEA\n# comment\nbadline\n"
+        resp.raise_for_status = MagicMock()
+        mock_get.return_value = resp
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = LoadQuickGOAnnotationsPayload.model_validate({
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "source_version": "v1",
+            "eco_mapping_url": "https://eco.test/map.txt",
+        })
+        result = op._load_eco_mapping(p, _noop_emit)
+        assert result == {"ECO:0000314": "IDA", "ECO:0000501": "IEA"}
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_http_error_raises(self, mock_get) -> None:
+        resp = MagicMock()
+        resp.raise_for_status.side_effect = requests.HTTPError("404")
+        mock_get.return_value = resp
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = LoadQuickGOAnnotationsPayload.model_validate({
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "source_version": "v1",
+            "eco_mapping_url": "https://eco.test/bad",
+        })
+        with pytest.raises(requests.HTTPError):
+            op._load_eco_mapping(p, _noop_emit)
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_emits_start_and_done(self, mock_get) -> None:
+        resp = MagicMock()
+        resp.text = "ECO:0000314 IDA\n"
+        resp.raise_for_status = MagicMock()
+        mock_get.return_value = resp
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = LoadQuickGOAnnotationsPayload.model_validate({
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "source_version": "v1",
+            "eco_mapping_url": "https://eco.test/map.txt",
+        })
+        events: list[str] = []
+        def emit(event, msg, fields, level):
+            return events.append(event)
+        op._load_eco_mapping(p, emit)
+        assert "load_quickgo_annotations.eco_mapping_start" in events
+        assert "load_quickgo_annotations.eco_mapping_done" in events
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_ignores_non_eco_lines(self, mock_get) -> None:
+        resp = MagicMock()
+        resp.text = "ECO:0000314 IDA\nNOT_ECO stuff\n  \nECO:0000501 IEA\n"
+        resp.raise_for_status = MagicMock()
+        mock_get.return_value = resp
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = LoadQuickGOAnnotationsPayload.model_validate({
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "source_version": "v1",
+            "eco_mapping_url": "https://eco.test/map.txt",
+        })
+        result = op._load_eco_mapping(p, _noop_emit)
+        assert len(result) == 2
+
+
+# ---------------------------------------------------------------------------
+# _fetch_quickgo_page — TSV stream parsing
+# ---------------------------------------------------------------------------
+
+QUICKGO_HEADER_LINE = (
+    "GENE PRODUCT ID\tGO TERM\tQUALIFIER\tECO ID\tREFERENCE\tWITH/FROM\tASSIGNED BY\tDATE"
+)
+
+
+def _tsv_row_str(
+    accession: str = "P12345",
+    go_term: str = "GO:0005634",
+    qualifier: str = "enables",
+    eco_id: str = "ECO:0000314",
+    reference: str = "PMID:12345",
+    with_from: str = "",
+    assigned_by: str = "UniProt",
+    date: str = "20240101",
+) -> str:
+    return f"{accession}\t{go_term}\t{qualifier}\t{eco_id}\t{reference}\t{with_from}\t{assigned_by}\t{date}"
+
+
+def _make_tsv_text(*data_rows: str) -> str:
+    return "\n".join([QUICKGO_HEADER_LINE] + list(data_rows)) + "\n"
+
+
+def _make_stream_response(text: str, status_code: int = 200) -> MagicMock:
+    resp = MagicMock()
+    resp.status_code = status_code
+    resp.raise_for_status = MagicMock()
+    if status_code >= 400:
+        resp.raise_for_status.side_effect = requests.HTTPError(f"{status_code}")
+    raw = _io.BytesIO(text.encode("utf-8"))
+    resp.raw = raw
+    resp.raw.decode_content = True
+    return resp
+
+
+class TestFetchQuickgoPage:
+    def _payload(self, **kw):
+        return LoadQuickGOAnnotationsPayload.model_validate({
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "source_version": "v1",
+            **kw,
+        })
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_parses_rows(self, mock_get) -> None:
+        tsv = _make_tsv_text(
+            _tsv_row_str("P12345", "GO:0005634"),
+            _tsv_row_str("Q99999", "GO:0008150"),
+        )
+        mock_get.return_value = _make_stream_response(tsv)
+
+        op = LoadQuickGOAnnotationsOperation()
+        records = list(
+            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=["P12345"], batch_index=0, total_batches=1)
+        )
+        assert len(records) == 2
+        assert records[0]["GENE PRODUCT ID"] == "P12345"
+        assert records[1]["GO TERM"] == "GO:0008150"
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_skips_empty_lines(self, mock_get) -> None:
+        tsv = QUICKGO_HEADER_LINE + "\n\n" + _tsv_row_str() + "\n\n"
+        mock_get.return_value = _make_stream_response(tsv)
+
+        op = LoadQuickGOAnnotationsOperation()
+        records = list(
+            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
+        )
+        assert len(records) == 1
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_skips_short_rows(self, mock_get) -> None:
+        tsv = QUICKGO_HEADER_LINE + "\ntoo\tfew\n" + _tsv_row_str() + "\n"
+        mock_get.return_value = _make_stream_response(tsv)
+
+        op = LoadQuickGOAnnotationsOperation()
+        records = list(
+            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
+        )
+        assert len(records) == 1
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_http_error_raises(self, mock_get) -> None:
+        mock_get.return_value = _make_stream_response("", status_code=500)
+
+        op = LoadQuickGOAnnotationsOperation()
+        with pytest.raises(requests.HTTPError):
+            list(
+                op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
+            )
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_sends_correct_params_with_gp_ids(self, mock_get) -> None:
+        mock_get.return_value = _make_stream_response(_make_tsv_text())
+
+        op = LoadQuickGOAnnotationsOperation()
+        list(
+            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=["P12345", "Q99999"], batch_index=0, total_batches=1)
+        )
+        _, kwargs = mock_get.call_args
+        assert kwargs["params"]["geneProductId"] == "P12345,Q99999"
+        assert kwargs["params"]["geneProductType"] == "protein"
+        assert kwargs["headers"]["Accept"] == "text/tsv"
+        assert kwargs["stream"] is True
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_no_gp_ids_omits_gene_product_param(self, mock_get) -> None:
+        mock_get.return_value = _make_stream_response(_make_tsv_text())
+
+        op = LoadQuickGOAnnotationsOperation()
+        list(
+            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
+        )
+        _, kwargs = mock_get.call_args
+        assert "geneProductId" not in kwargs["params"]
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_emits_download_start_with_progress(self, mock_get) -> None:
+        mock_get.return_value = _make_stream_response(_make_tsv_text())
+        events: list[tuple[str, dict]] = []
+        def emit(event, msg, fields, level):
+            return events.append((event, fields))
+
+        op = LoadQuickGOAnnotationsOperation()
+        list(
+            op._fetch_quickgo_page(self._payload(), emit, gp_ids=["X"], batch_index=2, total_batches=5)
+        )
+        start_events = [e for e in events if e[0] == "load_quickgo_annotations.download_start"]
+        assert len(start_events) == 1
+        assert start_events[0][1]["batch"] == 3
+        assert start_events[0][1]["of"] == 5
+        assert start_events[0][1]["_progress_current"] == 3
+        assert start_events[0][1]["_progress_total"] == 5
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_header_only_yields_nothing(self, mock_get) -> None:
+        tsv = QUICKGO_HEADER_LINE + "\n"
+        mock_get.return_value = _make_stream_response(tsv)
+
+        op = LoadQuickGOAnnotationsOperation()
+        records = list(
+            op._fetch_quickgo_page(self._payload(), _noop_emit, gp_ids=None, batch_index=0, total_batches=1)
+        )
+        assert records == []
+
+
+# ---------------------------------------------------------------------------
+# _stream_quickgo — batching logic
+# ---------------------------------------------------------------------------
+
+class TestStreamQuickgo:
+    def _payload(self, **kw):
+        return LoadQuickGOAnnotationsPayload.model_validate({
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "source_version": "v1",
+            **kw,
+        })
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_batches_accessions(self, mock_get) -> None:
+        mock_get.side_effect = lambda *a, **kw: _make_stream_response(_make_tsv_text())
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = self._payload(gene_product_batch_size=2)
+        list(op._stream_quickgo(p, _noop_emit, gene_product_ids=["A", "B", "C", "D", "E"]))
+        assert mock_get.call_count == 3  # 2+2+1
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_no_ids_single_request(self, mock_get) -> None:
+        mock_get.return_value = _make_stream_response(_make_tsv_text())
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = self._payload(use_db_accessions=False)
+        list(op._stream_quickgo(p, _noop_emit, gene_product_ids=None))
+        assert mock_get.call_count == 1
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_emits_batching_event(self, mock_get) -> None:
+        mock_get.side_effect = lambda *a, **kw: _make_stream_response(_make_tsv_text())
+
+        events: list[tuple[str, dict]] = []
+        def emit(event, msg, fields, level):
+            return events.append((event, fields))
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = self._payload(gene_product_batch_size=2)
+        list(op._stream_quickgo(p, emit, gene_product_ids=["A", "B", "C"]))
+        batching = [e for e in events if e[0] == "load_quickgo_annotations.batching"]
+        assert len(batching) == 1
+        assert batching[0][1]["total_accessions"] == 3
+        assert batching[0][1]["total_batches"] == 2
+        assert batching[0][1]["batch_size"] == 2
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_yields_records_from_all_batches(self, mock_get) -> None:
+        tsv = _make_tsv_text(_tsv_row_str("P12345"))
+        mock_get.side_effect = lambda *a, **kw: _make_stream_response(tsv)
+
+        op = LoadQuickGOAnnotationsOperation()
+        p = self._payload(gene_product_batch_size=1)
+        records = list(op._stream_quickgo(p, _noop_emit, gene_product_ids=["A", "B"]))
+        # Each batch returns 1 record, 2 batches
+        assert len(records) == 2
+
+
+# ---------------------------------------------------------------------------
+# Full execute flow
+# ---------------------------------------------------------------------------
+
+def _mock_session(
+    canonical_accessions: set[str] | None = None,
+    protein_accessions: set[str] | None = None,
+    go_terms: list[tuple[str, int]] | None = None,
+    snapshot_exists: bool = True,
+) -> MagicMock:
+    session = MagicMock()
+    if snapshot_exists:
+        session.get.return_value = MagicMock()
+    else:
+        session.get.return_value = None
+
+    canon = canonical_accessions if canonical_accessions is not None else {"P12345"}
+    prots = protein_accessions if protein_accessions is not None else {"P12345"}
+    session.scalars.side_effect = [iter(canon), iter(prots)]
+
+    terms = go_terms or [("GO:0003824", 1), ("GO:0008150", 2)]
+    query_mock = MagicMock()
+    query_mock.filter.return_value.all.return_value = terms
+    session.query.return_value = query_mock
+
+    def _set_id(obj):
+        obj.id = uuid.uuid4()
+    session.add.side_effect = _set_id
+
+    return session
+
+
+def _base_payload(**overrides) -> dict:
+    d = {
+        "ontology_snapshot_id": _SNAPSHOT_ID,
+        "source_version": "2024-01-01",
+        "quickgo_base_url": "https://quickgo.test/annotation/downloadSearch",
+        "use_db_accessions": True,
+        "eco_mapping_url": None,
+        "page_size": 100,
+        "timeout_seconds": 10,
+        "commit_every_page": False,
+        "gene_product_batch_size": 200,
+    }
+    d.update(overrides)
+    return d
+
+
+class TestExecute:
+    def test_snapshot_not_found_raises(self) -> None:
+        session = _mock_session(snapshot_exists=False)
+        op = LoadQuickGOAnnotationsOperation()
+        with pytest.raises(ValueError, match="not found"):
+            op.execute(session, _base_payload(), emit=_noop_emit)
+
+    def test_no_proteins_returns_zero(self) -> None:
+        session = _mock_session(canonical_accessions=set())
+        session.scalars.side_effect = [iter(set()), iter(set())]
+        op = LoadQuickGOAnnotationsOperation()
+        events: list[str] = []
+        def emit(event, msg, fields, level):
+            return events.append(event)
+        result = op.execute(session, _base_payload(), emit=emit)
+        assert result.result["annotations_inserted"] == 0
+        assert "load_quickgo_annotations.no_proteins" in events
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_full_run_inserts_and_skips(self, mock_get) -> None:
+        tsv = _make_tsv_text(
+            _tsv_row_str("P12345", "GO:0003824"),
+            _tsv_row_str("UNKNOWN", "GO:0003824"),
+            _tsv_row_str("P12345", "GO:9999999"),
+        )
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+            go_terms=[("GO:0003824", 1)],
+        )
+
+        events: list[str] = []
+        def emit(event, msg, fields, level):
+            return events.append(event)
+
+        op = LoadQuickGOAnnotationsOperation()
+        result = op.execute(session, _base_payload(), emit=emit)
+        assert result.result["annotations_inserted"] == 1
+        assert result.result["annotations_skipped"] == 2
+        assert "load_quickgo_annotations.start" in events
+        assert "load_quickgo_annotations.done" in events
+        assert "load_quickgo_annotations.annotation_set_created" in events
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_total_limit_stops_early(self, mock_get) -> None:
+        tsv = _make_tsv_text(
+            _tsv_row_str("P12345", "GO:0003824"),
+            _tsv_row_str("P12345", "GO:0008150"),
+            _tsv_row_str("P12345", "GO:0003824"),
+        )
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+        )
+
+        events: list[str] = []
+        def emit(event, msg, fields, level):
+            return events.append(event)
+
+        op = LoadQuickGOAnnotationsOperation()
+        op.execute(
+            session, _base_payload(total_limit=1, page_size=1), emit=emit,
+        )
+        assert "load_quickgo_annotations.limit_reached" in events
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_commit_every_page(self, mock_get) -> None:
+        tsv = _make_tsv_text(
+            _tsv_row_str("P12345", "GO:0003824"),
+            _tsv_row_str("P12345", "GO:0008150"),
+        )
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+        )
+
+        op = LoadQuickGOAnnotationsOperation()
+        op.execute(
+            session, _base_payload(commit_every_page=True, page_size=1), emit=_noop_emit,
+        )
+        assert session.commit.call_count >= 2
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_no_commit_when_disabled(self, mock_get) -> None:
+        tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+        )
+
+        op = LoadQuickGOAnnotationsOperation()
+        op.execute(
+            session, _base_payload(commit_every_page=False, page_size=1), emit=_noop_emit,
+        )
+        session.commit.assert_not_called()
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_page_done_emitted(self, mock_get) -> None:
+        tsv = _make_tsv_text(
+            _tsv_row_str("P12345", "GO:0003824"),
+            _tsv_row_str("P12345", "GO:0008150"),
+            _tsv_row_str("P12345", "GO:0003824"),
+        )
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+        )
+
+        events: list[tuple[str, dict]] = []
+        def emit(event, msg, fields, level):
+            return events.append((event, fields))
+
+        op = LoadQuickGOAnnotationsOperation()
+        result = op.execute(session, _base_payload(page_size=2), emit=emit)
+        page_done = [e for e in events if e[0] == "load_quickgo_annotations.page_done"]
+        assert len(page_done) >= 1
+        assert result.result["pages"] == 2
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_result_contains_elapsed_seconds(self, mock_get) -> None:
+        tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+        )
+
+        op = LoadQuickGOAnnotationsOperation()
+        result = op.execute(session, _base_payload(), emit=_noop_emit)
+        assert "elapsed_seconds" in result.result
+        assert result.result["elapsed_seconds"] >= 0
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_use_db_accessions_false(self, mock_get) -> None:
+        tsv = _make_tsv_text(_tsv_row_str("X00001", "GO:0003824"))
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345", "X00001"},
+            go_terms=[("GO:0003824", 1)],
+        )
+
+        op = LoadQuickGOAnnotationsOperation()
+        result = op.execute(
+            session,
+            _base_payload(use_db_accessions=False, gene_product_ids=["X00001"]),
+            emit=_noop_emit,
+        )
+        _, kwargs = mock_get.call_args
+        assert "X00001" in kwargs["params"]["geneProductId"]
+        assert result.result["annotations_inserted"] == 1
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_eco_mapping_integrated_in_execute(self, mock_get) -> None:
+        eco_resp = MagicMock()
+        eco_resp.text = "ECO:0000314 IDA\n"
+        eco_resp.raise_for_status = MagicMock()
+
+        tsv_resp = _make_stream_response(
+            _make_tsv_text(_tsv_row_str("P12345", "GO:0003824", eco_id="ECO:0000314"))
+        )
+
+        mock_get.side_effect = [eco_resp, tsv_resp]
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+            go_terms=[("GO:0003824", 1)],
+        )
+
+        op = LoadQuickGOAnnotationsOperation()
+        result = op.execute(
+            session,
+            _base_payload(eco_mapping_url="https://eco.test/map.txt"),
+            emit=_noop_emit,
+        )
+        assert result.result["annotations_inserted"] == 1
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_result_has_annotation_set_id(self, mock_get) -> None:
+        tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+        )
+
+        op = LoadQuickGOAnnotationsOperation()
+        result = op.execute(session, _base_payload(), emit=_noop_emit)
+        assert "annotation_set_id" in result.result
+
+    @patch("protea.core.operations.load_quickgo_annotations.requests.get")
+    def test_remainder_buffer_flushed(self, mock_get) -> None:
+        """Records that don't fill a full page are still flushed at the end."""
+        tsv = _make_tsv_text(_tsv_row_str("P12345", "GO:0003824"))
+        mock_get.return_value = _make_stream_response(tsv)
+
+        session = _mock_session(
+            canonical_accessions={"P12345"},
+            protein_accessions={"P12345"},
+        )
+
+        op = LoadQuickGOAnnotationsOperation()
+        # page_size much larger than record count → only remainder flush
+        result = op.execute(session, _base_payload(page_size=10000), emit=_noop_emit)
+        assert result.result["annotations_inserted"] == 1
+        assert result.result["pages"] == 1
+
+    def test_operation_name(self) -> None:
+        assert LoadQuickGOAnnotationsOperation().name == "load_quickgo_annotations"
diff --git a/tests/test_logging.py b/tests/test_logging.py
new file mode 100644
index 0000000..76c6c2a
--- /dev/null
+++ b/tests/test_logging.py
@@ -0,0 +1,154 @@
+"""Tests for protea/infrastructure/logging.py"""
+from __future__ import annotations
+
+import json
+import logging
+
+from protea.infrastructure.logging import JSONFormatter, configure_logging
+
+
+class TestJSONFormatter:
+    """Tests for the JSONFormatter class."""
+
+    def _make_record(self, msg="hello", level=logging.INFO, name="test.logger", **kwargs):
+        record = logging.LogRecord(
+            name=name,
+            level=level,
+            pathname="test.py",
+            lineno=1,
+            msg=msg,
+            args=(),
+            exc_info=kwargs.pop("exc_info", None),
+        )
+        for k, v in kwargs.items():
+            setattr(record, k, v)
+        return record
+
+    def test_formats_valid_json_with_expected_keys(self):
+        formatter = JSONFormatter()
+        record = self._make_record("test message")
+        output = formatter.format(record)
+        data = json.loads(output)
+
+        assert "timestamp" in data
+        assert data["level"] == "INFO"
+        assert data["message"] == "test message"
+        assert data["logger"] == "test.logger"
+
+    def test_timestamp_is_utc_iso_format(self):
+        formatter = JSONFormatter()
+        record = self._make_record()
+        data = json.loads(formatter.format(record))
+        # UTC ISO timestamps end with +00:00
+        assert "+00:00" in data["timestamp"]
+
+    def test_includes_exc_info_when_present(self):
+        formatter = JSONFormatter()
+        try:
+            raise ValueError("boom")
+        except ValueError:
+            import sys
+            exc_info = sys.exc_info()
+
+        record = self._make_record("error occurred", exc_info=exc_info)
+        data = json.loads(formatter.format(record))
+
+        assert "exception" in data
+        assert "ValueError" in data["exception"]
+        assert "boom" in data["exception"]
+
+    def test_exc_info_absent_when_no_exception(self):
+        formatter = JSONFormatter()
+        record = self._make_record("all good")
+        data = json.loads(formatter.format(record))
+        assert "exception" not in data
+
+    def test_includes_extra_fields(self):
+        formatter = JSONFormatter()
+        record = self._make_record("with extras", queue="protea.jobs", batch_size=100)
+        data = json.loads(formatter.format(record))
+
+        assert data["queue"] == "protea.jobs"
+        assert data["batch_size"] == 100
+
+    def test_builtin_attrs_excluded_from_extras(self):
+        formatter = JSONFormatter()
+        record = self._make_record("check builtins")
+        data = json.loads(formatter.format(record))
+
+        # Standard LogRecord attributes should not appear as top-level keys
+        for attr in ("args", "exc_info", "exc_text", "lineno", "pathname", "thread"):
+            assert attr not in data
+
+    def test_stack_info_included_when_present(self):
+        formatter = JSONFormatter()
+        record = self._make_record("with stack")
+        record.stack_info = "Stack trace here"
+        data = json.loads(formatter.format(record))
+        assert data["stack_info"] == "Stack trace here"
+
+    def test_non_serializable_extra_uses_default_str(self):
+        formatter = JSONFormatter()
+        record = self._make_record("non-serializable", obj=object())
+        # Should not raise — json.dumps(default=str) handles it
+        output = formatter.format(record)
+        data = json.loads(output)
+        assert "obj" in data
+
+
+class TestConfigureLogging:
+    """Tests for the configure_logging function."""
+
+    def setup_method(self):
+        """Save root logger state before each test."""
+        self._root = logging.getLogger()
+        self._original_handlers = list(self._root.handlers)
+        self._original_level = self._root.level
+
+    def teardown_method(self):
+        """Restore root logger state after each test."""
+        self._root.handlers = self._original_handlers
+        self._root.setLevel(self._original_level)
+
+    def test_json_true_sets_json_formatter(self):
+        configure_logging(json=True, level="WARNING")
+        root = logging.getLogger()
+        assert len(root.handlers) == 1
+        assert isinstance(root.handlers[0].formatter, JSONFormatter)
+
+    def test_json_false_uses_standard_formatter(self):
+        configure_logging(json=False, level="INFO")
+        root = logging.getLogger()
+        assert len(root.handlers) == 1
+        formatter = root.handlers[0].formatter
+        assert not isinstance(formatter, JSONFormatter)
+        assert isinstance(formatter, logging.Formatter)
+
+    def test_respects_level_parameter(self):
+        configure_logging(json=True, level="DEBUG")
+        assert logging.getLogger().level == logging.DEBUG
+
+        configure_logging(json=True, level="ERROR")
+        assert logging.getLogger().level == logging.ERROR
+
+    def test_level_is_case_insensitive(self):
+        configure_logging(json=True, level="warning")
+        assert logging.getLogger().level == logging.WARNING
+
+    def test_clears_existing_handlers(self):
+        root = logging.getLogger()
+        root.addHandler(logging.StreamHandler())
+        root.addHandler(logging.StreamHandler())
+        assert len(root.handlers) >= 2
+
+        configure_logging(json=True)
+        assert len(root.handlers) == 1
+
+    def test_invalid_level_falls_back_to_info(self):
+        configure_logging(json=True, level="NONEXISTENT")
+        assert logging.getLogger().level == logging.INFO
+
+    def test_handler_is_stream_handler(self):
+        configure_logging(json=True)
+        root = logging.getLogger()
+        assert isinstance(root.handlers[0], logging.StreamHandler)
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 83abe54..a16ff43 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -6,7 +6,6 @@
 from protea.core.evaluation import EvaluationData
 from protea.core.metrics import CAFAMetrics, PRPoint, compute_cafa_metrics
 
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -60,7 +59,7 @@ def test_cafa_metrics_summary_keys(self):
 class TestComputeCafaMetricsValidation:
     def test_invalid_category_raises(self):
         with pytest.raises(ValueError, match="category"):
-            compute_cafa_metrics([], _make_eval(), category="pk")
+            compute_cafa_metrics([], _make_eval(), category="invalid")
 
     def test_valid_nk_category(self):
         result = compute_cafa_metrics([], _make_eval(nk={"P1": {"GO:0001"}}), category="nk")
diff --git a/tests/test_predict_go_terms.py b/tests/test_predict_go_terms.py
index e246af8..ea48937 100644
--- a/tests/test_predict_go_terms.py
+++ b/tests/test_predict_go_terms.py
@@ -8,14 +8,20 @@
 
 from protea.core.knn_search import _compute_distance_matrix, search_knn
 from protea.core.operations.predict_go_terms import (
-    PredictGOTermsOperation,
-    PredictGOTermsPayload,
     PredictGOTermsBatchOperation,
     PredictGOTermsBatchPayload,
+    PredictGOTermsOperation,
+    PredictGOTermsPayload,
+    StorePredictionsOperation,
+    _aspect_index_path,
+    _build_anno_csr,
+    _csr_lookup,
+    _disk_cache_paths,
 )
-from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
 from protea.infrastructure.orm.models.annotation.annotation_set import AnnotationSet
 from protea.infrastructure.orm.models.annotation.ontology_snapshot import OntologySnapshot
+from protea.infrastructure.orm.models.embedding.embedding_config import EmbeddingConfig
+from protea.infrastructure.orm.models.job import JobStatus
 
 _noop_emit = lambda *_: None  # noqa: E731
 _SNAPSHOT_ID = str(uuid.uuid4())
@@ -46,7 +52,7 @@ def test_minimal_valid(self) -> None:
         assert p.batch_size == 1024
 
     def test_empty_embedding_config_id_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             PredictGOTermsPayload.model_validate({
                 "embedding_config_id": "",
                 "annotation_set_id": _ANN_SET_ID,
@@ -54,7 +60,7 @@ def test_empty_embedding_config_id_raises(self) -> None:
             })
 
     def test_whitespace_embedding_config_id_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             PredictGOTermsPayload.model_validate({
                 "embedding_config_id": "   ",
                 "annotation_set_id": _ANN_SET_ID,
@@ -62,7 +68,7 @@ def test_whitespace_embedding_config_id_raises(self) -> None:
             })
 
     def test_empty_annotation_set_id_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             PredictGOTermsPayload.model_validate({
                 "embedding_config_id": str(uuid.uuid4()),
                 "annotation_set_id": "",
@@ -70,7 +76,7 @@ def test_empty_annotation_set_id_raises(self) -> None:
             })
 
     def test_empty_ontology_snapshot_id_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             PredictGOTermsPayload.model_validate({
                 "embedding_config_id": str(uuid.uuid4()),
                 "annotation_set_id": _ANN_SET_ID,
@@ -78,7 +84,7 @@ def test_empty_ontology_snapshot_id_raises(self) -> None:
             })
 
     def test_missing_annotation_set_raises(self) -> None:
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             PredictGOTermsPayload.model_validate({
                 "embedding_config_id": str(uuid.uuid4()),
                 "ontology_snapshot_id": _SNAPSHOT_ID,
@@ -194,7 +200,7 @@ def test_faiss_flat_matches_numpy(self) -> None:
         numpy_res = search_knn(Q, R, accs, k=3, backend="numpy", metric="cosine")
         faiss_res = search_knn(Q, R, accs, k=3, backend="faiss",
                                metric="cosine", faiss_index_type="Flat")
-        for np_hits, fa_hits in zip(numpy_res, faiss_res):
+        for np_hits, fa_hits in zip(numpy_res, faiss_res, strict=False):
             np_accs = [a for a, _ in np_hits]
             fa_accs = [a for a, _ in fa_hits]
             assert np_accs == fa_accs
@@ -222,7 +228,7 @@ def test_faiss_hnsw(self) -> None:
     def test_unknown_backend_raises(self) -> None:
         R, accs = self._make_data()
         Q = np.random.rand(1, 16).astype(np.float32)
-        with pytest.raises(Exception):
+        with pytest.raises(ValueError):
             search_knn(Q, R, accs, k=3, backend="unknown")
 
     def test_unknown_faiss_index_raises(self) -> None:
@@ -366,3 +372,430 @@ def test_no_references_returns_zero(self) -> None:
             result = op.execute(session, self._base_payload(), emit=_noop_emit)
 
         assert result.result["batches"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Coordinator — dispatching batches
+# ---------------------------------------------------------------------------
+
+class TestPredictGOTermsCoordinatorDispatch:
+    def _op(self) -> PredictGOTermsOperation:
+        return PredictGOTermsOperation()
+
+    def _base_payload(self):
+        return {
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": _ANN_SET_ID,
+            "ontology_snapshot_id": _SNAPSHOT_ID,
+            "_job_id": str(uuid.uuid4()),
+        }
+
+    def test_dispatches_correct_number_of_batches(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        session.get.side_effect = make_session_get()
+        session.flush.return_value = None
+        pred_set = MagicMock()
+        pred_set.id = uuid.uuid4()
+        session.add.side_effect = lambda obj: setattr(obj, "id", uuid.uuid4()) if not hasattr(obj, "id") or obj.id is None else None
+
+        accessions = [f"P{i:05d}" for i in range(10)]
+
+        payload = self._base_payload()
+        payload["batch_size"] = 4
+
+        with patch.object(op, "_load_query_accessions", return_value=accessions):
+            result = op.execute(session, payload, emit=_noop_emit)
+
+        # ceil(10/4) = 3 batches
+        assert result.result["batches"] == 3
+        assert result.result["queries"] == 10
+        assert result.deferred is True
+        assert result.progress_total == 3
+        assert len(result.publish_operations) == 3
+
+    def test_creates_prediction_set(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        session.get.side_effect = make_session_get()
+
+        def add_side_effect(obj):
+            if not hasattr(obj, "id") or obj.id is None:
+                obj.id = uuid.uuid4()
+        session.add.side_effect = add_side_effect
+
+        with patch.object(op, "_load_query_accessions", return_value=["P1"]):
+            result = op.execute(session, self._base_payload(), emit=_noop_emit)
+
+        assert "prediction_set_id" in result.result
+        assert result.result["batches"] == 1
+
+    def test_batch_messages_contain_correct_fields(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        session.get.side_effect = make_session_get()
+
+        def add_side_effect(obj):
+            if not hasattr(obj, "id") or obj.id is None:
+                obj.id = uuid.uuid4()
+        session.add.side_effect = add_side_effect
+
+        payload = self._base_payload()
+        payload["compute_alignments"] = True
+        payload["compute_taxonomy"] = True
+        payload["aspect_separated_knn"] = True
+
+        with patch.object(op, "_load_query_accessions", return_value=["P1"]):
+            result = op.execute(session, payload, emit=_noop_emit)
+
+        queue, msg = result.publish_operations[0]
+        assert queue == "protea.predictions.batch"
+        assert msg["operation"] == "predict_go_terms_batch"
+        assert msg["payload"]["compute_alignments"] is True
+        assert msg["payload"]["compute_taxonomy"] is True
+        assert msg["payload"]["aspect_separated_knn"] is True
+
+
+# ---------------------------------------------------------------------------
+# StorePredictionsOperation
+# ---------------------------------------------------------------------------
+
+class TestStorePredictions:
+    def _op(self) -> StorePredictionsOperation:
+        return StorePredictionsOperation()
+
+    def _make_prediction(self, **overrides):
+        defaults = {
+            "protein_accession": "P12345",
+            "go_term_id": 42,
+            "ref_protein_accession": "Q99999",
+            "distance": 0.15,
+        }
+        defaults.update(overrides)
+        return defaults
+
+    def test_inserts_predictions(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+
+        row = MagicMock()
+        row.progress_current = 1
+        row.progress_total = 5
+        session.execute.return_value.fetchone.return_value = row
+
+        pred_set_id = str(uuid.uuid4())
+        parent_job_id = str(uuid.uuid4())
+
+        payload = {
+            "parent_job_id": parent_job_id,
+            "prediction_set_id": pred_set_id,
+            "predictions": [self._make_prediction(), self._make_prediction(go_term_id=43)],
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        assert result.result["predictions_inserted"] == 2
+        assert session.execute.called
+
+    def test_skips_when_parent_cancelled(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.CANCELLED
+        session.get.return_value = parent
+
+        payload = {
+            "parent_job_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "predictions": [self._make_prediction()],
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        assert result.result["skipped"] is True
+
+    def test_skips_when_parent_failed(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.FAILED
+        session.get.return_value = parent
+
+        payload = {
+            "parent_job_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "predictions": [self._make_prediction()],
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        assert result.result["skipped"] is True
+
+    def test_empty_predictions_still_updates_progress(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+
+        row = MagicMock()
+        row.progress_current = 1
+        row.progress_total = 3
+        session.execute.return_value.fetchone.return_value = row
+
+        payload = {
+            "parent_job_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "predictions": [],
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        assert result.result["predictions_inserted"] == 0
+
+    def test_last_batch_closes_parent_as_succeeded(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.RUNNING
+        session.get.return_value = parent
+
+        # First execute returns progress_current == progress_total
+        progress_row = MagicMock()
+        progress_row.progress_current = 3
+        progress_row.progress_total = 3
+        # Second execute (succeeded update) returns closed
+        closed_row = MagicMock()
+        closed_row.id = uuid.uuid4()
+
+        session.execute.return_value.fetchone.side_effect = [progress_row, closed_row]
+
+        payload = {
+            "parent_job_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "predictions": [self._make_prediction()],
+        }
+
+        events = []
+        def capture_emit(event, msg, fields, level):
+            events.append(event)
+
+        op.execute(session, payload, emit=capture_emit)
+        assert "store_predictions.parent_succeeded" in events
+
+    def test_name(self) -> None:
+        assert StorePredictionsOperation().name == "store_predictions"
+
+
+# ---------------------------------------------------------------------------
+# Batch worker — parent cancellation
+# ---------------------------------------------------------------------------
+
+class TestPredictBatchParentCancellation:
+    def _op(self) -> PredictGOTermsBatchOperation:
+        return PredictGOTermsBatchOperation()
+
+    def test_skips_when_parent_cancelled(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.CANCELLED
+        session.get.return_value = parent
+
+        payload = {
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "parent_job_id": str(uuid.uuid4()),
+            "query_accessions": ["P1"],
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        assert result.result["skipped"] is True
+
+    def test_skips_when_parent_failed(self) -> None:
+        op = self._op()
+        session = MagicMock()
+        parent = MagicMock()
+        parent.status = JobStatus.FAILED
+        session.get.return_value = parent
+
+        payload = {
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "parent_job_id": str(uuid.uuid4()),
+            "query_accessions": ["P1"],
+        }
+
+        result = op.execute(session, payload, emit=_noop_emit)
+        assert result.result["skipped"] is True
+
+
+# ---------------------------------------------------------------------------
+# Pure helper functions
+# ---------------------------------------------------------------------------
+
+class TestBuildAnnoCsr:
+    def test_builds_correct_structure(self) -> None:
+        accessions = ["P1", "P2"]
+        go_map = {
+            "P1": [
+                {"go_term_id": 10, "qualifier": "enables", "evidence_code": "IDA"},
+                {"go_term_id": 20, "qualifier": None, "evidence_code": "IEA"},
+            ],
+            "P2": [
+                {"go_term_id": 30, "qualifier": "involved_in", "evidence_code": "IPI"},
+            ],
+        }
+
+        gtids, quals, ecodes, offsets = _build_anno_csr(accessions, go_map)
+        assert list(gtids) == [10, 20, 30]
+        assert list(offsets) == [0, 2, 3]
+        assert quals[0] == "enables"
+        assert ecodes[2] == "IPI"
+
+    def test_missing_accession_produces_empty_range(self) -> None:
+        accessions = ["P1", "P2"]
+        go_map = {"P1": [{"go_term_id": 10}]}
+
+        gtids, quals, ecodes, offsets = _build_anno_csr(accessions, go_map)
+        assert list(offsets) == [0, 1, 1]  # P2 has empty range
+
+    def test_empty_input(self) -> None:
+        gtids, quals, ecodes, offsets = _build_anno_csr([], {})
+        assert len(gtids) == 0
+        assert list(offsets) == [0]
+
+
+class TestCsrLookup:
+    def test_retrieves_annotations(self) -> None:
+        accessions = ["P1", "P2"]
+        go_map = {
+            "P1": [{"go_term_id": 10, "qualifier": "enables", "evidence_code": "IDA"}],
+            "P2": [{"go_term_id": 20, "qualifier": None, "evidence_code": "IEA"}],
+        }
+        gtids, quals, ecodes, offsets = _build_anno_csr(accessions, go_map)
+        acc_to_anno_idx = {acc: i for i, acc in enumerate(accessions)}
+
+        result = _csr_lookup({"P1"}, accessions, acc_to_anno_idx, gtids, quals, ecodes, offsets)
+        assert "P1" in result
+        assert len(result["P1"]) == 1
+        assert result["P1"][0]["go_term_id"] == 10
+
+    def test_missing_accession_ignored(self) -> None:
+        gtids, quals, ecodes, offsets = _build_anno_csr(["P1"], {"P1": [{"go_term_id": 10}]})
+        acc_to_anno_idx = {"P1": 0}
+
+        result = _csr_lookup({"UNKNOWN"}, ["P1"], acc_to_anno_idx, gtids, quals, ecodes, offsets)
+        assert result == {}
+
+
+class TestDiskCachePaths:
+    def test_paths_include_ids(self) -> None:
+        ec_id = uuid.uuid4()
+        as_id = uuid.uuid4()
+        emb_path, acc_path = _disk_cache_paths(ec_id, as_id)
+        assert str(ec_id) in str(emb_path)
+        assert str(as_id) in str(emb_path)
+        assert "embeddings" in str(emb_path)
+        assert "accessions" in str(acc_path)
+
+    def test_aspect_index_path_includes_aspect(self) -> None:
+        ec_id = uuid.uuid4()
+        as_id = uuid.uuid4()
+        path = _aspect_index_path(ec_id, as_id, "P")
+        assert "__P_indices" in str(path)
+
+
+# ---------------------------------------------------------------------------
+# Batch payload validation
+# ---------------------------------------------------------------------------
+
+class TestPredictGOTermsBatchPayload:
+    def test_valid_payload(self) -> None:
+        p = PredictGOTermsBatchPayload.model_validate({
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "parent_job_id": str(uuid.uuid4()),
+            "query_accessions": ["P1", "P2"],
+        })
+        assert p.limit_per_entry == 5
+        assert p.aspect_separated_knn is True
+
+    def test_feature_flags_default_false(self) -> None:
+        p = PredictGOTermsBatchPayload.model_validate({
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": str(uuid.uuid4()),
+            "prediction_set_id": str(uuid.uuid4()),
+            "parent_job_id": str(uuid.uuid4()),
+            "query_accessions": [],
+        })
+        assert p.compute_alignments is False
+        assert p.compute_taxonomy is False
+        assert p.compute_reranker_features is False
+
+
+# ---------------------------------------------------------------------------
+# _predict_batch — reranker features
+# ---------------------------------------------------------------------------
+
+class TestPredictBatchRerankerFeatures:
+    def _op(self) -> PredictGOTermsBatchOperation:
+        return PredictGOTermsBatchOperation()
+
+    def _payload(self, **kwargs):
+        defaults = {
+            "embedding_config_id": str(uuid.uuid4()),
+            "annotation_set_id": _ANN_SET_ID,
+            "prediction_set_id": str(uuid.uuid4()),
+            "parent_job_id": str(uuid.uuid4()),
+            "query_accessions": [],
+            "limit_per_entry": 2,
+            "compute_reranker_features": True,
+        }
+        defaults.update(kwargs)
+        return PredictGOTermsBatchPayload.model_validate(defaults)
+
+    def test_reranker_features_included_when_enabled(self) -> None:
+        op = self._op()
+        p = self._payload()
+        pred_set_id = uuid.uuid4()
+
+        ref_data = {
+            "accessions": ["REF1", "REF2"],
+            "embeddings": np.array([[1.0, 0.0], [0.0, 1.0]], dtype=np.float32),
+            "go_map": {
+                "REF1": [{"go_term_id": 1, "qualifier": "enables", "evidence_code": "IDA"}],
+                "REF2": [{"go_term_id": 2, "qualifier": None, "evidence_code": "IEA"}],
+            },
+        }
+        query_embs = np.array([[0.9, 0.1]], dtype=np.float32)
+        preds = op._predict_batch(["Q1"], query_embs, ref_data, pred_set_id, p)
+
+        assert len(preds) >= 1
+        for pred in preds:
+            assert "vote_count" in pred
+            assert "k_position" in pred
+            assert "go_term_frequency" in pred
+            assert "ref_annotation_density" in pred
+            assert "neighbor_distance_std" in pred
+
+    def test_reranker_features_excluded_when_disabled(self) -> None:
+        op = self._op()
+        p = self._payload(compute_reranker_features=False)
+        pred_set_id = uuid.uuid4()
+
+        ref_data = {
+            "accessions": ["REF1"],
+            "embeddings": np.array([[1.0, 0.0]], dtype=np.float32),
+            "go_map": {
+                "REF1": [{"go_term_id": 1, "qualifier": "enables", "evidence_code": "IDA"}],
+            },
+        }
+        query_embs = np.array([[0.9, 0.1]], dtype=np.float32)
+        preds = op._predict_batch(["Q1"], query_embs, ref_data, pred_set_id, p)
+
+        for pred in preds:
+            assert "vote_count" not in pred
+            assert "k_position" not in pred
diff --git a/tests/test_proteins_router.py b/tests/test_proteins_router.py
new file mode 100644
index 0000000..4c51bb3
--- /dev/null
+++ b/tests/test_proteins_router.py
@@ -0,0 +1,352 @@
+"""Unit tests for the /proteins router.
+
+Database is fully mocked -- no real infrastructure required.
+"""
+from __future__ import annotations
+
+from contextlib import contextmanager
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from protea.api.routers.proteins import router
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_app(session_factory):
+    app = FastAPI()
+    app.state.session_factory = session_factory
+    app.include_router(router)
+    return app
+
+
+@contextmanager
+def _mock_scope(session):
+    yield session
+
+
+def _make_protein(**overrides):
+    defaults = {
+        "accession": "P12345",
+        "entry_name": "TEST_HUMAN",
+        "gene_name": "TEST",
+        "organism": "Homo sapiens",
+        "taxonomy_id": 9606,
+        "length": 100,
+        "reviewed": True,
+        "is_canonical": True,
+        "canonical_accession": "P12345",
+        "isoform_index": None,
+        "sequence_id": 1,
+    }
+    defaults.update(overrides)
+    p = MagicMock()
+    for k, v in defaults.items():
+        setattr(p, k, v)
+    return p
+
+
+def _make_metadata():
+    meta = MagicMock()
+    for attr in (
+        "function_cc", "ec_number", "catalytic_activity", "pathway",
+        "keywords", "cofactor", "activity_regulation", "absorption",
+        "kinetics", "ph_dependence", "redox_potential", "temperature_dependence",
+        "active_site", "binding_site", "dna_binding", "rhea_id", "site", "features",
+    ):
+        setattr(meta, attr, f"mock_{attr}")
+    return meta
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture()
+def session():
+    return MagicMock()
+
+
+@pytest.fixture()
+def factory(session):
+    return MagicMock()
+
+
+@pytest.fixture()
+def client(session, factory):
+    app = _make_app(factory)
+    with patch(
+        "protea.api.routers.proteins.session_scope",
+        side_effect=lambda _: _mock_scope(session),
+    ):
+        with TestClient(app) as c:
+            yield c, session
+
+
+# ---------------------------------------------------------------------------
+# GET /proteins/stats
+# ---------------------------------------------------------------------------
+
+class TestProteinStats:
+    def test_returns_all_stat_keys(self, client):
+        c, session = client
+        # Each scalar() call returns a value in order:
+        # total, canonical, reviewed, with_metadata, with_embeddings, with_go
+        session.query.return_value.scalar.return_value = 10
+        session.query.return_value.filter.return_value.scalar.return_value = 5
+        session.query.return_value.join.return_value.scalar.return_value = 3
+
+        resp = c.get("/proteins/stats")
+        assert resp.status_code == 200
+        data = resp.json()
+        for key in (
+            "total", "canonical", "isoforms", "reviewed",
+            "unreviewed", "with_metadata", "with_embeddings", "with_go_annotations",
+        ):
+            assert key in data
+
+    def test_stats_zero_values(self, client):
+        c, session = client
+        session.query.return_value.scalar.return_value = 0
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+        session.query.return_value.join.return_value.scalar.return_value = 0
+
+        resp = c.get("/proteins/stats")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] == 0
+        assert data["isoforms"] == 0
+
+
+# ---------------------------------------------------------------------------
+# GET /proteins
+# ---------------------------------------------------------------------------
+
+class TestListProteins:
+    def test_returns_paginated_list(self, client):
+        c, session = client
+        p = _make_protein()
+        q_mock = MagicMock()
+        session.query.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.count.return_value = 1
+        q_mock.order_by.return_value.offset.return_value.limit.return_value.all.return_value = [p]
+
+        resp = c.get("/proteins")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["total"] == 1
+        assert len(data["items"]) == 1
+        assert data["items"][0]["accession"] == "P12345"
+
+    def test_search_filter(self, client):
+        c, session = client
+        q_mock = MagicMock()
+        session.query.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.count.return_value = 0
+        q_mock.order_by.return_value.offset.return_value.limit.return_value.all.return_value = []
+
+        resp = c.get("/proteins", params={"search": "kinase"})
+        assert resp.status_code == 200
+        assert resp.json()["total"] == 0
+        assert resp.json()["items"] == []
+
+    def test_reviewed_filter(self, client):
+        c, session = client
+        q_mock = MagicMock()
+        session.query.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.count.return_value = 0
+        q_mock.order_by.return_value.offset.return_value.limit.return_value.all.return_value = []
+
+        resp = c.get("/proteins", params={"reviewed": "true"})
+        assert resp.status_code == 200
+
+    def test_canonical_only_false(self, client):
+        c, session = client
+        q_mock = MagicMock()
+        session.query.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.count.return_value = 0
+        q_mock.order_by.return_value.offset.return_value.limit.return_value.all.return_value = []
+
+        resp = c.get("/proteins", params={"canonical_only": "false"})
+        assert resp.status_code == 200
+
+    def test_pagination_params(self, client):
+        c, session = client
+        q_mock = MagicMock()
+        session.query.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.count.return_value = 100
+        q_mock.order_by.return_value.offset.return_value.limit.return_value.all.return_value = []
+
+        resp = c.get("/proteins", params={"limit": 10, "offset": 20})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["limit"] == 10
+        assert data["offset"] == 20
+
+    def test_empty_list(self, client):
+        c, session = client
+        q_mock = MagicMock()
+        session.query.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.count.return_value = 0
+        q_mock.order_by.return_value.offset.return_value.limit.return_value.all.return_value = []
+
+        resp = c.get("/proteins")
+        assert resp.status_code == 200
+        assert resp.json()["items"] == []
+
+
+# ---------------------------------------------------------------------------
+# GET /proteins/{accession}
+# ---------------------------------------------------------------------------
+
+class TestGetProtein:
+    def test_returns_protein_with_metadata(self, client):
+        c, session = client
+        p = _make_protein()
+        meta = _make_metadata()
+        session.get.return_value = p
+        session.query.return_value.filter.return_value.first.return_value = meta
+        session.query.return_value.filter.return_value.scalar.return_value = 2
+        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = []
+
+        resp = c.get("/proteins/P12345")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["accession"] == "P12345"
+        assert data["metadata"] is not None
+        assert data["metadata"]["function_cc"] == "mock_function_cc"
+
+    def test_returns_protein_without_metadata(self, client):
+        c, session = client
+        p = _make_protein()
+        session.get.return_value = p
+        session.query.return_value.filter.return_value.first.return_value = None
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = []
+
+        resp = c.get("/proteins/P12345")
+        assert resp.status_code == 200
+        assert resp.json()["metadata"] is None
+
+    def test_not_found_returns_404(self, client):
+        c, session = client
+        session.get.return_value = None
+
+        resp = c.get("/proteins/UNKNOWN")
+        assert resp.status_code == 404
+
+    def test_canonical_lists_isoforms(self, client):
+        c, session = client
+        p = _make_protein(is_canonical=True)
+        meta = _make_metadata()
+        session.get.return_value = p
+        session.query.return_value.filter.return_value.first.return_value = meta
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+
+        iso1 = MagicMock()
+        iso1.accession = "P12345-2"
+        iso2 = MagicMock()
+        iso2.accession = "P12345-3"
+        session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [iso1, iso2]
+
+        resp = c.get("/proteins/P12345")
+        assert resp.status_code == 200
+        assert resp.json()["isoforms"] == ["P12345-2", "P12345-3"]
+
+    def test_non_canonical_no_isoform_list(self, client):
+        c, session = client
+        p = _make_protein(is_canonical=False, accession="P12345-2", sequence_id=None)
+        session.get.return_value = p
+        session.query.return_value.filter.return_value.first.return_value = None
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+
+        resp = c.get("/proteins/P12345-2")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["isoforms"] == []
+        assert data["embedding_count"] == 0
+
+
+# ---------------------------------------------------------------------------
+# GET /proteins/{accession}/annotations
+# ---------------------------------------------------------------------------
+
+class TestGetProteinAnnotations:
+    def _make_annotation_row(self, go_id="GO:0003674", name="molecular_function",
+                              aspect="F", qualifier="enables", evidence="IDA",
+                              assigned_by="UniProt", db_ref="PMID:123",
+                              ann_set_id=None, source="goa", version="2024-01"):
+        ann = MagicMock()
+        ann.qualifier = qualifier
+        ann.evidence_code = evidence
+        ann.assigned_by = assigned_by
+        ann.db_reference = db_ref
+        ann.annotation_set_id = ann_set_id or uuid4()
+
+        gt = MagicMock()
+        gt.go_id = go_id
+        gt.name = name
+        gt.aspect = aspect
+
+        aset = MagicMock()
+        aset.source = source
+        aset.source_version = version
+
+        return (ann, gt, aset)
+
+    def test_returns_annotations(self, client):
+        c, session = client
+        row = self._make_annotation_row()
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.order_by.return_value.all.return_value = [row]
+
+        resp = c.get("/proteins/P12345/annotations")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data) == 1
+        assert data[0]["go_id"] == "GO:0003674"
+        assert data[0]["evidence_code"] == "IDA"
+
+    def test_empty_annotations(self, client):
+        c, session = client
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.order_by.return_value.all.return_value = []
+
+        resp = c.get("/proteins/P12345/annotations")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+    def test_filter_by_annotation_set_id(self, client):
+        c, session = client
+        ann_set_id = uuid4()
+        row = self._make_annotation_row(ann_set_id=ann_set_id)
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.order_by.return_value.all.return_value = [row]
+
+        resp = c.get("/proteins/P12345/annotations", params={"annotation_set_id": str(ann_set_id)})
+        assert resp.status_code == 200
+        assert len(resp.json()) == 1
+
+    def test_invalid_annotation_set_id_returns_422(self, client):
+        c, session = client
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.filter.side_effect = ValueError("bad uuid")
+
+        resp = c.get("/proteins/P12345/annotations", params={"annotation_set_id": "not-a-uuid"})
+        assert resp.status_code == 422
diff --git a/tests/test_queue.py b/tests/test_queue.py
index 5912620..9c0bb47 100644
--- a/tests/test_queue.py
+++ b/tests/test_queue.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import json
+import threading
 from unittest.mock import MagicMock, patch
 from uuid import UUID, uuid4
 
@@ -142,10 +143,20 @@ def _mock_pika(self, consumer):
 
         return conn, channel
 
-    def test_run_declares_queue(self):
+    def test_run_declares_queue_with_dlx(self):
         consumer = _consumer()
         conn, channel = self._mock_pika(consumer)
-        channel.queue_declare.assert_called_once_with(queue="test.jobs", durable=True)
+        # DLQ + main queue
+        assert channel.queue_declare.call_count == 2
+        channel.queue_declare.assert_any_call(queue="protea.dead-letter", durable=True)
+        channel.queue_declare.assert_any_call(
+            queue="test.jobs",
+            durable=True,
+            arguments={"x-dead-letter-exchange": "protea.dlx"},
+        )
+        channel.exchange_declare.assert_called_once_with(
+            exchange="protea.dlx", exchange_type="fanout", durable=True
+        )
 
     def test_run_sets_prefetch(self):
         consumer = _consumer()
@@ -180,7 +191,8 @@ def test_publishes_correct_body(self):
         conn.channel.return_value = channel
         conn.is_open = True
 
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn):
+        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
+             patch("protea.infrastructure.queue.publisher._local", threading.local()):
             publish_job("amqp://localhost/", "test.jobs", job_id)
 
         channel.basic_publish.assert_called_once()
@@ -189,16 +201,18 @@ def test_publishes_correct_body(self):
         body = json.loads(kwargs["body"].decode())
         assert body["job_id"] == str(job_id)
 
-    def test_closes_connection_on_success(self):
+    def test_reuses_connection_on_success(self):
+        """With thread-local connection reuse, conn is NOT closed after a successful publish."""
         conn = MagicMock()
         channel = MagicMock()
         conn.channel.return_value = channel
         conn.is_open = True
 
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn):
+        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
+             patch("protea.infrastructure.queue.publisher._local", threading.local()):
             publish_job("amqp://localhost/", "q", uuid4())
 
-        conn.close.assert_called_once()
+        conn.close.assert_not_called()
 
     def test_closes_connection_on_exception(self):
         conn = MagicMock()
@@ -208,12 +222,13 @@ def test_closes_connection_on_exception(self):
         conn.is_open = True
 
         with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
-             patch("protea.infrastructure.queue.publisher.time.sleep"):
+             patch("protea.infrastructure.queue.publisher.time.sleep"), \
+             patch("protea.infrastructure.queue.publisher._local", threading.local()):
             with pytest.raises(RuntimeError, match="Failed to publish to queue"):
                 publish_job("amqp://localhost/", "q", uuid4())
 
-        # close() is called once per retry attempt (4 total: 1 initial + 3 retries)
-        assert conn.close.call_count == 4
+        # _close_cached_connection calls conn.close() once per failed attempt (5 total)
+        assert conn.close.call_count == 5
 
     def test_declares_durable_queue(self):
         conn = MagicMock()
@@ -221,7 +236,546 @@ def test_declares_durable_queue(self):
         conn.channel.return_value = channel
         conn.is_open = False
 
-        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn):
+        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
+             patch("protea.infrastructure.queue.publisher._local", threading.local()):
             publish_job("amqp://localhost/", "my.queue", uuid4())
 
-        channel.queue_declare.assert_called_once_with(queue="my.queue", durable=True)
+        channel.queue_declare.assert_called_once_with(
+            queue="my.queue", durable=True, arguments={"x-dead-letter-exchange": "protea.dlx"}
+        )
+
+    def test_exponential_backoff_delays(self):
+        """Verify that the publisher uses exponential backoff between retries."""
+        conn = MagicMock()
+        channel = MagicMock()
+        channel.basic_publish.side_effect = RuntimeError("broker down")
+        conn.channel.return_value = channel
+        conn.is_open = True
+
+        sleep_calls = []
+        with patch("protea.infrastructure.queue.publisher.pika.BlockingConnection", return_value=conn), \
+             patch("protea.infrastructure.queue.publisher.time.sleep", side_effect=lambda d: sleep_calls.append(d)), \
+             patch("protea.infrastructure.queue.publisher._local", threading.local()):
+            with pytest.raises(RuntimeError, match="Failed to publish"):
+                publish_job("amqp://localhost/", "q", uuid4())
+
+        # 5 attempts → 4 sleeps: 1, 2, 4, 8
+        assert sleep_calls == [1, 2, 4, 8]
+
+
+# ---------------------------------------------------------------------------
+# OperationConsumer — emit writes to parent job
+# ---------------------------------------------------------------------------
+
+class TestOperationConsumerEmit:
+    """Verify that OperationConsumer's emit writes JobEvent rows to the parent job."""
+
+    def test_emit_writes_job_event_on_parent(self):
+        from protea.core.contracts.operation import OperationResult
+        from protea.infrastructure.queue.consumer import OperationConsumer
+
+        parent_job_id = uuid4()
+
+        # Mock registry and operation
+        op = MagicMock()
+        op.execute.return_value = OperationResult()
+        registry = MagicMock()
+        registry.get.return_value = op
+
+        # Track sessions created by the factory
+        sessions = []
+        def make_session():
+            s = MagicMock()
+            sessions.append(s)
+            return s
+        factory = MagicMock(side_effect=make_session)
+
+        consumer = OperationConsumer(
+            amqp_url="amqp://localhost/",
+            queue_name="test.queue",
+            registry=registry,
+            session_factory=factory,
+        )
+
+        # Build a valid message with a parent job_id
+        body = json.dumps({
+            "operation": "test_op",
+            "job_id": str(parent_job_id),
+            "payload": {"key": "value"},
+        }).encode()
+
+        channel = MagicMock()
+        method = _make_method()
+        props = MagicMock()
+
+        consumer._on_message(channel, method, props, body)
+
+        # Operation should have been called
+        op.execute.assert_called_once()
+        channel.basic_ack.assert_called_once()
+
+    def test_emit_records_failure_on_parent(self):
+        from protea.infrastructure.queue.consumer import OperationConsumer
+
+        parent_job_id = uuid4()
+
+        # Operation that raises
+        op = MagicMock()
+        op.execute.side_effect = ValueError("boom")
+        registry = MagicMock()
+        registry.get.return_value = op
+
+        sessions = []
+        def make_session():
+            s = MagicMock()
+            sessions.append(s)
+            return s
+        factory = MagicMock(side_effect=make_session)
+
+        consumer = OperationConsumer(
+            amqp_url="amqp://localhost/",
+            queue_name="test.queue",
+            registry=registry,
+            session_factory=factory,
+        )
+
+        body = json.dumps({
+            "operation": "test_op",
+            "job_id": str(parent_job_id),
+            "payload": {},
+        }).encode()
+
+        channel = MagicMock()
+        method = _make_method()
+        props = MagicMock()
+
+        consumer._on_message(channel, method, props, body)
+
+        # Should nack (not requeue by default)
+        channel.basic_nack.assert_called_once()
+        # Should have created a session to write the error event
+        # At least: 1 execution session + 1 error event session
+        assert len(sessions) >= 2
+        # The error event session should have had .add() called with a JobEvent
+        error_session = sessions[-1]
+        error_session.add.assert_called_once()
+        error_session.commit.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# OperationConsumer._on_message — extended coverage
+# ---------------------------------------------------------------------------
+
+class TestOperationConsumerOnMessage:
+    """Cover uncovered lines in OperationConsumer._on_message."""
+
+    def _make_consumer(self, op=None, raises=None, requeue_on_failure=False):
+        from protea.core.contracts.operation import OperationResult
+        from protea.infrastructure.queue.consumer import OperationConsumer
+
+        if op is None:
+            op = MagicMock()
+            if raises:
+                op.execute.side_effect = raises
+            else:
+                op.execute.return_value = OperationResult()
+
+        registry = MagicMock()
+        registry.get.return_value = op
+
+        sessions = []
+        def make_session():
+            s = MagicMock()
+            sessions.append(s)
+            return s
+
+        factory = MagicMock(side_effect=make_session)
+
+        consumer = OperationConsumer(
+            amqp_url="amqp://localhost/",
+            queue_name="test.ops",
+            registry=registry,
+            session_factory=factory,
+            requeue_on_failure=requeue_on_failure,
+        )
+        return consumer, sessions, factory, op
+
+    def _body(self, operation="test_op", job_id=None, payload=None):
+        msg = {
+            "operation": operation,
+            "payload": payload or {},
+        }
+        if job_id is not None:
+            msg["job_id"] = str(job_id)
+        return json.dumps(msg).encode()
+
+    def test_successful_operation_acks(self):
+        consumer, sessions, _, op = self._make_consumer()
+        channel = MagicMock()
+        method = _make_method(10)
+
+        consumer._on_message(channel, method, MagicMock(), self._body())
+
+        op.execute.assert_called_once()
+        channel.basic_ack.assert_called_once_with(delivery_tag=10)
+        channel.basic_nack.assert_not_called()
+
+    def test_failed_operation_nacks_without_requeue(self):
+        consumer, sessions, _, _ = self._make_consumer(raises=ValueError("oops"))
+        channel = MagicMock()
+        method = _make_method(20)
+
+        consumer._on_message(channel, method, MagicMock(), self._body())
+
+        channel.basic_nack.assert_called_once_with(delivery_tag=20, requeue=False)
+        channel.basic_ack.assert_not_called()
+
+    def test_failed_operation_nacks_with_requeue_when_flag_set(self):
+        consumer, sessions, _, _ = self._make_consumer(
+            raises=ValueError("oops"), requeue_on_failure=True
+        )
+        channel = MagicMock()
+        method = _make_method(21)
+
+        consumer._on_message(channel, method, MagicMock(), self._body())
+
+        channel.basic_nack.assert_called_once_with(delivery_tag=21, requeue=True)
+
+    def test_cuda_oom_clears_cache_and_requeues(self):
+        exc = RuntimeError("CUDA out of memory. Tried to allocate 2 GiB")
+        consumer, sessions, _, _ = self._make_consumer(raises=exc)
+        channel = MagicMock()
+        method = _make_method(30)
+
+        with patch("protea.infrastructure.queue.consumer.torch", create=True):
+            # Import torch inside the handler — we patch at module level
+            import sys
+            mock_module = MagicMock()
+            with patch.dict(sys.modules, {"torch": mock_module}):
+                consumer._on_message(channel, method, MagicMock(), self._body())
+
+        # Should requeue regardless of requeue_on_failure flag
+        channel.basic_nack.assert_called_once()
+        call_kwargs = channel.basic_nack.call_args.kwargs
+        assert call_kwargs["requeue"] is True
+
+    def test_unparseable_message_nacks_without_requeue(self):
+        consumer, _, _, _ = self._make_consumer()
+        channel = MagicMock()
+        method = _make_method(40)
+
+        consumer._on_message(channel, method, MagicMock(), b"not json")
+
+        channel.basic_nack.assert_called_once_with(delivery_tag=40, requeue=False)
+        channel.basic_ack.assert_not_called()
+
+    def test_missing_operation_key_nacks(self):
+        consumer, _, _, _ = self._make_consumer()
+        channel = MagicMock()
+        method = _make_method(41)
+        body = json.dumps({"payload": {}}).encode()
+
+        consumer._on_message(channel, method, MagicMock(), body)
+
+        channel.basic_nack.assert_called_once_with(delivery_tag=41, requeue=False)
+
+    def test_stop_flag_nacks_with_requeue(self):
+        consumer, _, _, _ = self._make_consumer()
+        consumer._stop = True
+        channel = MagicMock()
+        method = _make_method(50)
+
+        consumer._on_message(channel, method, MagicMock(), self._body())
+
+        channel.basic_nack.assert_called_once_with(delivery_tag=50, requeue=True)
+        channel.basic_ack.assert_not_called()
+
+    def test_emit_writes_job_event_to_parent_session(self):
+        """When operation calls emit, a JobEvent is written to a separate session."""
+        from protea.core.contracts.operation import OperationResult
+
+        parent_id = uuid4()
+
+        def _execute(session, payload, *, emit):
+            emit("progress", "doing stuff", {"step": 1}, "info")
+            return OperationResult()
+
+        op = MagicMock()
+        op.execute.side_effect = _execute
+
+        consumer, sessions, _, _ = self._make_consumer(op=op)
+        channel = MagicMock()
+        method = _make_method()
+
+        consumer._on_message(channel, method, MagicMock(), self._body(job_id=parent_id))
+
+        # sessions: [0]=execution session, [1]=emit event session
+        assert len(sessions) >= 2
+        emit_session = sessions[1]
+        emit_session.add.assert_called_once()
+        emit_session.commit.assert_called_once()
+        emit_session.close.assert_called_once()
+
+    def test_emit_without_parent_job_id_only_logs(self):
+        """When no job_id in message, emit should not create an event session."""
+        from protea.core.contracts.operation import OperationResult
+
+        def _execute(session, payload, *, emit):
+            emit("progress", "no parent", {}, "info")
+            return OperationResult()
+
+        op = MagicMock()
+        op.execute.side_effect = _execute
+
+        consumer, sessions, _, _ = self._make_consumer(op=op)
+        channel = MagicMock()
+        method = _make_method()
+
+        # Message without job_id
+        body = json.dumps({"operation": "test_op", "payload": {}}).encode()
+        consumer._on_message(channel, method, MagicMock(), body)
+
+        # Only the execution session should have been created (no event session)
+        assert len(sessions) == 1
+
+    def test_emit_session_failure_is_handled_gracefully(self):
+        """If writing the event to DB fails, the operation should still complete."""
+        from protea.core.contracts.operation import OperationResult
+
+        parent_id = uuid4()
+
+        def _execute(session, payload, *, emit):
+            emit("progress", "msg", {}, "info")
+            return OperationResult()
+
+        op = MagicMock()
+        op.execute.side_effect = _execute
+
+        sessions_created = []
+        def make_session():
+            s = MagicMock()
+            sessions_created.append(s)
+            # Make the second session (emit session) fail on commit
+            if len(sessions_created) == 2:
+                s.commit.side_effect = RuntimeError("DB down")
+            return s
+
+        from protea.infrastructure.queue.consumer import OperationConsumer
+        registry = MagicMock()
+        registry.get.return_value = op
+        factory = MagicMock(side_effect=make_session)
+
+        consumer = OperationConsumer(
+            amqp_url="amqp://localhost/",
+            queue_name="test.ops",
+            registry=registry,
+            session_factory=factory,
+        )
+        channel = MagicMock()
+        method = _make_method()
+
+        consumer._on_message(channel, method, MagicMock(), self._body(job_id=parent_id))
+
+        # Should still ack despite emit failure
+        channel.basic_ack.assert_called_once()
+
+    def test_publish_operations_forwarded(self):
+        """Downstream publish_operations from result are forwarded via publish_operation."""
+        from protea.core.contracts.operation import OperationResult
+
+        result = OperationResult(
+            publish_operations=[
+                ("protea.embeddings.write", {"batch": [1, 2]}),
+                ("protea.predictions.write", {"batch": [3, 4]}),
+            ]
+        )
+        op = MagicMock()
+        op.execute.return_value = result
+
+        consumer, sessions, _, _ = self._make_consumer(op=op)
+        channel = MagicMock()
+        method = _make_method()
+
+        with patch("protea.infrastructure.queue.consumer.publish_operation") as mock_pub:
+            consumer._on_message(channel, method, MagicMock(), self._body())
+
+        assert mock_pub.call_count == 2
+        mock_pub.assert_any_call("amqp://localhost/", "protea.embeddings.write", {"batch": [1, 2]})
+        mock_pub.assert_any_call("amqp://localhost/", "protea.predictions.write", {"batch": [3, 4]})
+
+    def test_failed_operation_writes_error_event_to_parent(self):
+        """On failure with parent_job_id, a child.failed event is written."""
+        parent_id = uuid4()
+        consumer, sessions, _, _ = self._make_consumer(raises=TypeError("bad type"))
+        channel = MagicMock()
+        method = _make_method()
+
+        consumer._on_message(channel, method, MagicMock(), self._body(job_id=parent_id))
+
+        # Find the error event session (last one created besides execution session)
+        # sessions: [0]=execution, [1]=error event
+        assert len(sessions) >= 2
+        err_session = sessions[-1]
+        err_session.add.assert_called_once()
+        added_event = err_session.add.call_args[0][0]
+        assert added_event.job_id == parent_id
+        assert added_event.event == "child.failed"
+        assert added_event.level == "error"
+        assert "bad type" in added_event.message
+
+    def test_invalid_job_id_in_message_is_ignored(self):
+        """If job_id is not a valid UUID, parent_job_id should be None (no crash)."""
+        from protea.core.contracts.operation import OperationResult
+
+        op = MagicMock()
+        op.execute.return_value = OperationResult()
+
+        consumer, sessions, _, _ = self._make_consumer(op=op)
+        channel = MagicMock()
+        method = _make_method()
+
+        body = json.dumps({
+            "operation": "test_op",
+            "job_id": "not-a-uuid",
+            "payload": {},
+        }).encode()
+
+        consumer._on_message(channel, method, MagicMock(), body)
+
+        # Should still succeed — only 1 session (execution), no event sessions
+        channel.basic_ack.assert_called_once()
+        assert len(sessions) == 1
+
+    def test_error_event_session_rollback_on_commit_failure(self):
+        """If the error event session commit fails, rollback is called."""
+        parent_id = uuid4()
+
+        sessions_created = []
+        def make_session():
+            s = MagicMock()
+            sessions_created.append(s)
+            # Make the error event session (3rd: exec + err_event) fail
+            if len(sessions_created) == 2:
+                s.commit.side_effect = RuntimeError("DB gone")
+            return s
+
+        from protea.infrastructure.queue.consumer import OperationConsumer
+        op = MagicMock()
+        op.execute.side_effect = ValueError("boom")
+        registry = MagicMock()
+        registry.get.return_value = op
+        factory = MagicMock(side_effect=make_session)
+
+        consumer = OperationConsumer(
+            amqp_url="amqp://localhost/",
+            queue_name="test.ops",
+            registry=registry,
+            session_factory=factory,
+        )
+        channel = MagicMock()
+        method = _make_method()
+
+        consumer._on_message(channel, method, MagicMock(), self._body(job_id=parent_id))
+
+        # Error event session should have rollback called
+        err_session = sessions_created[1]
+        err_session.rollback.assert_called_once()
+        err_session.close.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# QueueConsumer._on_message — RetryLaterError handling
+# ---------------------------------------------------------------------------
+
+class TestQueueConsumerRetryLater:
+    """Cover RetryLaterError handling in QueueConsumer._on_message (lines 142-151)."""
+
+    def test_retry_later_sleeps_and_republishes(self):
+        from protea.core.contracts.operation import RetryLaterError
+
+        job_id = uuid4()
+        worker = _make_worker(raises=RetryLaterError("GPU busy", delay_seconds=30))
+        consumer = _consumer(worker)
+
+        channel = MagicMock()
+        method = _make_method(99)
+        props = MagicMock()
+
+        consumer._on_message(channel, method, props, _encode(job_id))
+
+        # Should ack before execution
+        channel.basic_ack.assert_called_once_with(delivery_tag=99)
+        # Should sleep on the connection
+        channel.connection.sleep.assert_called_once_with(30)
+        # Should re-publish
+        channel.basic_publish.assert_called_once()
+        pub_kwargs = channel.basic_publish.call_args.kwargs
+        assert pub_kwargs["routing_key"] == "test.jobs"
+        body = json.loads(pub_kwargs["body"].decode())
+        assert body["job_id"] == str(job_id)
+
+    def test_shutdown_draining_nacks_with_requeue(self):
+        """When _stop is set, messages are nacked with requeue=True."""
+        consumer = _consumer()
+        consumer._stop = True
+
+        channel = MagicMock()
+        method = _make_method(77)
+
+        consumer._on_message(channel, method, MagicMock(), _encode(uuid4()))
+
+        channel.basic_nack.assert_called_once_with(delivery_tag=77, requeue=True)
+        channel.basic_ack.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# OperationConsumer._handle_stop
+# ---------------------------------------------------------------------------
+
+class TestOperationConsumerHandleStop:
+    def test_handle_stop_sets_flag(self):
+        from protea.infrastructure.queue.consumer import OperationConsumer
+
+        consumer = OperationConsumer(
+            amqp_url="amqp://localhost/",
+            queue_name="test.ops",
+            registry=MagicMock(),
+            session_factory=MagicMock(),
+        )
+        assert consumer._stop is False
+        consumer._handle_stop()
+        assert consumer._stop is True
+
+
+# ---------------------------------------------------------------------------
+# OperationConsumer.run (pika fully mocked)
+# ---------------------------------------------------------------------------
+
+class TestOperationConsumerRun:
+    def test_run_declares_queue_and_starts_consuming(self):
+        from protea.infrastructure.queue.consumer import OperationConsumer
+
+        consumer = OperationConsumer(
+            amqp_url="amqp://localhost/",
+            queue_name="test.ops",
+            registry=MagicMock(),
+            session_factory=MagicMock(),
+            prefetch_count=4,
+        )
+
+        conn = MagicMock()
+        channel = MagicMock()
+        conn.channel.return_value = channel
+        conn.is_open = False
+
+        with patch("protea.infrastructure.queue.consumer.pika.BlockingConnection", return_value=conn):
+            consumer.run()
+
+        channel.queue_declare.assert_any_call(
+            queue="test.ops",
+            durable=True,
+            arguments={"x-dead-letter-exchange": "protea.dlx"},
+        )
+        channel.basic_qos.assert_called_once_with(prefetch_count=4)
+        channel.basic_consume.assert_called_once()
+        channel.start_consuming.assert_called_once()
diff --git a/tests/test_real_models.py b/tests/test_real_models.py
index f9c3887..bec5a99 100644
--- a/tests/test_real_models.py
+++ b/tests/test_real_models.py
@@ -12,7 +12,6 @@
 
 import numpy as np
 import pytest
-import torch
 
 from protea.core.operations.compute_embeddings import _embed_esm
 
diff --git a/tests/test_reranker.py b/tests/test_reranker.py
new file mode 100644
index 0000000..ec4b7c8
--- /dev/null
+++ b/tests/test_reranker.py
@@ -0,0 +1,251 @@
+"""Unit tests for the LightGBM re-ranker core module."""
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+
+from protea.core.reranker import (
+    ALL_FEATURES,
+    CATEGORICAL_FEATURES,
+    LABEL_COLUMN,
+    NUMERIC_FEATURES,
+    TrainResult,
+    load_training_tsv,
+    model_from_string,
+    model_to_string,
+    predict,
+    prepare_dataset,
+    train,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_training_df(n: int = 200, positive_rate: float = 0.3, seed: int = 42) -> pd.DataFrame:
+    """Generate a synthetic training DataFrame with realistic feature distributions."""
+    rng = np.random.RandomState(seed)
+
+    labels = (rng.random(n) < positive_rate).astype(int)
+
+    data: dict[str, list] = {
+        "protein_accession": [f"P{i:05d}" for i in range(n)],
+        "go_id": [f"GO:{rng.randint(1, 99999):07d}" for _ in range(n)],
+        "aspect": rng.choice(["F", "P", "C"], n).tolist(),
+        "label": labels.tolist(),
+    }
+
+    # Numeric features — positives get slightly better values
+    for col in NUMERIC_FEATURES:
+        if col == "distance":
+            data[col] = (rng.random(n) * 0.5 + (1 - labels) * 0.3).tolist()
+        elif "identity" in col or "similarity" in col:
+            data[col] = (rng.random(n) * 0.5 + labels * 0.3).tolist()
+        elif "gaps" in col:
+            data[col] = (rng.random(n) * 0.1).tolist()
+        elif "score" in col:
+            data[col] = (rng.random(n) * 500 + labels * 200).tolist()
+        elif "length" in col or "alignment_length" in col:
+            data[col] = (rng.randint(100, 1000, n)).tolist()
+        elif col == "vote_count":
+            data[col] = (rng.randint(1, 10, n) + labels * 2).tolist()
+        elif col == "k_position":
+            data[col] = (rng.randint(1, 5, n)).tolist()
+        elif col == "go_term_frequency":
+            data[col] = (rng.randint(1, 100, n)).tolist()
+        elif col == "ref_annotation_density":
+            data[col] = (rng.randint(1, 50, n)).tolist()
+        elif col == "neighbor_distance_std":
+            data[col] = (rng.random(n) * 0.1).tolist()
+        else:
+            data[col] = (rng.random(n) * 10).tolist()
+
+    # Categorical features
+    data["qualifier"] = rng.choice(["enables", "involved_in", "located_in", ""], n).tolist()
+    data["evidence_code"] = rng.choice(["IDA", "IEA", "ISS", "EXP", ""], n).tolist()
+    data["taxonomic_relation"] = rng.choice(["self", "sibling", "ancestor", ""], n).tolist()
+
+    return pd.DataFrame(data)
+
+
+# ---------------------------------------------------------------------------
+# prepare_dataset
+# ---------------------------------------------------------------------------
+
+
+class TestPrepareDataset:
+    def test_returns_correct_shapes(self):
+        df = _make_training_df(50)
+        X, y = prepare_dataset(df)
+        assert X.shape == (50, len(ALL_FEATURES))
+        assert y.shape == (50,)
+
+    def test_categorical_columns_are_category_dtype(self):
+        df = _make_training_df(20)
+        X, _ = prepare_dataset(df)
+        for col in CATEGORICAL_FEATURES:
+            assert X[col].dtype.name == "category"
+
+    def test_label_is_int(self):
+        df = _make_training_df(20)
+        _, y = prepare_dataset(df)
+        assert y.dtype == int
+
+    def test_only_feature_columns_in_X(self):
+        df = _make_training_df(20)
+        X, _ = prepare_dataset(df)
+        assert list(X.columns) == ALL_FEATURES
+        assert "protein_accession" not in X.columns
+        assert "go_id" not in X.columns
+
+    def test_empty_strings_become_na_for_categoricals(self):
+        df = _make_training_df(20)
+        df.loc[0, "qualifier"] = ""
+        X, _ = prepare_dataset(df)
+        assert pd.isna(X.loc[0, "qualifier"])
+
+
+# ---------------------------------------------------------------------------
+# train
+# ---------------------------------------------------------------------------
+
+
+class TestTrain:
+    def test_returns_train_result(self):
+        df = _make_training_df(200)
+        result = train(df, num_boost_round=10, early_stopping_rounds=5)
+        assert isinstance(result, TrainResult)
+        assert result.model is not None
+        assert "val_auc" in result.metrics
+        assert "val_f1" in result.metrics
+        assert "best_iteration" in result.metrics
+        assert len(result.feature_importance) > 0
+
+    def test_metrics_are_reasonable(self):
+        df = _make_training_df(500, positive_rate=0.3)
+        result = train(df, num_boost_round=50, early_stopping_rounds=10)
+        assert 0.0 <= result.metrics["val_auc"] <= 1.0
+        assert 0.0 <= result.metrics["val_precision"] <= 1.0
+        assert 0.0 <= result.metrics["val_recall"] <= 1.0
+        assert result.metrics["train_samples"] > 0
+        assert result.metrics["val_samples"] > 0
+
+    def test_custom_params(self):
+        df = _make_training_df(200)
+        result = train(
+            df,
+            params={"num_leaves": 15, "learning_rate": 0.1},
+            num_boost_round=10,
+            early_stopping_rounds=5,
+        )
+        assert result.model is not None
+
+    def test_feature_importance_keys_are_features(self):
+        df = _make_training_df(200)
+        result = train(df, num_boost_round=10, early_stopping_rounds=5)
+        for key in result.feature_importance:
+            assert key in ALL_FEATURES
+
+    def test_positive_rate_in_metrics(self):
+        df = _make_training_df(200, positive_rate=0.4)
+        result = train(df, num_boost_round=10, early_stopping_rounds=5)
+        assert 0.2 < result.metrics["positive_rate"] < 0.6  # approximate
+
+
+# ---------------------------------------------------------------------------
+# predict
+# ---------------------------------------------------------------------------
+
+
+class TestPredict:
+    def test_returns_probabilities(self):
+        df = _make_training_df(200)
+        result = train(df, num_boost_round=10, early_stopping_rounds=5)
+        scores = predict(result.model, df)
+        assert len(scores) == 200
+        assert all(0.0 <= s <= 1.0 for s in scores)
+
+    def test_scores_without_label_column(self):
+        df = _make_training_df(200)
+        result = train(df, num_boost_round=10, early_stopping_rounds=5)
+        df_no_label = df.drop(columns=[LABEL_COLUMN])
+        scores = predict(result.model, df_no_label)
+        assert len(scores) == 200
+
+    def test_higher_scores_for_positive_examples(self):
+        """On average, positive examples should get higher scores."""
+        df = _make_training_df(1000, positive_rate=0.3)
+        result = train(df, num_boost_round=50, early_stopping_rounds=10)
+        scores = predict(result.model, df)
+        pos_mean = np.mean(scores[df["label"] == 1])
+        neg_mean = np.mean(scores[df["label"] == 0])
+        assert pos_mean > neg_mean
+
+
+# ---------------------------------------------------------------------------
+# Serialization
+# ---------------------------------------------------------------------------
+
+
+class TestSerialization:
+    def test_roundtrip(self):
+        df = _make_training_df(200)
+        result = train(df, num_boost_round=10, early_stopping_rounds=5)
+        model_str = model_to_string(result.model)
+        assert isinstance(model_str, str)
+        assert len(model_str) > 100
+
+        restored = model_from_string(model_str)
+        original_scores = predict(result.model, df)
+        restored_scores = predict(restored, df)
+        np.testing.assert_array_almost_equal(original_scores, restored_scores)
+
+
+# ---------------------------------------------------------------------------
+# load_training_tsv
+# ---------------------------------------------------------------------------
+
+
+class TestLoadTrainingTSV:
+    def test_parses_tsv_string(self):
+        tsv = "distance\tvote_count\tlabel\n0.1\t3\t1\n0.5\t1\t0\n"
+        df = load_training_tsv(tsv)
+        assert len(df) == 2
+        assert df["distance"].dtype == float
+        assert np.issubdtype(df["vote_count"].dtype, np.number)
+        assert df["label"].dtype == int
+
+    def test_parses_tsv_bytes(self):
+        tsv = b"distance\tvote_count\tlabel\n0.1\t3\t1\n"
+        df = load_training_tsv(tsv)
+        assert len(df) == 1
+
+    def test_missing_values_become_nan(self):
+        tsv = "distance\tvote_count\tlabel\n\t\t0\n"
+        df = load_training_tsv(tsv)
+        assert pd.isna(df.loc[0, "distance"])
+        assert pd.isna(df.loc[0, "vote_count"])
+        assert df.loc[0, "label"] == 0
+
+    def test_handles_missing_columns_gracefully(self):
+        tsv = "distance\tlabel\n0.1\t1\n"
+        df = load_training_tsv(tsv)
+        assert "distance" in df.columns
+        assert "vote_count" not in df.columns
+
+
+# ---------------------------------------------------------------------------
+# Feature constants
+# ---------------------------------------------------------------------------
+
+
+class TestFeatureConstants:
+    def test_no_duplicate_features(self):
+        assert len(ALL_FEATURES) == len(set(ALL_FEATURES))
+
+    def test_all_features_is_union(self):
+        assert ALL_FEATURES == NUMERIC_FEATURES + CATEGORICAL_FEATURES
+
+    def test_numeric_and_categorical_disjoint(self):
+        assert set(NUMERIC_FEATURES) & set(CATEGORICAL_FEATURES) == set()
diff --git a/tests/test_run_cafa_evaluation.py b/tests/test_run_cafa_evaluation.py
new file mode 100644
index 0000000..3c1a81d
--- /dev/null
+++ b/tests/test_run_cafa_evaluation.py
@@ -0,0 +1,1166 @@
+"""Unit tests for RunCafaEvaluationOperation.
+
+No real DB, network, or cafaeval binary required — everything is mocked.
+"""
+from __future__ import annotations
+
+import gzip
+import os
+import tempfile
+import uuid
+from unittest.mock import MagicMock, patch
+
+import pandas as pd
+import pytest
+from pydantic import ValidationError
+
+from protea.core.evaluation import EvaluationData
+from protea.core.operations.run_cafa_evaluation import (
+    _NS_LABELS,
+    _NS_SHORT,
+    RunCafaEvaluationOperation,
+    RunCafaEvaluationPayload,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+EVAL_SET_ID = str(uuid.uuid4())
+PRED_SET_ID = str(uuid.uuid4())
+OLD_ANN_SET_ID = uuid.uuid4()
+NEW_ANN_SET_ID = uuid.uuid4()
+SNAP_ID = uuid.uuid4()
+SCORING_CONFIG_ID = str(uuid.uuid4())
+
+
+def _make_emit():
+    """Return a mock emit function that records all calls."""
+    return MagicMock()
+
+
+def _make_eval_set(eval_set_id=None):
+    es = MagicMock()
+    es.id = uuid.UUID(eval_set_id or EVAL_SET_ID)
+    es.old_annotation_set_id = OLD_ANN_SET_ID
+    es.new_annotation_set_id = NEW_ANN_SET_ID
+    return es
+
+
+def _make_pred_set(pred_set_id=None):
+    ps = MagicMock()
+    ps.id = uuid.UUID(pred_set_id or PRED_SET_ID)
+    return ps
+
+
+def _make_ann_old():
+    ann = MagicMock()
+    ann.ontology_snapshot_id = SNAP_ID
+    return ann
+
+
+def _make_snapshot(obo_url="https://example.com/go.obo", ia_url=None):
+    snap = MagicMock()
+    snap.obo_url = obo_url
+    snap.ia_url = ia_url
+    return snap
+
+
+def _make_eval_data(nk=None, lk=None, pk=None, known=None, pk_known=None):
+    return EvaluationData(
+        nk=nk or {"P1": {"GO:0000001"}},
+        lk=lk or {"P2": {"GO:0000002"}},
+        pk=pk or {},
+        known=known or {},
+        pk_known=pk_known or {},
+    )
+
+
+def _make_scoring_config():
+    sc = MagicMock()
+    sc.formula = "linear"
+    sc.weights = {"embedding_similarity": 1.0}
+    return sc
+
+
+def _dfs_best_fixture():
+    """Build a dfs_best dict matching cafaeval output format."""
+    df_f = pd.DataFrame(
+        [
+            {
+                "ns": "biological_process",
+                "f": 0.45,
+                "pr": 0.51,
+                "rc": 0.40,
+                "tau": 0.32,
+                "cov_max": 0.95,
+                "n": 100,
+            },
+            {
+                "ns": "molecular_function",
+                "f": 0.60,
+                "pr": 0.65,
+                "rc": 0.55,
+                "tau": 0.20,
+                "cov_max": 0.88,
+                "n": 50,
+            },
+            {
+                "ns": "cellular_component",
+                "f": 0.70,
+                "pr": 0.72,
+                "rc": 0.68,
+                "tau": 0.15,
+                "cov_max": 0.92,
+                "n": 75,
+            },
+        ]
+    )
+    return {"f": df_f}
+
+
+# ---------------------------------------------------------------------------
+# Payload validation
+# ---------------------------------------------------------------------------
+
+
+class TestRunCafaEvaluationPayload:
+    def test_valid_payload(self):
+        p = RunCafaEvaluationPayload(
+            evaluation_set_id=EVAL_SET_ID,
+            prediction_set_id=PRED_SET_ID,
+        )
+        assert p.evaluation_set_id == EVAL_SET_ID
+        assert p.prediction_set_id == PRED_SET_ID
+        assert p.max_distance is None
+        assert p.artifacts_dir is None
+        assert p.scoring_config_id is None
+        assert p.ia_file is None
+
+    def test_valid_payload_all_fields(self):
+        p = RunCafaEvaluationPayload(
+            evaluation_set_id=EVAL_SET_ID,
+            prediction_set_id=PRED_SET_ID,
+            max_distance=1.5,
+            artifacts_dir="/tmp/artifacts",
+            scoring_config_id=SCORING_CONFIG_ID,
+            ia_file="/tmp/ia.tsv",
+        )
+        assert p.max_distance == 1.5
+        assert p.artifacts_dir == "/tmp/artifacts"
+        assert p.scoring_config_id == SCORING_CONFIG_ID
+        assert p.ia_file == "/tmp/ia.tsv"
+
+    def test_empty_evaluation_set_id_raises(self):
+        with pytest.raises(ValidationError, match="non-empty"):
+            RunCafaEvaluationPayload(
+                evaluation_set_id="  ",
+                prediction_set_id=PRED_SET_ID,
+            )
+
+    def test_empty_prediction_set_id_raises(self):
+        with pytest.raises(ValidationError, match="non-empty"):
+            RunCafaEvaluationPayload(
+                evaluation_set_id=EVAL_SET_ID,
+                prediction_set_id="",
+            )
+
+    def test_non_string_evaluation_set_id_raises(self):
+        with pytest.raises(ValidationError):
+            RunCafaEvaluationPayload(
+                evaluation_set_id=123,
+                prediction_set_id=PRED_SET_ID,
+            )
+
+    def test_max_distance_out_of_range(self):
+        with pytest.raises(ValidationError):
+            RunCafaEvaluationPayload(
+                evaluation_set_id=EVAL_SET_ID,
+                prediction_set_id=PRED_SET_ID,
+                max_distance=3.0,
+            )
+
+    def test_max_distance_negative(self):
+        with pytest.raises(ValidationError):
+            RunCafaEvaluationPayload(
+                evaluation_set_id=EVAL_SET_ID,
+                prediction_set_id=PRED_SET_ID,
+                max_distance=-0.1,
+            )
+
+    def test_strips_whitespace(self):
+        p = RunCafaEvaluationPayload(
+            evaluation_set_id=f"  {EVAL_SET_ID}  ",
+            prediction_set_id=f"  {PRED_SET_ID}  ",
+        )
+        assert p.evaluation_set_id == EVAL_SET_ID
+        assert p.prediction_set_id == PRED_SET_ID
+
+    def test_frozen_payload(self):
+        p = RunCafaEvaluationPayload(
+            evaluation_set_id=EVAL_SET_ID,
+            prediction_set_id=PRED_SET_ID,
+        )
+        with pytest.raises(ValidationError):
+            p.evaluation_set_id = "new_value"
+
+
+# ---------------------------------------------------------------------------
+# Operation name
+# ---------------------------------------------------------------------------
+
+
+class TestOperationName:
+    def test_name(self):
+        op = RunCafaEvaluationOperation()
+        assert op.name == "run_cafa_evaluation"
+
+
+# ---------------------------------------------------------------------------
+# _parse_results
+# ---------------------------------------------------------------------------
+
+
+class TestParseResults:
+    def setup_method(self):
+        self.op = RunCafaEvaluationOperation()
+
+    def test_parse_all_namespaces(self):
+        dfs_best = _dfs_best_fixture()
+        result = self.op._parse_results(dfs_best)
+        assert set(result.keys()) == {"BPO", "MFO", "CCO"}
+
+    def test_parse_bpo_values(self):
+        dfs_best = _dfs_best_fixture()
+        result = self.op._parse_results(dfs_best)
+        bpo = result["BPO"]
+        assert bpo["fmax"] == 0.45
+        assert bpo["precision"] == 0.51
+        assert bpo["recall"] == 0.40
+        assert bpo["tau"] == 0.32
+        assert bpo["coverage"] == 0.95
+        assert bpo["n_proteins"] == 100
+
+    def test_parse_mfo_values(self):
+        dfs_best = _dfs_best_fixture()
+        result = self.op._parse_results(dfs_best)
+        mfo = result["MFO"]
+        assert mfo["fmax"] == 0.60
+        assert mfo["precision"] == 0.65
+        assert mfo["recall"] == 0.55
+
+    def test_parse_empty_dfs_best(self):
+        result = self.op._parse_results({})
+        assert result == {}
+
+    def test_parse_none_df_f(self):
+        result = self.op._parse_results({"f": None})
+        assert result == {}
+
+    def test_parse_empty_df_f(self):
+        result = self.op._parse_results({"f": pd.DataFrame()})
+        assert result == {}
+
+    def test_parse_ignores_unknown_namespaces(self):
+        df_f = pd.DataFrame(
+            [{"ns": "unknown_namespace", "f": 0.5, "pr": 0.5, "rc": 0.5, "tau": 0.1, "cov_max": 0.9, "n": 10}]
+        )
+        result = self.op._parse_results({"f": df_f})
+        assert result == {}
+
+    def test_parse_uses_cov_fallback_when_no_cov_max(self):
+        df_f = pd.DataFrame(
+            [{"ns": "biological_process", "f": 0.5, "pr": 0.5, "rc": 0.5, "tau": 0.1, "cov": 0.85, "n": 10}]
+        )
+        result = self.op._parse_results({"f": df_f})
+        assert result["BPO"]["coverage"] == 0.85
+
+    def test_parse_missing_n_column(self):
+        df_f = pd.DataFrame(
+            [{"ns": "biological_process", "f": 0.5, "pr": 0.5, "rc": 0.5, "tau": 0.1, "cov_max": 0.9}]
+        )
+        result = self.op._parse_results({"f": df_f})
+        assert result["BPO"]["n_proteins"] is None
+
+
+# ---------------------------------------------------------------------------
+# _write_gt
+# ---------------------------------------------------------------------------
+
+
+class TestWriteGt:
+    def setup_method(self):
+        self.op = RunCafaEvaluationOperation()
+
+    def test_write_gt_basic(self):
+        annotations = {
+            "P2": {"GO:0000002", "GO:0000003"},
+            "P1": {"GO:0000001"},
+        }
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_gt(annotations, path)
+            with open(path) as f:
+                lines = f.read().strip().split("\n")
+            # Sorted by protein then by GO ID
+            assert lines[0] == "P1\tGO:0000001"
+            assert lines[1] == "P2\tGO:0000002"
+            assert lines[2] == "P2\tGO:0000003"
+            assert len(lines) == 3
+        finally:
+            os.unlink(path)
+
+    def test_write_gt_empty(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_gt({}, path)
+            with open(path) as f:
+                content = f.read()
+            assert content == ""
+        finally:
+            os.unlink(path)
+
+
+# ---------------------------------------------------------------------------
+# _download_obo
+# ---------------------------------------------------------------------------
+
+
+class TestDownloadObo:
+    def setup_method(self):
+        self.op = RunCafaEvaluationOperation()
+
+    @patch("protea.core.operations.run_cafa_evaluation.requests.get")
+    def test_download_plain(self, mock_get):
+        mock_resp = MagicMock()
+        mock_resp.text = "format-version: 1.2\n"
+        mock_resp.raise_for_status = MagicMock()
+        mock_get.return_value = mock_resp
+
+        with tempfile.NamedTemporaryFile(suffix=".obo", delete=False) as f:
+            path = f.name
+        try:
+            self.op._download_obo("https://example.com/go.obo", path)
+            with open(path) as f:
+                assert f.read() == "format-version: 1.2\n"
+        finally:
+            os.unlink(path)
+
+    @patch("protea.core.operations.run_cafa_evaluation.requests.get")
+    def test_download_gzip(self, mock_get):
+        original = b"format-version: 1.2\n"
+        compressed = gzip.compress(original)
+        mock_resp = MagicMock()
+        mock_resp.content = compressed
+        mock_resp.raise_for_status = MagicMock()
+        mock_get.return_value = mock_resp
+
+        with tempfile.NamedTemporaryFile(suffix=".obo", delete=False) as f:
+            path = f.name
+        try:
+            self.op._download_obo("https://example.com/go.obo.gz", path)
+            with open(path, "rb") as f:
+                assert f.read() == original
+        finally:
+            os.unlink(path)
+
+
+# ---------------------------------------------------------------------------
+# _download_tsv
+# ---------------------------------------------------------------------------
+
+
+class TestDownloadTsv:
+    def setup_method(self):
+        self.op = RunCafaEvaluationOperation()
+
+    def test_local_absolute_path(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as src:
+            src.write("GO:0001\t0.5\n")
+            src_path = src.name
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as dst:
+            dst_path = dst.name
+        try:
+            self.op._download_tsv(src_path, dst_path)
+            with open(dst_path) as f:
+                assert f.read() == "GO:0001\t0.5\n"
+        finally:
+            os.unlink(src_path)
+            os.unlink(dst_path)
+
+    def test_local_file_scheme(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as src:
+            src.write("GO:0002\t0.8\n")
+            src_path = src.name
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as dst:
+            dst_path = dst.name
+        try:
+            self.op._download_tsv(f"file://{src_path}", dst_path)
+            with open(dst_path) as f:
+                assert f.read() == "GO:0002\t0.8\n"
+        finally:
+            os.unlink(src_path)
+            os.unlink(dst_path)
+
+    def test_local_gzip_path(self):
+        original = b"GO:0003\t0.3\n"
+        with tempfile.NamedTemporaryFile(suffix=".tsv.gz", delete=False) as src:
+            src.write(gzip.compress(original))
+            src_path = src.name
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as dst:
+            dst_path = dst.name
+        try:
+            self.op._download_tsv(src_path, dst_path)
+            with open(dst_path, "rb") as f:
+                assert f.read() == original
+        finally:
+            os.unlink(src_path)
+            os.unlink(dst_path)
+
+    @patch("protea.core.operations.run_cafa_evaluation.requests.get")
+    def test_http_download(self, mock_get):
+        mock_resp = MagicMock()
+        mock_resp.text = "GO:0004\t0.9\n"
+        mock_resp.raise_for_status = MagicMock()
+        mock_get.return_value = mock_resp
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as dst:
+            dst_path = dst.name
+        try:
+            self.op._download_tsv("https://example.com/ia.tsv", dst_path)
+            with open(dst_path) as f:
+                assert f.read() == "GO:0004\t0.9\n"
+        finally:
+            os.unlink(dst_path)
+
+    @patch("protea.core.operations.run_cafa_evaluation.requests.get")
+    def test_http_gzip_download(self, mock_get):
+        original = b"GO:0005\t0.6\n"
+        mock_resp = MagicMock()
+        mock_resp.content = gzip.compress(original)
+        mock_resp.raise_for_status = MagicMock()
+        mock_get.return_value = mock_resp
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as dst:
+            dst_path = dst.name
+        try:
+            self.op._download_tsv("https://example.com/ia.tsv.gz", dst_path)
+            with open(dst_path, "rb") as f:
+                assert f.read() == original
+        finally:
+            os.unlink(dst_path)
+
+
+# ---------------------------------------------------------------------------
+# _write_predictions
+# ---------------------------------------------------------------------------
+
+
+class TestWritePredictions:
+    def setup_method(self):
+        self.op = RunCafaEvaluationOperation()
+
+    def test_write_predictions_without_scoring_config(self):
+        pred_mock = MagicMock()
+        pred_mock.protein_accession = "P1"
+        pred_mock.distance = 0.4
+        pred_mock.identity_nw = None
+        pred_mock.identity_sw = None
+        pred_mock.evidence_code = None
+        pred_mock.taxonomic_distance = None
+
+        gt_mock = MagicMock()
+        gt_mock.go_id = "GO:0000001"
+
+        session = MagicMock()
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = [(pred_mock, gt_mock)]
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_predictions(
+                session, uuid.uuid4(), {"P1"}, None, path, None
+            )
+            with open(path) as f:
+                line = f.read().strip()
+            # score = max(0, 1 - 0.4/2) = 0.8
+            assert line == "P1\tGO:0000001\t0.8000"
+        finally:
+            os.unlink(path)
+
+    def test_write_predictions_deduplicates(self):
+        pred1 = MagicMock()
+        pred1.protein_accession = "P1"
+        pred1.distance = 0.2
+
+        pred2 = MagicMock()
+        pred2.protein_accession = "P1"
+        pred2.distance = 0.6
+
+        gt_mock = MagicMock()
+        gt_mock.go_id = "GO:0000001"
+
+        session = MagicMock()
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = [(pred1, gt_mock), (pred2, gt_mock)]
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_predictions(
+                session, uuid.uuid4(), {"P1"}, None, path, None
+            )
+            with open(path) as f:
+                lines = f.read().strip().split("\n")
+            # Only the first (closest) prediction should be written
+            assert len(lines) == 1
+        finally:
+            os.unlink(path)
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_score")
+    def test_write_predictions_with_scoring_config(self, mock_compute_score):
+        mock_compute_score.return_value = 0.75
+
+        pred_mock = MagicMock()
+        pred_mock.protein_accession = "P1"
+        pred_mock.distance = 0.4
+        pred_mock.identity_nw = 0.8
+        pred_mock.identity_sw = 0.9
+        pred_mock.evidence_code = "IDA"
+        pred_mock.taxonomic_distance = 2.0
+
+        gt_mock = MagicMock()
+        gt_mock.go_id = "GO:0000001"
+
+        session = MagicMock()
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = [(pred_mock, gt_mock)]
+
+        scoring_config = _make_scoring_config()
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_predictions(
+                session, uuid.uuid4(), {"P1"}, None, path, scoring_config
+            )
+            with open(path) as f:
+                line = f.read().strip()
+            assert line == "P1\tGO:0000001\t0.7500"
+            mock_compute_score.assert_called_once()
+        finally:
+            os.unlink(path)
+
+    def test_write_predictions_zero_distance(self):
+        pred_mock = MagicMock()
+        pred_mock.protein_accession = "P1"
+        pred_mock.distance = 0.0
+
+        gt_mock = MagicMock()
+        gt_mock.go_id = "GO:0000001"
+
+        session = MagicMock()
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = [(pred_mock, gt_mock)]
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_predictions(
+                session, uuid.uuid4(), {"P1"}, None, path, None
+            )
+            with open(path) as f:
+                line = f.read().strip()
+            # score = max(0, 1 - 0/2) = 1.0
+            assert line == "P1\tGO:0000001\t1.0000"
+        finally:
+            os.unlink(path)
+
+    def test_write_predictions_with_max_distance(self):
+        """When max_distance is provided, query should include the filter."""
+        pred_mock = MagicMock()
+        pred_mock.protein_accession = "P1"
+        pred_mock.distance = 0.3
+
+        gt_mock = MagicMock()
+        gt_mock.go_id = "GO:0000001"
+
+        session = MagicMock()
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = [(pred_mock, gt_mock)]
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_predictions(
+                session, uuid.uuid4(), {"P1"}, 0.5, path, None
+            )
+            with open(path) as f:
+                line = f.read().strip()
+            assert line == "P1\tGO:0000001\t0.8500"
+            # filter should have been called 3 times:
+            # pred_set_id, protein_accession IN, distance <=
+            assert query.filter.call_count == 3
+        finally:
+            os.unlink(path)
+
+    def test_write_predictions_none_distance_fallback(self):
+        pred_mock = MagicMock()
+        pred_mock.protein_accession = "P1"
+        pred_mock.distance = None
+
+        gt_mock = MagicMock()
+        gt_mock.go_id = "GO:0000001"
+
+        session = MagicMock()
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = [(pred_mock, gt_mock)]
+
+        with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f:
+            path = f.name
+        try:
+            self.op._write_predictions(
+                session, uuid.uuid4(), {"P1"}, None, path, None
+            )
+            with open(path) as f:
+                line = f.read().strip()
+            # score = max(0, 1 - 0/2) = 1.0 (None → 0.0)
+            assert line == "P1\tGO:0000001\t1.0000"
+        finally:
+            os.unlink(path)
+
+
+# ---------------------------------------------------------------------------
+# execute — error paths
+# ---------------------------------------------------------------------------
+
+
+class TestExecuteErrors:
+    def setup_method(self):
+        self.op = RunCafaEvaluationOperation()
+        self.emit = _make_emit()
+
+    def test_missing_evaluation_set(self):
+        session = MagicMock()
+        session.get.return_value = None
+
+        with pytest.raises(ValueError, match="EvaluationSet.*not found"):
+            self.op.execute(
+                session,
+                {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                emit=self.emit,
+            )
+
+    def test_missing_prediction_set(self):
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        # First call returns eval_set, second returns None (pred_set missing)
+        session.get.side_effect = [eval_set, None]
+
+        with pytest.raises(ValueError, match="PredictionSet.*not found"):
+            self.op.execute(
+                session,
+                {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                emit=self.emit,
+            )
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_no_delta_proteins(self, mock_compute):
+        mock_compute.return_value = EvaluationData(
+            nk={}, lk={}, pk={}, known={}, pk_known={}
+        )
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        with pytest.raises(ValueError, match="No delta proteins"):
+            self.op.execute(
+                session,
+                {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                emit=self.emit,
+            )
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_missing_scoring_config(self, mock_compute):
+        mock_compute.return_value = _make_eval_data()
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        # get calls: eval_set, pred_set, ann_old, snapshot, scoring_config (None)
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot, None]
+
+        with pytest.raises(ValueError, match="ScoringConfig.*not found"):
+            self.op.execute(
+                session,
+                {
+                    "evaluation_set_id": EVAL_SET_ID,
+                    "prediction_set_id": PRED_SET_ID,
+                    "scoring_config_id": SCORING_CONFIG_ID,
+                },
+                emit=self.emit,
+            )
+
+
+# ---------------------------------------------------------------------------
+# execute — happy path
+# ---------------------------------------------------------------------------
+
+
+class TestExecuteHappyPath:
+    def setup_method(self):
+        self.op = RunCafaEvaluationOperation()
+        self.emit = _make_emit()
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_full_run(self, mock_compute):
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        # Mock the DB query for _write_predictions
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        dfs_best = _dfs_best_fixture()
+
+        with patch.object(self.op, "_download_obo"):
+            with patch(
+                "cafaeval.evaluation.cafa_eval",
+                return_value=(MagicMock(), dfs_best),
+            ) as mock_cafa:
+                result = self.op.execute(
+                    session,
+                    {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                    emit=self.emit,
+                )
+
+        assert "evaluation_result_id" in result.result
+        assert "results" in result.result
+        # cafa_eval called 3 times: NK, LK, PK
+        assert mock_cafa.call_count == 3
+        # session.add called for EvaluationResult
+        session.add.assert_called_once()
+        session.flush.assert_called_once()
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_emit_events(self, mock_compute):
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        dfs_best = _dfs_best_fixture()
+
+        with patch.object(self.op, "_download_obo"):
+            with patch(
+                "cafaeval.evaluation.cafa_eval",
+                return_value=(MagicMock(), dfs_best),
+            ):
+                self.op.execute(
+                    session,
+                    {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                    emit=self.emit,
+                )
+
+        # Verify key emit events were fired
+        emit_events = [c[0][0] for c in self.emit.call_args_list]
+        assert "run_cafa_evaluation.start" in emit_events
+        assert "run_cafa_evaluation.computing_delta" in emit_events
+        assert "run_cafa_evaluation.delta_done" in emit_events
+        assert "run_cafa_evaluation.downloading_obo" in emit_events
+        assert "run_cafa_evaluation.writing_predictions" in emit_events
+        assert "run_cafa_evaluation.done" in emit_events
+        # 3 evaluating events (NK, LK, PK)
+        assert emit_events.count("run_cafa_evaluation.evaluating") == 3
+        assert emit_events.count("run_cafa_evaluation.setting_done") == 3
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_cafa_eval_failure_catches_exception(self, mock_compute):
+        """When cafa_eval raises for one setting, it should log warning and continue."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        with patch.object(self.op, "_download_obo"):
+            with patch(
+                "cafaeval.evaluation.cafa_eval",
+                side_effect=RuntimeError("cafa_eval exploded"),
+            ):
+                result = self.op.execute(
+                    session,
+                    {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                    emit=self.emit,
+                )
+
+        # All three settings should be empty dicts (all failed)
+        results = result.result["results"]
+        assert results["NK"] == {}
+        assert results["LK"] == {}
+        assert results["PK"] == {}
+
+        # Emit should have 3 setting_failed events
+        emit_events = [c[0][0] for c in self.emit.call_args_list]
+        assert emit_events.count("run_cafa_evaluation.setting_failed") == 3
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_ia_missing_warning(self, mock_compute):
+        """When no IA file and no ia_url, a warning should be emitted."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot(ia_url=None)  # no ia_url
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        with patch.object(self.op, "_download_obo"):
+            with patch(
+                "cafaeval.evaluation.cafa_eval",
+                return_value=(MagicMock(), _dfs_best_fixture()),
+            ):
+                self.op.execute(
+                    session,
+                    {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                    emit=self.emit,
+                )
+
+        emit_events = [c[0][0] for c in self.emit.call_args_list]
+        assert "run_cafa_evaluation.ia_missing" in emit_events
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_ia_url_download(self, mock_compute):
+        """When snapshot has ia_url, _download_tsv should be called."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot(ia_url="https://example.com/ia.tsv")
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        with patch.object(self.op, "_download_obo"), \
+             patch.object(self.op, "_download_tsv") as mock_dl_tsv, \
+             patch(
+                 "cafaeval.evaluation.cafa_eval",
+                 return_value=(MagicMock(), _dfs_best_fixture()),
+             ):
+            self.op.execute(
+                session,
+                {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                emit=self.emit,
+            )
+
+        mock_dl_tsv.assert_called_once()
+        assert mock_dl_tsv.call_args[0][0] == "https://example.com/ia.tsv"
+
+        emit_events = [c[0][0] for c in self.emit.call_args_list]
+        assert "run_cafa_evaluation.downloading_ia" in emit_events
+        assert "run_cafa_evaluation.ia_resolved" in emit_events
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_explicit_ia_file_takes_precedence(self, mock_compute):
+        """Explicit ia_file in payload overrides snapshot ia_url."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot(ia_url="https://example.com/ia.tsv")
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        with patch.object(self.op, "_download_obo"), \
+             patch.object(self.op, "_download_tsv") as mock_dl_tsv, \
+             patch(
+                 "cafaeval.evaluation.cafa_eval",
+                 return_value=(MagicMock(), _dfs_best_fixture()),
+             ):
+            self.op.execute(
+                session,
+                {
+                    "evaluation_set_id": EVAL_SET_ID,
+                    "prediction_set_id": PRED_SET_ID,
+                    "ia_file": "/custom/ia.tsv",
+                },
+                emit=self.emit,
+            )
+
+        # _download_tsv should NOT be called because ia_file overrides ia_url
+        mock_dl_tsv.assert_not_called()
+
+        emit_events = [c[0][0] for c in self.emit.call_args_list]
+        assert "run_cafa_evaluation.ia_resolved" in emit_events
+        assert "run_cafa_evaluation.downloading_ia" not in emit_events
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_session_commit_before_cafa_eval(self, mock_compute):
+        """Session should be committed before cafa_eval to release DB connection."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        call_order = []
+        session.commit.side_effect = lambda: call_order.append("commit")
+
+        with patch.object(self.op, "_download_obo"):
+            with patch(
+                "cafaeval.evaluation.cafa_eval",
+                side_effect=lambda *a, **kw: (call_order.append("cafa_eval"), (MagicMock(), _dfs_best_fixture()))[-1],
+            ):
+                self.op.execute(
+                    session,
+                    {"evaluation_set_id": EVAL_SET_ID, "prediction_set_id": PRED_SET_ID},
+                    emit=self.emit,
+                )
+
+        assert call_order[0] == "commit"
+        assert "cafa_eval" in call_order
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_artifacts_dir(self, mock_compute):
+        """When artifacts_dir is set, artifact directory should be created."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with patch.object(self.op, "_download_obo"):
+                with patch(
+                    "cafaeval.evaluation.cafa_eval",
+                    return_value=(None, _dfs_best_fixture()),
+                ):
+                    result = self.op.execute(
+                        session,
+                        {
+                            "evaluation_set_id": EVAL_SET_ID,
+                            "prediction_set_id": PRED_SET_ID,
+                            "artifacts_dir": tmpdir,
+                        },
+                        emit=self.emit,
+                    )
+
+            result_id = result.result["evaluation_result_id"]
+            assert os.path.isdir(os.path.join(tmpdir, result_id))
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_artifacts_dir_with_write_results(self, mock_compute):
+        """When artifacts_dir is set and df is not None, write_results is called."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        df_mock = MagicMock()  # non-None df triggers write_results
+        dfs_best = _dfs_best_fixture()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            with patch.object(self.op, "_download_obo"), \
+                 patch(
+                     "cafaeval.evaluation.cafa_eval",
+                     return_value=(df_mock, dfs_best),
+                 ), \
+                 patch(
+                     "cafaeval.evaluation.write_results"
+                 ) as mock_write:
+                result = self.op.execute(
+                    session,
+                    {
+                        "evaluation_set_id": EVAL_SET_ID,
+                        "prediction_set_id": PRED_SET_ID,
+                        "artifacts_dir": tmpdir,
+                    },
+                    emit=self.emit,
+                )
+
+            # write_results called 3 times (NK, LK, PK)
+            assert mock_write.call_count == 3
+            result_id = result.result["evaluation_result_id"]
+            # Check setting subdirectories were created
+            for setting in ("NK", "LK", "PK"):
+                setting_dir = os.path.join(tmpdir, result_id, setting)
+                assert os.path.isdir(setting_dir)
+
+    @patch("protea.core.operations.run_cafa_evaluation.compute_evaluation_data")
+    def test_scoring_config_snapshot(self, mock_compute):
+        """When scoring_config_id is provided and found, it snapshots the config."""
+        mock_compute.return_value = _make_eval_data()
+
+        session = MagicMock()
+        eval_set = _make_eval_set()
+        pred_set = _make_pred_set()
+        ann_old = _make_ann_old()
+        snapshot = _make_snapshot()
+        scoring_cfg = MagicMock()
+        scoring_cfg.formula = "linear"
+        scoring_cfg.weights = {"embedding_similarity": 1.0}
+        session.get.side_effect = [eval_set, pred_set, ann_old, snapshot, scoring_cfg]
+
+        query = MagicMock()
+        session.query.return_value = query
+        query.join.return_value = query
+        query.filter.return_value = query
+        query.order_by.return_value = query
+        query.yield_per.return_value = []
+
+        with patch.object(self.op, "_download_obo"), \
+             patch(
+                 "cafaeval.evaluation.cafa_eval",
+                 return_value=(MagicMock(), _dfs_best_fixture()),
+             ), \
+             patch(
+                 "protea.core.operations.run_cafa_evaluation.ScoringConfig"
+             ) as mock_sc_cls:
+            mock_sc_cls.return_value = MagicMock()
+            result = self.op.execute(
+                session,
+                {
+                    "evaluation_set_id": EVAL_SET_ID,
+                    "prediction_set_id": PRED_SET_ID,
+                    "scoring_config_id": SCORING_CONFIG_ID,
+                },
+                emit=self.emit,
+            )
+
+        # ScoringConfig constructor was called for snapshotting
+        mock_sc_cls.assert_called_once_with(
+            formula="linear",
+            weights={"embedding_similarity": 1.0},
+        )
+        assert "evaluation_result_id" in result.result
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+
+class TestConstants:
+    def test_ns_labels_mapping(self):
+        assert _NS_LABELS["biological_process"] == "BPO"
+        assert _NS_LABELS["molecular_function"] == "MFO"
+        assert _NS_LABELS["cellular_component"] == "CCO"
+
+    def test_ns_short_set(self):
+        assert _NS_SHORT == {"BPO", "MFO", "CCO"}
diff --git a/tests/test_scoring.py b/tests/test_scoring.py
index 347afeb..dfd114b 100644
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@@ -1,17 +1,16 @@
 """Tests for protea.core.scoring and related evidence weight resolution."""
-import pytest
 from unittest.mock import MagicMock
 
+import pytest
+
 from protea.core.scoring import compute_score, evidence_weight, score_predictions
 from protea.infrastructure.orm.models.embedding.scoring_config import (
     DEFAULT_EVIDENCE_WEIGHT_FALLBACK,
-    DEFAULT_EVIDENCE_WEIGHTS,
     FORMULA_EVIDENCE_WEIGHTED,
     FORMULA_LINEAR,
     ScoringConfig,
 )
 
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
diff --git a/tests/test_scoring_router.py b/tests/test_scoring_router.py
index eb037e6..121013a 100644
--- a/tests/test_scoring_router.py
+++ b/tests/test_scoring_router.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 from contextlib import contextmanager
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from unittest.mock import MagicMock, patch
 from uuid import uuid4
 
@@ -11,12 +11,14 @@
 from fastapi.testclient import TestClient
 
 from protea.api.routers.scoring import router
+from protea.infrastructure.orm.models.annotation.evaluation_set import EvaluationSet
+from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+from protea.infrastructure.orm.models.embedding.reranker_model import RerankerModel
 from protea.infrastructure.orm.models.embedding.scoring_config import (
     FORMULA_LINEAR,
     ScoringConfig,
 )
 
-
 # ---------------------------------------------------------------------------
 # Fixtures
 # ---------------------------------------------------------------------------
@@ -29,7 +31,7 @@ def _make_config(name="test", formula=FORMULA_LINEAR, weights=None, ev_weights=N
     cfg.weights = weights or {"embedding_similarity": 1.0}
     cfg.evidence_weights = ev_weights
     cfg.description = None
-    cfg.created_at = datetime(2026, 1, 1, tzinfo=timezone.utc)
+    cfg.created_at = datetime(2026, 1, 1, tzinfo=UTC)
     return cfg
 
 
@@ -226,8 +228,6 @@ def test_prediction_set_not_found(self, client, session):
         assert resp.status_code == 404
 
     def test_scoring_config_not_found(self, client, session):
-        from unittest.mock import call
-        from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
         # First get (PredictionSet) found, second (ScoringConfig) not found
         session.get.side_effect = [MagicMock(), None]
         resp = client.get(
@@ -236,6 +236,155 @@ def test_scoring_config_not_found(self, client, session):
         )
         assert resp.status_code == 404
 
+    @patch("protea.api.routers.scoring.compute_score", return_value=0.85)
+    def test_streams_tsv_with_data(self, mock_score, session):
+        """Full streaming path: header + data rows."""
+        set_id = uuid4()
+        config_id = uuid4()
+        cfg = _make_config("stream", formula="linear")
+        cfg.id = config_id
+        pred_set = MagicMock()
+
+        pred = MagicMock()
+        pred.protein_accession = "P12345"
+        pred.distance = 0.1
+        pred.ref_protein_accession = "Q99999"
+        pred.evidence_code = "IDA"
+        pred.qualifier = "enables"
+        pred.identity_nw = 0.9
+        pred.identity_sw = 0.8
+        pred.taxonomic_distance = 2
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+            from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+            if model is PredictionSet:
+                return pred_set
+            if model is ScoringConfig:
+                return cfg
+            return None
+
+        session.get.side_effect = get_side
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.yield_per.return_value = [(pred, "GO:0003674")]
+
+        app = FastAPI()
+        factory = MagicMock()
+        app.state.session_factory = factory
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(
+                    f"/scoring/prediction-sets/{set_id}/score.tsv"
+                    f"?scoring_config_id={config_id}"
+                )
+        assert resp.status_code == 200
+        assert "text/tab-separated-values" in resp.headers["content-type"]
+        lines = resp.text.strip().split("\n")
+        assert len(lines) == 2
+        assert lines[0].startswith("protein_accession")
+        assert "P12345" in lines[1]
+        assert "GO:0003674" in lines[1]
+
+    @patch("protea.api.routers.scoring.compute_score", return_value=0.3)
+    def test_min_score_filters_rows(self, mock_score, session):
+        """Rows below min_score are excluded from the stream."""
+        set_id = uuid4()
+        config_id = uuid4()
+        cfg = _make_config("filter")
+        cfg.id = config_id
+
+        pred = MagicMock()
+        pred.protein_accession = "P00001"
+        pred.distance = 0.5
+        pred.ref_protein_accession = None
+        pred.evidence_code = "IEA"
+        pred.qualifier = None
+        pred.identity_nw = None
+        pred.identity_sw = None
+        pred.taxonomic_distance = None
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+            from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+            if model is PredictionSet:
+                return MagicMock()
+            if model is ScoringConfig:
+                return cfg
+            return None
+
+        session.get.side_effect = get_side
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.yield_per.return_value = [(pred, "GO:0005575")]
+
+        app = FastAPI()
+        factory = MagicMock()
+        app.state.session_factory = factory
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(
+                    f"/scoring/prediction-sets/{set_id}/score.tsv"
+                    f"?scoring_config_id={config_id}&min_score=0.5"
+                )
+        assert resp.status_code == 200
+        lines = resp.text.strip().split("\n")
+        # Only header — score 0.3 < min_score 0.5
+        assert len(lines) == 1
+        assert lines[0].startswith("protein_accession")
+
+    @patch("protea.api.routers.scoring.compute_score", return_value=0.9)
+    def test_accession_filter(self, mock_score, session):
+        """Accession query parameter is forwarded to the DB query."""
+        set_id = uuid4()
+        config_id = uuid4()
+        cfg = _make_config("acc-filter")
+        cfg.id = config_id
+
+        pred = MagicMock()
+        pred.protein_accession = "P99999"
+        pred.distance = 0.05
+        pred.ref_protein_accession = "Q11111"
+        pred.evidence_code = "EXP"
+        pred.qualifier = "enables"
+        pred.identity_nw = 0.95
+        pred.identity_sw = 0.92
+        pred.taxonomic_distance = 0
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+            from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+            if model is PredictionSet:
+                return MagicMock()
+            if model is ScoringConfig:
+                return cfg
+            return None
+
+        session.get.side_effect = get_side
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.filter.return_value = q_mock
+        q_mock.yield_per.return_value = [(pred, "GO:0008150")]
+
+        app = FastAPI()
+        factory = MagicMock()
+        app.state.session_factory = factory
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(
+                    f"/scoring/prediction-sets/{set_id}/score.tsv"
+                    f"?scoring_config_id={config_id}&accession=P99999"
+                )
+        assert resp.status_code == 200
+        lines = resp.text.strip().split("\n")
+        assert len(lines) == 2
+        assert "P99999" in lines[1]
+
 
 # ---------------------------------------------------------------------------
 # GET /prediction-sets/{set_id}/metrics — 404 preflight checks
@@ -260,3 +409,718 @@ def test_scoring_config_not_found(self, client, session):
         session.get.side_effect = [MagicMock(), None]
         resp = client.get(self._url())
         assert resp.status_code == 404
+
+    def test_invalid_category_returns_422(self, client, session):
+        resp = client.get(
+            f"/scoring/prediction-sets/{uuid4()}/metrics"
+            f"?scoring_config_id={uuid4()}"
+            f"&old_annotation_set_id={uuid4()}"
+            f"&new_annotation_set_id={uuid4()}"
+            f"&ontology_snapshot_id={uuid4()}"
+            f"&category=invalid"
+        )
+        assert resp.status_code == 422
+
+    @patch("protea.api.routers.scoring.compute_cafa_metrics")
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    @patch("protea.api.routers.scoring.compute_score", return_value=0.9)
+    def test_returns_metrics_with_curve(self, mock_score, mock_eval, mock_metrics, client, session):
+        set_id = uuid4()
+        config_id = uuid4()
+        cfg = _make_config("metrics-cfg")
+        cfg.id = config_id
+        pred_set = MagicMock()
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+            from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+            if model is PredictionSet:
+                return pred_set
+            if model is ScoringConfig:
+                return cfg
+            return None
+
+        session.get.side_effect = get_side
+        mock_eval.return_value = MagicMock()
+
+        pred = MagicMock()
+        pred.protein_accession = "P12345"
+        pred.distance = 0.1
+        pred.identity_nw = 0.9
+        pred.identity_sw = 0.8
+        pred.evidence_code = "IDA"
+        pred.taxonomic_distance = 2
+
+        session.query.return_value.join.return_value.filter.return_value.all.return_value = [
+            (pred, "GO:0003674"),
+        ]
+
+        point = MagicMock()
+        point.threshold = 0.5
+        point.precision = 0.9
+        point.recall = 0.8
+        point.f1 = 0.85
+        metrics_result = MagicMock()
+        metrics_result.summary.return_value = {"fmax": 0.85, "auc_pr": 0.78}
+        metrics_result.curve = [point]
+        mock_metrics.return_value = metrics_result
+
+        resp = client.get(
+            f"/scoring/prediction-sets/{set_id}/metrics"
+            f"?scoring_config_id={config_id}"
+            f"&old_annotation_set_id={uuid4()}"
+            f"&new_annotation_set_id={uuid4()}"
+            f"&ontology_snapshot_id={uuid4()}"
+            f"&category=nk"
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["prediction_set_id"] == str(set_id)
+        assert data["scoring_config_id"] == str(config_id)
+        assert data["scoring_config_name"] == "metrics-cfg"
+        assert "fmax" in data
+        assert "curve" in data
+        assert len(data["curve"]) == 1
+        assert data["curve"][0]["threshold"] == 0.5
+
+    @patch("protea.api.routers.scoring.compute_cafa_metrics")
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    @patch("protea.api.routers.scoring.compute_score", return_value=0.5)
+    def test_lk_category(self, mock_score, mock_eval, mock_metrics, client, session):
+        set_id = uuid4()
+        config_id = uuid4()
+        cfg = _make_config("lk-cfg")
+        cfg.id = config_id
+
+        def get_side(model, id_):
+            from protea.infrastructure.orm.models.embedding.prediction_set import PredictionSet
+            from protea.infrastructure.orm.models.embedding.scoring_config import ScoringConfig
+            if model is PredictionSet:
+                return MagicMock()
+            if model is ScoringConfig:
+                return cfg
+            return None
+
+        session.get.side_effect = get_side
+        mock_eval.return_value = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value.all.return_value = []
+
+        metrics_result = MagicMock()
+        metrics_result.summary.return_value = {"fmax": 0.0, "auc_pr": 0.0}
+        metrics_result.curve = []
+        mock_metrics.return_value = metrics_result
+
+        resp = client.get(
+            f"/scoring/prediction-sets/{set_id}/metrics"
+            f"?scoring_config_id={config_id}"
+            f"&old_annotation_set_id={uuid4()}"
+            f"&new_annotation_set_id={uuid4()}"
+            f"&ontology_snapshot_id={uuid4()}"
+            f"&category=lk"
+        )
+        assert resp.status_code == 200
+        mock_metrics.assert_called_once()
+        call_kwargs = mock_metrics.call_args
+        assert call_kwargs[1]["category"] == "lk" or call_kwargs[0][2] == "lk" if len(call_kwargs[0]) > 2 else call_kwargs[1].get("category") == "lk"
+
+
+# ---------------------------------------------------------------------------
+# GET /prediction-sets/{set_id}/training-data.tsv
+# ---------------------------------------------------------------------------
+
+
+def _make_eval_set():
+    es = MagicMock(spec=EvaluationSet)
+    es.id = uuid4()
+    es.old_annotation_set_id = uuid4()
+    es.new_annotation_set_id = uuid4()
+    return es
+
+
+def _make_pred_set():
+    ps = MagicMock(spec=PredictionSet)
+    ps.id = uuid4()
+    ps.ontology_snapshot_id = uuid4()
+    return ps
+
+
+def _make_go_prediction(**kwargs):
+    pred = MagicMock()
+    pred.protein_accession = kwargs.get("protein_accession", "P12345")
+    pred.distance = kwargs.get("distance", 0.1)
+    pred.ref_protein_accession = kwargs.get("ref_protein_accession", "Q99999")
+    pred.qualifier = kwargs.get("qualifier", "enables")
+    pred.evidence_code = kwargs.get("evidence_code", "IDA")
+    pred.identity_nw = kwargs.get("identity_nw", 0.9)
+    pred.similarity_nw = kwargs.get("similarity_nw", 0.85)
+    pred.alignment_score_nw = kwargs.get("alignment_score_nw", 450.0)
+    pred.gaps_pct_nw = kwargs.get("gaps_pct_nw", 0.02)
+    pred.alignment_length_nw = kwargs.get("alignment_length_nw", 300.0)
+    pred.identity_sw = kwargs.get("identity_sw", 0.92)
+    pred.similarity_sw = kwargs.get("similarity_sw", 0.88)
+    pred.alignment_score_sw = kwargs.get("alignment_score_sw", 420.0)
+    pred.gaps_pct_sw = kwargs.get("gaps_pct_sw", 0.01)
+    pred.alignment_length_sw = kwargs.get("alignment_length_sw", 280.0)
+    pred.length_query = kwargs.get("length_query", 350)
+    pred.length_ref = kwargs.get("length_ref", 340)
+    pred.query_taxonomy_id = kwargs.get("query_taxonomy_id", 9606)
+    pred.ref_taxonomy_id = kwargs.get("ref_taxonomy_id", 10090)
+    pred.taxonomic_lca = kwargs.get("taxonomic_lca", 314146)
+    pred.taxonomic_distance = kwargs.get("taxonomic_distance", 4)
+    pred.taxonomic_common_ancestors = kwargs.get("taxonomic_common_ancestors", 20)
+    pred.taxonomic_relation = kwargs.get("taxonomic_relation", "sibling")
+    pred.vote_count = kwargs.get("vote_count", 3)
+    pred.k_position = kwargs.get("k_position", 1)
+    pred.go_term_frequency = kwargs.get("go_term_frequency", 15)
+    pred.ref_annotation_density = kwargs.get("ref_annotation_density", 8)
+    pred.neighbor_distance_std = kwargs.get("neighbor_distance_std", 0.05)
+    return pred
+
+
+class TestTrainingDataEndpoint:
+    def _url(self, set_id, eval_set_id, category="nk"):
+        return (
+            f"/scoring/prediction-sets/{set_id}/training-data.tsv"
+            f"?evaluation_set_id={eval_set_id}&category={category}"
+        )
+
+    def test_prediction_set_not_found(self, client, session):
+        session.get.return_value = None
+        resp = client.get(self._url(uuid4(), uuid4()))
+        assert resp.status_code == 404
+        assert "PredictionSet" in resp.json()["detail"]
+
+    def test_evaluation_set_not_found(self, client, session):
+        ps = _make_pred_set()
+        session.get.side_effect = lambda model, id_: ps if model is PredictionSet else None
+        resp = client.get(self._url(ps.id, uuid4()))
+        assert resp.status_code == 404
+        assert "EvaluationSet" in resp.json()["detail"]
+
+    def test_invalid_category_returns_422(self, client, session):
+        resp = client.get(
+            f"/scoring/prediction-sets/{uuid4()}/training-data.tsv"
+            f"?evaluation_set_id={uuid4()}&category=invalid"
+        )
+        assert resp.status_code == 422
+
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    def test_streams_labeled_data_positive(self, mock_eval, session):
+        """Prediction matching ground truth gets label=1."""
+        ps = _make_pred_set()
+        es = _make_eval_set()
+        pred = _make_go_prediction(protein_accession="P12345")
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+
+        eval_data = MagicMock()
+        eval_data.nk = {"P12345": {"GO:0003674"}}
+        mock_eval.return_value = eval_data
+
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.yield_per.return_value = [(pred, "GO:0003674", "F")]
+
+        app = FastAPI()
+        app.state.session_factory = MagicMock()
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(self._url(ps.id, es.id, "nk"))
+
+        assert resp.status_code == 200
+        assert "text/tab-separated-values" in resp.headers["content-type"]
+        lines = resp.text.strip().split("\n")
+        assert len(lines) == 2
+        header = lines[0].split("\t")
+        assert "label" in header
+        assert "vote_count" in header
+        row = lines[1].split("\t")
+        label_idx = header.index("label")
+        assert row[label_idx] == "1"
+
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    def test_streams_labeled_data_negative(self, mock_eval, session):
+        """Prediction NOT in ground truth gets label=0."""
+        ps = _make_pred_set()
+        es = _make_eval_set()
+        pred = _make_go_prediction(protein_accession="P12345")
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+
+        eval_data = MagicMock()
+        eval_data.nk = {"P99999": {"GO:0005575"}}  # different protein
+        mock_eval.return_value = eval_data
+
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.yield_per.return_value = [(pred, "GO:0003674", "F")]
+
+        app = FastAPI()
+        app.state.session_factory = MagicMock()
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(self._url(ps.id, es.id, "nk"))
+
+        lines = resp.text.strip().split("\n")
+        header = lines[0].split("\t")
+        row = lines[1].split("\t")
+        label_idx = header.index("label")
+        assert row[label_idx] == "0"
+
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    def test_all_columns_present(self, mock_eval, session):
+        """Verify all 32 columns are in the TSV header."""
+        ps = _make_pred_set()
+        es = _make_eval_set()
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+
+        eval_data = MagicMock()
+        eval_data.nk = {}
+        mock_eval.return_value = eval_data
+
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.yield_per.return_value = []
+
+        app = FastAPI()
+        app.state.session_factory = MagicMock()
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(self._url(ps.id, es.id))
+
+        header = resp.text.strip().split("\n")[0].split("\t")
+        assert len(header) == 31
+        assert header[0] == "protein_accession"
+        assert header[3] == "label"
+        assert header[-1] == "neighbor_distance_std"
+
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    def test_pk_category(self, mock_eval, session):
+        """PK category uses eval_data.pk for ground truth."""
+        ps = _make_pred_set()
+        es = _make_eval_set()
+        pred = _make_go_prediction(protein_accession="P12345")
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+
+        eval_data = MagicMock()
+        eval_data.nk = {}
+        eval_data.lk = {}
+        eval_data.pk = {"P12345": {"GO:0003674"}}
+        mock_eval.return_value = eval_data
+
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.yield_per.return_value = [(pred, "GO:0003674", "F")]
+
+        app = FastAPI()
+        app.state.session_factory = MagicMock()
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(self._url(ps.id, es.id, "pk"))
+
+        lines = resp.text.strip().split("\n")
+        header = lines[0].split("\t")
+        row = lines[1].split("\t")
+        label_idx = header.index("label")
+        assert row[label_idx] == "1"
+
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    def test_none_features_render_as_empty(self, mock_eval, session):
+        """None values are rendered as empty strings in the TSV."""
+        ps = _make_pred_set()
+        es = _make_eval_set()
+        pred = _make_go_prediction(
+            identity_nw=None,
+            similarity_nw=None,
+            vote_count=None,
+            neighbor_distance_std=None,
+        )
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+
+        eval_data = MagicMock()
+        eval_data.nk = {}
+        mock_eval.return_value = eval_data
+
+        q_mock = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value = q_mock
+        q_mock.yield_per.return_value = [(pred, "GO:0003674", "F")]
+
+        app = FastAPI()
+        app.state.session_factory = MagicMock()
+        app.include_router(router)
+        with patch("protea.api.routers.scoring.session_scope", side_effect=lambda _: _mock_scope(session)):
+            with TestClient(app) as c:
+                resp = c.get(self._url(ps.id, es.id))
+
+        lines = resp.text.strip().split("\n")
+        header = lines[0].split("\t")
+        row = lines[1].split("\t")
+        # identity_nw should be empty
+        nw_idx = header.index("identity_nw")
+        assert row[nw_idx] == ""
+        # vote_count should be empty
+        vc_idx = header.index("vote_count")
+        assert row[vc_idx] == ""
+
+
+# ---------------------------------------------------------------------------
+# Reranker CRUD endpoints
+# ---------------------------------------------------------------------------
+
+
+def _make_reranker_model(**kwargs):
+    m = MagicMock(spec=RerankerModel)
+    m.id = kwargs.get("id", uuid4())
+    m.name = kwargs.get("name", "test-reranker")
+    m.prediction_set_id = kwargs.get("prediction_set_id", uuid4())
+    m.evaluation_set_id = kwargs.get("evaluation_set_id", uuid4())
+    m.category = kwargs.get("category", "nk")
+    m.aspect = kwargs.get("aspect", None)
+    m.model_data = kwargs.get("model_data", "lgb_model_string")
+    m.metrics = kwargs.get("metrics", {"val_auc": 0.85})
+    m.feature_importance = kwargs.get("feature_importance", {"distance": 100})
+    m.created_at = datetime(2026, 3, 18, tzinfo=UTC)
+    return m
+
+
+class TestListRerankers:
+    def test_empty_list(self, client, session):
+        session.query.return_value.order_by.return_value.all.return_value = []
+        resp = client.get("/scoring/rerankers")
+        assert resp.status_code == 200
+        assert resp.json() == []
+
+    def test_returns_rerankers(self, client, session):
+        m = _make_reranker_model(name="my-model")
+        session.query.return_value.order_by.return_value.all.return_value = [m]
+        resp = client.get("/scoring/rerankers")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data) == 1
+        assert data[0]["name"] == "my-model"
+        assert "metrics" in data[0]
+
+
+class TestGetReranker:
+    def test_found(self, client, session):
+        m = _make_reranker_model(name="found")
+        session.get.return_value = m
+        resp = client.get(f"/scoring/rerankers/{m.id}")
+        assert resp.status_code == 200
+        assert resp.json()["name"] == "found"
+
+    def test_not_found(self, client, session):
+        session.get.return_value = None
+        resp = client.get(f"/scoring/rerankers/{uuid4()}")
+        assert resp.status_code == 404
+
+
+class TestDeleteReranker:
+    def test_delete_existing(self, client, session):
+        m = _make_reranker_model()
+        session.get.return_value = m
+        resp = client.delete(f"/scoring/rerankers/{m.id}")
+        assert resp.status_code == 204
+        session.delete.assert_called_once_with(m)
+
+    def test_delete_not_found(self, client, session):
+        session.get.return_value = None
+        resp = client.delete(f"/scoring/rerankers/{uuid4()}")
+        assert resp.status_code == 404
+
+
+class TestTrainReranker:
+    def test_prediction_set_not_found(self, client, session):
+        session.get.return_value = None
+        session.query.return_value.filter.return_value.first.return_value = None
+        resp = client.post("/scoring/rerankers/train", json={
+            "name": "test",
+            "prediction_set_id": str(uuid4()),
+            "evaluation_set_id": str(uuid4()),
+        })
+        assert resp.status_code == 404
+        assert "PredictionSet" in resp.json()["detail"]
+
+    def test_evaluation_set_not_found(self, client, session):
+        ps = _make_pred_set()
+        session.query.return_value.filter.return_value.first.return_value = None
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            return None
+
+        session.get.side_effect = get_side
+        resp = client.post("/scoring/rerankers/train", json={
+            "name": "test",
+            "prediction_set_id": str(ps.id),
+            "evaluation_set_id": str(uuid4()),
+        })
+        assert resp.status_code == 404
+        assert "EvaluationSet" in resp.json()["detail"]
+
+    def test_duplicate_name_returns_409(self, client, session):
+        ps = _make_pred_set()
+        es = _make_eval_set()
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+        session.query.return_value.filter.return_value.first.return_value = _make_reranker_model()
+
+        resp = client.post("/scoring/rerankers/train", json={
+            "name": "existing-name",
+            "prediction_set_id": str(ps.id),
+            "evaluation_set_id": str(es.id),
+        })
+        assert resp.status_code == 409
+
+    def test_empty_predictions_returns_422(self, client, session):
+        ps = _make_pred_set()
+        es = _make_eval_set()
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+        session.query.return_value.filter.return_value.first.return_value = None
+
+        eval_data = MagicMock()
+        eval_data.nk = {}
+
+        with patch("protea.api.routers.scoring.compute_evaluation_data", return_value=eval_data):
+            # Empty result set
+            session.query.return_value.join.return_value.filter.return_value.all.return_value = []
+            resp = client.post("/scoring/rerankers/train", json={
+                "name": "empty-test",
+                "prediction_set_id": str(ps.id),
+                "evaluation_set_id": str(es.id),
+            })
+        assert resp.status_code == 422
+
+    def test_invalid_category_returns_422(self, client, session):
+        resp = client.post("/scoring/rerankers/train", json={
+            "name": "test",
+            "prediction_set_id": str(uuid4()),
+            "evaluation_set_id": str(uuid4()),
+            "category": "invalid",
+        })
+        assert resp.status_code == 422
+
+
+class TestRerankedTSV:
+    def test_prediction_set_not_found(self, client, session):
+        session.get.return_value = None
+        resp = client.get(
+            f"/scoring/prediction-sets/{uuid4()}/rerank.tsv"
+            f"?reranker_id={uuid4()}"
+        )
+        assert resp.status_code == 404
+        assert "PredictionSet" in resp.json()["detail"]
+
+    def test_reranker_not_found(self, client, session):
+        ps = _make_pred_set()
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            return None
+
+        session.get.side_effect = get_side
+        resp = client.get(
+            f"/scoring/prediction-sets/{ps.id}/rerank.tsv"
+            f"?reranker_id={uuid4()}"
+        )
+        assert resp.status_code == 404
+        assert "RerankerModel" in resp.json()["detail"]
+
+
+class TestRerankerMetrics:
+    def _url(self, set_id, reranker_id, eval_set_id, category="nk"):
+        return (
+            f"/scoring/prediction-sets/{set_id}/reranker-metrics"
+            f"?reranker_id={reranker_id}"
+            f"&evaluation_set_id={eval_set_id}"
+            f"&category={category}"
+        )
+
+    def test_prediction_set_not_found(self, client, session):
+        session.get.return_value = None
+        resp = client.get(self._url(uuid4(), uuid4(), uuid4()))
+        assert resp.status_code == 404
+        assert "PredictionSet" in resp.json()["detail"]
+
+    def test_reranker_not_found(self, client, session):
+        ps = _make_pred_set()
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            return None
+
+        session.get.side_effect = get_side
+        resp = client.get(self._url(ps.id, uuid4(), uuid4()))
+        assert resp.status_code == 404
+        assert "RerankerModel" in resp.json()["detail"]
+
+    def test_evaluation_set_not_found(self, client, session):
+        ps = _make_pred_set()
+        rm = _make_reranker_model()
+
+        call_count = 0
+        def get_side(model, id_):
+            nonlocal call_count
+            call_count += 1
+            if model is PredictionSet:
+                return ps
+            if model is RerankerModel:
+                return rm
+            return None
+
+        session.get.side_effect = get_side
+        resp = client.get(self._url(ps.id, rm.id, uuid4()))
+        assert resp.status_code == 404
+        assert "EvaluationSet" in resp.json()["detail"]
+
+    def test_invalid_category_returns_422(self, client, session):
+        resp = client.get(
+            f"/scoring/prediction-sets/{uuid4()}/reranker-metrics"
+            f"?reranker_id={uuid4()}"
+            f"&evaluation_set_id={uuid4()}"
+            f"&category=invalid"
+        )
+        assert resp.status_code == 422
+
+    @patch("protea.api.routers.scoring.compute_cafa_metrics")
+    @patch("protea.api.routers.scoring.reranker_predict")
+    @patch("protea.api.routers.scoring.model_from_string")
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    def test_returns_metrics(self, mock_eval, mock_from_str, mock_predict, mock_metrics, client, session):
+        ps = _make_pred_set()
+        rm = _make_reranker_model(name="test-rr")
+        es = _make_eval_set()
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is RerankerModel:
+                return rm
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+        mock_eval.return_value = MagicMock()
+
+        pred = _make_go_prediction()
+        session.query.return_value.join.return_value.filter.return_value.yield_per.return_value = [
+            (pred, "GO:0003674"),
+        ]
+
+        import numpy as np
+        mock_from_str.return_value = MagicMock()
+        mock_predict.return_value = np.array([0.85])
+
+        point = MagicMock()
+        point.threshold = 0.5
+        point.precision = 0.9
+        point.recall = 0.8
+        point.f1 = 0.85
+        metrics_result = MagicMock()
+        metrics_result.summary.return_value = {
+            "category": "nk",
+            "fmax": 0.85,
+            "threshold_at_fmax": 0.5,
+            "auc_pr": 0.78,
+            "n_ground_truth_proteins": 10,
+            "n_predicted_proteins": 8,
+            "n_predictions": 1,
+        }
+        metrics_result.curve = [point]
+        mock_metrics.return_value = metrics_result
+
+        resp = client.get(self._url(ps.id, rm.id, es.id))
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["prediction_set_id"] == str(ps.id)
+        assert data["reranker_id"] == str(rm.id)
+        assert data["reranker_name"] == "test-rr"
+        assert "fmax" in data
+        assert "curve" in data
+        assert len(data["curve"]) == 1
+
+    @patch("protea.api.routers.scoring.compute_evaluation_data")
+    def test_empty_predictions_returns_zero_metrics(self, mock_eval, client, session):
+        ps = _make_pred_set()
+        rm = _make_reranker_model()
+        es = _make_eval_set()
+
+        def get_side(model, id_):
+            if model is PredictionSet:
+                return ps
+            if model is RerankerModel:
+                return rm
+            if model is EvaluationSet:
+                return es
+            return None
+
+        session.get.side_effect = get_side
+        mock_eval.return_value = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value.yield_per.return_value = []
+
+        resp = client.get(self._url(ps.id, rm.id, es.id))
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["fmax"] == 0.0
+        assert data["n_predictions"] == 0
diff --git a/tests/test_showcase_router.py b/tests/test_showcase_router.py
new file mode 100644
index 0000000..78555c3
--- /dev/null
+++ b/tests/test_showcase_router.py
@@ -0,0 +1,286 @@
+"""Unit tests for the /showcase router.
+
+Database is fully mocked — no real infrastructure required.
+"""
+from __future__ import annotations
+
+from contextlib import contextmanager
+from unittest.mock import MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from protea.api.routers.showcase import _derive_method, router
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_app(session_factory):
+    app = FastAPI()
+    app.state.session_factory = session_factory
+    app.include_router(router)
+    return app
+
+
+@contextmanager
+def _mock_scope(session):
+    yield session
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture()
+def session():
+    return MagicMock()
+
+
+@pytest.fixture()
+def factory(session):
+    return MagicMock()
+
+
+@pytest.fixture()
+def client(session, factory):
+    app = _make_app(factory)
+    with patch(
+        "protea.api.routers.showcase.session_scope",
+        side_effect=lambda _: _mock_scope(session),
+    ):
+        with TestClient(app) as c:
+            yield c, session
+
+
+# ---------------------------------------------------------------------------
+# _derive_method
+# ---------------------------------------------------------------------------
+
+class TestDeriveMethod:
+    def test_baseline(self):
+        assert _derive_method(None, None) == ("knn_baseline", "KNN (embedding distance)")
+
+    def test_scored(self):
+        assert _derive_method(uuid4(), None) == ("knn_scored", "KNN + Scoring")
+
+    def test_reranker(self):
+        assert _derive_method(None, uuid4()) == ("knn_reranker", "KNN + Re-ranker")
+
+    def test_reranker_takes_precedence(self):
+        assert _derive_method(uuid4(), uuid4()) == ("knn_reranker", "KNN + Re-ranker")
+
+
+# ---------------------------------------------------------------------------
+# GET /showcase — empty database
+# ---------------------------------------------------------------------------
+
+class TestShowcaseEmpty:
+    def test_empty_database_returns_zeros(self, client):
+        c, session = client
+
+        # All count queries return 0
+        session.query.return_value.scalar.return_value = 0
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+        session.query.return_value.all.return_value = []
+
+        resp = c.get("/showcase")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        assert data["protein_stats"]["total"] == 0
+        assert data["protein_stats"]["canonical"] == 0
+        assert data["counts"]["proteins"] == 0
+        assert data["counts"]["sequences"] == 0
+        assert data["counts"]["embeddings"] == 0
+        assert data["counts"]["prediction_sets"] == 0
+        assert data["counts"]["predictions"] == 0
+        assert data["counts"]["reranker_models"] == 0
+        assert data["counts"]["evaluations"] == 0
+        assert data["best_fmax"] == {}
+        assert data["method_comparison"] == {}
+        assert len(data["pipeline_stages"]) == 5
+
+    def test_pipeline_stages_structure(self, client):
+        c, session = client
+        session.query.return_value.scalar.return_value = 0
+        session.query.return_value.filter.return_value.scalar.return_value = 0
+        session.query.return_value.all.return_value = []
+
+        resp = c.get("/showcase")
+        data = resp.json()
+        stages = data["pipeline_stages"]
+        expected_names = {"sequences", "embeddings", "predictions", "reranker_models", "evaluations"}
+        assert {s["name"] for s in stages} == expected_names
+        for s in stages:
+            assert "count" in s
+            assert "href" in s
+
+
+# ---------------------------------------------------------------------------
+# GET /showcase — with evaluation data
+# ---------------------------------------------------------------------------
+
+class TestShowcaseWithEvaluations:
+    def _make_eval_result(self, scoring_config_id=None, reranker_model_id=None, results=None):
+        er = MagicMock()
+        er.id = uuid4()
+        er.scoring_config_id = scoring_config_id
+        er.reranker_model_id = reranker_model_id
+        er.results = results or {}
+        return er
+
+    def test_single_baseline_evaluation(self, client):
+        c, session = client
+
+        eval_result = self._make_eval_result(
+            results={
+                "NK": {
+                    "BPO": {"fmax": 0.45},
+                    "MFO": {"fmax": 0.52},
+                    "CCO": {"fmax": 0.60},
+                },
+            },
+        )
+
+        # Mock query chain — we need separate calls for counts vs eval
+        call_count = [0]
+        def query_side_effect(*args):
+            call_count[0] += 1
+            q = MagicMock()
+            q.scalar.return_value = 100
+            q.filter.return_value.scalar.return_value = 50
+            q.all.return_value = [eval_result]
+            return q
+        session.query.side_effect = query_side_effect
+
+        resp = c.get("/showcase")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        assert data["counts"]["evaluations"] == 1
+        if data["best_fmax"]:
+            nk = data["best_fmax"].get("NK", {})
+            if "BPO" in nk:
+                assert nk["BPO"]["fmax"] == 0.45
+                assert nk["BPO"]["method"] == "knn_baseline"
+
+    def test_method_comparison_ordering(self, client):
+        c, session = client
+
+        baseline = self._make_eval_result(
+            results={"NK": {"BPO": {"fmax": 0.40}}},
+        )
+        scored = self._make_eval_result(
+            scoring_config_id=uuid4(),
+            results={"NK": {"BPO": {"fmax": 0.50}}},
+        )
+        reranker = self._make_eval_result(
+            reranker_model_id=uuid4(),
+            results={"NK": {"BPO": {"fmax": 0.60}}},
+        )
+
+        call_count = [0]
+        def query_side_effect(*args):
+            call_count[0] += 1
+            q = MagicMock()
+            q.scalar.return_value = 10
+            q.filter.return_value.scalar.return_value = 5
+            q.all.return_value = [baseline, scored, reranker]
+            return q
+        session.query.side_effect = query_side_effect
+
+        resp = c.get("/showcase")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        if "NK" in data.get("method_comparison", {}):
+            methods = [m["method"] for m in data["method_comparison"]["NK"]]
+            # Should follow _METHOD_ORDER: baseline, scored, reranker
+            assert methods == ["knn_baseline", "knn_scored", "knn_reranker"]
+
+    def test_multiple_categories(self, client):
+        c, session = client
+
+        eval_result = self._make_eval_result(
+            results={
+                "NK": {"BPO": {"fmax": 0.45}},
+                "LK": {"BPO": {"fmax": 0.55}},
+                "PK": {"BPO": {"fmax": 0.65}},
+            },
+        )
+
+        def query_side_effect(*args):
+            q = MagicMock()
+            q.scalar.return_value = 0
+            q.filter.return_value.scalar.return_value = 0
+            q.all.return_value = [eval_result]
+            return q
+        session.query.side_effect = query_side_effect
+
+        resp = c.get("/showcase")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        if data["best_fmax"]:
+            # Should have entries for all three categories
+            for cat in ["NK", "LK", "PK"]:
+                if cat in data["best_fmax"]:
+                    assert "BPO" in data["best_fmax"][cat]
+
+    def test_empty_results_field(self, client):
+        c, session = client
+
+        eval_result = self._make_eval_result(results={})
+
+        def query_side_effect(*args):
+            q = MagicMock()
+            q.scalar.return_value = 0
+            q.filter.return_value.scalar.return_value = 0
+            q.all.return_value = [eval_result]
+            return q
+        session.query.side_effect = query_side_effect
+
+        resp = c.get("/showcase")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["best_fmax"] == {}
+
+    def test_none_results_field(self, client):
+        c, session = client
+
+        eval_result = self._make_eval_result(results=None)
+
+        def query_side_effect(*args):
+            q = MagicMock()
+            q.scalar.return_value = 0
+            q.filter.return_value.scalar.return_value = 0
+            q.all.return_value = [eval_result]
+            return q
+        session.query.side_effect = query_side_effect
+
+        resp = c.get("/showcase")
+        assert resp.status_code == 200
+
+    def test_best_fmax_picks_highest(self, client):
+        c, session = client
+
+        er1 = self._make_eval_result(results={"NK": {"BPO": {"fmax": 0.40}}})
+        er2 = self._make_eval_result(results={"NK": {"BPO": {"fmax": 0.60}}})
+
+        def query_side_effect(*args):
+            q = MagicMock()
+            q.scalar.return_value = 0
+            q.filter.return_value.scalar.return_value = 0
+            q.all.return_value = [er1, er2]
+            return q
+        session.query.side_effect = query_side_effect
+
+        resp = c.get("/showcase")
+        assert resp.status_code == 200
+        data = resp.json()
+
+        if "NK" in data["best_fmax"] and "BPO" in data["best_fmax"]["NK"]:
+            assert data["best_fmax"]["NK"]["BPO"]["fmax"] == 0.60
diff --git a/tests/test_support_maintenance_routers.py b/tests/test_support_maintenance_routers.py
index beb86be..0823262 100644
--- a/tests/test_support_maintenance_routers.py
+++ b/tests/test_support_maintenance_routers.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 from contextlib import contextmanager
-from datetime import datetime, timezone
+from datetime import UTC, datetime
 from unittest.mock import MagicMock, patch
 from uuid import uuid4
 
@@ -10,9 +10,8 @@
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
-from protea.api.routers.support import router as support_router
 from protea.api.routers.maintenance import router as maintenance_router
-
+from protea.api.routers.support import router as support_router
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -68,7 +67,7 @@ def test_comments_serialized(self, support_client, support_session):
         entry = MagicMock()
         entry.id = uuid4()
         entry.comment = "Great tool!"
-        entry.created_at = datetime(2026, 1, 1, tzinfo=timezone.utc)
+        entry.created_at = datetime(2026, 1, 1, tzinfo=UTC)
         support_session.query.return_value.count.return_value = 1
         support_session.query.return_value.filter.return_value.order_by.return_value.limit.return_value.all.return_value = [entry]
         resp = support_client.get("/support")
diff --git a/tests/test_train_reranker.py b/tests/test_train_reranker.py
new file mode 100644
index 0000000..04d59fc
--- /dev/null
+++ b/tests/test_train_reranker.py
@@ -0,0 +1,475 @@
+"""Unit tests for protea.core.operations.train_reranker.
+
+Covers payload validation, the TrainRerankerOperation helper methods,
+and the _compute_comparison_metrics logic.  Heavy DB / model training
+is mocked — no real infrastructure required.
+"""
+from __future__ import annotations
+
+import uuid
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from protea.core.operations.train_reranker import (
+    TrainRerankerOperation,
+    TrainRerankerPayload,
+)
+
+
+def _noop_emit(*a, **kw):
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Payload validation
+# ---------------------------------------------------------------------------
+
+class TestTrainRerankerPayload:
+    def _valid_kwargs(self, **overrides) -> dict[str, Any]:
+        defaults = {
+            "name": "test-model",
+            "old_annotation_set_id": str(uuid.uuid4()),
+            "new_annotation_set_id": str(uuid.uuid4()),
+            "embedding_config_id": str(uuid.uuid4()),
+            "ontology_snapshot_id": str(uuid.uuid4()),
+        }
+        defaults.update(overrides)
+        return defaults
+
+    def test_valid_payload(self):
+        p = TrainRerankerPayload(**self._valid_kwargs())
+        assert p.name == "test-model"
+        assert p.category == "nk"
+        assert p.limit_per_entry == 5
+
+    def test_empty_name_raises(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(name=""))
+
+    def test_whitespace_name_raises(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(name="   "))
+
+    def test_empty_old_annotation_set_id_raises(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(old_annotation_set_id=""))
+
+    def test_empty_new_annotation_set_id_raises(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(new_annotation_set_id=""))
+
+    def test_empty_embedding_config_id_raises(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(embedding_config_id=""))
+
+    def test_empty_ontology_snapshot_id_raises(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(ontology_snapshot_id=""))
+
+    def test_invalid_category_raises(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(category="invalid"))
+
+    def test_valid_categories(self):
+        for cat in ("nk", "lk", "pk"):
+            p = TrainRerankerPayload(**self._valid_kwargs(category=cat))
+            assert p.category == cat
+
+    def test_custom_knn_params(self):
+        p = TrainRerankerPayload(**self._valid_kwargs(
+            limit_per_entry=10,
+            distance_threshold=0.5,
+            search_backend="faiss",
+            metric="euclidean",
+        ))
+        assert p.limit_per_entry == 10
+        assert p.distance_threshold == 0.5
+        assert p.search_backend == "faiss"
+
+    def test_custom_lightgbm_params(self):
+        p = TrainRerankerPayload(**self._valid_kwargs(
+            num_boost_round=500,
+            early_stopping_rounds=25,
+            val_fraction=0.1,
+            neg_pos_ratio=3.0,
+        ))
+        assert p.num_boost_round == 500
+        assert p.early_stopping_rounds == 25
+        assert p.val_fraction == 0.1
+        assert p.neg_pos_ratio == 3.0
+
+    def test_feature_flags_default_false(self):
+        p = TrainRerankerPayload(**self._valid_kwargs())
+        assert p.compute_alignments is False
+        assert p.compute_taxonomy is False
+
+    def test_aspect_filter(self):
+        p = TrainRerankerPayload(**self._valid_kwargs(aspect="bpo"))
+        assert p.aspect == "bpo"
+
+    def test_name_is_stripped(self):
+        p = TrainRerankerPayload(**self._valid_kwargs(name="  my model  "))
+        assert p.name == "my model"
+
+    def test_limit_per_entry_must_be_positive(self):
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(limit_per_entry=0))
+
+        with pytest.raises(ValueError):
+            TrainRerankerPayload(**self._valid_kwargs(limit_per_entry=-1))
+
+
+# ---------------------------------------------------------------------------
+# _validate
+# ---------------------------------------------------------------------------
+
+class TestValidate:
+    def _make_op(self):
+        return TrainRerankerOperation()
+
+    def _make_payload(self, **kw):
+        defaults = {
+            "name": "test",
+            "old_annotation_set_id": str(uuid.uuid4()),
+            "new_annotation_set_id": str(uuid.uuid4()),
+            "embedding_config_id": str(uuid.uuid4()),
+            "ontology_snapshot_id": str(uuid.uuid4()),
+        }
+        defaults.update(kw)
+        return TrainRerankerPayload(**defaults)
+
+    def test_old_annotation_set_not_found(self):
+        op = self._make_op()
+        session = MagicMock()
+        session.get.return_value = None
+        p = self._make_payload()
+
+        with pytest.raises(ValueError, match="AnnotationSet"):
+            op._validate(
+                session, p,
+                uuid.UUID(p.old_annotation_set_id),
+                uuid.UUID(p.new_annotation_set_id),
+                uuid.UUID(p.embedding_config_id),
+                uuid.UUID(p.ontology_snapshot_id),
+            )
+
+    def test_new_annotation_set_not_found(self):
+        op = self._make_op()
+        session = MagicMock()
+        # First call (old) returns something, second (new) returns None
+        session.get.side_effect = [MagicMock(), None]
+        p = self._make_payload()
+
+        with pytest.raises(ValueError, match="AnnotationSet"):
+            op._validate(
+                session, p,
+                uuid.UUID(p.old_annotation_set_id),
+                uuid.UUID(p.new_annotation_set_id),
+                uuid.UUID(p.embedding_config_id),
+                uuid.UUID(p.ontology_snapshot_id),
+            )
+
+    def test_embedding_config_not_found(self):
+        op = self._make_op()
+        session = MagicMock()
+        # old and new found, embedding config not found
+        session.get.side_effect = [MagicMock(), MagicMock(), None]
+        p = self._make_payload()
+
+        with pytest.raises(ValueError, match="EmbeddingConfig"):
+            op._validate(
+                session, p,
+                uuid.UUID(p.old_annotation_set_id),
+                uuid.UUID(p.new_annotation_set_id),
+                uuid.UUID(p.embedding_config_id),
+                uuid.UUID(p.ontology_snapshot_id),
+            )
+
+    def test_duplicate_name_raises(self):
+        op = self._make_op()
+        session = MagicMock()
+        session.get.return_value = MagicMock()  # all lookups succeed
+        session.query.return_value.filter.return_value.first.return_value = MagicMock()  # name exists
+        p = self._make_payload()
+
+        with pytest.raises(ValueError, match="already exists"):
+            op._validate(
+                session, p,
+                uuid.UUID(p.old_annotation_set_id),
+                uuid.UUID(p.new_annotation_set_id),
+                uuid.UUID(p.embedding_config_id),
+                uuid.UUID(p.ontology_snapshot_id),
+            )
+
+    def test_valid_passes(self):
+        op = self._make_op()
+        session = MagicMock()
+        session.get.return_value = MagicMock()  # all lookups succeed
+        session.query.return_value.filter.return_value.first.return_value = None  # no duplicate name
+        p = self._make_payload()
+
+        # Should not raise
+        op._validate(
+            session, p,
+            uuid.UUID(p.old_annotation_set_id),
+            uuid.UUID(p.new_annotation_set_id),
+            uuid.UUID(p.embedding_config_id),
+            uuid.UUID(p.ontology_snapshot_id),
+        )
+
+
+# ---------------------------------------------------------------------------
+# _load_query_embeddings
+# ---------------------------------------------------------------------------
+
+class TestLoadQueryEmbeddings:
+    def test_returns_empty_when_no_matches(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value.all.return_value = []
+
+        emb, valid = op._load_query_embeddings(session, ["P1", "P2"], uuid.uuid4())
+        assert len(valid) == 0
+        assert emb.shape == (0,)
+
+    def test_returns_embeddings_for_found(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value.all.return_value = [
+            ("P1", [0.1, 0.2, 0.3]),
+            ("P2", [0.4, 0.5, 0.6]),
+        ]
+
+        emb, valid = op._load_query_embeddings(session, ["P1", "P2"], uuid.uuid4())
+        assert valid == ["P1", "P2"]
+        assert emb.shape == (2, 3)
+        np.testing.assert_allclose(emb[0], [0.1, 0.2, 0.3], atol=1e-6)
+
+
+# ---------------------------------------------------------------------------
+# _load_sequences
+# ---------------------------------------------------------------------------
+
+class TestLoadSequences:
+    def test_returns_dict(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.query.return_value.join.return_value.filter.return_value.all.return_value = [
+            ("P1", "MKVLWAGS"),
+            ("P2", "ACDEF"),
+        ]
+
+        result = op._load_sequences(session, {"P1", "P2"})
+        assert result == {"P1": "MKVLWAGS", "P2": "ACDEF"}
+
+    def test_empty_accessions(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        result = op._load_sequences(session, set())
+        assert result == {}
+
+
+# ---------------------------------------------------------------------------
+# _load_taxonomy_ids
+# ---------------------------------------------------------------------------
+
+class TestLoadTaxonomyIds:
+    def test_returns_dict(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = [
+            ("P1", 9606),
+            ("P2", 10090),
+        ]
+
+        result = op._load_taxonomy_ids(session, {"P1", "P2"})
+        assert result == {"P1": 9606, "P2": 10090}
+
+    def test_none_taxonomy_id(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.query.return_value.filter.return_value.all.return_value = [
+            ("P1", None),
+        ]
+
+        result = op._load_taxonomy_ids(session, {"P1"})
+        assert result == {"P1": None}
+
+
+# ---------------------------------------------------------------------------
+# _compute_comparison_metrics
+# ---------------------------------------------------------------------------
+
+class TestComputeComparisonMetrics:
+    def test_returns_expected_keys(self):
+        op = TrainRerankerOperation()
+
+        # Create a minimal DataFrame
+        df = pd.DataFrame([
+            {"protein_accession": "P1", "go_id": "GO:0001", "distance": 0.1, "label": 1},
+            {"protein_accession": "P1", "go_id": "GO:0002", "distance": 0.9, "label": 0},
+        ])
+
+        # Mock train result
+        train_result = MagicMock()
+        train_result.model = MagicMock()
+
+        # Mock evaluation data
+        eval_data = MagicMock()
+        eval_data.nk = {"P1": {"GO:0001"}}
+
+        with patch(
+            "protea.core.operations.train_reranker.reranker_predict",
+            return_value=np.array([0.9, 0.1]),
+        ), patch(
+            "protea.core.operations.train_reranker.compute_cafa_metrics",
+        ) as mock_cafa:
+            mock_metrics = MagicMock()
+            mock_metrics.fmax = 0.5
+            mock_metrics.auc_pr = 0.4
+            mock_metrics.threshold_at_fmax = 0.3
+            mock_metrics.n_ground_truth_proteins = 1
+            mock_cafa.return_value = mock_metrics
+
+            result = op._compute_comparison_metrics(df, train_result, eval_data, "nk")
+
+        expected_keys = {
+            "baseline_fmax", "baseline_auc_pr", "baseline_threshold",
+            "reranker_fmax", "reranker_auc_pr", "reranker_threshold",
+            "fmax_improvement", "auc_pr_improvement", "n_ground_truth_proteins",
+        }
+        assert set(result.keys()) == expected_keys
+
+    def test_fmax_improvement_computed(self):
+        op = TrainRerankerOperation()
+        df = pd.DataFrame([
+            {"protein_accession": "P1", "go_id": "GO:0001", "distance": 0.1, "label": 1},
+        ])
+
+        train_result = MagicMock()
+
+        call_count = [0]
+        def fake_cafa(*args, **kwargs):
+            call_count[0] += 1
+            m = MagicMock()
+            if call_count[0] == 1:
+                m.fmax = 0.4  # baseline
+                m.auc_pr = 0.3
+            else:
+                m.fmax = 0.6  # reranker
+                m.auc_pr = 0.5
+            m.threshold_at_fmax = 0.3
+            m.n_ground_truth_proteins = 1
+            return m
+
+        with patch(
+            "protea.core.operations.train_reranker.reranker_predict",
+            return_value=np.array([0.9]),
+        ), patch(
+            "protea.core.operations.train_reranker.compute_cafa_metrics",
+            side_effect=fake_cafa,
+        ):
+            result = op._compute_comparison_metrics(df, train_result, MagicMock(), "nk")
+
+        assert result["baseline_fmax"] == 0.4
+        assert result["reranker_fmax"] == 0.6
+        assert result["fmax_improvement"] == 0.2
+
+
+# ---------------------------------------------------------------------------
+# _load_go_maps
+# ---------------------------------------------------------------------------
+
+class TestLoadGoMaps:
+    def test_returns_id_and_aspect_maps(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.execute.return_value.fetchall.return_value = [
+            (1, "GO:0001", "P"),
+            (2, "GO:0002", "F"),
+            (3, "GO:0003", None),
+        ]
+
+        id_map, aspect_map = op._load_go_maps(session, uuid.uuid4())
+        assert id_map == {1: "GO:0001", 2: "GO:0002", 3: "GO:0003"}
+        assert aspect_map == {1: "P", 2: "F"}
+        assert 3 not in aspect_map  # None aspect excluded
+
+
+# ---------------------------------------------------------------------------
+# Full execute flow (heavily mocked)
+# ---------------------------------------------------------------------------
+
+class TestExecuteFlow:
+    def test_no_ground_truth_raises(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.get.return_value = MagicMock()
+        session.query.return_value.filter.return_value.first.return_value = None
+
+        payload = {
+            "name": "test",
+            "old_annotation_set_id": str(uuid.uuid4()),
+            "new_annotation_set_id": str(uuid.uuid4()),
+            "embedding_config_id": str(uuid.uuid4()),
+            "ontology_snapshot_id": str(uuid.uuid4()),
+        }
+
+        with patch.object(op, "_validate"), \
+             patch(
+                 "protea.core.operations.train_reranker.compute_evaluation_data",
+             ) as mock_eval:
+            eval_data = MagicMock()
+            eval_data.nk = {}  # empty ground truth
+            eval_data.stats.return_value = {}
+            mock_eval.return_value = eval_data
+
+            with pytest.raises(ValueError, match="No ground truth"):
+                op.execute(session, payload, emit=_noop_emit)
+
+    def test_no_embeddings_raises(self):
+        op = TrainRerankerOperation()
+        session = MagicMock()
+        session.get.return_value = MagicMock()
+        session.query.return_value.filter.return_value.first.return_value = None
+
+        payload = {
+            "name": "test",
+            "old_annotation_set_id": str(uuid.uuid4()),
+            "new_annotation_set_id": str(uuid.uuid4()),
+            "embedding_config_id": str(uuid.uuid4()),
+            "ontology_snapshot_id": str(uuid.uuid4()),
+        }
+
+        with patch.object(op, "_validate"), \
+             patch(
+                 "protea.core.operations.train_reranker.compute_evaluation_data",
+             ) as mock_eval, \
+             patch.object(op, "_load_go_maps", return_value=({}, {})), \
+             patch.object(op, "_load_reference_per_aspect", return_value={
+                 "P": {"accessions": [], "embeddings": np.empty((0,)), "go_map": {}},
+                 "F": {"accessions": [], "embeddings": np.empty((0,)), "go_map": {}},
+                 "C": {"accessions": [], "embeddings": np.empty((0,)), "go_map": {}},
+             }), \
+             patch.object(op, "_load_query_embeddings", return_value=(np.empty((0,)), [])):
+
+            eval_data = MagicMock()
+            eval_data.nk = {"P1": {"GO:0001"}}
+            eval_data.stats.return_value = {"nk": 1}
+            mock_eval.return_value = eval_data
+
+            with pytest.raises(ValueError, match="No delta proteins have embeddings"):
+                op.execute(session, payload, emit=_noop_emit)
+
+
+# ---------------------------------------------------------------------------
+# Operation name
+# ---------------------------------------------------------------------------
+
+class TestOperationName:
+    def test_name(self):
+        assert TrainRerankerOperation().name == "train_reranker"