From b20074578419c1102f002a0e1e38eb43e035bf5a Mon Sep 17 00:00:00 2001
From: Frederik Prijck <frederik.prijck@okta.com>
Date: Tue, 16 Jun 2026 15:37:15 +0200
Subject: [PATCH] feat(mcp): add protected HTTP MCP server support

Adds OAuth-protected HTTP MCP server support so evals can target servers
that require an Authorization: Bearer token (e.g. the Auth0 hosted MCP
server, which authenticates with a Management API token).

- mintMcpToken: per-job client-credentials token mint for HTTP MCP servers
- MCPHttpServerConfig.auth (tokenUrl/clientId/clientSecret/audience)
- All four runners forward the minted token: claude-code, copilot, and
  gemini-cli set an Authorization header on the server config; codex writes
  a bearer_token_env_var reference into config.toml and injects the token
  into its process env (an inline bearer_token is rejected by codex)
- A failed token mint skips the server with a warning rather than
  registering it unauthenticated
- sandbox.passthroughEnv: forward named host env vars into the Docker
  sandbox so MCP credentials reach the container
- docs/PROTECTED_MCP.md setup guide
---
 AGENTS.md                                     |   2 +
 apps/auth0-evals/eval.config.js               |  23 ++++
 docs/PROTECTED_MCP.md                         | 126 ++++++++++++++++++
 packages/eval-core/src/config/defaults.ts     |   2 +
 packages/eval-core/src/config/framework.ts    |  29 ++++
 packages/eval-core/src/config/mcp-auth.ts     |  41 ++++++
 packages/eval-core/src/index.ts               |   3 +
 .../eval-core/tests/config/mcp-auth.test.ts   |  56 ++++++++
 packages/eval/src/cli/run.ts                  |   1 +
 .../eval/src/runners/claude-code/agent.ts     |  24 +++-
 packages/eval/src/runners/codex/agent.ts      |  59 +++++++-
 packages/eval/src/runners/copilot/agent.ts    |  35 ++++-
 packages/eval/src/runners/gemini-cli/agent.ts |  34 ++++-
 packages/eval/src/sandbox/docker.ts           |  16 ++-
 packages/eval/tests/docker.test.ts            |  41 ++++++
 .../eval/tests/runners/codex-agent.test.ts    |  76 +++++++++++
 .../eval/tests/runners/copilot-agent.test.ts  |  79 +++++++++--
 .../tests/runners/gemini-cli-agent.test.ts    |  72 +++++++++-
 18 files changed, 685 insertions(+), 34 deletions(-)
 create mode 100644 docs/PROTECTED_MCP.md
 create mode 100644 packages/eval-core/src/config/mcp-auth.ts
 create mode 100644 packages/eval-core/tests/config/mcp-auth.test.ts

diff --git a/AGENTS.md b/AGENTS.md
index d5845885..bf491216 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -344,6 +344,8 @@ All agent runners have access to file/shell tools in their respective environmen
 
 When MCP tools are enabled (`--tools mcp`), MCP server tool definitions are appended to the tool list.
 
+Authenticated HTTP MCP servers are configured with an `auth` block (`tokenUrl`, `clientId`, `clientSecret`, `audience`). The framework mints a Management API token per agent job via a client-credentials exchange and forwards it to the MCP server. All four runners support this: claude-code, copilot, and gemini-cli forward it as an `Authorization: Bearer` header in their server config; codex passes it via a `bearer_token_env_var` reference in `config.toml` (Codex rejects an inline token, so the secret stays out of the file). A failed token mint skips the server with a warning rather than registering it unauthenticated. Full setup guide: [docs/PROTECTED_MCP.md](docs/PROTECTED_MCP.md).
+
 ---
 
 ## Models
diff --git a/apps/auth0-evals/eval.config.js b/apps/auth0-evals/eval.config.js
index 5d96cc0a..0d2eec42 100644
--- a/apps/auth0-evals/eval.config.js
+++ b/apps/auth0-evals/eval.config.js
@@ -44,6 +44,22 @@ export default {
         type: 'http',
         url: 'https://auth0.com/docs/mcp',
       },
+      ...(process.env.MCP_TENANT_DOMAIN &&
+      process.env.MCP_CLIENT_ID &&
+      process.env.MCP_CLIENT_SECRET
+        ? {
+            'auth0-hosted-mcp': {
+              type: 'http',
+              url: `https://${process.env.MCP_TENANT_DOMAIN}/v1/mcp`,
+              auth: {
+                tokenUrl: `https://${process.env.MCP_TENANT_DOMAIN}/oauth/token`,
+                clientId: process.env.MCP_CLIENT_ID,
+                clientSecret: process.env.MCP_CLIENT_SECRET,
+                audience: `https://${process.env.MCP_TENANT_DOMAIN}/api/v2/`,
+              },
+            },
+          }
+        : {}),
     },
   },
 
@@ -81,6 +97,13 @@ export default {
   },
 
 
+  sandbox: {
+    // Host env vars forwarded into the Docker sandbox (names only; values resolved
+    // from process.env at launch). Needed so the authenticated auth0-hosted-mcp
+    // server can mint its token inside the container.
+    passthroughEnv: ['MCP_TENANT_DOMAIN', 'MCP_CLIENT_ID', 'MCP_CLIENT_SECRET'],
+  },
+
   braintrust: {
     projectId: '38395851-dd41-46ec-a971-a30402db6921',
     datasetName: 'auth0-evals',
diff --git a/docs/PROTECTED_MCP.md b/docs/PROTECTED_MCP.md
new file mode 100644
index 00000000..eda5fa65
--- /dev/null
+++ b/docs/PROTECTED_MCP.md
@@ -0,0 +1,126 @@
+# Protected MCP Servers
+
+This guide covers how to wire up a **protected HTTP MCP server** — one that requires an `Authorization: Bearer` token, such as the Auth0 hosted MCP server which authenticates with a Management API token. It explains the credentials you need, the config to add, and how the framework mints and forwards the token to every runner.
+
+---
+
+## When to use this
+
+- You want an agent to use an MCP server that requires an `Authorization: Bearer` token rather than being publicly reachable.
+- The credentials come from a Machine-to-Machine (client-credentials) application, and you want a fresh token minted per job rather than a long-lived secret baked into config.
+
+> **Runner support:** token forwarding is implemented for **all runners** — claude-code, copilot, gemini-cli, and codex. The first three forward the token as an `Authorization: Bearer` header in their MCP server config; codex passes it via a `bearer_token_env_var` reference in `config.toml` (Codex rejects an inline token, so the secret never lands in the file).
+
+---
+
+## Prerequisites
+
+You need an Auth0 tenant with a **Machine-to-Machine application** authorized for the **Management API**:
+
+1. In the Auth0 Dashboard, create (or reuse) an M2M application.
+2. Authorize it for the **Auth0 Management API** (`https://YOUR_TENANT/api/v2/`) with the scopes the task needs — e.g. `read:clients` to list applications.
+3. Note the application's **Client ID** and **Client Secret**.
+
+> **Audience matters.** The hosted MCP server authenticates with a **Management API** token (`/api/v2/` audience). The `/v1/mcp` audience is reserved by Auth0 and returns `access_denied` for client credentials — so the `audience` field below points at `/api/v2/`, not at the MCP URL.
+
+---
+
+## Step 1 — Set the environment variables
+
+The server entry in `eval.config.js` is gated on three env vars. If any is missing, the server is omitted (see [Troubleshooting](#troubleshooting)).
+
+```bash
+export MCP_TENANT_DOMAIN="your-tenant.us.auth0.com"   # no scheme, no trailing slash
+export MCP_CLIENT_ID="your-m2m-client-id"
+export MCP_CLIENT_SECRET="your-m2m-client-secret"
+```
+
+You can also set the LLM `--model` you intend to run; the proxy/model setup is unchanged from any other eval.
+
+---
+
+## Step 2 — Register the MCP server in `eval.config.js`
+
+The Auth0 hosted MCP server is already registered in `apps/auth0-evals/eval.config.js`, gated on the env vars above:
+
+```js
+mcp: {
+  servers: {
+    'auth0-docs': { type: 'http', url: 'https://auth0.com/docs/mcp' },
+
+    ...(process.env.MCP_TENANT_DOMAIN &&
+    process.env.MCP_CLIENT_ID &&
+    process.env.MCP_CLIENT_SECRET
+      ? {
+          'auth0-hosted-mcp': {
+            type: 'http',
+            url: `https://${process.env.MCP_TENANT_DOMAIN}/v1/mcp`,
+            auth: {
+              tokenUrl: `https://${process.env.MCP_TENANT_DOMAIN}/oauth/token`,
+              clientId: process.env.MCP_CLIENT_ID,
+              clientSecret: process.env.MCP_CLIENT_SECRET,
+              audience: `https://${process.env.MCP_TENANT_DOMAIN}/api/v2/`,
+            },
+          },
+        }
+      : {}),
+  },
+},
+```
+
+To wire up **a different** protected HTTP MCP server, add another entry with an `auth` block. The `auth` field is typed as `MCPOAuthConfig`:
+
+| Field | Meaning |
+|---|---|
+| `tokenUrl` | OAuth token endpoint, e.g. `https://TENANT/oauth/token` |
+| `clientId` | Client ID for the client-credentials grant |
+| `clientSecret` | Client secret for the client-credentials grant |
+| `audience` | API audience the token is minted for, e.g. `https://TENANT/api/v2/` |
+
+Servers **without** an `auth` block (like `auth0-docs`) continue to work unauthenticated.
+
+---
+
+## Step 3 — How the token is minted and forwarded
+
+You don't write any token code — the framework does it per job:
+
+1. When a job starts with `--tools mcp`, the active runner walks the configured MCP servers.
+2. For each HTTP server with an `auth` block, it calls `mintMcpToken(auth)` — a **client-credentials** exchange (`grant_type=client_credentials`) against `tokenUrl` for the given `audience`.
+3. The resulting token is forwarded to the MCP server. claude-code, copilot, and gemini-cli set it as an `Authorization: Bearer <token>` header in the server config; codex writes a `bearer_token_env_var` reference into `config.toml` and injects the token into the Codex process env under that name (Codex rejects an inline `bearer_token`, so the secret stays out of the config file).
+
+The token is minted **per job**, not at config-load time, so a long `--model all --mode all` matrix never reuses an expired token.
+
+**Loud failure:** if the token mint fails (bad creds, network error, missing field), the server is **skipped with a `logger.warn`** rather than registered unauthenticated. This makes a misconfigured run look like "MCP wasn't available" — not a silent "the agent chose not to use MCP."
+
+---
+
+## Sandbox credential passthrough
+
+When evals run in the Docker sandbox (the default), the framework can only mint a token inside the container if the credentials reach it. The three `MCP_*` vars are forwarded via `sandbox.passthroughEnv` in `eval.config.js`:
+
+```js
+sandbox: {
+  passthroughEnv: ['MCP_TENANT_DOMAIN', 'MCP_CLIENT_ID', 'MCP_CLIENT_SECRET'],
+},
+```
+
+Only the **names** are listed here; values are resolved from `process.env` at job launch and never stored in config. Vars that aren't currently set on the host are skipped.
+
+---
+
+## Troubleshooting
+
+| Symptom | Likely cause |
+|---|---|
+| Log: `MCP server 'auth0-hosted-mcp' skipped — token mint failed or creds missing` | One of `MCP_TENANT_DOMAIN` / `MCP_CLIENT_ID` / `MCP_CLIENT_SECRET` is unset, or the token endpoint rejected the credentials. |
+| `auth0-hosted-mcp` not registered at all (only `auth0-docs`) | The env-var gate in `eval.config.js` evaluated false — at least one of the three vars is empty. |
+| Token mint returns `access_denied` | `audience` points at `/v1/mcp` instead of `/api/v2/`, or the M2M app isn't authorized for the Management API. |
+| `401` late in a very long job | The minted token's TTL expired mid-job. Management API tokens are typically long-lived (hours) vs. the 30-min job timeout, so this is rare. |
+
+---
+
+## Related docs
+
+- [docs/ADDING_EVALS.md](ADDING_EVALS.md) — grader primitives and how evals are structured.
+- [AGENTS.md](../AGENTS.md) — framework overview, runner details, and the MCP auth summary.
diff --git a/packages/eval-core/src/config/defaults.ts b/packages/eval-core/src/config/defaults.ts
index 07e6ba27..db402c75 100644
--- a/packages/eval-core/src/config/defaults.ts
+++ b/packages/eval-core/src/config/defaults.ts
@@ -50,4 +50,6 @@ export const DEFAULT_FRAMEWORK_CONFIG: Required<FrameworkConfig> = {
   },
 
   scoring: {},
+
+  sandbox: {},
 };
diff --git a/packages/eval-core/src/config/framework.ts b/packages/eval-core/src/config/framework.ts
index 09666de0..995fc537 100644
--- a/packages/eval-core/src/config/framework.ts
+++ b/packages/eval-core/src/config/framework.ts
@@ -37,11 +37,28 @@ export interface MCPStdioServerConfig {
   env?: Record<string, string>;
 }
 
+export interface MCPOAuthConfig {
+  /** OAuth token endpoint, e.g. https://TENANT/oauth/token */
+  tokenUrl: string;
+  /** OAuth client ID for the client-credentials grant. */
+  clientId: string;
+  /** OAuth client secret for the client-credentials grant. */
+  clientSecret: string;
+  /** API audience the token is requested for, e.g. https://TENANT/api/v2/ */
+  audience: string;
+}
+
 export interface MCPHttpServerConfig {
   /** URL-based MCP server. */
   type: 'http';
   /** HTTP URL for the remote MCP server. */
   url: string;
+  /**
+   * Optional OAuth config. When present, the framework mints a fresh Bearer
+   * token per agent job and injects it as an Authorization header. If any
+   * field is empty (e.g. a missing env var), the server is omitted with a warning.
+   */
+  auth?: MCPOAuthConfig;
 }
 
 /** Discriminated union — either a stdio (command-based) or http (URL-based) MCP server. */
@@ -119,6 +136,16 @@ export interface ScoringConfig {
 
 // ── Root config ──────────────────────────────────────────────────────────────
 
+export interface SandboxConfig {
+  /**
+   * Names of host environment variables to forward into the Docker sandbox.
+   * Each name is resolved from `process.env` at job launch; only currently-set
+   * vars are forwarded. Use for app-specific secrets the framework can't know
+   * about (e.g. MCP server credentials). Names only — values are never stored here.
+   */
+  passthroughEnv?: string[];
+}
+
 export interface FrameworkConfig {
   /** Directory containing evaluation definitions (required). */
   evalsDir: string;
@@ -140,4 +167,6 @@ export interface FrameworkConfig {
   braintrust?: BraintrustConfig;
   /** Scoring behaviour overrides (e.g. custom doc URL allowlist). */
   scoring?: ScoringConfig;
+  /** Docker sandbox settings (e.g. env vars to forward into the container). */
+  sandbox?: SandboxConfig;
 }
diff --git a/packages/eval-core/src/config/mcp-auth.ts b/packages/eval-core/src/config/mcp-auth.ts
new file mode 100644
index 00000000..2c6e2ed2
--- /dev/null
+++ b/packages/eval-core/src/config/mcp-auth.ts
@@ -0,0 +1,41 @@
+/**
+ * OAuth token minting for authenticated HTTP MCP servers.
+ *
+ * Performs a client-credentials exchange to obtain a short-lived Bearer token.
+ * Called once per agent job so a long matrix run never reuses an expired token.
+ */
+
+import type { MCPOAuthConfig } from './framework.js';
+import { logger } from '../utils/logger.js';
+
+export async function mintMcpToken(auth: MCPOAuthConfig): Promise<string | undefined> {
+  if (!auth.tokenUrl || !auth.clientId || !auth.clientSecret || !auth.audience) {
+    logger.warn('[mcp-auth] Incomplete OAuth config — skipping token mint');
+    return undefined;
+  }
+  try {
+    const res = await fetch(auth.tokenUrl, {
+      method: 'POST',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({
+        grant_type: 'client_credentials',
+        client_id: auth.clientId,
+        client_secret: auth.clientSecret,
+        audience: auth.audience,
+      }),
+    });
+    if (!res.ok) {
+      logger.warn(`[mcp-auth] Token request failed: ${res.status}`);
+      return undefined;
+    }
+    const { access_token } = (await res.json()) as { access_token?: string };
+    if (!access_token) {
+      logger.warn('[mcp-auth] Token response missing access_token');
+      return undefined;
+    }
+    return access_token;
+  } catch (err) {
+    logger.warn(`[mcp-auth] Token request error: ${err instanceof Error ? err.message : String(err)}`);
+    return undefined;
+  }
+}
diff --git a/packages/eval-core/src/index.ts b/packages/eval-core/src/index.ts
index e6147132..b9cc5ae9 100644
--- a/packages/eval-core/src/index.ts
+++ b/packages/eval-core/src/index.ts
@@ -62,6 +62,7 @@ export type {
   MCPServerConfig,
   MCPStdioServerConfig,
   MCPHttpServerConfig,
+  MCPOAuthConfig,
   SkillsConfig,
   RemoteSkillRepo,
   JudgeConfig,
@@ -69,10 +70,12 @@ export type {
   WorkspaceConfig,
   BraintrustConfig,
   ScoringConfig,
+  SandboxConfig,
 } from './config/framework.js';
 export { DEFAULT_FRAMEWORK_CONFIG } from './config/defaults.js';
 export { defineConfig, loadConfig, deepMerge } from './config/loader.js';
 export type { LoadConfigOptions } from './config/loader.js';
+export { mintMcpToken } from './config/mcp-auth.js';
 
 // Workspace
 export {
diff --git a/packages/eval-core/tests/config/mcp-auth.test.ts b/packages/eval-core/tests/config/mcp-auth.test.ts
new file mode 100644
index 00000000..1b6418bb
--- /dev/null
+++ b/packages/eval-core/tests/config/mcp-auth.test.ts
@@ -0,0 +1,56 @@
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import { mintMcpToken } from '../../src/config/mcp-auth.js';
+import type { MCPOAuthConfig } from '../../src/config/framework.js';
+
+const validAuth: MCPOAuthConfig = {
+  tokenUrl: 'https://tenant.us.auth0.com/oauth/token',
+  clientId: 'client-id',
+  clientSecret: 'client-secret',
+  audience: 'https://tenant.us.auth0.com/api/v2/',
+};
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe('mintMcpToken', () => {
+  it('returns the access_token on a successful exchange', async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
+      ok: true,
+      json: async () => ({ access_token: 'tok-123' }),
+    });
+    vi.stubGlobal('fetch', fetchMock);
+
+    const token = await mintMcpToken(validAuth);
+
+    expect(token).toBe('tok-123');
+    expect(fetchMock).toHaveBeenCalledOnce();
+    const [url, init] = fetchMock.mock.calls[0]!;
+    expect(url).toBe(validAuth.tokenUrl);
+    const body = JSON.parse((init as RequestInit).body as string);
+    expect(body).toMatchObject({
+      grant_type: 'client_credentials',
+      client_id: 'client-id',
+      client_secret: 'client-secret',
+      audience: validAuth.audience,
+    });
+  });
+
+  it('returns undefined when the response is not ok', async () => {
+    vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ ok: false, json: async () => ({}) }));
+    expect(await mintMcpToken(validAuth)).toBeUndefined();
+  });
+
+  it('returns undefined without calling fetch when a credential is missing', async () => {
+    const fetchMock = vi.fn();
+    vi.stubGlobal('fetch', fetchMock);
+    const token = await mintMcpToken({ ...validAuth, clientSecret: '' });
+    expect(token).toBeUndefined();
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
+
+  it('returns undefined when the body has no access_token', async () => {
+    vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ ok: true, json: async () => ({}) }));
+    expect(await mintMcpToken(validAuth)).toBeUndefined();
+  });
+});
diff --git a/packages/eval/src/cli/run.ts b/packages/eval/src/cli/run.ts
index 19da6f06..23f4b123 100644
--- a/packages/eval/src/cli/run.ts
+++ b/packages/eval/src/cli/run.ts
@@ -153,6 +153,7 @@ async function runAgentJob(
         agentType,
         apiKey,
         ghToken: process.env.GH_TOKEN,
+        passthroughEnv: getFrameworkConfig().sandbox.passthroughEnv,
       });
     }
 
diff --git a/packages/eval/src/runners/claude-code/agent.ts b/packages/eval/src/runners/claude-code/agent.ts
index 0cd40adb..e48aa3f9 100644
--- a/packages/eval/src/runners/claude-code/agent.ts
+++ b/packages/eval/src/runners/claude-code/agent.ts
@@ -30,6 +30,7 @@ import {
   logger,
   makeSessionId,
   filteredEnv,
+  mintMcpToken,
 } from '@a0/eval-core';
 import { classifyActionType, classifyErrorCategory, detectRetry } from '@a0/eval-core';
 import { LLM_API_KEY_ENV } from '../../cli/constants.js';
@@ -137,12 +138,25 @@ export async function runClaudeCodeAgent(
   }
 
   // Build MCP server config when --tools mcp is requested.
-  let mcpServers: Record<string, { type: 'http'; url: string }> | undefined;
+  // Token is minted here (job start) so a long matrix run never reuses an expired token.
+  let mcpServers: Record<string, { type: 'http'; url: string; headers?: Record<string, string> }> | undefined;
   if (tools.includes('mcp')) {
     const configServers = getFrameworkConfig().mcp.servers;
-    const httpServers: Record<string, { type: 'http'; url: string }> = {};
+    const httpServers: Record<string, { type: 'http'; url: string; headers?: Record<string, string> }> = {};
     for (const [name, server] of Object.entries(configServers)) {
-      if (server.type === 'http') {
+      if (server.type !== 'http') continue;
+      if (server.auth) {
+        const token = await mintMcpToken(server.auth);
+        if (!token) {
+          logger.warn(`[ClaudeCode] MCP server '${name}' skipped — token mint failed or creds missing`);
+          continue;
+        }
+        httpServers[name] = {
+          type: 'http' as const,
+          url: server.url,
+          headers: { Authorization: `Bearer ${token}` },
+        };
+      } else {
         httpServers[name] = { type: 'http' as const, url: server.url };
       }
     }
@@ -305,9 +319,7 @@ export function handleMessage(
 
     const usage = msg.usage;
     const turnInput =
-      (usage?.input_tokens ?? 0) +
-      (usage?.cache_read_input_tokens ?? 0) +
-      (usage?.cache_creation_input_tokens ?? 0);
+      (usage?.input_tokens ?? 0) + (usage?.cache_read_input_tokens ?? 0) + (usage?.cache_creation_input_tokens ?? 0);
     const turnOutput = usage?.output_tokens ?? 0;
     record.inputTokens += turnInput;
     record.outputTokens += turnOutput;
diff --git a/packages/eval/src/runners/codex/agent.ts b/packages/eval/src/runners/codex/agent.ts
index 016e5f0b..7253630f 100644
--- a/packages/eval/src/runners/codex/agent.ts
+++ b/packages/eval/src/runners/codex/agent.ts
@@ -36,6 +36,7 @@ import {
   logger,
   filteredEnv,
   readWorkspaceFile,
+  mintMcpToken,
 } from '@a0/eval-core';
 import { classifyActionType, classifyErrorCategory, detectRetry } from '@a0/eval-core';
 import { LLM_API_KEY_ENV } from '../../cli/constants.js';
@@ -56,12 +57,27 @@ function tomlEscape(s: string): string {
   return s.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
 }
 
-function buildMcpToml(servers: Record<string, MCPServerConfig>): string {
+/**
+ * Builds the `[mcp_servers.*]` TOML blocks.
+ *
+ * For HTTP servers with an entry in `bearerTokenEnvVars`, emits
+ * `bearer_token_env_var = "<NAME>"` so Codex reads the Bearer token from that
+ * env var at runtime. Codex rejects an inline `bearer_token` key, so the token
+ * is never written to the config file — only the env-var name is.
+ */
+function buildMcpToml(
+  servers: Record<string, MCPServerConfig>,
+  bearerTokenEnvVars: Record<string, string> = {},
+): string {
   let toml = '';
   for (const [name, server] of Object.entries(servers)) {
     const safeName = tomlEscape(name);
     if (server.type === 'http') {
       toml += `\n[mcp_servers."${safeName}"]\nurl = "${tomlEscape(server.url)}"\n`;
+      const envVar = bearerTokenEnvVars[name];
+      if (envVar) {
+        toml += `bearer_token_env_var = "${tomlEscape(envVar)}"\n`;
+      }
     } else {
       toml += `\n[mcp_servers."${safeName}"]\ncommand = "${tomlEscape(server.command)}"\n`;
       if (server.args && server.args.length > 0) {
@@ -84,6 +100,7 @@ function writeCodexConfig(
   proxyBaseUrl: string,
   workspace: string,
   mcpServers: Record<string, MCPServerConfig> = {},
+  bearerTokenEnvVars: Record<string, string> = {},
 ): void {
   mkdirSync(codexHome, { recursive: true });
   // Resolve canonical path — on macOS /var is a symlink to /private/var.
@@ -101,7 +118,7 @@ wire_api = "responses"
 
 [projects."${resolvedWorkspace}"]
 trust_level = "trusted"
-${buildMcpToml(mcpServers)}`;
+${buildMcpToml(mcpServers, bearerTokenEnvVars)}`;
   writeFileSync(join(codexHome, 'config.toml'), configToml, 'utf-8');
 }
 
@@ -212,8 +229,7 @@ function handleItem(item: ThreadItem, record: RunRecord, ctx: RunCtx, now: numbe
       const isError = item.status === 'failed';
       for (const change of item.changes) {
         const rawName = change.kind === 'delete' ? 'delete_file' : 'write_file';
-        const content =
-          !isError && change.kind !== 'delete' ? readWorkspaceFile(ctx.workspace, change.path) : '';
+        const content = !isError && change.kind !== 'delete' ? readWorkspaceFile(ctx.workspace, change.path) : '';
         ctx.turnToolCount++;
         ctx.toolCallsInTurn++;
         pushToolCall(record, rawName, { path: change.path, content }, '', isError, now);
@@ -433,11 +449,36 @@ export async function runCodexAgent(
   mkdirSync(codexHome, { recursive: true });
 
   // Resolve MCP servers from framework config when --tools mcp is requested.
-  const mcpServers: Record<string, MCPServerConfig> = tools.includes('mcp') ? getFrameworkConfig().mcp.servers : {};
+  const configuredServers: Record<string, MCPServerConfig> = tools.includes('mcp')
+    ? getFrameworkConfig().mcp.servers
+    : {};
+
+  // Mint a Bearer token per HTTP server that declares an `auth` block. The token
+  // is passed to Codex via an env var (referenced by `bearer_token_env_var` in
+  // config.toml) — Codex rejects an inline `bearer_token`, so the secret never
+  // touches the config file. Minting per job avoids reusing an expired token on
+  // a long matrix run. A failed mint drops the server rather than registering it
+  // unauthenticated, so a misconfigured run looks like "MCP wasn't available".
+  const mcpServers: Record<string, MCPServerConfig> = {};
+  const bearerTokenEnvVars: Record<string, string> = {};
+  const bearerTokens: Record<string, string> = {};
+  for (const [name, server] of Object.entries(configuredServers)) {
+    if (server.type === 'http' && server.auth) {
+      const token = await mintMcpToken(server.auth);
+      if (!token) {
+        logger.warn(`[Codex] MCP server '${name}' skipped — token mint failed or creds missing`);
+        continue;
+      }
+      const envVar = `MCP_BEARER_${name.replace(/[^A-Za-z0-9]/g, '_').toUpperCase()}`;
+      bearerTokenEnvVars[name] = envVar;
+      bearerTokens[envVar] = token;
+    }
+    mcpServers[name] = server;
+  }
 
   const normalizedBaseUrl = proxyBaseUrl.replace(/\/+$/, '');
   const codexApiUrl = normalizedBaseUrl.endsWith('/v1') ? normalizedBaseUrl : `${normalizedBaseUrl}/v1`;
-  writeCodexConfig(codexHome, codexApiUrl, workspace, mcpServers);
+  writeCodexConfig(codexHome, codexApiUrl, workspace, mcpServers, bearerTokenEnvVars);
   logger.info(`[Codex] Proxy: ${proxyBaseUrl}`);
   logger.info(`[Codex] CODEX_HOME: ${codexHome}`);
   if (Object.keys(mcpServers).length > 0) {
@@ -462,6 +503,12 @@ export async function runCodexAgent(
     }
   }
 
+  // Inject minted Bearer tokens so Codex can resolve each authed server's
+  // `bearer_token_env_var` reference at runtime.
+  for (const [key, value] of Object.entries(bearerTokens)) {
+    codexEnv[key] = value;
+  }
+
   // Skills are injected into the workspace by CodexRunner.prepareSkills().
 
   const ctx: RunCtx = {
diff --git a/packages/eval/src/runners/copilot/agent.ts b/packages/eval/src/runners/copilot/agent.ts
index 638d9578..f9f2a53a 100644
--- a/packages/eval/src/runners/copilot/agent.ts
+++ b/packages/eval/src/runners/copilot/agent.ts
@@ -26,6 +26,7 @@ import {
   logger,
   makeSessionId,
   filteredEnv,
+  mintMcpToken,
 } from '@a0/eval-core';
 import { classifyActionType, classifyErrorCategory, detectRetry } from '@a0/eval-core';
 import { CopilotCliTranslator } from './translator.js';
@@ -49,12 +50,33 @@ export interface CopilotRunOptions {
   model?: string;
 }
 
-/** Returns MCP server config for the Auth0 docs server. */
-export function getMcpServers(): Record<string, MCPServerConfig> {
+/**
+ * Builds the Copilot MCP server config from the framework config.
+ *
+ * For HTTP servers with an `auth` block, mints a fresh Bearer token per job
+ * (client-credentials exchange) and forwards it as an `Authorization` header.
+ * If the token mint fails, the server is skipped with a warning rather than
+ * registered unauthenticated — a misconfigured run looks like "MCP wasn't
+ * available", not a silent "the agent chose not to use MCP".
+ */
+export async function getMcpServers(): Promise<Record<string, MCPServerConfig>> {
   const servers = getFrameworkConfig().mcp.servers;
   const result: Record<string, MCPServerConfig> = {};
   for (const [name, server] of Object.entries(servers)) {
-    if (server.type === 'http') {
+    if (server.type !== 'http') continue;
+    if (server.auth) {
+      const token = await mintMcpToken(server.auth);
+      if (!token) {
+        logger.warn(`[Copilot] MCP server '${name}' skipped — token mint failed or creds missing`);
+        continue;
+      }
+      result[name] = {
+        type: 'http',
+        url: server.url,
+        tools: ['*'],
+        headers: { Authorization: `Bearer ${token}` },
+      };
+    } else {
       result[name] = { type: 'http', url: server.url, tools: ['*'] };
     }
   }
@@ -125,7 +147,10 @@ export async function runCopilotAgent(
   if (model) {
     logger.info(`[Copilot] Model: ${model}`);
   }
-  if (tools.includes('mcp')) logger.info(`[Copilot] MCP: ${Object.keys(getMcpServers()).join(', ')}`);
+
+  // Mint tokens once per job so a long matrix run never reuses an expired token.
+  const mcpServers = tools.includes('mcp') ? await getMcpServers() : {};
+  if (tools.includes('mcp')) logger.info(`[Copilot] MCP: ${Object.keys(mcpServers).join(', ')}`);
   if (tools.includes('skills')) logger.info('[Copilot] Skills: .github/skills/');
 
   const session = await client.createSession({
@@ -134,7 +159,7 @@ export async function runCopilotAgent(
     onPermissionRequest: approveAll,
     // Suppress ask_user to prevent eval runs from blocking on interactive input.
     excludedTools: ['ask_user'],
-    ...(tools.includes('mcp') ? { mcpServers: getMcpServers() } : {}),
+    ...(tools.includes('mcp') ? { mcpServers } : {}),
     // Skill files are pre-copied to .github/skills/ by CopySkillsStrategy.
     ...(tools.includes('skills') ? { skillDirectories: [join(workspace, '.github', 'skills')] } : {}),
     // Disable infinite sessions — each eval run is a clean, isolated session.
diff --git a/packages/eval/src/runners/gemini-cli/agent.ts b/packages/eval/src/runners/gemini-cli/agent.ts
index 3c4d879e..c6d81254 100644
--- a/packages/eval/src/runners/gemini-cli/agent.ts
+++ b/packages/eval/src/runners/gemini-cli/agent.ts
@@ -28,6 +28,7 @@ import {
   estimateCost,
   logger,
   filteredEnv,
+  mintMcpToken,
 } from '@a0/eval-core';
 import { classifyActionType, classifyErrorCategory, detectRetry } from '@a0/eval-core';
 import { LLM_API_KEY_ENV } from '../../cli/constants.js';
@@ -76,21 +77,44 @@ function isAutoCancelled(output: string): boolean {
  * MCP tool calls appear in the stream-json output as tool_use events with names
  * using the format `mcp__<serverName>__<toolName>` (e.g. `mcp__auth0-docs__search_auth0_docs`).
  *
+ * For HTTP servers with an `auth` block, mints a fresh Bearer token per job
+ * (client-credentials exchange) and writes it as an `Authorization` header into
+ * the server config. If the token mint fails, the server is skipped with a
+ * warning rather than registered unauthenticated.
+ *
  * Returns the names of the registered MCP servers (empty when MCP is disabled).
  */
-function writeGeminiSettings(workspace: string, includeMcp: boolean): string[] {
+interface GeminiMcpServer {
+  httpUrl: string;
+  timeout: number;
+  headers?: Record<string, string>;
+}
+
+async function writeGeminiSettings(workspace: string, includeMcp: boolean): Promise<string[]> {
   const settings: {
     security: { auth: { selectedType: string } };
-    mcpServers?: Record<string, { httpUrl: string; timeout: number }>;
+    mcpServers?: Record<string, GeminiMcpServer>;
   } = {
     security: { auth: { selectedType: GEMINI_AUTH_TYPE } },
   };
 
-  const mcpServers: Record<string, { httpUrl: string; timeout: number }> = {};
+  const mcpServers: Record<string, GeminiMcpServer> = {};
   if (includeMcp) {
     const configServers = getFrameworkConfig().mcp.servers;
     for (const [name, server] of Object.entries(configServers)) {
-      if (server.type === 'http') {
+      if (server.type !== 'http') continue;
+      if (server.auth) {
+        const token = await mintMcpToken(server.auth);
+        if (!token) {
+          logger.warn(`[GeminiCLI] MCP server '${name}' skipped — token mint failed or creds missing`);
+          continue;
+        }
+        mcpServers[name] = {
+          httpUrl: server.url,
+          timeout: 30000,
+          headers: { Authorization: `Bearer ${token}` },
+        };
+      } else {
         mcpServers[name] = { httpUrl: server.url, timeout: 30000 };
       }
     }
@@ -154,7 +178,7 @@ export async function runGeminiCliAgent(
   logger.info(`\n[GeminiCLI] Starting task: ${evalDef.id}`);
   logger.info(`[GeminiCLI] Workspace: ${workspace}`);
   logger.info(`[GeminiCLI] Model: ${model}`);
-  const mcpNames = writeGeminiSettings(workspace, tools.includes('mcp'));
+  const mcpNames = await writeGeminiSettings(workspace, tools.includes('mcp'));
   if (mcpNames.length > 0) logger.info(`[GeminiCLI] MCP: ${mcpNames.join(', ')}`);
 
   // Trust only this workspace so YOLO mode isn't overridden in CI/headless environments.
diff --git a/packages/eval/src/sandbox/docker.ts b/packages/eval/src/sandbox/docker.ts
index ad87a304..f360af90 100644
--- a/packages/eval/src/sandbox/docker.ts
+++ b/packages/eval/src/sandbox/docker.ts
@@ -30,6 +30,11 @@ export interface DockerRunOptions {
   apiKey: string;
   /** Optional GitHub token (for copilot runner). */
   ghToken?: string;
+  /**
+   * Names of host env vars to forward into the container (from `sandbox.passthroughEnv`).
+   * Each is resolved from `process.env` here; only currently-set vars are forwarded.
+   */
+  passthroughEnv?: string[];
 }
 
 // Serialises concurrent ensureDockerImage calls so only one build runs at a time.
@@ -115,7 +120,7 @@ function findRepoRoot(): string {
  * after the container exits.
  */
 export async function runJobInDocker(options: DockerRunOptions): Promise<JobResult> {
-  const { workspace, evalId, model, mode, tools, agentType, apiKey, ghToken } = options;
+  const { workspace, evalId, model, mode, tools, agentType, apiKey, ghToken, passthroughEnv } = options;
 
   await ensureDockerImage();
 
@@ -150,6 +155,15 @@ export async function runJobInDocker(options: DockerRunOptions): Promise<JobResu
     envFlags.push('-e', `GH_TOKEN=${ghToken}`);
   }
 
+  // Forward app-declared passthrough env vars (e.g. MCP server credentials).
+  // Only vars currently set on the host are forwarded; missing ones are skipped.
+  for (const name of passthroughEnv ?? []) {
+    const value = process.env[name];
+    if (value !== undefined) {
+      envFlags.push('-e', `${name}=${value}`);
+    }
+  }
+
   // Mount host CA certificates for corporate SSL inspection (MITM proxies)
   // Use resolvedWorkspace (canonicalized) to ensure we mount the same path we validated
   const volumeFlags: string[] = ['-v', `${resolvedWorkspace}:${DOCKER_WORKSPACE_MOUNT}:rw`];
diff --git a/packages/eval/tests/docker.test.ts b/packages/eval/tests/docker.test.ts
index 36c35809..f06eb716 100644
--- a/packages/eval/tests/docker.test.ts
+++ b/packages/eval/tests/docker.test.ts
@@ -330,6 +330,47 @@ describe('runJobInDocker — results parsing', () => {
 
     rmSync(workspace, { recursive: true, force: true });
   });
+
+  it('forwards passthroughEnv vars that are set on the host and skips unset ones', async () => {
+    const runJobInDocker = await getRunJobInDocker();
+    const workspace = mkdtempSync(join(tmpdir(), 'docker-passthrough-'));
+
+    const origDomain = process.env.MCP_TENANT_DOMAIN;
+    const origSecret = process.env.MCP_CLIENT_SECRET;
+    process.env.MCP_TENANT_DOMAIN = 'tenant.us.auth0.com';
+    delete process.env.MCP_CLIENT_SECRET;
+
+    mockExecFileSync.mockReturnValue('');
+
+    let capturedArgs: string[] = [];
+    mockSpawn.mockImplementation((_cmd: string, args: string[]) => {
+      capturedArgs = args;
+      writeFileSync(join(workspace, '.eval-results.json'), JSON.stringify({ ok: true }));
+      return makeCloseEmitter();
+    });
+
+    await runJobInDocker({
+      workspace,
+      evalId: 'test_eval',
+      model: 'gpt-5.4',
+      mode: 'agent' as const,
+      tools: ['mcp'],
+      agentType: 'claude-code' as const,
+      apiKey: 'test-key',
+      passthroughEnv: ['MCP_TENANT_DOMAIN', 'MCP_CLIENT_SECRET'],
+    });
+
+    const envPairs = extractEnvPairs(capturedArgs);
+
+    // Set var is forwarded; unset var is skipped entirely.
+    expect(envPairs).toContain('MCP_TENANT_DOMAIN=tenant.us.auth0.com');
+    expect(envPairs.some((e) => e.startsWith('MCP_CLIENT_SECRET='))).toBe(false);
+
+    if (origDomain === undefined) delete process.env.MCP_TENANT_DOMAIN;
+    else process.env.MCP_TENANT_DOMAIN = origDomain;
+    if (origSecret !== undefined) process.env.MCP_CLIENT_SECRET = origSecret;
+    rmSync(workspace, { recursive: true, force: true });
+  });
 });
 
 // ── Host-side timeout ────────────────────────────────────────────────────────
diff --git a/packages/eval/tests/runners/codex-agent.test.ts b/packages/eval/tests/runners/codex-agent.test.ts
index bb778fe8..0539e14e 100644
--- a/packages/eval/tests/runners/codex-agent.test.ts
+++ b/packages/eval/tests/runners/codex-agent.test.ts
@@ -51,10 +51,13 @@ const mockGetFrameworkConfig = vi.hoisted(() =>
   }),
 );
 
+const mintMcpTokenMock = vi.hoisted(() => vi.fn());
+
 vi.mock('@a0/eval-core', async () => ({
   ...(await vi.importActual('@a0/eval-core')),
   getAgentProxyBaseUrl: vi.fn().mockReturnValue('https://your-llm-proxy.example.com'),
   getFrameworkConfig: mockGetFrameworkConfig,
+  mintMcpToken: mintMcpTokenMock,
 }));
 
 // ── Mock @openai/codex-sdk ──────────────────────────────────────────────────
@@ -752,6 +755,79 @@ describe('MCP integration', () => {
     expect(env['MY_SECRET_TOKEN']).toBe('secret123');
   });
 
+  it('mints a token and writes bearer_token_env_var for authed http servers', async () => {
+    mintMcpTokenMock.mockResolvedValueOnce('minted-token');
+    mockGetFrameworkConfig.mockReturnValue({
+      proxy: { baseUrl: 'https://your-llm-proxy.example.com/v1' },
+      mcp: {
+        servers: {
+          'auth0-hosted-mcp': {
+            type: 'http',
+            url: 'https://tenant.auth0.com/v1/mcp',
+            auth: {
+              tokenUrl: 'https://tenant.auth0.com/oauth/token',
+              clientId: 'cid',
+              clientSecret: 'secret',
+              audience: 'https://tenant.auth0.com/api/v2/',
+            },
+          },
+        },
+      },
+    });
+    queueTurns([{ type: 'item.completed', item: { type: 'agent_message', text: 'Done.' } }, turnCompleted()]);
+
+    await runCodexAgent(evalDef, workspace, { tools: ['mcp'] });
+
+    const written = (writeFileSync as ReturnType<typeof vi.fn>).mock.calls.find(
+      (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).endsWith('config.toml'),
+    );
+    expect(written).toBeDefined();
+    if (!written) return;
+    const toml = written[1] as string;
+    expect(toml).toContain('[mcp_servers."auth0-hosted-mcp"]');
+    expect(toml).toContain('url = "https://tenant.auth0.com/v1/mcp"');
+    expect(toml).toContain('bearer_token_env_var = "MCP_BEARER_AUTH0_HOSTED_MCP"');
+    // The token itself must never be written to the config file.
+    expect(toml).not.toContain('minted-token');
+
+    // The minted token is injected into the Codex env under the referenced name.
+    const codexOptions = sdk.state.constructorCalls[0];
+    const env = codexOptions.env as Record<string, string>;
+    expect(env['MCP_BEARER_AUTH0_HOSTED_MCP']).toBe('minted-token');
+  });
+
+  it('skips an authed server when the token mint fails', async () => {
+    mintMcpTokenMock.mockResolvedValueOnce(undefined);
+    mockGetFrameworkConfig.mockReturnValue({
+      proxy: { baseUrl: 'https://your-llm-proxy.example.com/v1' },
+      mcp: {
+        servers: {
+          'auth0-hosted-mcp': {
+            type: 'http',
+            url: 'https://tenant.auth0.com/v1/mcp',
+            auth: {
+              tokenUrl: 'https://tenant.auth0.com/oauth/token',
+              clientId: 'cid',
+              clientSecret: 'secret',
+              audience: 'https://tenant.auth0.com/api/v2/',
+            },
+          },
+        },
+      },
+    });
+    queueTurns([{ type: 'item.completed', item: { type: 'agent_message', text: 'Done.' } }, turnCompleted()]);
+
+    await runCodexAgent(evalDef, workspace, { tools: ['mcp'] });
+
+    const written = (writeFileSync as ReturnType<typeof vi.fn>).mock.calls.find(
+      (c: unknown[]) => typeof c[0] === 'string' && (c[0] as string).endsWith('config.toml'),
+    );
+    expect(written).toBeDefined();
+    if (!written) return;
+    const toml = written[1] as string;
+    expect(toml).not.toContain('auth0-hosted-mcp');
+  });
+
   it('does not write MCP sections when tools does not include mcp', async () => {
     mockGetFrameworkConfig.mockReturnValue({
       proxy: { baseUrl: 'https://your-llm-proxy.example.com/v1' },
diff --git a/packages/eval/tests/runners/copilot-agent.test.ts b/packages/eval/tests/runners/copilot-agent.test.ts
index a054c76e..c2c28ecb 100644
--- a/packages/eval/tests/runners/copilot-agent.test.ts
+++ b/packages/eval/tests/runners/copilot-agent.test.ts
@@ -8,6 +8,14 @@
 
 import { describe, it, expect, vi, beforeAll, beforeEach, afterEach } from 'vitest';
 import { EventEmitter } from 'node:events';
+
+// Mock only mintMcpToken so authed-server tests don't perform a real OAuth fetch.
+const mintMcpTokenMock = vi.hoisted(() => vi.fn());
+vi.mock('@a0/eval-core', async () => ({
+  ...(await vi.importActual('@a0/eval-core')),
+  mintMcpToken: mintMcpTokenMock,
+}));
+
 import { setFrameworkConfig } from '@a0/eval-core';
 import { TEST_CONFIG } from '../test-config.js';
 
@@ -123,16 +131,71 @@ describe('COPILOT_DEFAULT_MODEL', () => {
 });
 
 describe('getMcpServers', () => {
-  it('returns auth0-docs remote MCP server config', () => {
-    const servers = getMcpServers();
+  it('returns auth0-docs remote MCP server config', async () => {
+    const servers = await getMcpServers();
     expect(servers).toHaveProperty('auth0-docs');
     expect(servers['auth0-docs'].type).toBe('http');
-    expect(servers['auth0-docs'].url).toBe('https://auth0.com/docs/mcp');
-  });
-
-  it('includes all tools via wildcard', () => {
-    const servers = getMcpServers();
-    expect(servers['auth0-docs'].tools).toContain('*');
+    expect((servers['auth0-docs'] as { url: string }).url).toBe('https://auth0.com/docs/mcp');
+  });
+
+  it('includes all tools via wildcard', async () => {
+    const servers = await getMcpServers();
+    expect((servers['auth0-docs'] as { tools: string[] }).tools).toContain('*');
+  });
+
+  it('does not set an Authorization header for unauthenticated servers', async () => {
+    const servers = await getMcpServers();
+    expect((servers['auth0-docs'] as { headers?: Record<string, string> }).headers).toBeUndefined();
+  });
+
+  it('mints a token and forwards it as an Authorization header for authed servers', async () => {
+    mintMcpTokenMock.mockResolvedValueOnce('minted-token');
+    setFrameworkConfig({
+      ...TEST_CONFIG,
+      mcp: {
+        servers: {
+          'auth0-hosted-mcp': {
+            type: 'http',
+            url: 'https://tenant.auth0.com/v1/mcp',
+            auth: {
+              tokenUrl: 'https://tenant.auth0.com/oauth/token',
+              clientId: 'cid',
+              clientSecret: 'secret',
+              audience: 'https://tenant.auth0.com/api/v2/',
+            },
+          },
+        },
+      },
+    });
+    const servers = await getMcpServers();
+    expect((servers['auth0-hosted-mcp'] as { headers?: Record<string, string> }).headers).toEqual({
+      Authorization: 'Bearer minted-token',
+    });
+    setFrameworkConfig(TEST_CONFIG);
+  });
+
+  it('skips an authed server when the token mint fails', async () => {
+    mintMcpTokenMock.mockResolvedValueOnce(undefined);
+    setFrameworkConfig({
+      ...TEST_CONFIG,
+      mcp: {
+        servers: {
+          'auth0-hosted-mcp': {
+            type: 'http',
+            url: 'https://tenant.auth0.com/v1/mcp',
+            auth: {
+              tokenUrl: 'https://tenant.auth0.com/oauth/token',
+              clientId: 'cid',
+              clientSecret: 'secret',
+              audience: 'https://tenant.auth0.com/api/v2/',
+            },
+          },
+        },
+      },
+    });
+    const servers = await getMcpServers();
+    expect(servers).not.toHaveProperty('auth0-hosted-mcp');
+    setFrameworkConfig(TEST_CONFIG);
   });
 });
 
diff --git a/packages/eval/tests/runners/gemini-cli-agent.test.ts b/packages/eval/tests/runners/gemini-cli-agent.test.ts
index 2cc38182..aae9e7e2 100644
--- a/packages/eval/tests/runners/gemini-cli-agent.test.ts
+++ b/packages/eval/tests/runners/gemini-cli-agent.test.ts
@@ -20,9 +20,8 @@ import { join } from 'node:path';
 
 // ── Mock framework config ────────────────────────────────────────────────────
 
-vi.mock('@a0/eval-core', async () => ({
-  ...(await vi.importActual('@a0/eval-core')),
-  getFrameworkConfig: vi.fn().mockReturnValue({
+const mockGetFrameworkConfig = vi.hoisted(() =>
+  vi.fn().mockReturnValue({
     proxy: { baseUrl: 'https://llm.example.com/v1' },
     mcp: {
       servers: {
@@ -30,6 +29,13 @@ vi.mock('@a0/eval-core', async () => ({
       },
     },
   }),
+);
+const mintMcpTokenMock = vi.hoisted(() => vi.fn());
+
+vi.mock('@a0/eval-core', async () => ({
+  ...(await vi.importActual('@a0/eval-core')),
+  getFrameworkConfig: mockGetFrameworkConfig,
+  mintMcpToken: mintMcpTokenMock,
 }));
 
 // ── Mock spawn ────────────────────────────────────────────────────────────────
@@ -568,6 +574,66 @@ describe('.gemini/settings.json', () => {
 
     expect(existsSync(join(tmpWorkspace, '.gemini', 'settings.json'))).toBe(true);
   });
+
+  it('mints a token and writes an Authorization header for authed servers', async () => {
+    mockSpawn.mockReturnValue(makeChild([resultEvent()]));
+    mintMcpTokenMock.mockResolvedValueOnce('minted-token');
+    mockGetFrameworkConfig.mockReturnValueOnce({
+      proxy: { baseUrl: 'https://llm.example.com/v1' },
+      mcp: {
+        servers: {
+          'auth0-hosted-mcp': {
+            type: 'http',
+            url: 'https://tenant.auth0.com/v1/mcp',
+            auth: {
+              tokenUrl: 'https://tenant.auth0.com/oauth/token',
+              clientId: 'cid',
+              clientSecret: 'secret',
+              audience: 'https://tenant.auth0.com/api/v2/',
+            },
+          },
+        },
+      },
+    });
+
+    await runGeminiCliAgent(evalDef, tmpWorkspace, { tools: ['mcp'] });
+
+    const settings = readSettings();
+    expect(settings.mcpServers).toEqual({
+      'auth0-hosted-mcp': {
+        httpUrl: 'https://tenant.auth0.com/v1/mcp',
+        timeout: 30000,
+        headers: { Authorization: 'Bearer minted-token' },
+      },
+    });
+  });
+
+  it('skips an authed server when the token mint fails', async () => {
+    mockSpawn.mockReturnValue(makeChild([resultEvent()]));
+    mintMcpTokenMock.mockResolvedValueOnce(undefined);
+    mockGetFrameworkConfig.mockReturnValueOnce({
+      proxy: { baseUrl: 'https://llm.example.com/v1' },
+      mcp: {
+        servers: {
+          'auth0-hosted-mcp': {
+            type: 'http',
+            url: 'https://tenant.auth0.com/v1/mcp',
+            auth: {
+              tokenUrl: 'https://tenant.auth0.com/oauth/token',
+              clientId: 'cid',
+              clientSecret: 'secret',
+              audience: 'https://tenant.auth0.com/api/v2/',
+            },
+          },
+        },
+      },
+    });
+
+    await runGeminiCliAgent(evalDef, tmpWorkspace, { tools: ['mcp'] });
+
+    const settings = readSettings();
+    expect(settings).not.toHaveProperty('mcpServers');
+  });
 });
 
 // ── GH_TOKEN env forwarding ──────────────────────────────────────────────────