From b5668487788f13ed42f4d8d50347026d4650575a Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 1 Jun 2026 20:55:51 +0000 Subject: [PATCH 1/6] test: add mock-LLM E2E test for /model slash command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests/e2e/mock-llm/mock-llm-model-switch.spec.ts exercising the full /model mid-conversation profile switch flow against the real agent-server with a scripted mock LLM backend. Step 1 — setup: - ensureMockLLMProfile() configures agent_settings.llm (proven pattern) - POST /api/profiles/{name} creates profile B as the switch target - Registers a 3-entry trajectory: padding for the internal LLM call, INITIAL_REPLY_TOKEN, POST_SWITCH_REPLY_TOKEN Step 2 — conversation + /model switch: - Starts conversation from home page, waits for agent reply - Types '/model model-switch-profile-b' and submits - Verifies 'Switched to profile' confirmation in data-testid=model-messages - Verifies POST /api/conversations/{id}/switch_profile was intercepted - Sends follow-up, verifies agent responds (post-switch continuity) - Verifies no error banners Co-authored-by: openhands --- .../mock-llm/mock-llm-model-switch.spec.ts | 291 ++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 tests/e2e/mock-llm/mock-llm-model-switch.spec.ts diff --git a/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts new file mode 100644 index 00000000..e0698fe7 --- /dev/null +++ b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts @@ -0,0 +1,291 @@ +/** + * Mock-LLM E2E test: /model slash command — mid-conversation LLM profile switching. + * + * Exercises the full /model flow end-to-end against the real agent-server: + * + * 1. Setup: configure the active LLM settings via ensureMockLLMProfile + * (the proven pattern), create a named profile B as the switch target + * via the profiles API, and register a trajectory with text replies. + * + * 2. Conversation + switch: start a conversation from the home page, + * wait for the agent to reply, then type `/model ` in the + * chat input. Verify the "Switched to profile" confirmation renders in + * the chat UI. Verify the switch_profile POST was made to the agent-server. + * + * 3. Post-switch verification: send another message after the switch + * and verify the agent responds, proving the conversation continues + * working under the new profile. + */ + +import { test, expect } from "@playwright/test"; +import { + BACKEND_URL, + SESSION_API_KEY, + MOCK_LLM_AGENT_URL, + seedLocalStorage, + routeSessionApiKey, + dismissAnalyticsModal, + waitForTestId, + waitForPath, + getConversationIdFromURL, + waitForNonUserMessageText, + deleteConversation, + registerTrajectory, + activateTrajectory, + resetMockLLM, + ensureMockLLMProfile, +} from "./utils/mock-llm-helpers"; + +/** Profile B is the switch target — created via the profiles API. */ +const PROFILE_B_NAME = "model-switch-profile-b"; +const MODEL_B = "openai/mock-model-beta"; + +const INITIAL_REPLY_TOKEN = "MODEL_SWITCH_INITIAL_REPLY_OK"; +const POST_SWITCH_REPLY_TOKEN = "MODEL_SWITCH_POST_SWITCH_REPLY_OK"; + +/** Create a named LLM profile via the agent-server profiles API. */ +async function saveProfile( + request: import("@playwright/test").APIRequestContext, + name: string, + model: string, +) { + // ProfilesClient.saveProfile() uses POST /api/profiles/{name} + const resp = await request.post( + `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, + { + headers: { + "X-Session-API-Key": SESSION_API_KEY, + "Content-Type": "application/json", + }, + data: { + llm: { + model, + api_key: "mock-api-key-for-testing", + base_url: MOCK_LLM_AGENT_URL, + }, + }, + }, + ); + expect( + resp.ok(), + `POST /api/profiles/${name} returned ${resp.status()}`, + ).toBe(true); +} + +/** Delete a profile (best-effort cleanup). */ +async function deleteProfile( + request: import("@playwright/test").APIRequestContext, + name: string, +) { + await request.delete( + `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, + { headers: { "X-Session-API-Key": SESSION_API_KEY } }, + ); +} + +/** + * Set contentEditable chat input text and dispatch an input event. + * + * contentEditable divs don't respond reliably to Playwright's .fill() or + * .type(), so we set the text programmatically via evaluate. + */ +async function setChatInput( + page: import("@playwright/test").Page, + text: string, +) { + await page.evaluate( + ({ testId, inputText }) => { + const el = document.querySelector(`[data-testid="${testId}"]`); + if (!(el instanceof HTMLElement)) + throw new Error("Chat input not found"); + el.focus(); + el.textContent = inputText; + el.dispatchEvent( + new InputEvent("input", { + bubbles: true, + data: inputText, + inputType: "insertText", + }), + ); + }, + { testId: "chat-input", inputText: text }, + ); +} + +test.describe.configure({ mode: "serial" }); + +test.describe("mock-LLM /model slash command", () => { + const conversationIds = new Set(); + + test.beforeEach(async ({ page }) => { + await seedLocalStorage(page); + }); + + test.afterEach(async ({ request }) => { + for (const id of Array.from(conversationIds)) { + try { + await deleteConversation(request, id); + conversationIds.delete(id); + } catch { + // best-effort cleanup + } + } + }); + + test.afterAll(async ({ request }) => { + // Clean up the profile we created and reset mock LLM + try { + await deleteProfile(request, PROFILE_B_NAME); + } catch { + // best-effort + } + try { + await resetMockLLM(request); + } catch { + // best-effort + } + }); + + // ── Step 1: Configure LLM + create switch-target profile + register trajectory + + test("step 1: configure LLM, create switch-target profile, register trajectory", async ({ + request, + }) => { + // Use the proven ensureMockLLMProfile helper to set agent_settings.llm + // so the agent-server can reach the mock LLM. This is the same approach + // used by mock-llm-conversation.spec.ts. + await ensureMockLLMProfile(request); + + // Create profile B as the switch target — it has a different model name + // but the same mock LLM base_url so post-switch inference still works. + await saveProfile(request, PROFILE_B_NAME, MODEL_B); + + // Verify profile B was created + const profilesResp = await request.get(`${BACKEND_URL}/api/profiles`, { + headers: { "X-Session-API-Key": SESSION_API_KEY }, + }); + expect(profilesResp.ok()).toBe(true); + const profiles = await profilesResp.json(); + const profileNames: string[] = profiles.profiles.map( + (p: { name: string }) => p.name, + ); + expect(profileNames).toContain(PROFILE_B_NAME); + + // Register a trajectory with THREE entries: + // Turn 0: padding for the agent-server's internal LLM call + // (condenser/skill-analysis) that runs before the main loop + // Turn 1: actual reply to the initial user message + // Turn 2: reply to the post-switch follow-up message + await registerTrajectory(request, "model-switch", [ + { text: "" }, + { text: INITIAL_REPLY_TOKEN }, + { text: POST_SWITCH_REPLY_TOKEN }, + ]); + await activateTrajectory(request, "model-switch"); + }); + + // ── Step 2: Conversation + /model switch + post-switch verification ─ + + test("step 2: start conversation, switch profile via /model, verify switch", async ({ + page, + request, + }) => { + test.setTimeout(120_000); + + // Track whether the switch_profile POST was intercepted. + let switchProfileCalled = false; + let switchProfileBody: Record | null = null; + page.on("request", (req) => { + const url = new URL(req.url()); + // The switch_profile endpoint is POST /api/conversations/{id}/switch_profile + if ( + req.method() === "POST" && + url.pathname.match(/\/api\/conversations\/[^/]+\/switch_profile/) + ) { + switchProfileCalled = true; + try { + switchProfileBody = req.postDataJSON(); + } catch { + // non-JSON body + } + } + }); + + await routeSessionApiKey(page); + await page.goto("/", { waitUntil: "domcontentloaded" }); + await dismissAnalyticsModal(page); + await waitForTestId(page, "home-chat-launcher"); + + // ── Send initial message and wait for agent reply ── + + await test.step("send initial message", async () => { + await setChatInput(page, "Hello, please respond briefly."); + await page.getByTestId("submit-button").click(); + await waitForPath(page, /\/conversations\/.+/, 30_000); + }); + + const conversationId = getConversationIdFromURL(page); + conversationIds.add(conversationId); + + await test.step("wait for initial agent reply", async () => { + await waitForNonUserMessageText(page, INITIAL_REPLY_TOKEN, 30_000); + }); + + // ── Type /model to switch ── + + await test.step("type /model command to switch to profile B", async () => { + // Wait for the chat input to be available (it should be focused + // after the agent reply completes). + await waitForTestId(page, "chat-input"); + + await setChatInput(page, `/model ${PROFILE_B_NAME}`); + await page.getByTestId("submit-button").click(); + }); + + // ── Verify: "Switched to profile" message appears in chat UI ── + + await test.step("verify 'Switched to profile' message in chat", async () => { + // The ModelMessages component renders with data-testid="model-messages" + // and contains "Switched to profile " + await waitForNonUserMessageText(page, PROFILE_B_NAME, 15_000); + + // Also verify the specific model-messages container is rendered + const modelMessages = page.getByTestId("model-messages"); + await expect(modelMessages.first()).toBeVisible({ timeout: 5_000 }); + }); + + // ── Verify: the switch_profile POST was made ── + + await test.step("verify switch_profile API was called", async () => { + expect( + switchProfileCalled, + "POST /switch_profile should have been called", + ).toBe(true); + // The body should contain the profile name + expect(switchProfileBody).toBeTruthy(); + const bodyStr = JSON.stringify(switchProfileBody); + expect( + bodyStr.includes(PROFILE_B_NAME), + `switch_profile body should contain "${PROFILE_B_NAME}", got: ${bodyStr}`, + ).toBe(true); + }); + + // ── Send a follow-up message to verify conversation still works ── + + await test.step("send follow-up message after switch", async () => { + await setChatInput(page, "Confirm the model switch worked."); + await page.getByTestId("submit-button").click(); + }); + + await test.step("verify post-switch agent reply", async () => { + await waitForNonUserMessageText(page, POST_SWITCH_REPLY_TOKEN, 30_000); + }); + + // ── Verify: no error banners ── + + await test.step("verify no error banners", async () => { + const errorBanner = page.getByTestId("error-message-banner"); + await expect(errorBanner).not.toBeVisible({ timeout: 2_000 }); + }); + }); +}); From 7f8b45474973dd0b71a039b7be2781349ccdb0b2 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 1 Jun 2026 21:13:09 +0000 Subject: [PATCH 2/6] fix(ci): prevent main-branch Docker E2E from posting comments to PRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the Docker workflow completes on main, it triggers mock-llm-docker-e2e.yml via workflow_run. GitHub's API can populate workflow_run.pull_requests[] with unrelated open PRs (stale association from shared commit ancestry). This caused main-branch Docker E2E failures to post ❌ comments on random PRs. Fix: skip PR number extraction when the triggering workflow ran on main/master. Main-branch runs still execute (validating the published image) but no longer post misleading comments to PRs. PR-specific runs via the pull_request trigger are unaffected. Co-authored-by: openhands --- .github/workflows/mock-llm-docker-e2e.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mock-llm-docker-e2e.yml b/.github/workflows/mock-llm-docker-e2e.yml index 1915e629..424b30f2 100644 --- a/.github/workflows/mock-llm-docker-e2e.yml +++ b/.github/workflows/mock-llm-docker-e2e.yml @@ -62,8 +62,17 @@ jobs: if [ "${{ github.event_name }}" = "workflow_run" ]; then echo "sha=${{ github.event.workflow_run.head_sha }}" >> "$GITHUB_OUTPUT" echo "ref=${{ github.event.workflow_run.head_sha }}" >> "$GITHUB_OUTPUT" - PR_NUMBER=$(echo '${{ toJSON(github.event.workflow_run.pull_requests) }}' \ - | jq -r '.[0].number // empty') + # Only associate with a PR when the triggering Docker workflow + # ran on a PR branch. Main-branch workflow_run events can have + # a stale pull_requests[] entry that points at an unrelated PR, + # causing failure comments to land on the wrong PR. + HEAD_BRANCH="${{ github.event.workflow_run.head_branch }}" + if [ "$HEAD_BRANCH" = "main" ] || [ "$HEAD_BRANCH" = "master" ]; then + PR_NUMBER="" + else + PR_NUMBER=$(echo '${{ toJSON(github.event.workflow_run.pull_requests) }}' \ + | jq -r '.[0].number // empty') + fi echo "pr_number=${PR_NUMBER}" >> "$GITHUB_OUTPUT" elif [ "${{ github.event_name }}" = "pull_request" ]; then echo "sha=${{ github.event.pull_request.head.sha }}" >> "$GITHUB_OUTPUT" From 35d8db8d047af850a1effc5b6a93a2045e51cafb Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 1 Jun 2026 21:27:45 +0000 Subject: [PATCH 3/6] refactor: address review comments on mock-LLM model-switch test - Import APIRequestContext from top-level @playwright/test import instead of inline import() type references (review #1) - Extract setChatInput to shared mock-llm-helpers.ts and reuse it in both mock-llm-conversation and mock-llm-model-switch (review #2) - Add upstream reference for the padding trajectory entry coupling to CondensationMixin (review #3) - Use direct field assertion on switchProfileBody.profile_name instead of loose JSON.stringify substring match (review #4) - Remove redundant toBeVisible check after waitForNonUserMessageText already polls model-messages elements (review #5) Co-authored-by: openhands --- .../mock-llm/mock-llm-conversation.spec.ts | 20 +----- .../mock-llm/mock-llm-model-switch.spec.ts | 68 +++++-------------- tests/e2e/mock-llm/utils/mock-llm-helpers.ts | 26 +++++++ 3 files changed, 46 insertions(+), 68 deletions(-) diff --git a/tests/e2e/mock-llm/mock-llm-conversation.spec.ts b/tests/e2e/mock-llm/mock-llm-conversation.spec.ts index 87cfcb37..46116f14 100644 --- a/tests/e2e/mock-llm/mock-llm-conversation.spec.ts +++ b/tests/e2e/mock-llm/mock-llm-conversation.spec.ts @@ -37,6 +37,7 @@ import { waitForSuccessfulBashObservation, deleteConversation, resetMockLLM, + setChatInput, } from "./utils/mock-llm-helpers"; const PROFILE_NAME = "mock-llm-e2e"; @@ -291,24 +292,7 @@ test.describe("mock-LLM agent-server conversation", () => { // Keeping the tokens out of the user bubble lets us assert they appear // *only* in agent output. - // Set contenteditable text via evaluate (contentEditable divs don't - // respond reliably to Playwright's .fill() or .type()). - await page.evaluate( - ({ testId, text }) => { - const el = document.querySelector(`[data-testid="${testId}"]`); - if (!(el instanceof HTMLElement)) throw new Error("Chat input not found"); - el.focus(); - el.textContent = text; - el.dispatchEvent( - new InputEvent("input", { - bubbles: true, - data: text, - inputType: "insertText", - }), - ); - }, - { testId: "chat-input", text: USER_MESSAGE }, - ); + await setChatInput(page, USER_MESSAGE); // Click the submit button — this triggers conversation creation await page.getByTestId("submit-button").click(); diff --git a/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts index e0698fe7..5f9df140 100644 --- a/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts +++ b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts @@ -17,7 +17,7 @@ * working under the new profile. */ -import { test, expect } from "@playwright/test"; +import { test, expect, type APIRequestContext } from "@playwright/test"; import { BACKEND_URL, SESSION_API_KEY, @@ -34,6 +34,7 @@ import { activateTrajectory, resetMockLLM, ensureMockLLMProfile, + setChatInput, } from "./utils/mock-llm-helpers"; /** Profile B is the switch target — created via the profiles API. */ @@ -45,11 +46,10 @@ const POST_SWITCH_REPLY_TOKEN = "MODEL_SWITCH_POST_SWITCH_REPLY_OK"; /** Create a named LLM profile via the agent-server profiles API. */ async function saveProfile( - request: import("@playwright/test").APIRequestContext, + request: APIRequestContext, name: string, model: string, ) { - // ProfilesClient.saveProfile() uses POST /api/profiles/{name} const resp = await request.post( `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, { @@ -73,45 +73,13 @@ async function saveProfile( } /** Delete a profile (best-effort cleanup). */ -async function deleteProfile( - request: import("@playwright/test").APIRequestContext, - name: string, -) { +async function deleteProfile(request: APIRequestContext, name: string) { await request.delete( `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, { headers: { "X-Session-API-Key": SESSION_API_KEY } }, ); } -/** - * Set contentEditable chat input text and dispatch an input event. - * - * contentEditable divs don't respond reliably to Playwright's .fill() or - * .type(), so we set the text programmatically via evaluate. - */ -async function setChatInput( - page: import("@playwright/test").Page, - text: string, -) { - await page.evaluate( - ({ testId, inputText }) => { - const el = document.querySelector(`[data-testid="${testId}"]`); - if (!(el instanceof HTMLElement)) - throw new Error("Chat input not found"); - el.focus(); - el.textContent = inputText; - el.dispatchEvent( - new InputEvent("input", { - bubbles: true, - data: inputText, - inputType: "insertText", - }), - ); - }, - { testId: "chat-input", inputText: text }, - ); -} - test.describe.configure({ mode: "serial" }); test.describe("mock-LLM /model slash command", () => { @@ -172,12 +140,17 @@ test.describe("mock-LLM /model slash command", () => { expect(profileNames).toContain(PROFILE_B_NAME); // Register a trajectory with THREE entries: - // Turn 0: padding for the agent-server's internal LLM call - // (condenser/skill-analysis) that runs before the main loop + // Turn 0: padding — the agent-server makes an internal LLM call + // (condenser/skill-analysis) before the agent's main loop. + // This consumes one trajectory response. If the SDK removes + // that internal call, this padding entry will cause an + // off-by-one; delete it at that point. + // Ref: same pattern in mock-llm-automation.spec.ts; + // upstream SDK code: openhands-sdk CondensationMixin. // Turn 1: actual reply to the initial user message // Turn 2: reply to the post-switch follow-up message await registerTrajectory(request, "model-switch", [ - { text: "" }, + { text: "" }, // padding for internal LLM call (see comment above) { text: INITIAL_REPLY_TOKEN }, { text: POST_SWITCH_REPLY_TOKEN }, ]); @@ -245,13 +218,9 @@ test.describe("mock-LLM /model slash command", () => { // ── Verify: "Switched to profile" message appears in chat UI ── await test.step("verify 'Switched to profile' message in chat", async () => { - // The ModelMessages component renders with data-testid="model-messages" - // and contains "Switched to profile " + // waitForNonUserMessageText already polls data-testid="model-messages" + // elements, so a successful wait proves the container is visible. await waitForNonUserMessageText(page, PROFILE_B_NAME, 15_000); - - // Also verify the specific model-messages container is rendered - const modelMessages = page.getByTestId("model-messages"); - await expect(modelMessages.first()).toBeVisible({ timeout: 5_000 }); }); // ── Verify: the switch_profile POST was made ── @@ -261,13 +230,12 @@ test.describe("mock-LLM /model slash command", () => { switchProfileCalled, "POST /switch_profile should have been called", ).toBe(true); - // The body should contain the profile name expect(switchProfileBody).toBeTruthy(); - const bodyStr = JSON.stringify(switchProfileBody); + // The switch_profile API uses { profile_name: "..." } expect( - bodyStr.includes(PROFILE_B_NAME), - `switch_profile body should contain "${PROFILE_B_NAME}", got: ${bodyStr}`, - ).toBe(true); + (switchProfileBody as Record)?.profile_name, + `switch_profile body.profile_name should be "${PROFILE_B_NAME}"`, + ).toBe(PROFILE_B_NAME); }); // ── Send a follow-up message to verify conversation still works ── diff --git a/tests/e2e/mock-llm/utils/mock-llm-helpers.ts b/tests/e2e/mock-llm/utils/mock-llm-helpers.ts index 03206c10..8a413f6f 100644 --- a/tests/e2e/mock-llm/utils/mock-llm-helpers.ts +++ b/tests/e2e/mock-llm/utils/mock-llm-helpers.ts @@ -380,5 +380,31 @@ export async function resetMockLLM(request: APIRequestContext) { expect(resp.ok(), `Reset mock LLM: ${resp.status()}`).toBe(true); } +/** + * Set contentEditable chat input text and dispatch an input event. + * + * contentEditable divs don't respond reliably to Playwright's .fill() or + * .type(), so we set the text programmatically via page.evaluate(). + */ +export async function setChatInput(page: Page, text: string) { + await page.evaluate( + ({ testId, inputText }) => { + const el = document.querySelector(`[data-testid="${testId}"]`); + if (!(el instanceof HTMLElement)) + throw new Error("Chat input not found"); + el.focus(); + el.textContent = inputText; + el.dispatchEvent( + new InputEvent("input", { + bubbles: true, + data: inputText, + inputType: "insertText", + }), + ); + }, + { testId: "chat-input", inputText: text }, + ); +} + // Mock automation helpers removed — the automation test now hits the real // automation backend running inside the bin/agent-canvas.mjs stack. From 1ab0495419b07f32e813c801d2798d86422125e2 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 1 Jun 2026 21:41:53 +0000 Subject: [PATCH 4/6] fix(ci): deduplicate Docker E2E workflow_run triggers + add retry Two Docker E2E reliability fixes: 1. Concurrency group: workflow_run triggers used the unique workflow_run.id as the group key, so multiple Docker builds completing on main each spawned their own E2E run (they never cancelled each other). Changed to key by the triggering workflow's head_branch, so main-branch workflow_run triggers share the group 'wr-main' and only the latest run survives. pull_request triggers still key by PR number (unchanged). 2. Retry: Docker Playwright config now uses retries:1 in CI to handle transient container startup ECONNREFUSED failures. The webServer health-check confirms the stack is up, but occasional races between container readiness and the first test request can still cause failures. Co-authored-by: openhands --- .github/workflows/mock-llm-docker-e2e.yml | 37 ++++++++++++++--------- playwright.mock-llm-docker.config.ts | 5 ++- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/.github/workflows/mock-llm-docker-e2e.yml b/.github/workflows/mock-llm-docker-e2e.yml index 424b30f2..03348ad4 100644 --- a/.github/workflows/mock-llm-docker-e2e.yml +++ b/.github/workflows/mock-llm-docker-e2e.yml @@ -25,8 +25,18 @@ on: type: string default: "" +# Concurrency: deduplicate runs for the same logical trigger. +# - pull_request: keyed by PR number (pushes to the same PR cancel earlier runs) +# - workflow_run: keyed by the triggering workflow's branch (e.g. "wr-main"), +# so multiple Docker builds completing on main don't pile up separate E2E runs +# - workflow_dispatch / fallback: keyed by ref concurrency: - group: mock-llm-docker-e2e-${{ github.event.workflow_run.id || github.event.pull_request.number || github.ref }} + group: >- + mock-llm-docker-e2e-${{ + github.event.pull_request.number || + (github.event.workflow_run.id && format('wr-{0}', github.event.workflow_run.head_branch)) || + github.ref + }} cancel-in-progress: true permissions: @@ -37,13 +47,18 @@ permissions: jobs: mock-llm-docker-e2e: - # workflow_run: only run if the Docker build succeeded. + # workflow_run: only for main/master — validates the published image. + # PR branches are already covered by the pull_request trigger below; + # without this guard, both triggers fire for e2e-tests PRs, producing + # duplicate (and potentially contradictory) comment pairs. # pull_request: only run with the 'e2e-tests' label, skip fork PRs (no GHCR push). # workflow_dispatch: always run. if: >- github.event_name == 'workflow_dispatch' || (github.event_name == 'workflow_run' && - github.event.workflow_run.conclusion == 'success') || + github.event.workflow_run.conclusion == 'success' && + (github.event.workflow_run.head_branch == 'main' || + github.event.workflow_run.head_branch == 'master')) || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'e2e-tests') && !github.event.pull_request.head.repo.fork) @@ -60,20 +75,12 @@ jobs: id: ctx run: | if [ "${{ github.event_name }}" = "workflow_run" ]; then + # workflow_run only fires for main (job-level `if` guard), so + # this path always tests the main-branch Docker image. Never + # post PR comments — results go to the step summary only. echo "sha=${{ github.event.workflow_run.head_sha }}" >> "$GITHUB_OUTPUT" echo "ref=${{ github.event.workflow_run.head_sha }}" >> "$GITHUB_OUTPUT" - # Only associate with a PR when the triggering Docker workflow - # ran on a PR branch. Main-branch workflow_run events can have - # a stale pull_requests[] entry that points at an unrelated PR, - # causing failure comments to land on the wrong PR. - HEAD_BRANCH="${{ github.event.workflow_run.head_branch }}" - if [ "$HEAD_BRANCH" = "main" ] || [ "$HEAD_BRANCH" = "master" ]; then - PR_NUMBER="" - else - PR_NUMBER=$(echo '${{ toJSON(github.event.workflow_run.pull_requests) }}' \ - | jq -r '.[0].number // empty') - fi - echo "pr_number=${PR_NUMBER}" >> "$GITHUB_OUTPUT" + echo "pr_number=" >> "$GITHUB_OUTPUT" elif [ "${{ github.event_name }}" = "pull_request" ]; then echo "sha=${{ github.event.pull_request.head.sha }}" >> "$GITHUB_OUTPUT" echo "ref=${{ github.event.pull_request.head.sha }}" >> "$GITHUB_OUTPUT" diff --git a/playwright.mock-llm-docker.config.ts b/playwright.mock-llm-docker.config.ts index a7c4e682..e1fad78a 100644 --- a/playwright.mock-llm-docker.config.ts +++ b/playwright.mock-llm-docker.config.ts @@ -86,7 +86,10 @@ export default defineConfig({ testMatch: /.*\.spec\.ts/, fullyParallel: false, forbidOnly: !!process.env.CI, - retries: 0, + // One retry for transient Docker container startup failures (ECONNREFUSED). + // The container health-check (webServer.url) confirms the stack is up, but + // occasional races can still cause the first request to fail. + retries: process.env.CI ? 1 : 0, workers: 1, timeout: 60_000, globalTimeout: process.env.CI ? 600_000 : 0, // 10 min hard cap in CI From ba7b2aebaa93afe4503b63339b0098fc9c6d5dc3 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 1 Jun 2026 22:13:52 +0000 Subject: [PATCH 5/6] refactor: address second round of review comments - Make saveProfile idempotent with best-effort delete-first so stale profiles from crashed CI runs don't cause persistent 409 failures - Increase switch-confirmation timeout from 15s to 30s for consistency with all other waitForNonUserMessageText calls in this test - Add waitForTestId guard before post-switch setChatInput to handle potential UI disable during profile switch settling Co-authored-by: openhands --- tests/e2e/mock-llm/mock-llm-model-switch.spec.ts | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts index 5f9df140..ab3a54b4 100644 --- a/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts +++ b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts @@ -44,12 +44,21 @@ const MODEL_B = "openai/mock-model-beta"; const INITIAL_REPLY_TOKEN = "MODEL_SWITCH_INITIAL_REPLY_OK"; const POST_SWITCH_REPLY_TOKEN = "MODEL_SWITCH_POST_SWITCH_REPLY_OK"; -/** Create a named LLM profile via the agent-server profiles API. */ +/** + * Create (or overwrite) a named LLM profile via the agent-server profiles API. + * Deletes first so setup is idempotent across re-runs — if a previous test + * crashed before afterAll cleanup, the stale profile won't cause a 409. + */ async function saveProfile( request: APIRequestContext, name: string, model: string, ) { + // Best-effort delete so a leftover profile doesn't block creation. + await request.delete( + `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, + { headers: { "X-Session-API-Key": SESSION_API_KEY } }, + ); const resp = await request.post( `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, { @@ -220,7 +229,7 @@ test.describe("mock-LLM /model slash command", () => { await test.step("verify 'Switched to profile' message in chat", async () => { // waitForNonUserMessageText already polls data-testid="model-messages" // elements, so a successful wait proves the container is visible. - await waitForNonUserMessageText(page, PROFILE_B_NAME, 15_000); + await waitForNonUserMessageText(page, PROFILE_B_NAME, 30_000); }); // ── Verify: the switch_profile POST was made ── @@ -241,6 +250,9 @@ test.describe("mock-LLM /model slash command", () => { // ── Send a follow-up message to verify conversation still works ── await test.step("send follow-up message after switch", async () => { + // Wait for the chat input to be ready — the UI may briefly disable + // it while the profile switch settles. + await waitForTestId(page, "chat-input"); await setChatInput(page, "Confirm the model switch worked."); await page.getByTestId("submit-button").click(); }); From b5562647de4143341d67eb1da958cde6466ecf37 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 1 Jun 2026 22:28:03 +0000 Subject: [PATCH 6/6] =?UTF-8?q?refactor:=20minor=20nits=20=E2=80=94=20non-?= =?UTF-8?q?null=20assertion=20+=20testId=20in=20error=20message?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use switchProfileBody!.profile_name after toBeTruthy guard instead of optional chain (the assertion guarantees non-null) - Include testId in setChatInput error message for easier diagnosis; accept optional testId parameter for future reuse Co-authored-by: openhands --- tests/e2e/mock-llm/mock-llm-model-switch.spec.ts | 2 +- tests/e2e/mock-llm/utils/mock-llm-helpers.ts | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts index ab3a54b4..8d090a5d 100644 --- a/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts +++ b/tests/e2e/mock-llm/mock-llm-model-switch.spec.ts @@ -242,7 +242,7 @@ test.describe("mock-LLM /model slash command", () => { expect(switchProfileBody).toBeTruthy(); // The switch_profile API uses { profile_name: "..." } expect( - (switchProfileBody as Record)?.profile_name, + switchProfileBody!.profile_name, `switch_profile body.profile_name should be "${PROFILE_B_NAME}"`, ).toBe(PROFILE_B_NAME); }); diff --git a/tests/e2e/mock-llm/utils/mock-llm-helpers.ts b/tests/e2e/mock-llm/utils/mock-llm-helpers.ts index 8a413f6f..d68ef40a 100644 --- a/tests/e2e/mock-llm/utils/mock-llm-helpers.ts +++ b/tests/e2e/mock-llm/utils/mock-llm-helpers.ts @@ -386,12 +386,16 @@ export async function resetMockLLM(request: APIRequestContext) { * contentEditable divs don't respond reliably to Playwright's .fill() or * .type(), so we set the text programmatically via page.evaluate(). */ -export async function setChatInput(page: Page, text: string) { +export async function setChatInput( + page: Page, + text: string, + testId = "chat-input", +) { await page.evaluate( - ({ testId, inputText }) => { - const el = document.querySelector(`[data-testid="${testId}"]`); + ({ tid, inputText }) => { + const el = document.querySelector(`[data-testid="${tid}"]`); if (!(el instanceof HTMLElement)) - throw new Error("Chat input not found"); + throw new Error(`Chat input [data-testid="${tid}"] not found`); el.focus(); el.textContent = inputText; el.dispatchEvent( @@ -402,7 +406,7 @@ export async function setChatInput(page: Page, text: string) { }), ); }, - { testId: "chat-input", inputText: text }, + { tid: testId, inputText: text }, ); }