-
Notifications
You must be signed in to change notification settings - Fork 18
test: add mock-LLM E2E test for /model slash command (mid-conversation profile switch) #998
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b566848
7f8b454
35d8db8
1ab0495
ba7b2ae
b556264
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,271 @@ | ||
| /** | ||
| * Mock-LLM E2E test: /model slash command — mid-conversation LLM profile switching. | ||
| * | ||
| * Exercises the full /model flow end-to-end against the real agent-server: | ||
| * | ||
| * 1. Setup: configure the active LLM settings via ensureMockLLMProfile | ||
| * (the proven pattern), create a named profile B as the switch target | ||
| * via the profiles API, and register a trajectory with text replies. | ||
| * | ||
| * 2. Conversation + switch: start a conversation from the home page, | ||
| * wait for the agent to reply, then type `/model <profile-B>` in the | ||
| * chat input. Verify the "Switched to profile" confirmation renders in | ||
| * the chat UI. Verify the switch_profile POST was made to the agent-server. | ||
| * | ||
| * 3. Post-switch verification: send another message after the switch | ||
| * and verify the agent responds, proving the conversation continues | ||
| * working under the new profile. | ||
| */ | ||
|
|
||
| import { test, expect, type APIRequestContext } from "@playwright/test"; | ||
| import { | ||
| BACKEND_URL, | ||
| SESSION_API_KEY, | ||
| MOCK_LLM_AGENT_URL, | ||
| seedLocalStorage, | ||
| routeSessionApiKey, | ||
| dismissAnalyticsModal, | ||
| waitForTestId, | ||
| waitForPath, | ||
| getConversationIdFromURL, | ||
| waitForNonUserMessageText, | ||
| deleteConversation, | ||
| registerTrajectory, | ||
| activateTrajectory, | ||
| resetMockLLM, | ||
| ensureMockLLMProfile, | ||
| setChatInput, | ||
| } from "./utils/mock-llm-helpers"; | ||
|
|
||
| /** Profile B is the switch target — created via the profiles API. */ | ||
| const PROFILE_B_NAME = "model-switch-profile-b"; | ||
| const MODEL_B = "openai/mock-model-beta"; | ||
|
|
||
| const INITIAL_REPLY_TOKEN = "MODEL_SWITCH_INITIAL_REPLY_OK"; | ||
| const POST_SWITCH_REPLY_TOKEN = "MODEL_SWITCH_POST_SWITCH_REPLY_OK"; | ||
|
|
||
| /** | ||
| * Create (or overwrite) a named LLM profile via the agent-server profiles API. | ||
| * Deletes first so setup is idempotent across re-runs — if a previous test | ||
| * crashed before afterAll cleanup, the stale profile won't cause a 409. | ||
| */ | ||
| async function saveProfile( | ||
| request: APIRequestContext, | ||
| name: string, | ||
| model: string, | ||
| ) { | ||
| // Best-effort delete so a leftover profile doesn't block creation. | ||
| await request.delete( | ||
| `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, | ||
| { headers: { "X-Session-API-Key": SESSION_API_KEY } }, | ||
| ); | ||
| const resp = await request.post( | ||
| `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, | ||
| { | ||
| headers: { | ||
| "X-Session-API-Key": SESSION_API_KEY, | ||
| "Content-Type": "application/json", | ||
| }, | ||
| data: { | ||
| llm: { | ||
| model, | ||
| api_key: "mock-api-key-for-testing", | ||
| base_url: MOCK_LLM_AGENT_URL, | ||
| }, | ||
| }, | ||
| }, | ||
| ); | ||
| expect( | ||
| resp.ok(), | ||
|
malhotra5 marked this conversation as resolved.
|
||
| `POST /api/profiles/${name} returned ${resp.status()}`, | ||
| ).toBe(true); | ||
| } | ||
|
|
||
| /** Delete a profile (best-effort cleanup). */ | ||
| async function deleteProfile(request: APIRequestContext, name: string) { | ||
| await request.delete( | ||
| `${BACKEND_URL}/api/profiles/${encodeURIComponent(name)}`, | ||
| { headers: { "X-Session-API-Key": SESSION_API_KEY } }, | ||
| ); | ||
| } | ||
|
|
||
| test.describe.configure({ mode: "serial" }); | ||
|
|
||
| test.describe("mock-LLM /model slash command", () => { | ||
| const conversationIds = new Set<string>(); | ||
|
|
||
| test.beforeEach(async ({ page }) => { | ||
| await seedLocalStorage(page); | ||
| }); | ||
|
|
||
| test.afterEach(async ({ request }) => { | ||
| for (const id of Array.from(conversationIds)) { | ||
| try { | ||
| await deleteConversation(request, id); | ||
| conversationIds.delete(id); | ||
| } catch { | ||
| // best-effort cleanup | ||
| } | ||
| } | ||
| }); | ||
|
|
||
| test.afterAll(async ({ request }) => { | ||
| // Clean up the profile we created and reset mock LLM | ||
| try { | ||
| await deleteProfile(request, PROFILE_B_NAME); | ||
| } catch { | ||
| // best-effort | ||
| } | ||
| try { | ||
| await resetMockLLM(request); | ||
| } catch { | ||
| // best-effort | ||
| } | ||
| }); | ||
|
|
||
| // ── Step 1: Configure LLM + create switch-target profile + register trajectory | ||
|
|
||
| test("step 1: configure LLM, create switch-target profile, register trajectory", async ({ | ||
| request, | ||
| }) => { | ||
| // Use the proven ensureMockLLMProfile helper to set agent_settings.llm | ||
| // so the agent-server can reach the mock LLM. This is the same approach | ||
| // used by mock-llm-conversation.spec.ts. | ||
| await ensureMockLLMProfile(request); | ||
|
|
||
| // Create profile B as the switch target — it has a different model name | ||
| // but the same mock LLM base_url so post-switch inference still works. | ||
| await saveProfile(request, PROFILE_B_NAME, MODEL_B); | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟠 Important: The empty If that internal call is ever removed, merged, or reordered, this test will fail with a confusing off-by-one trajectory mismatch rather than a clear assertion error. Consider adding a reference to the issue or agent-server code path (e.g. a link to the relevant code or issue number) in the comment so future maintainers know exactly why this padding exists and can find the upstream source.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done — expanded the comment to explain the coupling risk and added a cross-reference to This comment was posted by an AI agent (OpenHands) on behalf of the user. |
||
| // Verify profile B was created | ||
| const profilesResp = await request.get(`${BACKEND_URL}/api/profiles`, { | ||
| headers: { "X-Session-API-Key": SESSION_API_KEY }, | ||
| }); | ||
| expect(profilesResp.ok()).toBe(true); | ||
| const profiles = await profilesResp.json(); | ||
| const profileNames: string[] = profiles.profiles.map( | ||
| (p: { name: string }) => p.name, | ||
| ); | ||
| expect(profileNames).toContain(PROFILE_B_NAME); | ||
|
|
||
| // Register a trajectory with THREE entries: | ||
| // Turn 0: padding — the agent-server makes an internal LLM call | ||
| // (condenser/skill-analysis) before the agent's main loop. | ||
| // This consumes one trajectory response. If the SDK removes | ||
| // that internal call, this padding entry will cause an | ||
| // off-by-one; delete it at that point. | ||
| // Ref: same pattern in mock-llm-automation.spec.ts; | ||
| // upstream SDK code: openhands-sdk CondensationMixin. | ||
| // Turn 1: actual reply to the initial user message | ||
| // Turn 2: reply to the post-switch follow-up message | ||
| await registerTrajectory(request, "model-switch", [ | ||
| { text: "" }, // padding for internal LLM call (see comment above) | ||
| { text: INITIAL_REPLY_TOKEN }, | ||
| { text: POST_SWITCH_REPLY_TOKEN }, | ||
| ]); | ||
| await activateTrajectory(request, "model-switch"); | ||
| }); | ||
|
|
||
| // ── Step 2: Conversation + /model switch + post-switch verification ─ | ||
|
|
||
| test("step 2: start conversation, switch profile via /model, verify switch", async ({ | ||
| page, | ||
| request, | ||
| }) => { | ||
| test.setTimeout(120_000); | ||
|
|
||
| // Track whether the switch_profile POST was intercepted. | ||
| let switchProfileCalled = false; | ||
| let switchProfileBody: Record<string, unknown> | null = null; | ||
| page.on("request", (req) => { | ||
| const url = new URL(req.url()); | ||
| // The switch_profile endpoint is POST /api/conversations/{id}/switch_profile | ||
| if ( | ||
| req.method() === "POST" && | ||
| url.pathname.match(/\/api\/conversations\/[^/]+\/switch_profile/) | ||
| ) { | ||
| switchProfileCalled = true; | ||
| try { | ||
| switchProfileBody = req.postDataJSON(); | ||
| } catch { | ||
| // non-JSON body | ||
| } | ||
| } | ||
| }); | ||
|
|
||
| await routeSessionApiKey(page); | ||
| await page.goto("/", { waitUntil: "domcontentloaded" }); | ||
| await dismissAnalyticsModal(page); | ||
| await waitForTestId(page, "home-chat-launcher"); | ||
|
|
||
| // ── Send initial message and wait for agent reply ── | ||
|
|
||
| await test.step("send initial message", async () => { | ||
| await setChatInput(page, "Hello, please respond briefly."); | ||
| await page.getByTestId("submit-button").click(); | ||
| await waitForPath(page, /\/conversations\/.+/, 30_000); | ||
| }); | ||
|
|
||
| const conversationId = getConversationIdFromURL(page); | ||
| conversationIds.add(conversationId); | ||
|
|
||
| await test.step("wait for initial agent reply", async () => { | ||
| await waitForNonUserMessageText(page, INITIAL_REPLY_TOKEN, 30_000); | ||
| }); | ||
|
|
||
| // ── Type /model <profile-B> to switch ── | ||
|
|
||
| await test.step("type /model command to switch to profile B", async () => { | ||
| // Wait for the chat input to be available (it should be focused | ||
| // after the agent reply completes). | ||
| await waitForTestId(page, "chat-input"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Suggestion: The
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done — removed the redundant This comment was posted by an AI agent (OpenHands) on behalf of the user. |
||
|
|
||
| await setChatInput(page, `/model ${PROFILE_B_NAME}`); | ||
| await page.getByTestId("submit-button").click(); | ||
| }); | ||
|
|
||
| // ── Verify: "Switched to profile" message appears in chat UI ── | ||
|
|
||
| await test.step("verify 'Switched to profile' message in chat", async () => { | ||
| // waitForNonUserMessageText already polls data-testid="model-messages" | ||
| // elements, so a successful wait proves the container is visible. | ||
| await waitForNonUserMessageText(page, PROFILE_B_NAME, 30_000); | ||
| }); | ||
|
|
||
| // ── Verify: the switch_profile POST was made ── | ||
|
|
||
| await test.step("verify switch_profile API was called", async () => { | ||
| expect( | ||
| switchProfileCalled, | ||
| "POST /switch_profile should have been called", | ||
| ).toBe(true); | ||
| expect(switchProfileBody).toBeTruthy(); | ||
| // The switch_profile API uses { profile_name: "..." } | ||
| expect( | ||
| switchProfileBody!.profile_name, | ||
| `switch_profile body.profile_name should be "${PROFILE_B_NAME}"`, | ||
| ).toBe(PROFILE_B_NAME); | ||
| }); | ||
|
|
||
| // ── Send a follow-up message to verify conversation still works ── | ||
|
|
||
| await test.step("send follow-up message after switch", async () => { | ||
| // Wait for the chat input to be ready — the UI may briefly disable | ||
| // it while the profile switch settles. | ||
| await waitForTestId(page, "chat-input"); | ||
| await setChatInput(page, "Confirm the model switch worked."); | ||
|
malhotra5 marked this conversation as resolved.
|
||
| await page.getByTestId("submit-button").click(); | ||
| }); | ||
|
|
||
| await test.step("verify post-switch agent reply", async () => { | ||
| await waitForNonUserMessageText(page, POST_SWITCH_REPLY_TOKEN, 30_000); | ||
| }); | ||
|
|
||
| // ── Verify: no error banners ── | ||
|
|
||
| await test.step("verify no error banners", async () => { | ||
| const errorBanner = page.getByTestId("error-message-banner"); | ||
| await expect(errorBanner).not.toBeVisible({ timeout: 2_000 }); | ||
| }); | ||
| }); | ||
| }); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 Suggestion: Inline
import("@playwright/test")type references are verbose and inconsistent with the file's top-level import. AddAPIRequestContextandPageto the existing import statement instead:Then replace all three occurrences:
request: import("@playwright/test").APIRequestContext→request: APIRequestContextpage: import("@playwright/test").Page→page: PageThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done — imported
APIRequestContextfrom top-level@playwright/testimport; removed all inlineimport()type references.setChatInputmoved to shared helpers so thePagetype import is no longer needed locally either. (commit 35d8db8)This comment was posted by an AI agent (OpenHands) on behalf of the user.