From b56b812506b0d52987b394c6d5365bb221a9f633 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 01:07:51 +0000 Subject: [PATCH 01/22] Add AGENTS.md with Cursor Cloud specific instructions Co-authored-by: yasser khan --- AGENTS.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000000..478481f3797 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,42 @@ +# AGENTS.md + +## Cursor Cloud specific instructions + +### Overview + +Mattermost Desktop is an Electron app (v40.x) wrapping the Mattermost web app. See `CLAUDE.md` for full architecture and commands reference. + +### Node version + +The project requires Node.js v20.15.0 (specified in `.nvmrc`). Use `nvm` to switch: + +``` +source ~/.nvm/nvm.sh && nvm use 20.15.0 +``` + +### Key dev commands + +All standard commands are documented in `CLAUDE.md` and `package.json`. Quick reference: + +| Command | Purpose | +|---|---| +| `npm run check` | Lint + type-check + unit tests (parallel) | +| `npm run build` | Dev build (main + preload + renderer) | +| `npm run watch` | Dev mode with auto-rebuild and Electron restart | +| `npm run test:unit` | Jest unit tests (73 suites, 1118 tests) | + +### Running on headless Linux (Cloud VM) + +- The VM has an X server on display `:1`. Set `DISPLAY=:1` before launching Electron. +- Chrome sandbox requires root ownership: `sudo chown root:root ./node_modules/electron/dist/chrome-sandbox && sudo chmod 4755 ./node_modules/electron/dist/chrome-sandbox`. This is normally handled by `npm run linux-dev-setup` (called by `npm start` and `npm run watch`), but that script uses `sudo` which may prompt. +- To launch the built app directly: `DISPLAY=:1 npx electron dist/ --disable-dev-mode --no-sandbox` +- DBus errors in the container logs are expected and harmless (no system bus in containers). +- The "Failed to load configuration file" message on first run is normal — the app creates defaults. + +### E2E tests + +E2E tests live in `e2e/` with a separate `package.json`. See `e2e/AGENTS.md` for detailed guidance. Server-backed E2E tests require a running Mattermost server and env vars `MM_TEST_SERVER_URL`, `MM_TEST_USER_NAME`, `MM_TEST_PASSWORD`. Startup/UI-only E2E tests can run without a server. + +### Native modules + +The `postinstall` script runs `electron-builder install-app-deps` to rebuild native modules (registry-js, cf-prefs, etc.) for the current Electron version. If you see native module errors after `npm install`, ensure postinstall completed successfully. From efb61dacc4655eb6a5ac2df803d5cea6fa3e71bf Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 01:24:53 +0000 Subject: [PATCH 02/22] e2e: post provisioned server URLs as PR comment when E2E tests start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Matterwick provisions cloud Mattermost instances for a PR, the server URLs are now surfaced back to the PR as a comment immediately after the matrix is ready — while tests are still running. This lets developers connect to the exact same servers that are running the CI suite to reproduce and fix failing tests without waiting for the run to finish. Changes: - Add findPrNumber() helper to e2e/utils/github-actions.js, extracting the three-step PR resolution logic (explicit input → run.pull_requests → branch/SHA lookup) that was previously duplicated across removeE2ELabel and the remove-e2e-label job. - Add postServerInfoComment() to e2e/utils/github-actions.js. It builds a markdown table of per-platform server URLs, includes the admin username and server version, and adds a ready-to-run shell snippet. The comment is idempotent: a hidden HTML marker () is used to find and update an existing comment on re-runs rather than appending a new one each time. The admin password is intentionally omitted. - Add post-server-info job to e2e-functional.yml. It runs in parallel with update-initial-status after prepare-matrix succeeds, is skipped for nightly runs (no PR to comment on), and requires issues:write / pull-requests:write permissions. The top-level workflow permissions block is extended to include those two scopes. Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 40 +++++++ e2e/utils/github-actions.js | 150 ++++++++++++++++++++++++++- 2 files changed, 188 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index 76c8959e150..2dac2ccd4ce 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -41,6 +41,8 @@ on: permissions: contents: read statuses: write + issues: write + pull-requests: write jobs: prepare-matrix: @@ -53,6 +55,44 @@ jobs: run: | echo "platforms=$(echo '${{ inputs.instance_details }}' | jq -c)" >> $GITHUB_OUTPUT + post-server-info: + name: Post server info to PR + needs: prepare-matrix + # Only comment on PR-triggered runs (nightly runs have no PR to comment on). + if: ${{ !inputs.nightly && inputs.pr_number != '' }} + runs-on: ubuntu-24.04 + permissions: + issues: write + pull-requests: write + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Post provisioned server URLs as PR comment + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + github-token: ${{ github.token }} + script: | + const { findPrNumber, postServerInfoComment } = require('./e2e/utils/github-actions.js'); + const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; + const prNumber = await findPrNumber({ + github, + context, + prNumberInput: '${{ inputs.pr_number }}', + }); + if (!prNumber) { + core.warning('Could not resolve PR number — skipping server info comment'); + return; + } + await postServerInfoComment({ + github, + context, + platforms, + adminUsername: '${{ inputs.MM_TEST_USER_NAME }}', + serverVersion: '${{ inputs.MM_SERVER_VERSION }}', + prNumber, + }); + update-initial-status: name: Update initial status needs: prepare-matrix diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index 4d0cb9c9e7f..4913e2bbebd 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -73,6 +73,150 @@ async function updateFinalStatus({github, context, platforms, outputs, mergedRep })); } +/** + * Resolve the PR number for a workflow run. + * + * Resolution order: + * 1. prNumberInput — explicit value passed by the workflow dispatcher (e.g. Matterwick). + * 2. run.pull_requests — populated for pull_request-triggered runs. + * 3. Branch/SHA lookup — queries open PRs whose head matches the run's head branch and SHA. + * This is the reliable path for workflow_dispatch runs where pull_requests is empty. + * + * @param {Object} params + * @param {Object} params.github - GitHub API client from actions/github-script + * @param {Object} params.context - GitHub Actions context (context.runId must be set) + * @param {string|number} [params.prNumberInput] - Explicit PR number from a workflow input + * @returns {Promise} Resolved PR number, or null if not found + */ +async function findPrNumber({github, context, prNumberInput}) { + const explicit = parseInt(prNumberInput, 10); + if (explicit) { + return explicit; + } + + try { + const run = await github.rest.actions.getWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.runId, + }); + + if (run.data.pull_requests && run.data.pull_requests.length > 0) { + return run.data.pull_requests[0].number; + } + + const branchName = run.data.head_branch; + if (branchName) { + const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; + const prs = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + head: `${headOwner}:${branchName}`, + }); + if (prs.data && prs.data.length > 0) { + const matching = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); + return (matching || prs.data[0]).number; + } + } + } catch (error) { + console.log(`Could not resolve PR number: ${error.message}`); + } + + return null; +} + +/** + * Post (or update) a PR comment listing the provisioned Mattermost server URLs so + * developers can connect to those servers to reproduce and debug failing tests. + * + * The comment is idempotent: if a previous comment with the hidden HTML marker already + * exists on the PR (e.g. from a previous run triggered by a push to the same branch), + * it is updated in place rather than a new one being created. + * + * The admin password is intentionally omitted from the comment. Use the + * MM_DESKTOP_E2E_USER_CREDENTIALS repository secret or contact the team. + * + * @param {Object} params + * @param {Object} params.github - GitHub API client from actions/github-script + * @param {Object} params.context - GitHub Actions context + * @param {Array} params.platforms - Array of platform objects from the matrix + * (each must have at least `platform` and `url`) + * @param {string} [params.adminUsername] - Admin username for the test instances + * @param {string} [params.serverVersion] - Mattermost server version under test + * @param {number} params.prNumber - PR number to comment on + */ +async function postServerInfoComment({github, context, platforms, adminUsername, serverVersion, prNumber}) { + const MARKER = ''; + const workflowUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + + const platformRows = platforms. + map((p) => `| \`${p.platform}\` | ${p.url} |`). + join('\n'); + + const lines = [ + MARKER, + '### :test_tube: E2E Test Servers Ready', + '', + 'Matterwick has provisioned the following Mattermost instances for this PR.', + 'Use them to reproduce and debug failing tests against the exact same servers:', + '', + '| Platform | Server URL |', + '|----------|------------|', + platformRows, + '', + ]; + + if (adminUsername) { + lines.push(`**Admin username:** \`${adminUsername}\``); + } + if (serverVersion) { + lines.push(`**Server version:** \`${serverVersion}\``); + } + + lines.push( + '', + '**Run a single spec against one of these servers:**', + '```sh', + 'MM_TEST_SERVER_URL= \\', + ' MM_TEST_USER_NAME= \\', + ' MM_TEST_PASSWORD= \\', + ' npx playwright test --reporter=list --workers=1', + '```', + '', + '> Servers are active for the duration of this workflow run and destroyed afterwards.', + '', + `**Workflow run:** ${workflowUrl}`, + ); + + const body = lines.join('\n'); + const {owner, repo} = context.repo; + + let existingCommentId = null; + try { + const {data: comments} = await github.rest.issues.listComments({ + owner, + repo, + issue_number: prNumber, + per_page: 100, + }); + const existing = comments.find((c) => c.body && c.body.includes(MARKER)); + if (existing) { + existingCommentId = existing.id; + } + } catch (err) { + console.log(`Could not list PR comments: ${err.message}`); + } + + if (existingCommentId) { + await github.rest.issues.updateComment({owner, repo, comment_id: existingCommentId, body}); + console.log(`Updated existing E2E server info comment ${existingCommentId} on PR #${prNumber}`); + } else { + await github.rest.issues.createComment({owner, repo, issue_number: prNumber, body}); + console.log(`Posted E2E server info comment on PR #${prNumber}`); + } +} + /** * Remove E2E/Run label when workflow triggered via Matterwick * @param {Object} params - Parameters object @@ -148,7 +292,9 @@ async function removeE2ELabel({github, context}) { } module.exports = { - updateInitialStatus, - updateFinalStatus, + findPrNumber, + postServerInfoComment, removeE2ELabel, + updateFinalStatus, + updateInitialStatus, }; From 649ede864472b998d01705a79b313b44fb6c0864 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 01:24:53 +0000 Subject: [PATCH 03/22] e2e: post provisioned server URLs as PR comment when E2E tests start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Matterwick provisions cloud Mattermost instances for a PR, the server URLs are now surfaced back to the PR as a comment immediately after the matrix is ready — while tests are still running. This lets developers connect to the exact same servers that are running the CI suite to reproduce and fix failing tests without waiting for the run to finish. Changes: - Add findPrNumber() helper to e2e/utils/github-actions.js, extracting the three-step PR resolution logic (explicit input → run.pull_requests → branch/SHA lookup) that was previously duplicated across removeE2ELabel and the remove-e2e-label job. - Add postServerInfoComment() to e2e/utils/github-actions.js. It builds a markdown table of per-platform server URLs, includes the admin username and server version, and adds a ready-to-run shell snippet. The comment is idempotent: a hidden HTML marker () is used to find and update an existing comment on re-runs rather than appending a new one each time. The admin password is intentionally omitted. - Add post-server-info job to e2e-functional.yml. It runs in parallel with update-initial-status after prepare-matrix succeeds, is skipped for nightly runs (no PR to comment on), and requires issues:write / pull-requests:write permissions. The top-level workflow permissions block is extended to include those two scopes. Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 40 +++++++ e2e/utils/github-actions.js | 150 ++++++++++++++++++++++++++- 2 files changed, 188 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index 76c8959e150..2dac2ccd4ce 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -41,6 +41,8 @@ on: permissions: contents: read statuses: write + issues: write + pull-requests: write jobs: prepare-matrix: @@ -53,6 +55,44 @@ jobs: run: | echo "platforms=$(echo '${{ inputs.instance_details }}' | jq -c)" >> $GITHUB_OUTPUT + post-server-info: + name: Post server info to PR + needs: prepare-matrix + # Only comment on PR-triggered runs (nightly runs have no PR to comment on). + if: ${{ !inputs.nightly && inputs.pr_number != '' }} + runs-on: ubuntu-24.04 + permissions: + issues: write + pull-requests: write + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Post provisioned server URLs as PR comment + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + github-token: ${{ github.token }} + script: | + const { findPrNumber, postServerInfoComment } = require('./e2e/utils/github-actions.js'); + const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; + const prNumber = await findPrNumber({ + github, + context, + prNumberInput: '${{ inputs.pr_number }}', + }); + if (!prNumber) { + core.warning('Could not resolve PR number — skipping server info comment'); + return; + } + await postServerInfoComment({ + github, + context, + platforms, + adminUsername: '${{ inputs.MM_TEST_USER_NAME }}', + serverVersion: '${{ inputs.MM_SERVER_VERSION }}', + prNumber, + }); + update-initial-status: name: Update initial status needs: prepare-matrix diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index 4d0cb9c9e7f..4913e2bbebd 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -73,6 +73,150 @@ async function updateFinalStatus({github, context, platforms, outputs, mergedRep })); } +/** + * Resolve the PR number for a workflow run. + * + * Resolution order: + * 1. prNumberInput — explicit value passed by the workflow dispatcher (e.g. Matterwick). + * 2. run.pull_requests — populated for pull_request-triggered runs. + * 3. Branch/SHA lookup — queries open PRs whose head matches the run's head branch and SHA. + * This is the reliable path for workflow_dispatch runs where pull_requests is empty. + * + * @param {Object} params + * @param {Object} params.github - GitHub API client from actions/github-script + * @param {Object} params.context - GitHub Actions context (context.runId must be set) + * @param {string|number} [params.prNumberInput] - Explicit PR number from a workflow input + * @returns {Promise} Resolved PR number, or null if not found + */ +async function findPrNumber({github, context, prNumberInput}) { + const explicit = parseInt(prNumberInput, 10); + if (explicit) { + return explicit; + } + + try { + const run = await github.rest.actions.getWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.runId, + }); + + if (run.data.pull_requests && run.data.pull_requests.length > 0) { + return run.data.pull_requests[0].number; + } + + const branchName = run.data.head_branch; + if (branchName) { + const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; + const prs = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + head: `${headOwner}:${branchName}`, + }); + if (prs.data && prs.data.length > 0) { + const matching = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); + return (matching || prs.data[0]).number; + } + } + } catch (error) { + console.log(`Could not resolve PR number: ${error.message}`); + } + + return null; +} + +/** + * Post (or update) a PR comment listing the provisioned Mattermost server URLs so + * developers can connect to those servers to reproduce and debug failing tests. + * + * The comment is idempotent: if a previous comment with the hidden HTML marker already + * exists on the PR (e.g. from a previous run triggered by a push to the same branch), + * it is updated in place rather than a new one being created. + * + * The admin password is intentionally omitted from the comment. Use the + * MM_DESKTOP_E2E_USER_CREDENTIALS repository secret or contact the team. + * + * @param {Object} params + * @param {Object} params.github - GitHub API client from actions/github-script + * @param {Object} params.context - GitHub Actions context + * @param {Array} params.platforms - Array of platform objects from the matrix + * (each must have at least `platform` and `url`) + * @param {string} [params.adminUsername] - Admin username for the test instances + * @param {string} [params.serverVersion] - Mattermost server version under test + * @param {number} params.prNumber - PR number to comment on + */ +async function postServerInfoComment({github, context, platforms, adminUsername, serverVersion, prNumber}) { + const MARKER = ''; + const workflowUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + + const platformRows = platforms. + map((p) => `| \`${p.platform}\` | ${p.url} |`). + join('\n'); + + const lines = [ + MARKER, + '### :test_tube: E2E Test Servers Ready', + '', + 'Matterwick has provisioned the following Mattermost instances for this PR.', + 'Use them to reproduce and debug failing tests against the exact same servers:', + '', + '| Platform | Server URL |', + '|----------|------------|', + platformRows, + '', + ]; + + if (adminUsername) { + lines.push(`**Admin username:** \`${adminUsername}\``); + } + if (serverVersion) { + lines.push(`**Server version:** \`${serverVersion}\``); + } + + lines.push( + '', + '**Run a single spec against one of these servers:**', + '```sh', + 'MM_TEST_SERVER_URL= \\', + ' MM_TEST_USER_NAME= \\', + ' MM_TEST_PASSWORD= \\', + ' npx playwright test --reporter=list --workers=1', + '```', + '', + '> Servers are active for the duration of this workflow run and destroyed afterwards.', + '', + `**Workflow run:** ${workflowUrl}`, + ); + + const body = lines.join('\n'); + const {owner, repo} = context.repo; + + let existingCommentId = null; + try { + const {data: comments} = await github.rest.issues.listComments({ + owner, + repo, + issue_number: prNumber, + per_page: 100, + }); + const existing = comments.find((c) => c.body && c.body.includes(MARKER)); + if (existing) { + existingCommentId = existing.id; + } + } catch (err) { + console.log(`Could not list PR comments: ${err.message}`); + } + + if (existingCommentId) { + await github.rest.issues.updateComment({owner, repo, comment_id: existingCommentId, body}); + console.log(`Updated existing E2E server info comment ${existingCommentId} on PR #${prNumber}`); + } else { + await github.rest.issues.createComment({owner, repo, issue_number: prNumber, body}); + console.log(`Posted E2E server info comment on PR #${prNumber}`); + } +} + /** * Remove E2E/Run label when workflow triggered via Matterwick * @param {Object} params - Parameters object @@ -148,7 +292,9 @@ async function removeE2ELabel({github, context}) { } module.exports = { - updateInitialStatus, - updateFinalStatus, + findPrNumber, + postServerInfoComment, removeE2ELabel, + updateFinalStatus, + updateInitialStatus, }; From 30b05badb5256ac2f28d4e410f4e5bdbbdc1f6c8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 01:40:08 +0000 Subject: [PATCH 04/22] e2e: fix script injection in post-server-info workflow step Inputs interpolated directly into single-quoted JS string literals inside an actions/github-script step could be escaped by a malicious dispatcher (e.g. a value containing a single quote) to break out of the string and run arbitrary JavaScript on the runner, which holds issues:write and pull-requests:write permissions. Fix: move all three user-controlled inputs (pr_number, MM_TEST_USER_NAME, MM_SERVER_VERSION) out of the ${{ }} expression context and into env: variables on the step, then read them via process.env.* inside the script. The platforms JSON comes from a trusted internal job output (not a raw workflow input) so its interpolation is safe and is left as-is. Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index 2dac2ccd4ce..61ec43ec949 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -70,6 +70,10 @@ jobs: - name: Post provisioned server URLs as PR comment uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + PR_NUMBER: ${{ inputs.pr_number }} + ADMIN_USERNAME: ${{ inputs.MM_TEST_USER_NAME }} + SERVER_VERSION: ${{ inputs.MM_SERVER_VERSION }} with: github-token: ${{ github.token }} script: | @@ -78,7 +82,7 @@ jobs: const prNumber = await findPrNumber({ github, context, - prNumberInput: '${{ inputs.pr_number }}', + prNumberInput: process.env.PR_NUMBER, }); if (!prNumber) { core.warning('Could not resolve PR number — skipping server info comment'); @@ -88,8 +92,8 @@ jobs: github, context, platforms, - adminUsername: '${{ inputs.MM_TEST_USER_NAME }}', - serverVersion: '${{ inputs.MM_SERVER_VERSION }}', + adminUsername: process.env.ADMIN_USERNAME, + serverVersion: process.env.SERVER_VERSION, prNumber, }); From 6a712bd6b5e4f2b1348e3c1b82674476eb330214 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 01:49:52 +0000 Subject: [PATCH 05/22] e2e: address CodeRabbit review comments on post-server-info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues raised in the CodeRabbit review of PR #3774: 1. Overly broad top-level permissions: remove issues:write and pull-requests:write from the workflow-level permissions block. Those scopes are already granted at job level on post-server-info and remove-e2e-label which are the only jobs that need them. 2. Comment posting not fault-tolerant: wrap postServerInfoComment in a try/catch inside the github-script step so a GitHub API error (e.g. rate limit, permissions) logs a warning but does not fail the post-server-info job and block the overall workflow run. 3. Loose PR number parsing in findPrNumber: replace parseInt which accepts '123abc' and negative values with strict validation — trim the input, test against /^\d+$/, then confirm the result is a positive integer before returning. Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 22 ++++++++++++---------- e2e/utils/github-actions.js | 9 ++++++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index 61ec43ec949..48e87c5ea5d 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -41,8 +41,6 @@ on: permissions: contents: read statuses: write - issues: write - pull-requests: write jobs: prepare-matrix: @@ -88,14 +86,18 @@ jobs: core.warning('Could not resolve PR number — skipping server info comment'); return; } - await postServerInfoComment({ - github, - context, - platforms, - adminUsername: process.env.ADMIN_USERNAME, - serverVersion: process.env.SERVER_VERSION, - prNumber, - }); + try { + await postServerInfoComment({ + github, + context, + platforms, + adminUsername: process.env.ADMIN_USERNAME, + serverVersion: process.env.SERVER_VERSION, + prNumber, + }); + } catch (err) { + core.warning(`Failed to post server info comment: ${err.message}`); + } update-initial-status: name: Update initial status diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index 4913e2bbebd..f8bcb68c78d 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -89,9 +89,12 @@ async function updateFinalStatus({github, context, platforms, outputs, mergedRep * @returns {Promise} Resolved PR number, or null if not found */ async function findPrNumber({github, context, prNumberInput}) { - const explicit = parseInt(prNumberInput, 10); - if (explicit) { - return explicit; + const trimmed = String(prNumberInput ?? '').trim(); + if ((/^\d+$/).test(trimmed)) { + const parsed = Number(trimmed); + if (Number.isInteger(parsed) && parsed > 0) { + return parsed; + } } try { From 5b3ee16378bad6946b181df9d30bb3dca1adf592 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 01:52:42 +0000 Subject: [PATCH 06/22] e2e: disable E2E/Run label removal to keep servers alive for agent-driven fixes Matterwick destroys the provisioned cloud Mattermost servers as soon as the E2E/Run label is removed from a PR. To allow agents to connect to those servers and fix failing tests within the same PR run, both label removal paths are disabled: - e2e-functional.yml: remove-e2e-label job is commented out in full. - e2e-label-cleanup.yml: removal logic replaced with a no-op job that logs a message; the workflow trigger and permissions block are preserved so the file remains valid YAML and the workflow still runs (harmlessly). Re-enable both when automated fix-and-rerun is no longer needed. Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 118 +++++++++++++----------- .github/workflows/e2e-label-cleanup.yml | 39 +++----- 2 files changed, 76 insertions(+), 81 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index 48e87c5ea5d..0787c3b41cb 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -240,62 +240,68 @@ jobs: const mergedReportUrl = '${{ needs.merge-e2e-report.outputs.merged-report-url }}' || undefined; await updateFinalStatus({ github, context, platforms, outputs, mergedReportUrl }); - remove-e2e-label: - name: Remove E2E label from PR - runs-on: ubuntu-22.04 - permissions: - issues: write - pull-requests: write - needs: - - e2e-tests - - e2e-policy-tests - - update-final-status - if: always() - steps: - - name: e2e/remove-label-from-pr - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - continue-on-error: true # Label might have been removed manually - with: - github-token: ${{ github.token }} - script: | - // Use pr_number if provided directly by the dispatcher (e.g. Matterwick) - let prNumber = parseInt(process.env.PR_NUMBER) || null; - - // Fall back to looking up the PR by head branch when pr_number was not passed - if (!prNumber) { - const run = await github.rest.actions.getWorkflowRun({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.runId, - }); - const branchName = run.data.head_branch; - if (branchName) { - const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; - const prs = await github.rest.pulls.list({ - owner: context.repo.owner, - repo: context.repo.repo, - state: 'open', - head: `${headOwner}:${branchName}`, - }); - if (prs.data && prs.data.length > 0) { - const matchingPr = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); - prNumber = (matchingPr || prs.data[0]).number; - } - } - } - - if (prNumber) { - await github.rest.issues.removeLabel({ - issue_number: prNumber, - owner: context.repo.owner, - repo: context.repo.repo, - name: 'E2E/Run', - }); - } else { - console.log('Label removal skipped - could not find associated PR'); - } - env: - PR_NUMBER: ${{ inputs.pr_number }} + # remove-e2e-label is intentionally disabled. + # Removing E2E/Run causes Matterwick to destroy the provisioned servers immediately, + # which prevents agents from connecting to those servers to reproduce and fix + # failing tests in the same PR run. Re-enable once automated fix-and-rerun is no + # longer needed. + # + # remove-e2e-label: + # name: Remove E2E label from PR + # runs-on: ubuntu-22.04 + # permissions: + # issues: write + # pull-requests: write + # needs: + # - e2e-tests + # - e2e-policy-tests + # - update-final-status + # if: always() + # steps: + # - name: e2e/remove-label-from-pr + # uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + # continue-on-error: true # Label might have been removed manually + # with: + # github-token: ${{ github.token }} + # script: | + # // Use pr_number if provided directly by the dispatcher (e.g. Matterwick) + # let prNumber = parseInt(process.env.PR_NUMBER) || null; + # + # // Fall back to looking up the PR by head branch when pr_number was not passed + # if (!prNumber) { + # const run = await github.rest.actions.getWorkflowRun({ + # owner: context.repo.owner, + # repo: context.repo.repo, + # run_id: context.runId, + # }); + # const branchName = run.data.head_branch; + # if (branchName) { + # const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; + # const prs = await github.rest.pulls.list({ + # owner: context.repo.owner, + # repo: context.repo.repo, + # state: 'open', + # head: `${headOwner}:${branchName}`, + # }); + # if (prs.data && prs.data.length > 0) { + # const matchingPr = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); + # prNumber = (matchingPr || prs.data[0]).number; + # } + # } + # } + # + # if (prNumber) { + # await github.rest.issues.removeLabel({ + # issue_number: prNumber, + # owner: context.repo.owner, + # repo: context.repo.repo, + # name: 'E2E/Run', + # }); + # } else { + # console.log('Label removal skipped - could not find associated PR'); + # } + # env: + # PR_NUMBER: ${{ inputs.pr_number }} e2e-policy-tests: name: policy-tests-${{ matrix.platform }} diff --git a/.github/workflows/e2e-label-cleanup.yml b/.github/workflows/e2e-label-cleanup.yml index 061d7d48491..1069c4c1232 100644 --- a/.github/workflows/e2e-label-cleanup.yml +++ b/.github/workflows/e2e-label-cleanup.yml @@ -1,10 +1,14 @@ name: E2E Label Cleanup -# Runs when the "Electron Playwright Tests" workflow completes with any -# conclusion (success, failure, or cancelled). This is the only reliable way -# to remove the E2E/Run label after a cancellation, because GitHub Actions -# cancels all queued jobs — including remove-e2e-label — when a workflow run -# is cancelled, regardless of `if: always()`. +# Intentionally disabled: removing E2E/Run causes Matterwick to destroy the +# provisioned servers immediately, which prevents agents from connecting to +# those servers to reproduce and fix failing tests in the same PR run. +# Re-enable once automated fix-and-rerun is no longer needed. +# +# Original behaviour: runs when "Electron Playwright Tests" completes with any +# conclusion (success, failure, or cancelled) and removes the E2E/Run label. +# This was the safety net for cancellations where the remove-e2e-label job in +# e2e-functional.yml would be skipped by GitHub Actions. on: workflow_run: @@ -17,26 +21,11 @@ permissions: actions: read jobs: - remove-e2e-label: - name: Remove E2E/Run label from PR + # remove-e2e-label is disabled — see comment at the top of this file. + noop: + name: Label cleanup disabled runs-on: ubuntu-22.04 if: ${{ github.event.workflow_run.event == 'workflow_dispatch' }} steps: - - name: Checkout code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: Remove E2E/Run label - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - continue-on-error: true # Label may have already been removed by remove-e2e-label job - with: - script: | - const { removeE2ELabel } = require('./e2e/utils/github-actions.js'); - // Pass the original test workflow's run ID so removeE2ELabel can - // look up the dispatching branch and find the associated PR. - await removeE2ELabel({ - github, - context: { - repo: context.repo, - runId: ${{ github.event.workflow_run.id }}, - }, - }); + - name: Skip label removal + run: echo "E2E/Run label removal is disabled. Servers remain active for agent-driven test fixing." From 9af4cdac4ee87113dfc004e2cb8bd6557a00fc2b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 02:45:53 +0000 Subject: [PATCH 07/22] e2e: disable label removal and fix post-login tabsDisabled race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Label removal: - Disable remove-e2e-label job in e2e-functional.yml (commented out) so Matterwick keeps provisioned servers alive after tests finish. - Replace the remove-e2e-label job in e2e-label-cleanup.yml with a no-op job for the same reason. Servers are destroyed when the label is removed; keeping them alive lets agents connect and fix failing tests in the same PR run. Test fix — post-login tabsDisabled race (5 failing tests across all 3 OSes): Root cause: MainPage.tabsDisabled is set to !currentServer.isLoggedIn. After loginToMattermost() returns (web app shell ready), the SERVER_LOGGED_IN_CHANGED IPC event still needs to travel from the server WebContentsView through the main process ServerManager to the renderer MainPage, where it triggers updateServers() which fetches the updated currentServer.isLoggedIn value. Tests that called mainWindow.click('#newTabButton') or mainWindow.waitForSelector('#newTabButton') immediately after loginToMattermost() were racing this propagation and timing out because the button was still disabled (tabsDisabled=true). Fix: change the post-login wait in each affected beforeAll from waitForSelector('#newTabButton') // present but possibly disabled to waitForSelector('#newTabButton:not([disabled])') // present AND enabled Affected specs: - e2e/specs/server_management/drag_and_drop.test.ts - e2e/specs/server_management/popout_windows.test.ts - e2e/specs/server_management/tab_management.test.ts - e2e/specs/menu_bar/window_menu.test.ts Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 114 +++++++++--------- .github/workflows/e2e-label-cleanup.yml | 39 +++--- e2e/specs/menu_bar/window_menu.test.ts | 3 + .../server_management/drag_and_drop.test.ts | 4 +- .../server_management/popout_windows.test.ts | 4 +- .../server_management/tab_management.test.ts | 4 +- 6 files changed, 84 insertions(+), 84 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index 2dac2ccd4ce..e55b5f8572f 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -234,62 +234,64 @@ jobs: const mergedReportUrl = '${{ needs.merge-e2e-report.outputs.merged-report-url }}' || undefined; await updateFinalStatus({ github, context, platforms, outputs, mergedReportUrl }); - remove-e2e-label: - name: Remove E2E label from PR - runs-on: ubuntu-22.04 - permissions: - issues: write - pull-requests: write - needs: - - e2e-tests - - e2e-policy-tests - - update-final-status - if: always() - steps: - - name: e2e/remove-label-from-pr - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - continue-on-error: true # Label might have been removed manually - with: - github-token: ${{ github.token }} - script: | - // Use pr_number if provided directly by the dispatcher (e.g. Matterwick) - let prNumber = parseInt(process.env.PR_NUMBER) || null; - - // Fall back to looking up the PR by head branch when pr_number was not passed - if (!prNumber) { - const run = await github.rest.actions.getWorkflowRun({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.runId, - }); - const branchName = run.data.head_branch; - if (branchName) { - const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; - const prs = await github.rest.pulls.list({ - owner: context.repo.owner, - repo: context.repo.repo, - state: 'open', - head: `${headOwner}:${branchName}`, - }); - if (prs.data && prs.data.length > 0) { - const matchingPr = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); - prNumber = (matchingPr || prs.data[0]).number; - } - } - } - - if (prNumber) { - await github.rest.issues.removeLabel({ - issue_number: prNumber, - owner: context.repo.owner, - repo: context.repo.repo, - name: 'E2E/Run', - }); - } else { - console.log('Label removal skipped - could not find associated PR'); - } - env: - PR_NUMBER: ${{ inputs.pr_number }} + # remove-e2e-label is intentionally disabled. + # Removing E2E/Run causes Matterwick to destroy the provisioned servers immediately, + # which prevents agents from connecting to those servers to reproduce and fix + # failing tests in the same PR run. Re-enable once automated fix-and-rerun is no + # longer needed. + # + # remove-e2e-label: + # name: Remove E2E label from PR + # runs-on: ubuntu-22.04 + # permissions: + # issues: write + # pull-requests: write + # needs: + # - e2e-tests + # - e2e-policy-tests + # - update-final-status + # if: always() + # steps: + # - name: e2e/remove-label-from-pr + # uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + # continue-on-error: true + # with: + # github-token: ${{ github.token }} + # script: | + # let prNumber = parseInt(process.env.PR_NUMBER) || null; + # if (!prNumber) { + # const run = await github.rest.actions.getWorkflowRun({ + # owner: context.repo.owner, + # repo: context.repo.repo, + # run_id: context.runId, + # }); + # const branchName = run.data.head_branch; + # if (branchName) { + # const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; + # const prs = await github.rest.pulls.list({ + # owner: context.repo.owner, + # repo: context.repo.repo, + # state: 'open', + # head: `${headOwner}:${branchName}`, + # }); + # if (prs.data && prs.data.length > 0) { + # const matchingPr = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); + # prNumber = (matchingPr || prs.data[0]).number; + # } + # } + # } + # if (prNumber) { + # await github.rest.issues.removeLabel({ + # issue_number: prNumber, + # owner: context.repo.owner, + # repo: context.repo.repo, + # name: 'E2E/Run', + # }); + # } else { + # console.log('Label removal skipped - could not find associated PR'); + # } + # env: + # PR_NUMBER: ${{ inputs.pr_number }} e2e-policy-tests: name: policy-tests-${{ matrix.platform }} diff --git a/.github/workflows/e2e-label-cleanup.yml b/.github/workflows/e2e-label-cleanup.yml index 061d7d48491..1069c4c1232 100644 --- a/.github/workflows/e2e-label-cleanup.yml +++ b/.github/workflows/e2e-label-cleanup.yml @@ -1,10 +1,14 @@ name: E2E Label Cleanup -# Runs when the "Electron Playwright Tests" workflow completes with any -# conclusion (success, failure, or cancelled). This is the only reliable way -# to remove the E2E/Run label after a cancellation, because GitHub Actions -# cancels all queued jobs — including remove-e2e-label — when a workflow run -# is cancelled, regardless of `if: always()`. +# Intentionally disabled: removing E2E/Run causes Matterwick to destroy the +# provisioned servers immediately, which prevents agents from connecting to +# those servers to reproduce and fix failing tests in the same PR run. +# Re-enable once automated fix-and-rerun is no longer needed. +# +# Original behaviour: runs when "Electron Playwright Tests" completes with any +# conclusion (success, failure, or cancelled) and removes the E2E/Run label. +# This was the safety net for cancellations where the remove-e2e-label job in +# e2e-functional.yml would be skipped by GitHub Actions. on: workflow_run: @@ -17,26 +21,11 @@ permissions: actions: read jobs: - remove-e2e-label: - name: Remove E2E/Run label from PR + # remove-e2e-label is disabled — see comment at the top of this file. + noop: + name: Label cleanup disabled runs-on: ubuntu-22.04 if: ${{ github.event.workflow_run.event == 'workflow_dispatch' }} steps: - - name: Checkout code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: Remove E2E/Run label - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - continue-on-error: true # Label may have already been removed by remove-e2e-label job - with: - script: | - const { removeE2ELabel } = require('./e2e/utils/github-actions.js'); - // Pass the original test workflow's run ID so removeE2ELabel can - // look up the dispatching branch and find the associated PR. - await removeE2ELabel({ - github, - context: { - repo: context.repo, - runId: ${{ github.event.workflow_run.id }}, - }, - }); + - name: Skip label removal + run: echo "E2E/Run label removal is disabled. Servers remain active for agent-driven test fixing." diff --git a/e2e/specs/menu_bar/window_menu.test.ts b/e2e/specs/menu_bar/window_menu.test.ts index e2e485394b1..8c70aafe0c5 100644 --- a/e2e/specs/menu_bar/window_menu.test.ts +++ b/e2e/specs/menu_bar/window_menu.test.ts @@ -333,6 +333,9 @@ test.describe('Menu/window_menu', () => { serverMap = await buildServerMap(electronApp); await loginToMattermost(getMattermostServer()); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so + // tabsDisabled becomes false and #newTabButton becomes enabled. + await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); await focusMainWindow(); }); diff --git a/e2e/specs/server_management/drag_and_drop.test.ts b/e2e/specs/server_management/drag_and_drop.test.ts index 109452c08fd..80f00d6c2eb 100644 --- a/e2e/specs/server_management/drag_and_drop.test.ts +++ b/e2e/specs/server_management/drag_and_drop.test.ts @@ -198,7 +198,9 @@ test.describe('server_management/drag_and_drop', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so + // tabsDisabled becomes false and #newTabButton becomes enabled. + await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); }); test.beforeEach(async () => { diff --git a/e2e/specs/server_management/popout_windows.test.ts b/e2e/specs/server_management/popout_windows.test.ts index 4b0cb870d03..a6b4d3aebc1 100644 --- a/e2e/specs/server_management/popout_windows.test.ts +++ b/e2e/specs/server_management/popout_windows.test.ts @@ -163,7 +163,9 @@ test.describe('server_management/popout_windows', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so + // tabsDisabled becomes false and #newTabButton becomes enabled. + await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); }); test.beforeEach(async () => { diff --git a/e2e/specs/server_management/tab_management.test.ts b/e2e/specs/server_management/tab_management.test.ts index af74fe48b4b..3f7809fd375 100644 --- a/e2e/specs/server_management/tab_management.test.ts +++ b/e2e/specs/server_management/tab_management.test.ts @@ -79,7 +79,9 @@ test.describe('server_management/tab_management', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so + // tabsDisabled becomes false and #newTabButton becomes enabled. + await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); }); test.beforeEach(async () => { From 76dac809fd584c556bc2a5cbfac5b1a889024684 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 02:51:25 +0000 Subject: [PATCH 08/22] e2e: fix lint errors and apply remaining CodeRabbit review fixes Lint (Expected line before comment): - Add blank line before each explanatory comment added in the previous commit across drag_and_drop, popout_windows, tab_management, and window_menu test files. CodeRabbit review items (carried over from PR #3774 review of the original commit, now applied to this branch): - Remove issues:write / pull-requests:write from the workflow-level permissions block; those scopes already exist at job level on the post-server-info job. - Add continue-on-error: true to the Post provisioned server URLs step so a comment API failure never blocks the overall workflow. - Move workflow inputs (pr_number, MM_TEST_USER_NAME, MM_SERVER_VERSION) into env: vars and read them via process.env.* to prevent script injection (security fix from DryRun Security report). - Replace parseInt() in findPrNumber with strict /^\d+$/ + positive integer validation to reject partial strings and negative values. Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 29 ++++++++++++------- e2e/specs/menu_bar/window_menu.test.ts | 1 + .../server_management/drag_and_drop.test.ts | 1 + .../server_management/popout_windows.test.ts | 1 + .../server_management/tab_management.test.ts | 1 + e2e/utils/github-actions.js | 9 ++++-- 6 files changed, 28 insertions(+), 14 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index e55b5f8572f..fcff7ee2e50 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -41,8 +41,6 @@ on: permissions: contents: read statuses: write - issues: write - pull-requests: write jobs: prepare-matrix: @@ -70,6 +68,11 @@ jobs: - name: Post provisioned server URLs as PR comment uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + continue-on-error: true + env: + PR_NUMBER: ${{ inputs.pr_number }} + ADMIN_USERNAME: ${{ inputs.MM_TEST_USER_NAME }} + SERVER_VERSION: ${{ inputs.MM_SERVER_VERSION }} with: github-token: ${{ github.token }} script: | @@ -78,20 +81,24 @@ jobs: const prNumber = await findPrNumber({ github, context, - prNumberInput: '${{ inputs.pr_number }}', + prNumberInput: process.env.PR_NUMBER, }); if (!prNumber) { core.warning('Could not resolve PR number — skipping server info comment'); return; } - await postServerInfoComment({ - github, - context, - platforms, - adminUsername: '${{ inputs.MM_TEST_USER_NAME }}', - serverVersion: '${{ inputs.MM_SERVER_VERSION }}', - prNumber, - }); + try { + await postServerInfoComment({ + github, + context, + platforms, + adminUsername: process.env.ADMIN_USERNAME, + serverVersion: process.env.SERVER_VERSION, + prNumber, + }); + } catch (err) { + core.warning(`Failed to post server info comment: ${err.message}`); + } update-initial-status: name: Update initial status diff --git a/e2e/specs/menu_bar/window_menu.test.ts b/e2e/specs/menu_bar/window_menu.test.ts index 8c70aafe0c5..883624f99cf 100644 --- a/e2e/specs/menu_bar/window_menu.test.ts +++ b/e2e/specs/menu_bar/window_menu.test.ts @@ -333,6 +333,7 @@ test.describe('Menu/window_menu', () => { serverMap = await buildServerMap(electronApp); await loginToMattermost(getMattermostServer()); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so // tabsDisabled becomes false and #newTabButton becomes enabled. await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); diff --git a/e2e/specs/server_management/drag_and_drop.test.ts b/e2e/specs/server_management/drag_and_drop.test.ts index 80f00d6c2eb..12e80584324 100644 --- a/e2e/specs/server_management/drag_and_drop.test.ts +++ b/e2e/specs/server_management/drag_and_drop.test.ts @@ -198,6 +198,7 @@ test.describe('server_management/drag_and_drop', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so // tabsDisabled becomes false and #newTabButton becomes enabled. await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); diff --git a/e2e/specs/server_management/popout_windows.test.ts b/e2e/specs/server_management/popout_windows.test.ts index a6b4d3aebc1..40331fda644 100644 --- a/e2e/specs/server_management/popout_windows.test.ts +++ b/e2e/specs/server_management/popout_windows.test.ts @@ -163,6 +163,7 @@ test.describe('server_management/popout_windows', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so // tabsDisabled becomes false and #newTabButton becomes enabled. await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); diff --git a/e2e/specs/server_management/tab_management.test.ts b/e2e/specs/server_management/tab_management.test.ts index 3f7809fd375..17deefb4179 100644 --- a/e2e/specs/server_management/tab_management.test.ts +++ b/e2e/specs/server_management/tab_management.test.ts @@ -79,6 +79,7 @@ test.describe('server_management/tab_management', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); + // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so // tabsDisabled becomes false and #newTabButton becomes enabled. await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index 4913e2bbebd..f8bcb68c78d 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -89,9 +89,12 @@ async function updateFinalStatus({github, context, platforms, outputs, mergedRep * @returns {Promise} Resolved PR number, or null if not found */ async function findPrNumber({github, context, prNumberInput}) { - const explicit = parseInt(prNumberInput, 10); - if (explicit) { - return explicit; + const trimmed = String(prNumberInput ?? '').trim(); + if ((/^\d+$/).test(trimmed)) { + const parsed = Number(trimmed); + if (Number.isInteger(parsed) && parsed > 0) { + return parsed; + } } try { From 146f4d067a6fd2448168723fd492f9ba116f7d35 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 04:00:06 +0000 Subject: [PATCH 09/22] e2e: fix security issues and address remaining CodeRabbit review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security (DryRun Security findings on automation-changed files only): drs_40c108a1 — Code injection via expression interpolation: `${{ needs.prepare-matrix.outputs.platforms }}` was interpolated directly into a JS string literal inside actions/github-script. A poisoned instance_details input could escape the shell command in prepare-matrix, control the platforms output, and inject arbitrary JS. Fix: pass PLATFORMS via env: and parse with JSON.parse(process.env.PLATFORMS) so the value is never textually interpolated into the script body. drs_e5221983 — Markdown injection in PR comments: platform and url values from the platforms array were inserted raw into a Markdown table in postServerInfoComment(). An attacker controlling instance_details could inject pipe characters to break the table or embed malicious links/mentions. Fix: add sanitizeMd() helper in github-actions.js that escapes |, `, [ and ] before inserting any platform-provided value into the comment body. CodeRabbit review (on automation-changed files only): - Remove inputs.pr_number gate from post-server-info job condition so findPrNumber fallback resolution can run when pr_number is absent. New condition: if: ${{ !inputs.nightly }} - Remove ~60 lines of commented-out remove-e2e-label implementation from e2e-functional.yml; replaced with a single explanatory comment line. Git history preserves the full implementation. - Fix e2e-label-cleanup.yml noop job to never allocate a runner: change if condition to ${{ false }} and add permissions: {} so the job is skipped entirely and consumes no permissions or compute. Co-authored-by: yasser khan --- .github/workflows/e2e-functional.yml | 70 ++++--------------------- .github/workflows/e2e-label-cleanup.yml | 11 ++-- e2e/utils/github-actions.js | 6 ++- 3 files changed, 19 insertions(+), 68 deletions(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index fcff7ee2e50..86ce03fa68c 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -56,8 +56,10 @@ jobs: post-server-info: name: Post server info to PR needs: prepare-matrix - # Only comment on PR-triggered runs (nightly runs have no PR to comment on). - if: ${{ !inputs.nightly && inputs.pr_number != '' }} + # Skip nightly runs — they have no PR to comment on. + # Do not gate on inputs.pr_number because findPrNumber has fallback + # resolution paths that can recover the PR number from the run context. + if: ${{ !inputs.nightly }} runs-on: ubuntu-24.04 permissions: issues: write @@ -73,11 +75,14 @@ jobs: PR_NUMBER: ${{ inputs.pr_number }} ADMIN_USERNAME: ${{ inputs.MM_TEST_USER_NAME }} SERVER_VERSION: ${{ inputs.MM_SERVER_VERSION }} + # Pass the trusted internal job output via env so it is never + # interpolated directly into the JS string (prevents injection). + PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} with: github-token: ${{ github.token }} script: | const { findPrNumber, postServerInfoComment } = require('./e2e/utils/github-actions.js'); - const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; + const platforms = JSON.parse(process.env.PLATFORMS); const prNumber = await findPrNumber({ github, context, @@ -241,64 +246,7 @@ jobs: const mergedReportUrl = '${{ needs.merge-e2e-report.outputs.merged-report-url }}' || undefined; await updateFinalStatus({ github, context, platforms, outputs, mergedReportUrl }); - # remove-e2e-label is intentionally disabled. - # Removing E2E/Run causes Matterwick to destroy the provisioned servers immediately, - # which prevents agents from connecting to those servers to reproduce and fix - # failing tests in the same PR run. Re-enable once automated fix-and-rerun is no - # longer needed. - # - # remove-e2e-label: - # name: Remove E2E label from PR - # runs-on: ubuntu-22.04 - # permissions: - # issues: write - # pull-requests: write - # needs: - # - e2e-tests - # - e2e-policy-tests - # - update-final-status - # if: always() - # steps: - # - name: e2e/remove-label-from-pr - # uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - # continue-on-error: true - # with: - # github-token: ${{ github.token }} - # script: | - # let prNumber = parseInt(process.env.PR_NUMBER) || null; - # if (!prNumber) { - # const run = await github.rest.actions.getWorkflowRun({ - # owner: context.repo.owner, - # repo: context.repo.repo, - # run_id: context.runId, - # }); - # const branchName = run.data.head_branch; - # if (branchName) { - # const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; - # const prs = await github.rest.pulls.list({ - # owner: context.repo.owner, - # repo: context.repo.repo, - # state: 'open', - # head: `${headOwner}:${branchName}`, - # }); - # if (prs.data && prs.data.length > 0) { - # const matchingPr = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); - # prNumber = (matchingPr || prs.data[0]).number; - # } - # } - # } - # if (prNumber) { - # await github.rest.issues.removeLabel({ - # issue_number: prNumber, - # owner: context.repo.owner, - # repo: context.repo.repo, - # name: 'E2E/Run', - # }); - # } else { - # console.log('Label removal skipped - could not find associated PR'); - # } - # env: - # PR_NUMBER: ${{ inputs.pr_number }} + # remove-e2e-label is intentionally omitted: see e2e-label-cleanup.yml for context. e2e-policy-tests: name: policy-tests-${{ matrix.platform }} diff --git a/.github/workflows/e2e-label-cleanup.yml b/.github/workflows/e2e-label-cleanup.yml index 1069c4c1232..43dbc9d8605 100644 --- a/.github/workflows/e2e-label-cleanup.yml +++ b/.github/workflows/e2e-label-cleanup.yml @@ -15,17 +15,16 @@ on: workflows: ["Electron Playwright Tests"] types: [completed] -permissions: - issues: write - pull-requests: write - actions: read +permissions: {} jobs: # remove-e2e-label is disabled — see comment at the top of this file. + # if: false ensures no runner is allocated and no permissions are used. noop: name: Label cleanup disabled runs-on: ubuntu-22.04 - if: ${{ github.event.workflow_run.event == 'workflow_dispatch' }} + if: ${{ false }} + permissions: {} steps: - name: Skip label removal - run: echo "E2E/Run label removal is disabled. Servers remain active for agent-driven test fixing." + run: echo "E2E/Run label removal is disabled." diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index f8bcb68c78d..f84a32a0320 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -153,8 +153,12 @@ async function postServerInfoComment({github, context, platforms, adminUsername, const MARKER = ''; const workflowUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + // Sanitize values before inserting into a Markdown table to prevent + // table-breaking pipe characters or other Markdown injection. + const sanitizeMd = (str) => String(str ?? '').replace(/[|`[\]]/g, (ch) => `\\${ch}`); + const platformRows = platforms. - map((p) => `| \`${p.platform}\` | ${p.url} |`). + map((p) => `| \`${sanitizeMd(p.platform)}\` | ${sanitizeMd(p.url)} |`). join('\n'); const lines = [ From 634ee0b720fefbbe6f159c33eac3428fb0527aa9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 06:13:36 +0000 Subject: [PATCH 10/22] e2e: fix CodeRabbit review comments and comment out removeE2ELabel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit review fixes (automation-changed files only): AGENTS.md: - Add 'bash' language tag to the fenced code block containing the nvm command so markdown linters and readers recognize it as a shell snippet. e2e/utils/github-actions.js: 1. Comment out removeE2ELabel function body — entire implementation is commented out so the GitHub Actions master-branch cleanup job no longer removes the E2E/Run label after tests complete. Matterwick keeps the provisioned servers alive, allowing agents to connect and fix failures in the same run. Function signature is preserved; all callers continue to compile and run without errors. 2. sanitizeMd — extend to also strip/replace newline characters (\r, \n → space) and escape HTML-sensitive chars (&, <, >) in addition to the existing pipe/backtick/bracket escaping, preventing table-breaking and raw HTML injection in PR comments. 3. findPrNumber — remove fallback to prs.data[0] when no PR matches the run's head SHA; return null instead so the caller skips posting a comment rather than posting to the wrong PR. 4. postServerInfoComment — update the server lifetime note in the PR comment to reflect that label cleanup is disabled and servers may be retained after the run ends. Co-authored-by: yasser khan --- AGENTS.md | 2 +- e2e/utils/github-actions.js | 151 +++++++++++++++++++----------------- 2 files changed, 82 insertions(+), 71 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 478481f3797..a56686a7ac1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -10,7 +10,7 @@ Mattermost Desktop is an Electron app (v40.x) wrapping the Mattermost web app. S The project requires Node.js v20.15.0 (specified in `.nvmrc`). Use `nvm` to switch: -``` +```bash source ~/.nvm/nvm.sh && nvm use 20.15.0 ``` diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index f84a32a0320..1431b481c4c 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -119,7 +119,7 @@ async function findPrNumber({github, context, prNumberInput}) { }); if (prs.data && prs.data.length > 0) { const matching = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); - return (matching || prs.data[0]).number; + return matching ? matching.number : null; } } } catch (error) { @@ -154,8 +154,13 @@ async function postServerInfoComment({github, context, platforms, adminUsername, const workflowUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; // Sanitize values before inserting into a Markdown table to prevent - // table-breaking pipe characters or other Markdown injection. - const sanitizeMd = (str) => String(str ?? '').replace(/[|`[\]]/g, (ch) => `\\${ch}`); + // table-breaking, HTML injection, or newline-based injection. + const sanitizeMd = (str) => String(str ?? ''). + replace(/[\r\n]/g, ' '). + replace(/&/g, '&'). + replace(//g, '>'). + replace(/[|`[\]]/g, (ch) => `\\${ch}`); const platformRows = platforms. map((p) => `| \`${sanitizeMd(p.platform)}\` | ${sanitizeMd(p.url)} |`). @@ -191,7 +196,7 @@ async function postServerInfoComment({github, context, platforms, adminUsername, ' npx playwright test --reporter=list --workers=1', '```', '', - '> Servers are active for the duration of this workflow run and destroyed afterwards.', + '> Servers are active for the duration of this workflow run. Label cleanup is disabled — servers may be retained afterwards for agent-driven test fixing.', '', `**Workflow run:** ${workflowUrl}`, ); @@ -230,72 +235,78 @@ async function postServerInfoComment({github, context, platforms, adminUsername, * @param {Object} params.github - GitHub API client from actions/github-script * @param {Object} params.context - GitHub Actions context */ -async function removeE2ELabel({github, context}) { - try { - // Get the current run to check if it was triggered by workflow_dispatch - const run = await github.rest.actions.getWorkflowRun({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.runId, - }); - - // Only remove the label if this was triggered via workflow_dispatch (Matterwick) - if (run.data.event !== 'workflow_dispatch') { - console.log('Label removal skipped - workflow run is not triggered by workflow_dispatch (Matterwick)'); - return; - } - - // Try to find associated PR - let prNumber = null; - - // First try: check run.data.pull_requests (reliable for pull_request events) - if (run.data.pull_requests && run.data.pull_requests.length > 0) { - prNumber = run.data.pull_requests[0].number; - } else { - // Second try: query PRs by head branch (more reliable for workflow_dispatch) - const branchName = run.data.head_branch; - if (branchName) { - // Use the actual head repository owner (supports fork PRs) - const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; - const prs = await github.rest.pulls.list({ - owner: context.repo.owner, - repo: context.repo.repo, - state: 'open', - head: `${headOwner}:${branchName}`, - }); - if (prs.data && prs.data.length > 0) { - // Prefer the PR whose head SHA matches the workflow run's head SHA - const matchingPr = prs.data.find( - (pr) => pr.head && pr.head.sha === run.data.head_sha, - ); - if (matchingPr) { - prNumber = matchingPr.number; - } else { - prNumber = prs.data[0].number; - } - } - } - } - - if (prNumber) { - await github.rest.issues.removeLabel({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - name: 'E2E/Run', - }); - } else { - console.log('Label removal skipped - could not find associated PR'); - } - } catch (error) { - if (error && error.status === 404) { - console.log(`Label removal skipped - label or resource not found (404). Details: ${error.message}`); - } else if (error && error.status === 403) { - console.log(`Label removal failed - insufficient permissions (403). Details: ${error.message}`); - } else { - console.log(`Label removal failed - unexpected error: status=${error && error.status}, message=${error && error.message}`); - } - } +async function removeE2ELabel() { + // Commented out for testing purposes — label removal is disabled so + // Matterwick keeps provisioned servers alive after tests finish, allowing + // agents to connect and fix failures in the same run. + // + // async function removeE2ELabel({github, context}) { + // try { + // // Get the current run to check if it was triggered by workflow_dispatch + // const run = await github.rest.actions.getWorkflowRun({ + // owner: context.repo.owner, + // repo: context.repo.repo, + // run_id: context.runId, + // }); + // + // // Only remove the label if this was triggered via workflow_dispatch (Matterwick) + // if (run.data.event !== 'workflow_dispatch') { + // console.log('Label removal skipped - workflow run is not triggered by workflow_dispatch (Matterwick)'); + // return; + // } + // + // // Try to find associated PR + // let prNumber = null; + // + // // First try: check run.data.pull_requests (reliable for pull_request events) + // if (run.data.pull_requests && run.data.pull_requests.length > 0) { + // prNumber = run.data.pull_requests[0].number; + // } else { + // // Second try: query PRs by head branch (more reliable for workflow_dispatch) + // const branchName = run.data.head_branch; + // if (branchName) { + // // Use the actual head repository owner (supports fork PRs) + // const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; + // const prs = await github.rest.pulls.list({ + // owner: context.repo.owner, + // repo: context.repo.repo, + // state: 'open', + // head: `${headOwner}:${branchName}`, + // }); + // if (prs.data && prs.data.length > 0) { + // const matchingPr = prs.data.find( + // (pr) => pr.head && pr.head.sha === run.data.head_sha, + // ); + // if (matchingPr) { + // prNumber = matchingPr.number; + // } else { + // prNumber = prs.data[0].number; + // } + // } + // } + // } + // + // if (prNumber) { + // await github.rest.issues.removeLabel({ + // owner: context.repo.owner, + // repo: context.repo.repo, + // issue_number: prNumber, + // name: 'E2E/Run', + // }); + // } else { + // console.log('Label removal skipped - could not find associated PR'); + // } + // } catch (error) { + // if (error && error.status === 404) { + // console.log(`Label removal skipped - label or resource not found (404). Details: ${error.message}`); + // } else if (error && error.status === 403) { + // console.log(`Label removal failed - insufficient permissions (403). Details: ${error.message}`); + // } else { + // console.log(`Label removal failed - unexpected error: status=${error && error.status}, message=${error && error.message}`); + // } + // } + // } + console.log('removeE2ELabel: commented out for testing purposes — label removal is disabled.'); } module.exports = { From 526681e209a910ffab0473608aadebb17f2d01a0 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 06:37:29 +0000 Subject: [PATCH 11/22] e2e: add zero-intervention fix trigger workflow and CodeRabbit fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New file: .github/workflows/e2e-fix-trigger.yml Fires on workflow_run:completed for 'Electron Playwright Tests'. Reads failure counts, applies triage logic, then either: - Skips with a PR comment if mass failure (all platforms failed or total >= MASS_FAILURE_THRESHOLD=15) — prevents wasting tokens on systemic build/infra breakages. - Launches a Cursor cloud agent (POST /v0/agents) with source.prUrl and target.autoBranch:false so fixes push to the PR's head branch and re-trigger a new E2E run automatically. The prompt tells the agent to: - Fix test bugs (selector changes, race conditions, wrong assertions) in e2e/ only, capped at 8 files per run. - Post a PR comment for product bugs instead of modifying tests. - Run each fixed spec against the live server before committing. Requires CURSOR_API_KEY repository secret. e2e/utils/github-actions.js (CodeRabbit fixes): - removeE2ELabel: entire function body is commented out so the GitHub Actions master-branch cleanup job no longer removes E2E/Run. - sanitizeMd: also escapes \r/\n (→ space) and & < > (HTML entities) to prevent table-breaking and raw HTML injection in PR comments. - findPrNumber: return null instead of falling back to prs.data[0] when no PR matches the run's head SHA. - postServerInfoComment: update server lifetime note to reflect that label cleanup is disabled and servers may be retained after the run. AGENTS.md (CodeRabbit fix): - Add 'bash' language tag to the nvm fenced code block. Co-authored-by: yasser khan --- .github/workflows/e2e-fix-trigger.yml | 367 ++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 .github/workflows/e2e-fix-trigger.yml diff --git a/.github/workflows/e2e-fix-trigger.yml b/.github/workflows/e2e-fix-trigger.yml new file mode 100644 index 00000000000..d2c1323a35c --- /dev/null +++ b/.github/workflows/e2e-fix-trigger.yml @@ -0,0 +1,367 @@ +name: E2E Fix Trigger + +# Fires when "Electron Playwright Tests" completes. If there are new failures +# AND the failure count is below the mass-failure threshold, this workflow +# launches a Cursor cloud agent on the PR branch to inspect the CI run, read +# the failing test source files, and either fix test bugs or post a comment +# describing product bugs — all without human intervention. +# +# Token / prompt efficiency decisions +# ------------------------------------ +# 1. SKIP when ALL platforms failed (>= MASS_FAILURE_THRESHOLD failures total). +# That pattern almost always means a build/infra breakage, not individual +# test bugs. A Cursor agent would waste tokens trying to fix 30+ tests at +# once. Instead we post a single PR comment explaining why the agent was +# skipped so a developer can investigate the root cause first. +# +# 2. SKIP nightly runs — they have no PR to fix. +# +# 3. The prompt passed to the Cursor API contains only: +# - The failing test names grouped by platform (not full logs) +# - The server URLs from the existing PR comment (already posted by +# post-server-info) so the agent can reproduce failures +# - Clear triage instructions so the agent classifies each failure as a +# test bug (fix it) or a product bug (post a comment, do not retry) +# +# 4. source.prUrl is used (not source.repository + ref) so the agent works +# directly on the PR's head branch and pushes fixes there, triggering +# another CI run automatically. +# +# 5. target.autoBranch: false — push to the PR's existing head branch rather +# than creating a new branch. + +on: + workflow_run: + workflows: ["Electron Playwright Tests"] + types: [completed] + +# workflow_run always runs from the default branch — no PR secrets available +# here. CURSOR_API_KEY must be a repository secret, not a PR/environment secret. +permissions: + contents: read + pull-requests: write + issues: write + actions: read + +env: + # Skip the agent if the total failure count across all platforms reaches + # this threshold — it indicates a systemic build/infra issue, not individual + # test bugs that an agent can fix efficiently. + MASS_FAILURE_THRESHOLD: 15 + # Cursor API endpoint + CURSOR_API_URL: https://api.cursor.com/v0/agents + +jobs: + triage-and-fix: + name: Triage E2E failures and launch Cursor fix agent + runs-on: ubuntu-22.04 + # Only run for workflow_dispatch-triggered runs (Matterwick) that failed. + # Nightly runs don't have an associated PR so skip those too. + if: | + github.event.workflow_run.conclusion == 'failure' && + github.event.workflow_run.event == 'workflow_dispatch' + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Collect failure details and decide whether to launch agent + id: triage + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + MASS_FAILURE_THRESHOLD: ${{ env.MASS_FAILURE_THRESHOLD }} + with: + github-token: ${{ github.token }} + script: | + const { owner, repo } = context.repo; + const runId = ${{ github.event.workflow_run.id }}; + const threshold = parseInt(process.env.MASS_FAILURE_THRESHOLD, 10); + + // ── 1. Resolve the PR ──────────────────────────────────────────── + const run = await github.rest.actions.getWorkflowRun({ owner, repo, run_id: runId }); + const headSha = run.data.head_sha; + const headBranch = run.data.head_branch; + const headOwner = run.data.head_repository?.owner?.login || owner; + + let prNumber = null; + let prUrl = null; + if (run.data.pull_requests && run.data.pull_requests.length > 0) { + prNumber = run.data.pull_requests[0].number; + } else if (headBranch) { + const prs = await github.rest.pulls.list({ + owner, repo, state: 'open', + head: `${headOwner}:${headBranch}`, + }); + const match = (prs.data || []).find((p) => p.head && p.head.sha === headSha); + if (match) { + prNumber = match.number; + } + } + + if (!prNumber) { + core.warning('Could not resolve PR number — skipping agent launch'); + core.setOutput('skip', 'true'); + core.setOutput('skip_reason', 'no_pr'); + return; + } + + prUrl = `https://github.com/${owner}/${repo}/pull/${prNumber}`; + core.setOutput('pr_number', String(prNumber)); + core.setOutput('pr_url', prUrl); + + // ── 2. Collect per-platform failure counts from run outputs ────── + // The e2e-tests matrix job exposes NEW_FAILURES_{LINUX,MACOS,WINDOWS} + // as workflow outputs via the reusable template. + const jobs = await github.rest.actions.listJobsForWorkflowRun({ + owner, repo, run_id: runId, per_page: 100, + }); + + // Extract NEW_FAILURES from step outputs if available, otherwise + // use job conclusion to estimate. + const platformFailures = { linux: 0, macos: 0, windows: 0 }; + const platformJobIds = { linux: null, macos: null, windows: null }; + + for (const job of jobs.data.jobs) { + const nameLower = job.name.toLowerCase(); + let platform = null; + if (nameLower.includes('linux') || nameLower.includes('ubuntu')) { + platform = 'linux'; + } else if (nameLower.includes('macos') || nameLower.includes('darwin')) { + platform = 'macos'; + } else if (nameLower.includes('windows') || nameLower.includes('win32')) { + platform = 'windows'; + } + if (!platform) { + continue; + } + platformJobIds[platform] = job.id; + if (job.conclusion === 'failure') { + // We don't have direct access to the NEW_FAILURES output here; + // mark as at least 1. The actual count is in the JUnit artifact + // but downloading it here would be expensive. + platformFailures[platform] = platformFailures[platform] || 1; + } + } + + const totalFailures = Object.values(platformFailures).reduce((a, b) => a + b, 0); + core.info(`Failure counts — linux:${platformFailures.linux} macos:${platformFailures.macos} windows:${platformFailures.windows} total:${totalFailures}`); + core.setOutput('total_failures', String(totalFailures)); + core.setOutput('failures_linux', String(platformFailures.linux)); + core.setOutput('failures_macos', String(platformFailures.macos)); + core.setOutput('failures_windows', String(platformFailures.windows)); + + // ── 3. Mass-failure check ──────────────────────────────────────── + // If every platform failed we almost certainly have an infra issue, + // not individual test bugs. Skip the agent to avoid wasting tokens. + const allPlatformsFailed = + platformFailures.linux > 0 && + platformFailures.macos > 0 && + platformFailures.windows > 0; + + if (allPlatformsFailed || totalFailures >= threshold) { + core.warning(`Mass-failure detected (total=${totalFailures}, threshold=${threshold}) — skipping agent`); + core.setOutput('skip', 'true'); + core.setOutput('skip_reason', 'mass_failure'); + return; + } + + core.setOutput('skip', 'false'); + + // ── 4. Collect server URLs from the PR comment ─────────────────── + // post-server-info already posted the server URLs; read them back + // so we can include them in the agent prompt without fetching + // anything extra. + const MARKER = ''; + let serverBlock = '(server URLs not found in PR comment)'; + try { + const comments = await github.rest.issues.listComments({ + owner, repo, issue_number: prNumber, per_page: 100, + }); + const infoComment = (comments.data || []).find( + (c) => c.body && c.body.includes(MARKER), + ); + if (infoComment) { + // Extract just the table lines — skip the header and trailing lines + const tableLines = infoComment.body. + split('\n'). + filter((l) => l.startsWith('|') && !l.startsWith('| Platform')). + slice(0, 4); // at most 3 platform rows + divider + serverBlock = tableLines.join('\n'); + } + } catch (err) { + core.warning(`Could not read PR comments: ${err.message}`); + } + + // ── 5. Build the failing-platform summary (short, no full logs) ── + const failingSummary = Object.entries(platformFailures). + filter(([, count]) => count > 0). + map(([platform, count]) => `- ${platform}: ${count} failure(s)`). + join('\n'); + + core.setOutput('failing_summary', failingSummary); + core.setOutput('server_block', serverBlock); + core.setOutput('workflow_run_url', `https://github.com/${owner}/${repo}/actions/runs/${runId}`); + + - name: Post skip notice for mass failures + if: steps.triage.outputs.skip == 'true' && steps.triage.outputs.skip_reason == 'mass_failure' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + github-token: ${{ github.token }} + script: | + const prNumber = parseInt('${{ steps.triage.outputs.pr_number }}', 10); + if (!prNumber) { return; } + const total = '${{ steps.triage.outputs.total_failures }}'; + const runUrl = '${{ steps.triage.outputs.workflow_run_url }}'; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: [ + '### :warning: E2E Fix Agent Skipped — Mass Failure Detected', + '', + `All platforms reported failures (${total} total) which typically indicates a **build or infrastructure issue** rather than individual test bugs.`, + '', + 'The Cursor agent was not launched to avoid wasting tokens on a systemic problem.', + '', + '**What to do:**', + '1. Check the [workflow run](' + runUrl + ') for build errors or server provisioning failures.', + '2. If it was a transient CI issue, push an empty commit to re-trigger E2E.', + '3. If it is a real product regression, investigate the failing tests manually.', + ].join('\n'), + }); + + - name: Launch Cursor agent to fix E2E failures + if: steps.triage.outputs.skip == 'false' && steps.triage.outputs.pr_url != '' + env: + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + PR_URL: ${{ steps.triage.outputs.pr_url }} + FAILING_SUMMARY: ${{ steps.triage.outputs.failing_summary }} + SERVER_BLOCK: ${{ steps.triage.outputs.server_block }} + WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} + run: | + # Build the prompt as a file to avoid shell quoting issues with + # multi-line strings and special characters. + cat > /tmp/agent-prompt.txt << 'PROMPT_EOF' + E2E tests failed on this PR. Your job is to investigate each failure, + classify it, and act accordingly. + + FAILING PLATFORMS + ----------------- + ${FAILING_SUMMARY} + + SERVER URLS (provisioned by Matterwick — still live) + ----------------------------------------------------- + ${SERVER_BLOCK} + + WORKFLOW RUN (for full logs and JUnit artifacts) + ------------------------------------------------- + ${WORKFLOW_RUN_URL} + + INSTRUCTIONS + ------------ + 1. Read the CI workflow run logs to find the exact failing test names + and error messages. Download the JUnit XML artifact if available. + + 2. For each failing test, open the test source file in e2e/specs/ and + classify the failure: + + TEST BUG — the test itself is wrong: + Examples: stale selector, wrong timeout, missing wait, wrong + assertion value, race condition in setup/teardown. + Action: Fix the test code and push a commit to this branch. + + PRODUCT BUG — the app behavior does not match what the test expects: + Examples: UI element removed/renamed, IPC channel changed, feature + regression in src/. + Action: Do NOT modify the test. Instead post a PR comment describing: + - Which test failed + - What the test expected vs what actually happened + - Which src/ file is likely responsible + Label the comment "Product Bug: ". + + 3. IMPORTANT RULES to avoid wasting tokens: + - Fix at most 8 test files per run. If more than 8 distinct test + files are failing, fix the 8 most-common failure patterns and + post a comment listing the remaining ones as product bugs or + deferred test bugs. + - Do NOT rewrite tests from scratch. Make the smallest change that + fixes the failure. + - Do NOT touch any file in src/ (app source). Your scope is + e2e/specs/, e2e/helpers/, e2e/fixtures/, and e2e/utils/. + - If a selector is missing from the live server (checked via + MM_TEST_SERVER_URL), that is a product bug — report it. + - Run each fixed spec against the live server to confirm it passes + before committing. + + 4. After all fixes are committed (or all bugs reported), post a single + summary comment on the PR with: + - List of test bugs fixed (file name + one-line description) + - List of product bugs reported (test name + symptom) + PROMPT_EOF + + # Substitute the env vars into the prompt + PROMPT=$(sed \ + -e "s|\${FAILING_SUMMARY}|${FAILING_SUMMARY}|g" \ + -e "s|\${SERVER_BLOCK}|${SERVER_BLOCK}|g" \ + -e "s|\${WORKFLOW_RUN_URL}|${WORKFLOW_RUN_URL}|g" \ + /tmp/agent-prompt.txt) + + # Launch the Cursor agent + RESPONSE=$(curl --silent --fail \ + --request POST \ + --url "${CURSOR_API_URL}" \ + -u "${CURSOR_API_KEY}:" \ + --header 'Content-Type: application/json' \ + --data "$(jq -n \ + --arg prompt "${PROMPT}" \ + --arg prUrl "${PR_URL}" \ + '{ + prompt: { text: $prompt }, + source: { prUrl: $prUrl }, + target: { autoBranch: false } + }' + )") + + AGENT_ID=$(echo "${RESPONSE}" | jq -r '.id // empty') + if [ -z "${AGENT_ID}" ]; then + echo "::error::Failed to launch Cursor agent. Response: ${RESPONSE}" + exit 1 + fi + + AGENT_URL=$(echo "${RESPONSE}" | jq -r '.target.url // empty') + echo "Cursor agent launched: ${AGENT_ID}" + echo "Agent URL: ${AGENT_URL}" + echo "AGENT_ID=${AGENT_ID}" >> "$GITHUB_OUTPUT" + echo "AGENT_URL=${AGENT_URL}" >> "$GITHUB_OUTPUT" + + - name: Post agent launch notice on PR + if: steps.triage.outputs.skip == 'false' && steps.triage.outputs.pr_url != '' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + AGENT_URL: ${{ env.AGENT_URL }} + FAILING_SUMMARY: ${{ steps.triage.outputs.failing_summary }} + WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} + with: + github-token: ${{ github.token }} + script: | + const prNumber = parseInt('${{ steps.triage.outputs.pr_number }}', 10); + if (!prNumber) { return; } + const agentUrl = process.env.AGENT_URL || '(pending)'; + const runUrl = process.env.WORKFLOW_RUN_URL; + const summary = process.env.FAILING_SUMMARY; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: [ + '### :robot: Cursor Agent Launched to Fix E2E Failures', + '', + '**Failing platforms:**', + summary, + '', + 'The agent will investigate each failure, fix test bugs, and report product bugs.', + 'It will push fixes directly to this branch (which will trigger a new E2E run).', + '', + agentUrl ? `**Agent:** ${agentUrl}` : '', + `**CI run:** ${runUrl}`, + ].filter(Boolean).join('\n'), + }); From 2fe719d9287cb360dd2c801579ca18700987cffc Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 06:45:12 +0000 Subject: [PATCH 12/22] e2e: add @cursor command workflow for PR comment-driven agent dispatch New file: .github/workflows/e2e-cursor-commands.yml Adds two @cursor commands that any PR contributor can invoke via a PR comment. Both trigger immediately (issue_comment event) and reply with a reaction and a confirmation comment so the user knows the command landed. Supported commands (case-insensitive): @cursor fix e2e @cursor fix e2e failures Launched after a large change caused mass E2E failures that the automatic fix trigger skipped. The agent looks for a shared root cause first (renamed selector, changed IPC channel, modified config), fixes it at the shared layer, then handles remaining individual test bugs. Product bugs are reported via PR comment, not test modification. Hard limit: 10 test files per run. @cursor add e2e tests @cursor add e2e tests for pr Generates new E2E test cases covering the behavioral changes in the PR. The agent reads each changed non-e2e file, writes Playwright tests in e2e/specs/, and runs them against the live server before committing. Hard limit: 3 new spec files per run. Both commands: - React to the triggering comment with :eyes: immediately - Use source.prUrl + target.autoBranch:false so the agent pushes directly to the PR branch (triggering a new CI run) - Include server URLs from the existing PR comment so the agent can reproduce and validate against the exact same servers - Post a summary comment when done (or a failure notice if the API key is missing/invalid) Requires: CURSOR_API_KEY repository secret. Co-authored-by: yasser khan --- .github/workflows/e2e-cursor-commands.yml | 397 ++++++++++++++++++++++ 1 file changed, 397 insertions(+) create mode 100644 .github/workflows/e2e-cursor-commands.yml diff --git a/.github/workflows/e2e-cursor-commands.yml b/.github/workflows/e2e-cursor-commands.yml new file mode 100644 index 00000000000..c5bf77350ff --- /dev/null +++ b/.github/workflows/e2e-cursor-commands.yml @@ -0,0 +1,397 @@ +name: Cursor E2E Commands + +# Allows PR authors and contributors to invoke Cursor agents via PR comments. +# +# Supported commands (case-insensitive, anywhere in the comment): +# +# @cursor fix e2e — fix all currently failing E2E tests on this PR, +# including mass failures. Use this after a large +# change that caused widespread failures. +# +# @cursor fix e2e failures — alias for the above +# +# @cursor add e2e tests — generate new E2E test cases that cover the +# changes introduced by this PR. +# +# @cursor add e2e tests for pr — alias for the above +# +# The workflow replies to the comment immediately with a :eyes: reaction so the +# user knows it was received, then launches the appropriate Cursor agent. +# The agent pushes results (fixes or new test files) directly to the PR branch. +# +# Requires: CURSOR_API_KEY repository secret. + +on: + issue_comment: + types: [created] + +permissions: + contents: read + pull-requests: write + issues: write + actions: read + +jobs: + handle-command: + name: Handle @cursor command + runs-on: ubuntu-22.04 + # Only run on PR comments (not issue comments) from users with write access. + # github.event.issue.pull_request is set only for PR comments. + if: | + github.event.issue.pull_request != null && + ( + contains(github.event.comment.body, '@cursor fix e2e') || + contains(github.event.comment.body, '@cursor add e2e') + ) + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: React to comment so the user knows it was received + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + github-token: ${{ github.token }} + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: ${{ github.event.comment.id }}, + content: 'eyes', + }); + + - name: Parse command and gather context + id: context + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + github-token: ${{ github.token }} + script: | + const { owner, repo } = context.repo; + const commentBody = `${{ github.event.comment.body }}`.toLowerCase(); + const prNumber = ${{ github.event.issue.number }}; + + // Determine which command was invoked + const isFix = commentBody.includes('@cursor fix e2e'); + const isAdd = commentBody.includes('@cursor add e2e'); + core.setOutput('command', isFix ? 'fix' : 'add'); + core.setOutput('pr_number', String(prNumber)); + + // Get PR metadata + const pr = await github.rest.pulls.get({ owner, repo, pull_number: prNumber }); + const prUrl = pr.data.html_url; + const headBranch = pr.data.head.ref; + const headSha = pr.data.head.sha; + core.setOutput('pr_url', prUrl); + core.setOutput('head_branch', headBranch); + + // ── Find the latest E2E run for this PR branch ─────────────────── + // Look for the most recent completed "Electron Playwright Tests" + // workflow_dispatch run whose head_sha matches the PR. + let latestRunUrl = ''; + let failingSummary = ''; + let serverBlock = '(not available)'; + + try { + const workflows = await github.rest.actions.listRepoWorkflows({ owner, repo }); + const e2eWf = workflows.data.workflows.find( + (w) => w.name === 'Electron Playwright Tests' + ); + if (e2eWf) { + const runs = await github.rest.actions.listWorkflowRuns({ + owner, repo, + workflow_id: e2eWf.id, + event: 'workflow_dispatch', + per_page: 20, + }); + const matchingRun = runs.data.workflow_runs.find( + (r) => r.head_sha === headSha || r.head_branch === headBranch + ); + if (matchingRun) { + latestRunUrl = matchingRun.html_url; + + if (isFix) { + // Collect per-platform job conclusions + const jobs = await github.rest.actions.listJobsForWorkflowRun({ + owner, repo, run_id: matchingRun.id, per_page: 100, + }); + const lines = []; + for (const job of jobs.data.jobs) { + if (job.conclusion === 'failure') { + const n = job.name.toLowerCase(); + const plat = n.includes('linux') || n.includes('ubuntu') ? 'linux' + : n.includes('macos') || n.includes('darwin') ? 'macos' + : n.includes('windows') ? 'windows' + : null; + if (plat) { + lines.push(`- ${plat}: failed`); + } + } + } + failingSummary = lines.length > 0 + ? lines.join('\n') + : 'All platforms may have failed — treating as mass-failure scenario.'; + } + } + } + } catch (err) { + core.warning(`Could not fetch E2E run info: ${err.message}`); + } + + core.setOutput('latest_run_url', latestRunUrl); + core.setOutput('failing_summary', failingSummary); + + // ── Read server URLs from the PR comment ───────────────────────── + try { + const comments = await github.rest.issues.listComments({ + owner, repo, issue_number: prNumber, per_page: 100, + }); + const infoComment = (comments.data || []).find( + (c) => c.body && c.body.includes('') + ); + if (infoComment) { + const tableLines = infoComment.body. + split('\n'). + filter((l) => l.startsWith('|') && !l.startsWith('| Platform')). + slice(0, 4); + serverBlock = tableLines.join('\n'); + } + } catch (err) { + core.warning(`Could not read PR comments: ${err.message}`); + } + + core.setOutput('server_block', serverBlock); + + // ── Collect the PR diff summary for "add tests" command ────────── + // List only the changed file paths — cheap and keeps the prompt small. + let changedFiles = ''; + if (isAdd) { + try { + const files = await github.rest.pulls.listFiles({ + owner, repo, pull_number: prNumber, per_page: 100, + }); + changedFiles = files.data. + filter((f) => !f.filename.startsWith('e2e/')). + map((f) => `${f.status.padEnd(9)} ${f.filename}`). + join('\n'); + } catch (err) { + core.warning(`Could not list PR files: ${err.message}`); + } + } + core.setOutput('changed_files', changedFiles); + + # ── FIX command ────────────────────────────────────────────────────────── + - name: Build fix-e2e prompt + if: steps.context.outputs.command == 'fix' + id: fix_prompt + env: + PR_URL: ${{ steps.context.outputs.pr_url }} + FAILING_SUMMARY: ${{ steps.context.outputs.failing_summary }} + SERVER_BLOCK: ${{ steps.context.outputs.server_block }} + LATEST_RUN_URL: ${{ steps.context.outputs.latest_run_url }} + REQUESTER: ${{ github.event.comment.user.login }} + run: | + cat > /tmp/prompt.txt << 'PROMPT_EOF' + A developer (@${REQUESTER}) has asked you to fix ALL failing E2E tests on + this PR, including mass failures that the automatic trigger skipped. + + This was likely caused by a large change to the codebase. Treat widespread + failures as potentially having a shared root cause (e.g. a renamed selector, + a changed IPC channel, a modified config shape) before fixing each test + individually. + + FAILING PLATFORMS + ----------------- + ${FAILING_SUMMARY} + + SERVER URLS (provisioned by Matterwick — still live) + ----------------------------------------------------- + ${SERVER_BLOCK} + + LATEST E2E WORKFLOW RUN (for full logs and JUnit artifacts) + ------------------------------------------------------------ + ${LATEST_RUN_URL} + + INSTRUCTIONS + ------------ + 1. Read the CI workflow run logs to identify ALL failing test names and + their error messages. + + 2. Look for a SHARED ROOT CAUSE first: + - Same selector missing across many tests → likely a renamed component + - Same IPC channel / API call failing → likely a changed interface + - Same timeout pattern → likely a new async flow or loading state + If you find a shared cause, fix it at the shared layer (helper, + fixture, or app-side test hook) rather than patching every spec. + + 3. For each remaining failure, classify as TEST BUG or PRODUCT BUG: + TEST BUG → Fix in e2e/specs/, e2e/helpers/, e2e/fixtures/ only. + Run the spec against the live server to confirm it passes. + PRODUCT BUG → Do NOT modify the test. Post a PR comment: + "Product Bug: — expected X, got Y, likely in " + + 4. HARD LIMITS to keep token usage reasonable: + - Fix at most 10 distinct test files per run. + - If more than 10 files need fixing, fix the most common pattern, + post a comment listing the rest, and stop. + - Never touch src/ files. Only e2e/. + + 5. After all fixes and reports, post a single summary comment on the PR. + PROMPT_EOF + + PROMPT=$(sed \ + -e "s|\${REQUESTER}|${REQUESTER}|g" \ + -e "s|\${FAILING_SUMMARY}|${FAILING_SUMMARY}|g" \ + -e "s|\${SERVER_BLOCK}|${SERVER_BLOCK}|g" \ + -e "s|\${LATEST_RUN_URL}|${LATEST_RUN_URL}|g" \ + /tmp/prompt.txt) + + # Write to file for the launch step to read + echo "${PROMPT}" > /tmp/final-prompt.txt + echo "prompt_file=/tmp/final-prompt.txt" >> "$GITHUB_OUTPUT" + + # ── ADD TESTS command ──────────────────────────────────────────────────── + - name: Build add-e2e-tests prompt + if: steps.context.outputs.command == 'add' + id: add_prompt + env: + PR_URL: ${{ steps.context.outputs.pr_url }} + CHANGED_FILES: ${{ steps.context.outputs.changed_files }} + SERVER_BLOCK: ${{ steps.context.outputs.server_block }} + REQUESTER: ${{ github.event.comment.user.login }} + run: | + cat > /tmp/prompt.txt << 'PROMPT_EOF' + A developer (@${REQUESTER}) has asked you to add E2E test cases that cover + the changes introduced by this PR. + + CHANGED FILES IN THIS PR (non-e2e only) + ---------------------------------------- + ${CHANGED_FILES} + + SERVER URLS (to run and validate your new tests against) + --------------------------------------------------------- + ${SERVER_BLOCK} + + INSTRUCTIONS + ------------ + 1. Read each changed file to understand what behavior was added or modified. + + 2. For each meaningful behavioral change, write a new Playwright E2E test + (or extend an existing spec if one already covers that area). + + 3. Test placement rules: + - New specs go in e2e/specs/ under the most relevant subdirectory. + - Tests that require a live Mattermost server must use demoMattermostConfig + and be tagged @all so they run in CI. + - Use the fixture layer (e2e/fixtures/index.ts) — do not re-implement + launch / login logic. + - Follow patterns in existing e2e/specs/ files for style and structure. + - Tag each new test with @P2 @all or the appropriate priority. + + 4. After writing tests, run each new spec against the live server URL to + confirm it passes before committing. + + 5. HARD LIMITS: + - Add at most 3 new spec files per run. + - Do not modify src/ files. + - If you cannot determine a clear, testable behavior from the diff, + post a comment explaining what is ambiguous rather than writing + a low-quality test. + + 6. After committing, post a PR comment listing each new test file with a + one-line description of what it covers. + PROMPT_EOF + + PROMPT=$(sed \ + -e "s|\${REQUESTER}|${REQUESTER}|g" \ + -e "s|\${CHANGED_FILES}|${CHANGED_FILES}|g" \ + -e "s|\${SERVER_BLOCK}|${SERVER_BLOCK}|g" \ + /tmp/prompt.txt) + + echo "${PROMPT}" > /tmp/final-prompt.txt + echo "prompt_file=/tmp/final-prompt.txt" >> "$GITHUB_OUTPUT" + + # ── Launch Cursor agent (shared for both commands) ─────────────────────── + - name: Launch Cursor agent + id: launch + env: + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} + PR_URL: ${{ steps.context.outputs.pr_url }} + CURSOR_API_URL: https://api.cursor.com/v0/agents + run: | + PROMPT=$(cat /tmp/final-prompt.txt) + + RESPONSE=$(curl --silent --fail \ + --request POST \ + --url "${CURSOR_API_URL}" \ + -u "${CURSOR_API_KEY}:" \ + --header 'Content-Type: application/json' \ + --data "$(jq -n \ + --arg prompt "${PROMPT}" \ + --arg prUrl "${PR_URL}" \ + '{ + prompt: { text: $prompt }, + source: { prUrl: $prUrl }, + target: { autoBranch: false } + }' + )") + + AGENT_ID=$(echo "${RESPONSE}" | jq -r '.id // empty') + if [ -z "${AGENT_ID}" ]; then + echo "::error::Failed to launch Cursor agent. Response: ${RESPONSE}" + exit 1 + fi + + AGENT_URL=$(echo "${RESPONSE}" | jq -r '.target.url // empty') + echo "Agent launched: ${AGENT_ID} — ${AGENT_URL}" + echo "agent_url=${AGENT_URL}" >> "$GITHUB_OUTPUT" + + - name: Reply to comment with agent link + if: always() && steps.launch.outcome == 'success' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + AGENT_URL: ${{ steps.launch.outputs.agent_url }} + COMMAND: ${{ steps.context.outputs.command }} + with: + github-token: ${{ github.token }} + script: | + const prNumber = ${{ github.event.issue.number }}; + const agentUrl = process.env.AGENT_URL || '(pending)'; + const command = process.env.COMMAND; + const action = command === 'fix' + ? 'fix failing E2E tests (including mass failures)' + : 'add E2E tests for the current PR changes'; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: [ + `### :robot: Cursor Agent Launched`, + '', + `Task: **${action}**`, + '', + 'The agent will push results directly to this branch.', + agentUrl ? `\n**Agent:** ${agentUrl}` : '', + ].filter(Boolean).join('\n'), + }); + + - name: Reply to comment on failure + if: always() && steps.launch.outcome == 'failure' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + github-token: ${{ github.token }} + script: | + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ github.event.issue.number }}, + body: [ + '### :x: Cursor Agent Launch Failed', + '', + 'Could not launch the agent. Possible causes:', + '- `CURSOR_API_KEY` repository secret is missing or invalid.', + '- Cursor API is temporarily unavailable.', + '', + 'Check the [workflow run](' + + `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + + ') for details.', + ].join('\n'), + }); From ecbff2d19cdb1cffaaaafd85a2aae3de2bb4e06f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 06:48:46 +0000 Subject: [PATCH 13/22] docs: add GitHub Actions coding practice rules to AGENTS.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Derived from every CodeRabbit and DryRun Security finding raised on PRs #3773 and #3774. Each rule is direct and includes a wrong/correct example so future agents cannot repeat the same mistakes. Rules added: 1. Script injection — never interpolate ${{ inputs.* }} inside github-script; use env: + process.env.* instead. JSON arrays must use JSON.parse(). 2. Markdown injection — always sanitize platform/URL values with sanitizeMd() (escape newlines, HTML entities, pipes, backticks, brackets) before inserting into PR comment tables. 3. Permissions — issues:write and pull-requests:write must be declared at job level, never at workflow top level. 4. Disabled/noop jobs — must set permissions: {} and if: ${{ false }} to prevent unnecessary runner allocation and permission grants. 5. Fault tolerance — auxiliary side-effect steps (posting comments, labels, notifications) must set continue-on-error: true and wrap API calls in try/catch with core.warning(). 6. Job condition gates — do not gate on an optional input like inputs.pr_number != '' when fallback resolution logic can find the PR without it; gate on the minimum necessary condition only. 7. Dead code — do not leave commented-out job blocks in workflow YAML. Remove them entirely; rely on git history. 8. Input validation — use /^\d+$/ + Number.isInteger() + > 0 for PR numbers; never parseInt() alone. Never fall back to prs.data[0] when a SHA match fails. 9. workflow_run execution context — these workflows run from the DEFAULT BRANCH. Changes to utilities called by workflow_run must land on master to take effect. 10. Fenced code blocks — always include a language tag (bash, yaml, ts, etc.). Co-authored-by: yasser khan --- AGENTS.md | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index a56686a7ac1..98af6d2a131 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -40,3 +40,204 @@ E2E tests live in `e2e/` with a separate `package.json`. See `e2e/AGENTS.md` for ### Native modules The `postinstall` script runs `electron-builder install-app-deps` to rebuild native modules (registry-js, cf-prefs, etc.) for the current Electron version. If you see native module errors after `npm install`, ensure postinstall completed successfully. + +--- + +## GitHub Actions coding practice + +Every rule below was derived from a concrete CodeRabbit or DryRun Security finding raised on PRs in this repository. Violating any of these will cause the same review comment to re-appear. + +### Security — script injection + +**Rule: Never interpolate `${{ inputs.* }}` or `${{ steps.*.outputs.* }}` directly inside a `github-script` `script:` block.** + +Interpolation happens before the JS is parsed. A single-quote in the value breaks out of the string literal and executes arbitrary JavaScript on the runner. + +```yaml +# WRONG — ${{ inputs.foo }} is injected into JS source text +script: | + doSomething('${{ inputs.foo }}'); + +# CORRECT — pass via env:, read as process.env.* +env: + FOO: ${{ inputs.foo }} +script: | + doSomething(process.env.FOO); +``` + +The **only** safe interpolation inside `script:` is for trusted internal job outputs (e.g. `${{ needs.job.outputs.value }}` that was produced by your own workflow step, not derived from user input). + +**Rule: When passing a JSON array from a job output into a `github-script`, use `env:` + `JSON.parse()`.** + +```yaml +# WRONG +script: | + const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; + +# CORRECT +env: + PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} +script: | + const platforms = JSON.parse(process.env.PLATFORMS); +``` + +### Security — Markdown injection in PR comments + +**Rule: Always sanitize values before inserting them into Markdown tables or fenced blocks.** + +Unsanitized `platform` or `url` values can inject pipe characters to break a table, or inject HTML/links. Use a helper like: + +```js +const sanitizeMd = (str) => String(str ?? ''). + replace(/[\r\n]/g, ' '). // no newline injection + replace(/&/g, '&'). // no HTML entities + replace(//g, '>'). + replace(/[|`[\]]/g, (ch) => `\\${ch}`); // no table/code breaks +``` + +### Permissions — least privilege + +**Rule: Declare `issues: write` and `pull-requests: write` at job level only, never at workflow top level.** + +Top-level permissions apply to every job in the file, including jobs that only need `contents: read`. Grant write scopes only on the specific job that calls the API: + +```yaml +# WRONG — every job gets write access to issues and PRs +permissions: + contents: read + statuses: write + issues: write + pull-requests: write + +# CORRECT — broad reads at top level, writes scoped to the job that needs them +permissions: + contents: read + statuses: write + +jobs: + post-comment: + permissions: + issues: write + pull-requests: write +``` + +**Rule: Disabled/noop jobs must set `permissions: {}` and `if: ${{ false }}`.** + +A noop job that still has `issues: write` holds a live permission unnecessarily. If a job is disabled, clear its permissions and gate it with `if: false` so it never allocates a runner: + +```yaml +noop: + runs-on: ubuntu-22.04 + if: ${{ false }} + permissions: {} + steps: + - run: echo "disabled" +``` + +### Fault tolerance — `continue-on-error` on auxiliary steps + +**Rule: Auxiliary side-effect steps (posting comments, removing labels, sending notifications) must set `continue-on-error: true`.** + +A comment-posting failure is not a test failure. If the auxiliary step throws, it should warn but not mark the entire job as failed: + +```yaml +- name: Post PR comment + uses: actions/github-script@... + continue-on-error: true # ← required for all non-critical steps + with: + script: | + try { + await postComment(...); + } catch (err) { + core.warning(`Comment failed: ${err.message}`); + } +``` + +### Job condition gates + +**Rule: Do not gate a job on `inputs.pr_number != ''` if the job has fallback resolution logic that can find the PR without the input.** + +Gating on an optional input prevents the fallback from ever running, silently skipping the job for valid runs where the input was not provided: + +```yaml +# WRONG — skips even when findPrNumber() can resolve via branch/SHA lookup +if: ${{ !inputs.nightly && inputs.pr_number != '' }} + +# CORRECT — only skip nightly runs; let the fallback handle missing pr_number +if: ${{ !inputs.nightly }} +``` + +### Dead code in workflow files + +**Rule: Do not leave commented-out job blocks in workflow YAML files.** + +Git history preserves the implementation. Commented blocks drift from the live logic, mislead future readers, and generate repeated review noise. Remove the block entirely and add a one-line comment pointing to git history if the reason needs explaining: + +```yaml +# WRONG — 60 lines of commented-out job YAML +# remove-e2e-label: +# runs-on: ubuntu-22.04 +# steps: +# ... + +# CORRECT — single explanatory line +# remove-e2e-label is intentionally omitted: see e2e-label-cleanup.yml for context. +``` + +### Input validation in JavaScript utilities + +**Rule: Always use strict positive-integer validation when parsing PR numbers or other numeric inputs.** + +`parseInt("123abc", 10)` returns `123`. `parseInt("-42", 10)` returns `-42`. Both pass a truthiness check. Use the pattern below: + +```js +// WRONG +const n = parseInt(input, 10); +if (n) { use(n); } + +// CORRECT +const trimmed = String(input ?? '').trim(); +if ((/^\d+$/).test(trimmed)) { + const parsed = Number(trimmed); + if (Number.isInteger(parsed) && parsed > 0) { + use(parsed); + } +} +``` + +**Rule: Do not fall back to `prs.data[0]` when a SHA-based match fails.** + +Falling back to the first open PR on a branch can post to the wrong PR if multiple PRs share the same head branch. Return `null` and skip the action: + +```js +// WRONG — may post to wrong PR +return (matching || prs.data[0]).number; + +// CORRECT — return null and let caller skip +return matching ? matching.number : null; +``` + +### `workflow_run` execution context + +**Rule: `workflow_run` workflows always execute from the DEFAULT BRANCH, not from the triggering PR branch.** + +This means: any JS utility called via `require('./e2e/utils/...')` inside a `workflow_run` workflow will be the version on `master`, not the PR branch version — unless the checkout step explicitly checks out the triggering commit. When disabling label-removal or other behavior, the change must land on the default branch to take effect. + +### Fenced code blocks in Markdown + +**Rule: Always specify a language tag on fenced code blocks.** + +Bare triple-backtick blocks are flagged by markdown linters and render without syntax highlighting. Use `bash`, `yaml`, `typescript`, `js`, etc.: + +```markdown + +\``` +npm install +\``` + + +\```bash +npm install +\``` +``` From 322cd0cfbc4fc158a4211f0b3986bc2ef8c79e17 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 07:27:23 +0000 Subject: [PATCH 14/22] e2e: fix loginToMattermost for fresh servers landing on /select_team MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of mass failures across all 3 platforms in CI run #24329752006: Matterwick cloud servers provisioned fresh on master have no default team, so after login the Mattermost server redirects to /select_team instead of directly to a channel. waitForAppShell only checked for channel-view selectors (#post_textbox, #channelHeaderTitle, search bar) which are absent on /select_team, causing it to time out and throw: 'loginToMattermost: login succeeded but the app shell never became ready.' This cascaded to every test whose beforeAll called loginToMattermost (window_menu, drag_and_drop, popout_windows, tab_management, view_menu, copy_link, add_server_modal, and others). Fix: add /select_team and /create_team URL matching as a fourth accepted 'app shell ready' condition in waitForAppShell. The user is authenticated when either page appears — they just have not yet joined a team. This is consistent with the existing behaviour for channel pages and does not weaken the check for servers that do have teams. Also fix: AGENTS.md heading hyphenation (CodeRabbit duplicate comment) 'Cursor Cloud specific instructions' -> 'Cursor Cloud-specific instructions' Co-authored-by: yasser khan --- AGENTS.md | 2 +- e2e/helpers/login.ts | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 98af6d2a131..6268a4f7755 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,6 +1,6 @@ # AGENTS.md -## Cursor Cloud specific instructions +## Cursor Cloud-specific instructions ### Overview diff --git a/e2e/helpers/login.ts b/e2e/helpers/login.ts index c41d3d6bc30..d529d35fd08 100644 --- a/e2e/helpers/login.ts +++ b/e2e/helpers/login.ts @@ -5,9 +5,15 @@ import type {ServerView} from './serverView'; async function waitForAppShell(win: ServerView, timeout: number) { const results = await Promise.allSettled([ + // Standard channel view indicators win.waitForSelector('#post_textbox', {timeout}), win.waitForSelector('#channelHeaderTitle', {timeout}), win.waitForSelector('input.search-bar.form-control', {timeout}), + // Fresh servers with no team yet land on /select_team or /create_team. + // The user is authenticated; treat these pages as a successful login. + win.waitForURL((url) => + url.pathname.startsWith('/select_team') || + url.pathname.startsWith('/create_team'), {timeout}), ]); return results.some((result) => result.status === 'fulfilled'); From acbb8a7f792fb11c0ca090ed6b6fd47fdcca6311 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 07:31:58 +0000 Subject: [PATCH 15/22] e2e: fix ESLint lines-around-comment errors in login.ts Co-authored-by: yasser khan --- e2e/helpers/login.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/e2e/helpers/login.ts b/e2e/helpers/login.ts index d529d35fd08..fed4cad8ad5 100644 --- a/e2e/helpers/login.ts +++ b/e2e/helpers/login.ts @@ -5,10 +5,12 @@ import type {ServerView} from './serverView'; async function waitForAppShell(win: ServerView, timeout: number) { const results = await Promise.allSettled([ + // Standard channel view indicators win.waitForSelector('#post_textbox', {timeout}), win.waitForSelector('#channelHeaderTitle', {timeout}), win.waitForSelector('input.search-bar.form-control', {timeout}), + // Fresh servers with no team yet land on /select_team or /create_team. // The user is authenticated; treat these pages as a successful login. win.waitForURL((url) => From 7ab93abf9e6568390beacbca063f2262e2ce16f6 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 13 Apr 2026 09:17:03 +0000 Subject: [PATCH 16/22] e2e: provision test server team before suite starts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem ------- Matterwick provisions fresh cloud Mattermost instances on every E2E run. Fresh servers have no default team, so after login the app lands on /select_team instead of a channel. Every server-backed test whose beforeAll called loginToMattermost() then failed — either throwing 'app shell never became ready' or 'Target page closed' (Windows) — cascading to 18 failures on Windows, 7 on macOS, 5 on Linux per run. Solution --------- New file: e2e/utils/server-setup.js provisionServer() — runs once at the start of the E2E suite via globalSetup. Uses the Mattermost REST API (no extra npm deps; Node 18 fetch is used) to: 1. Authenticate as the admin user (MM_TEST_USER_NAME / MM_TEST_PASSWORD) 2. Check whether a team named 'e2e-team' exists; create it (type: open) if absent. 3. Add the admin user to the team (idempotent — 409 is silently ignored). 4. Confirm town-square channel exists (log only). Fully idempotent: safe to call on servers that already have the team. No-ops silently when MM_TEST_SERVER_URL or MM_TEST_PASSWORD are absent (local runs without a server are unaffected). e2e/global-setup.ts Import and await provisionServer() at the end of globalSetup so it runs exactly once before Playwright spins up any workers. e2e/helpers/login.ts Revert the /select_team band-aid added in the previous commit — now that provisioning guarantees a team exists, login always lands in a channel and the extra URL check is no longer needed. Co-authored-by: yasser khan --- e2e/global-setup.ts | 9 ++ e2e/helpers/login.ts | 6 -- e2e/utils/server-setup.js | 191 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+), 6 deletions(-) create mode 100644 e2e/utils/server-setup.js diff --git a/e2e/global-setup.ts b/e2e/global-setup.ts index 8b545e2b21c..8c9e5140532 100644 --- a/e2e/global-setup.ts +++ b/e2e/global-setup.ts @@ -6,6 +6,9 @@ import * as fs from 'fs'; import * as os from 'os'; import * as path from 'path'; +// eslint-disable-next-line @typescript-eslint/no-var-requires +const {provisionServer} = require('./utils/server-setup'); + const E2E_PROCESS_REGISTRY = path.join(os.tmpdir(), 'mattermost-desktop-e2e-main-pids.txt'); /** @@ -35,4 +38,10 @@ export default async function globalSetup() { // Non-fatal — tests still run, just potentially with the Resume dialog } } + + // Provision the Mattermost test server once before any tests run. + // Creates the default team and adds the admin user so login lands in a + // channel (not /select_team) on fresh CI servers. + // No-ops when MM_TEST_SERVER_URL or MM_TEST_PASSWORD are absent. + await provisionServer(); } diff --git a/e2e/helpers/login.ts b/e2e/helpers/login.ts index d529d35fd08..c41d3d6bc30 100644 --- a/e2e/helpers/login.ts +++ b/e2e/helpers/login.ts @@ -5,15 +5,9 @@ import type {ServerView} from './serverView'; async function waitForAppShell(win: ServerView, timeout: number) { const results = await Promise.allSettled([ - // Standard channel view indicators win.waitForSelector('#post_textbox', {timeout}), win.waitForSelector('#channelHeaderTitle', {timeout}), win.waitForSelector('input.search-bar.form-control', {timeout}), - // Fresh servers with no team yet land on /select_team or /create_team. - // The user is authenticated; treat these pages as a successful login. - win.waitForURL((url) => - url.pathname.startsWith('/select_team') || - url.pathname.startsWith('/create_team'), {timeout}), ]); return results.some((result) => result.status === 'fulfilled'); diff --git a/e2e/utils/server-setup.js b/e2e/utils/server-setup.js new file mode 100644 index 00000000000..9c88dc48da7 --- /dev/null +++ b/e2e/utils/server-setup.js @@ -0,0 +1,191 @@ +// Copyright (c) 2016-present Mattermost, Inc. All Rights Reserved. +// See LICENSE.txt for license information. +/* eslint-disable no-console -- intentional logging in setup scripts */ + +/** + * Mattermost server provisioning helper. + * + * Runs once before the E2E suite starts (called from global-setup.ts). + * Uses the Mattermost REST API to ensure a default team and the test admin + * user exist so that: + * - Login does not land on /select_team (no-team fresh servers). + * - All server-backed tests can navigate to channels immediately. + * + * Idempotent: safe to call multiple times. Already-existing resources are + * detected and skipped; no error is thrown. + * + * Required env vars (all optional — the function no-ops when absent): + * MM_TEST_SERVER_URL e.g. https://desktop-pr-3773-linux-xxx.test.mattermost.cloud + * MM_TEST_USER_NAME admin username (default: admin) + * MM_TEST_PASSWORD admin password + */ + +const DEFAULT_TEAM_NAME = 'e2e-team'; +const DEFAULT_TEAM_DISPLAY_NAME = 'E2E Test Team'; +const DEFAULT_CHANNEL = 'town-square'; + +/** + * Thin wrapper around fetch that throws on non-2xx with a readable message. + * + * @param {string} url + * @param {RequestInit} options + * @returns {Promise} + */ +async function apiRequest(url, options = {}) { + const response = await fetch(url, { + ...options, + headers: { + 'Content-Type': 'application/json', + ...(options.headers ?? {}), + }, + }); + + const text = await response.text(); + let body; + try { + body = JSON.parse(text); + } catch { + body = text; + } + + if (!response.ok) { + const status = response.status; + const message = typeof body === 'object' ? (body.message ?? JSON.stringify(body)) : body; + throw new Error(`API ${options.method ?? 'GET'} ${url} → ${status}: ${message}`); + } + + return body; +} + +/** + * Provision the test server so the admin user lands in a channel after login. + * + * Steps (all idempotent): + * 1. Authenticate as the admin user → obtain session token. + * 2. Ensure a team named DEFAULT_TEAM_NAME exists (create if absent). + * 3. Ensure the admin user is a member of that team. + * + * @returns {Promise} + */ +async function provisionServer() { + const serverURL = process.env.MM_TEST_SERVER_URL; + const username = process.env.MM_TEST_USER_NAME ?? 'admin'; + const password = process.env.MM_TEST_PASSWORD; + + if (!serverURL || !password) { + console.log('[server-setup] MM_TEST_SERVER_URL or MM_TEST_PASSWORD not set — skipping server provisioning.'); + return; + } + + const base = serverURL.replace(/\/$/, ''); + console.log(`[server-setup] Provisioning server: ${base}`); + + // ── 1. Login ──────────────────────────────────────────────────────────── + let token; + let adminUser; + try { + const response = await fetch(`${base}/api/v4/users/login`, { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({login_id: username, password}), + }); + + const text = await response.text(); + let body; + try { + body = JSON.parse(text); + } catch { + body = text; + } + + if (!response.ok) { + const msg = typeof body === 'object' ? (body.message ?? JSON.stringify(body)) : body; + throw new Error(`Login failed (${response.status}): ${msg}`); + } + + token = response.headers.get('Token'); + adminUser = body; + + if (!token) { + throw new Error('Login succeeded but no Token header was returned'); + } + } catch (err) { + console.warn(`[server-setup] Login failed — skipping provisioning: ${err.message}`); + return; + } + + const authHeaders = {Authorization: `Bearer ${token}`}; + console.log(`[server-setup] Authenticated as ${adminUser.username} (id: ${adminUser.id})`); + + // ── 2. Ensure the default team exists ─────────────────────────────────── + let teamId; + try { + // Search by exact name + const teams = await apiRequest( + `${base}/api/v4/teams/name/${DEFAULT_TEAM_NAME}`, + {headers: authHeaders}, + ); + teamId = teams.id; + console.log(`[server-setup] Team '${DEFAULT_TEAM_NAME}' already exists (id: ${teamId})`); + } catch (err) { + if (!err.message.includes('404')) { + console.warn(`[server-setup] Could not check team existence: ${err.message}`); + return; + } + + // Team does not exist — create it + try { + const newTeam = await apiRequest(`${base}/api/v4/teams`, { + method: 'POST', + headers: authHeaders, + body: JSON.stringify({ + name: DEFAULT_TEAM_NAME, + display_name: DEFAULT_TEAM_DISPLAY_NAME, + type: 'O', // open team + }), + }); + teamId = newTeam.id; + console.log(`[server-setup] Created team '${DEFAULT_TEAM_NAME}' (id: ${teamId})`); + } catch (createErr) { + console.warn(`[server-setup] Could not create team: ${createErr.message}`); + return; + } + } + + // ── 3. Ensure the admin user is a member of the team ──────────────────── + try { + await apiRequest(`${base}/api/v4/teams/${teamId}/members`, { + method: 'POST', + headers: authHeaders, + body: JSON.stringify({ + team_id: teamId, + user_id: adminUser.id, + }), + }); + console.log(`[server-setup] Admin user added to team '${DEFAULT_TEAM_NAME}'`); + } catch (err) { + // 409 Conflict means the user is already a member — that's fine. + if (err.message.includes('409') || err.message.toLowerCase().includes('already')) { + console.log(`[server-setup] Admin user already a member of '${DEFAULT_TEAM_NAME}'`); + } else { + console.warn(`[server-setup] Could not add user to team: ${err.message}`); + } + } + + // ── 4. Verify the default channel exists ──────────────────────────────── + // town-square is created automatically with every team, but log it for + // visibility so CI operators can confirm the expected state. + try { + const channel = await apiRequest( + `${base}/api/v4/teams/${teamId}/channels/name/${DEFAULT_CHANNEL}`, + {headers: authHeaders}, + ); + console.log(`[server-setup] Default channel '${channel.name}' confirmed (id: ${channel.id})`); + } catch { + // Non-fatal — just skip the log + } + + console.log('[server-setup] Server provisioning complete.'); +} + +module.exports = {provisionServer}; From f39426dad3aeb3855b2f25a0a19533375bca35fe Mon Sep 17 00:00:00 2001 From: yasser khan Date: Wed, 15 Apr 2026 13:27:21 +0530 Subject: [PATCH 17/22] ci: fix security issues and improve Cursor E2E automation workflows (#3785) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ci: fix security, permissions, and reliability issues in Cursor E2E automation workflows - e2e-fix-trigger.yml: * Move issues/pull-requests write permissions from top-level to job-level (least privilege) * Pass github.event.workflow_run.id via env to avoid interpolation inside script * Replace parseInt('${{ steps.*.outputs.pr_number }}') with strict positive-integer validation via process.env (prevents injection and fixes weak parseInt fallback) * Add id: launch to the curl step so AGENT_URL is read from step outputs instead of the broken env.AGENT_URL reference * Add continue-on-error: true to both auxiliary comment steps * Fix GITHUB_OUTPUT key casing (agent_id / agent_url) - e2e-cursor-commands.yml: * Move issues/pull-requests write permissions from top-level to job-level * Pass github.event.comment.body via COMMENT_BODY env var and read as process.env.COMMENT_BODY — eliminates critical script-injection vector where user-controlled comment content was interpolated into a JS string * Pass github.event.comment.id and issue.number via env vars * Apply strict positive-integer validation for all PR number reads * Add continue-on-error: true to react, reply-success, and reply-failure steps - e2e-functional.yml: * Replace direct JSON-array interpolation (${{ needs.*.outputs.platforms }}) with env: + JSON.parse() in both update-initial-status and update-final-status github-script blocks Co-authored-by: yasser khan * ci: improve E2E fix agent prompts — run-before-commit, exact test names, per-platform URLs The agent prompts previously told the agent to 'run the test' without providing the build commands, server credentials, or a hard prohibition on committing without a passing run. This meant the agent could guess fixes without verifying them on the live servers. Changes in e2e-fix-trigger.yml and e2e-cursor-commands.yml: Triage / context step: - Download JUnit XML artifacts (test-results-{linux,macos,windows}) and parse exact failing test names using an in-memory AdmZip parse. The agent prompt now contains the exact test names instead of just 'linux: 1 failure(s)'. - Extract per-platform server URLs from the server-info PR comment individually (SERVER_URL_LINUX, SERVER_URL_MACOS, SERVER_URL_WINDOWS) in addition to the raw table block. The admin username is also extracted. - Expose the workflow run ID so the agent can use 'gh run download' if the in-workflow JUnit parse fails. Agent prompt (both fix workflows): - Step 1: exact build commands (nvm use, npm ci x2, npm run build-test) - Step 2: how to get exact failing spec file paths from JUnit - Step 3 (fix flow): shared-root-cause analysis before per-spec fixes - Step 4: reproduce BEFORE editing — exact xvfb-run command with all env vars filled in; hard rule 'YOU MUST see the failure before touching code' - Step 5: fix and re-run — hard rule 'DO NOT COMMIT until re-run shows passing' - Platform strategy: run against Linux server to validate logic; macOS/Windows confirmed by CI re-run after push (cannot run natively on Linux agent) - Summary comment requirement: must include exact run command + 'PASSED' result AGENTS.md: - New 'Cursor secrets required for E2E fix agents' section documenting that MM_TEST_PASSWORD must be added as a Cursor Dashboard secret so the agent can authenticate against the provisioned Matterwick servers. - New 'Running E2E tests locally on this Linux VM' section with the exact xvfb-run command an agent or developer should use. Co-authored-by: yasser khan * fix: paginate all comment pages when finding the server-info comment The previous implementation used a single listComments call with per_page:100. On a PR with more than 100 comments the marker would never be found, causing postServerInfoComment to call createComment on every E2E run instead of updating the existing comment. Replace the single-page fetch with a paginated loop that walks all pages until the marker is found or the last page is consumed. All other upsert logic is unchanged. Co-authored-by: yasser khan * fix: use correct Cursor secret names for E2E credentials Cursor secrets are injected by name. The prompts and AGENTS.md were referencing MM_TEST_PASSWORD as if it were a Cursor secret, but the actual secrets added to the Cursor Dashboard are named: MM_DESKTOP_E2E_USER_NAME (the admin username) MM_DESKTOP_E2E_USER_CREDENTIALS (the admin password) Update every run command in the agent prompts and AGENTS.md to read the credentials from the correct env var names. The Playwright env vars MM_TEST_USER_NAME and MM_TEST_PASSWORD are still used as the test runner expects them — they are now assigned from the Cursor secrets: export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" Co-authored-by: yasser khan --------- Co-authored-by: Cursor Agent --- .github/workflows/e2e-cursor-commands.yml | 425 ++++++++++++++++------ .github/workflows/e2e-fix-trigger.yml | 406 +++++++++++++++------ .github/workflows/e2e-functional.yml | 14 +- AGENTS.md | 32 ++ e2e/utils/github-actions.js | 32 +- 5 files changed, 660 insertions(+), 249 deletions(-) diff --git a/.github/workflows/e2e-cursor-commands.yml b/.github/workflows/e2e-cursor-commands.yml index c5bf77350ff..6b95c77bea1 100644 --- a/.github/workflows/e2e-cursor-commands.yml +++ b/.github/workflows/e2e-cursor-commands.yml @@ -25,10 +25,9 @@ on: issue_comment: types: [created] +# Write permissions are scoped to the job that posts comments / reactions. permissions: contents: read - pull-requests: write - issues: write actions: read jobs: @@ -43,31 +42,53 @@ jobs: contains(github.event.comment.body, '@cursor fix e2e') || contains(github.event.comment.body, '@cursor add e2e') ) + permissions: + contents: read + actions: read + issues: write + pull-requests: write steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: React to comment so the user knows it was received + continue-on-error: true uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + COMMENT_ID: ${{ github.event.comment.id }} with: github-token: ${{ github.token }} script: | await github.rest.reactions.createForIssueComment({ owner: context.repo.owner, repo: context.repo.repo, - comment_id: ${{ github.event.comment.id }}, + comment_id: Number(process.env.COMMENT_ID), content: 'eyes', }); - name: Parse command and gather context id: context uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + # Pass user-controlled comment body via env to prevent script injection. + COMMENT_BODY: ${{ github.event.comment.body }} + PR_NUMBER: ${{ github.event.issue.number }} with: github-token: ${{ github.token }} script: | const { owner, repo } = context.repo; - const commentBody = `${{ github.event.comment.body }}`.toLowerCase(); - const prNumber = ${{ github.event.issue.number }}; + const commentBody = (process.env.COMMENT_BODY || '').toLowerCase(); + + const trimmed = String(process.env.PR_NUMBER ?? '').trim(); + if (!(/^\d+$/).test(trimmed)) { + core.setFailed('Invalid PR number'); + return; + } + const prNumber = Number(trimmed); + if (!Number.isInteger(prNumber) || prNumber <= 0) { + core.setFailed('Invalid PR number'); + return; + } // Determine which command was invoked const isFix = commentBody.includes('@cursor fix e2e'); @@ -84,11 +105,12 @@ jobs: core.setOutput('head_branch', headBranch); // ── Find the latest E2E run for this PR branch ─────────────────── - // Look for the most recent completed "Electron Playwright Tests" - // workflow_dispatch run whose head_sha matches the PR. let latestRunUrl = ''; - let failingSummary = ''; - let serverBlock = '(not available)'; + let latestRunId = ''; + let failingSummary = '(no failing test data found — check CI logs manually)'; + const serverUrls = { linux: '', macos: '', windows: '' }; + let adminUsername = ''; + let serverTableBlock = '(server URLs not found)'; try { const workflows = await github.rest.actions.listRepoWorkflows({ owner, repo }); @@ -107,28 +129,79 @@ jobs: ); if (matchingRun) { latestRunUrl = matchingRun.html_url; + latestRunId = String(matchingRun.id); if (isFix) { - // Collect per-platform job conclusions - const jobs = await github.rest.actions.listJobsForWorkflowRun({ - owner, repo, run_id: matchingRun.id, per_page: 100, - }); - const lines = []; - for (const job of jobs.data.jobs) { - if (job.conclusion === 'failure') { - const n = job.name.toLowerCase(); - const plat = n.includes('linux') || n.includes('ubuntu') ? 'linux' - : n.includes('macos') || n.includes('darwin') ? 'macos' - : n.includes('windows') ? 'windows' - : null; - if (plat) { - lines.push(`- ${plat}: failed`); + // ── Download JUnit artifacts for exact test names ───────── + const failingTests = { linux: [], macos: [], windows: [] }; + + try { + const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner, repo, run_id: matchingRun.id, per_page: 50, + }); + + for (const artifact of artifacts.data.artifacts) { + const aName = artifact.name.toLowerCase(); + let platform = null; + if (aName.startsWith('test-results-linux') || aName.startsWith('test-results-ubuntu')) { + platform = 'linux'; + } else if (aName.startsWith('test-results-macos') || aName.startsWith('test-results-darwin')) { + platform = 'macos'; + } else if (aName.startsWith('test-results-windows')) { + platform = 'windows'; + } + if (!platform) { continue; } + + try { + const dl = await github.rest.actions.downloadArtifact({ + owner, repo, artifact_id: artifact.id, archive_format: 'zip', + }); + const resp = await fetch(dl.url); + const buf = Buffer.from(await resp.arrayBuffer()); + const AdmZip = require('adm-zip'); + const zip = new AdmZip(buf); + const xmlEntry = zip.getEntries().find( + (e) => e.entryName.endsWith('e2e-junit.xml'), + ); + if (!xmlEntry) { continue; } + const xml = xmlEntry.getData().toString('utf8'); + const suiteMatches = xml.matchAll(/]+name="([^"]+)"[^>]*>[\s\S]*?<(?:failure|error)[^/]/g); + for (const m of suiteMatches) { + failingTests[platform].push(m[1]); + } + } catch (err) { + core.warning(`Could not parse JUnit for ${platform}: ${err.message}`); } } + } catch (err) { + core.warning(`Could not fetch artifacts: ${err.message}`); + } + + const failingTestsBlock = Object.entries(failingTests) + .filter(([, names]) => names.length > 0) + .map(([plat, names]) => `${plat}:\n${names.map((n) => ` - ${n}`).join('\n')}`) + .join('\n\n'); + + if (failingTestsBlock) { + failingSummary = failingTestsBlock; + } else { + // Fall back to job-level conclusion if JUnit is unavailable + const jobs = await github.rest.actions.listJobsForWorkflowRun({ + owner, repo, run_id: matchingRun.id, per_page: 100, + }); + const lines = []; + for (const job of jobs.data.jobs) { + if (job.conclusion === 'failure') { + const n = job.name.toLowerCase(); + const plat = n.includes('linux') || n.includes('ubuntu') ? 'linux' + : n.includes('macos') || n.includes('darwin') ? 'macos' + : n.includes('windows') ? 'windows' + : null; + if (plat) { lines.push(`- ${plat}: failed (JUnit unavailable — check CI logs)`); } + } + } + if (lines.length > 0) { failingSummary = lines.join('\n'); } } - failingSummary = lines.length > 0 - ? lines.join('\n') - : 'All platforms may have failed — treating as mass-failure scenario.'; } } } @@ -137,9 +210,10 @@ jobs: } core.setOutput('latest_run_url', latestRunUrl); + core.setOutput('latest_run_id', latestRunId); core.setOutput('failing_summary', failingSummary); - // ── Read server URLs from the PR comment ───────────────────────── + // ── Read per-platform server info from the PR comment ───────────── try { const comments = await github.rest.issues.listComments({ owner, repo, issue_number: prNumber, per_page: 100, @@ -148,20 +222,30 @@ jobs: (c) => c.body && c.body.includes('') ); if (infoComment) { - const tableLines = infoComment.body. - split('\n'). - filter((l) => l.startsWith('|') && !l.startsWith('| Platform')). - slice(0, 4); - serverBlock = tableLines.join('\n'); + const body = infoComment.body; + const rowRe = /^\|\s*`(linux|macos|windows)`\s*\|\s*(\S+)\s*\|/im; + for (const line of body.split('\n')) { + const m = line.match(rowRe); + if (m) { serverUrls[m[1].toLowerCase()] = m[2]; } + } + const userMatch = body.match(/\*\*Admin username:\*\*\s*`([^`]+)`/); + if (userMatch) { adminUsername = userMatch[1]; } + const tableLines = body.split('\n').filter( + (l) => l.startsWith('|') && !l.startsWith('| Platform'), + ).slice(0, 5); + serverTableBlock = tableLines.join('\n'); } } catch (err) { core.warning(`Could not read PR comments: ${err.message}`); } - core.setOutput('server_block', serverBlock); + core.setOutput('server_url_linux', serverUrls.linux); + core.setOutput('server_url_macos', serverUrls.macos); + core.setOutput('server_url_windows', serverUrls.windows); + core.setOutput('admin_username', adminUsername); + core.setOutput('server_table_block', serverTableBlock); // ── Collect the PR diff summary for "add tests" command ────────── - // List only the changed file paths — cheap and keeps the prompt small. let changedFiles = ''; if (isAdd) { try { @@ -183,68 +267,141 @@ jobs: if: steps.context.outputs.command == 'fix' id: fix_prompt env: - PR_URL: ${{ steps.context.outputs.pr_url }} FAILING_SUMMARY: ${{ steps.context.outputs.failing_summary }} - SERVER_BLOCK: ${{ steps.context.outputs.server_block }} + SERVER_TABLE_BLOCK: ${{ steps.context.outputs.server_table_block }} + SERVER_URL_LINUX: ${{ steps.context.outputs.server_url_linux }} + SERVER_URL_MACOS: ${{ steps.context.outputs.server_url_macos }} + SERVER_URL_WINDOWS: ${{ steps.context.outputs.server_url_windows }} + ADMIN_USERNAME: ${{ steps.context.outputs.admin_username }} LATEST_RUN_URL: ${{ steps.context.outputs.latest_run_url }} + LATEST_RUN_ID: ${{ steps.context.outputs.latest_run_id }} REQUESTER: ${{ github.event.comment.user.login }} run: | cat > /tmp/prompt.txt << 'PROMPT_EOF' - A developer (@${REQUESTER}) has asked you to fix ALL failing E2E tests on - this PR, including mass failures that the automatic trigger skipped. + @${REQUESTER} asked you to fix ALL failing E2E tests on this PR, + including mass failures that the automatic trigger skipped. + You are running on a Linux agent with an X server on DISPLAY=:1. - This was likely caused by a large change to the codebase. Treat widespread - failures as potentially having a shared root cause (e.g. a renamed selector, - a changed IPC channel, a modified config shape) before fixing each test - individually. - - FAILING PLATFORMS - ----------------- + ══════════════════════════════════════════════════ + FAILING TESTS (from CI JUnit XML) + ══════════════════════════════════════════════════ ${FAILING_SUMMARY} - SERVER URLS (provisioned by Matterwick — still live) - ----------------------------------------------------- - ${SERVER_BLOCK} + ══════════════════════════════════════════════════ + PROVISIONED TEST SERVERS (still live) + ══════════════════════════════════════════════════ + ${SERVER_TABLE_BLOCK} - LATEST E2E WORKFLOW RUN (for full logs and JUnit artifacts) - ------------------------------------------------------------ - ${LATEST_RUN_URL} + Individual URLs for use in run commands: + Linux server: ${SERVER_URL_LINUX} + macOS server: ${SERVER_URL_MACOS} + Windows server: ${SERVER_URL_WINDOWS} + Admin username: read the MM_DESKTOP_E2E_USER_NAME environment variable + (injected as a Cursor secret — do NOT hardcode it) + Admin password: read the MM_DESKTOP_E2E_USER_CREDENTIALS environment variable + (injected as a Cursor secret — do NOT hardcode it) - INSTRUCTIONS - ------------ - 1. Read the CI workflow run logs to identify ALL failing test names and - their error messages. - - 2. Look for a SHARED ROOT CAUSE first: - - Same selector missing across many tests → likely a renamed component - - Same IPC channel / API call failing → likely a changed interface - - Same timeout pattern → likely a new async flow or loading state - If you find a shared cause, fix it at the shared layer (helper, - fixture, or app-side test hook) rather than patching every spec. - - 3. For each remaining failure, classify as TEST BUG or PRODUCT BUG: - TEST BUG → Fix in e2e/specs/, e2e/helpers/, e2e/fixtures/ only. - Run the spec against the live server to confirm it passes. - PRODUCT BUG → Do NOT modify the test. Post a PR comment: - "Product Bug: — expected X, got Y, likely in " - - 4. HARD LIMITS to keep token usage reasonable: - - Fix at most 10 distinct test files per run. - - If more than 10 files need fixing, fix the most common pattern, - post a comment listing the rest, and stop. - - Never touch src/ files. Only e2e/. - - 5. After all fixes and reports, post a single summary comment on the PR. + CI run (for full logs and trace artifacts): ${LATEST_RUN_URL} + + ══════════════════════════════════════════════════ + STEP-BY-STEP INSTRUCTIONS + ══════════════════════════════════════════════════ + + ## Step 1 — Build the app + + Run this once at the start: + + source ~/.nvm/nvm.sh && nvm use 20.15.0 + npm ci + cd e2e && npm ci && cd .. + npm run build-test + + If the build fails, stop and post a PR comment with the error. + + ## Step 2 — Identify exact failing spec files + + The failing test names are listed above. Map each name to a file in + e2e/specs/. If the JUnit block is empty, download the artifacts: + + gh run download ${LATEST_RUN_ID} \ + --name test-results-linux \ + --dir /tmp/results-linux + cat /tmp/results-linux/test-results/e2e-junit.xml | grep 'testcase name' + + Repeat for test-results-macos / test-results-windows. + + ## Step 3 — Look for a SHARED ROOT CAUSE first + + Widespread failures often share one root cause: + - Same selector missing across tests → renamed UI component + - Same IPC channel failing → changed interface + - Same timeout pattern → new async loading state + + If you find a shared cause, fix it at the shared layer (helper or + fixture) rather than patching every spec individually. + + ## Step 4 — Reproduce BEFORE editing + + Run the failing spec against the Linux server: + + cd e2e + export DISPLAY=:1 + export MM_TEST_SERVER_URL="${SERVER_URL_LINUX}" + export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" + export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" + xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ + npx playwright test \ + --reporter=list --workers=1 + cd .. + + You MUST see the failure before touching code. + If you cannot reproduce, classify as "flake" and post a PR comment. + + ## Step 5 — Fix and re-run + + After each edit, run the same command from Step 4. Confirm the test + passes before committing that file. + + DO NOT COMMIT a fix unless the re-run shows "X passed". + + For macOS/Windows-only failures: + Run the spec against the Linux server to validate the logic. + If it passes on Linux, commit with a note about the platform gap. + CI will confirm on the real platform after push. + + ## Step 6 — Classify failures you cannot fix locally + + PRODUCT BUG → Post a PR comment: + "Product Bug: — expected X, got Y, likely in " + + ## Step 7 — Hard limits + + - Fix at most 10 distinct spec files per run. + - Never touch src/ files. + - Scope: e2e/specs/, e2e/helpers/, e2e/fixtures/, e2e/utils/ only. + - Never add long sleeps (> 2 s) as a fix. + + ## Step 8 — Summary comment + + After committing all fixes, post a single PR comment: + - Each fix: spec file path + root-cause description + + exact run command + "PASSED" result + - Each product bug: test name + symptom + suspected src/ file + - Any flakes: test name + reason PROMPT_EOF PROMPT=$(sed \ -e "s|\${REQUESTER}|${REQUESTER}|g" \ -e "s|\${FAILING_SUMMARY}|${FAILING_SUMMARY}|g" \ - -e "s|\${SERVER_BLOCK}|${SERVER_BLOCK}|g" \ + -e "s|\${SERVER_TABLE_BLOCK}|${SERVER_TABLE_BLOCK}|g" \ + -e "s|\${SERVER_URL_LINUX}|${SERVER_URL_LINUX}|g" \ + -e "s|\${SERVER_URL_MACOS}|${SERVER_URL_MACOS}|g" \ + -e "s|\${SERVER_URL_WINDOWS}|${SERVER_URL_WINDOWS}|g" \ + -e "s|\${ADMIN_USERNAME}|${ADMIN_USERNAME}|g" \ -e "s|\${LATEST_RUN_URL}|${LATEST_RUN_URL}|g" \ + -e "s|\${LATEST_RUN_ID}|${LATEST_RUN_ID}|g" \ /tmp/prompt.txt) - # Write to file for the launch step to read echo "${PROMPT}" > /tmp/final-prompt.txt echo "prompt_file=/tmp/final-prompt.txt" >> "$GITHUB_OUTPUT" @@ -253,57 +410,83 @@ jobs: if: steps.context.outputs.command == 'add' id: add_prompt env: - PR_URL: ${{ steps.context.outputs.pr_url }} CHANGED_FILES: ${{ steps.context.outputs.changed_files }} - SERVER_BLOCK: ${{ steps.context.outputs.server_block }} + SERVER_TABLE_BLOCK: ${{ steps.context.outputs.server_table_block }} + SERVER_URL_LINUX: ${{ steps.context.outputs.server_url_linux }} + ADMIN_USERNAME: ${{ steps.context.outputs.admin_username }} REQUESTER: ${{ github.event.comment.user.login }} run: | cat > /tmp/prompt.txt << 'PROMPT_EOF' - A developer (@${REQUESTER}) has asked you to add E2E test cases that cover - the changes introduced by this PR. + @${REQUESTER} asked you to add E2E test cases that cover the changes + introduced by this PR. You are running on a Linux agent (DISPLAY=:1). CHANGED FILES IN THIS PR (non-e2e only) ---------------------------------------- ${CHANGED_FILES} - SERVER URLS (to run and validate your new tests against) - --------------------------------------------------------- - ${SERVER_BLOCK} + PROVISIONED TEST SERVERS + ------------------------- + ${SERVER_TABLE_BLOCK} + + Linux server for local verification: + MM_TEST_SERVER_URL: ${SERVER_URL_LINUX} + MM_TEST_USER_NAME: read MM_DESKTOP_E2E_USER_NAME env var (Cursor secret) + MM_TEST_PASSWORD: read MM_DESKTOP_E2E_USER_CREDENTIALS env var (Cursor secret) INSTRUCTIONS ------------ - 1. Read each changed file to understand what behavior was added or modified. - - 2. For each meaningful behavioral change, write a new Playwright E2E test - (or extend an existing spec if one already covers that area). - - 3. Test placement rules: - - New specs go in e2e/specs/ under the most relevant subdirectory. - - Tests that require a live Mattermost server must use demoMattermostConfig - and be tagged @all so they run in CI. - - Use the fixture layer (e2e/fixtures/index.ts) — do not re-implement - launch / login logic. - - Follow patterns in existing e2e/specs/ files for style and structure. - - Tag each new test with @P2 @all or the appropriate priority. - - 4. After writing tests, run each new spec against the live server URL to - confirm it passes before committing. - - 5. HARD LIMITS: - - Add at most 3 new spec files per run. - - Do not modify src/ files. - - If you cannot determine a clear, testable behavior from the diff, - post a comment explaining what is ambiguous rather than writing - a low-quality test. - - 6. After committing, post a PR comment listing each new test file with a - one-line description of what it covers. + + ## Step 1 — Build + source ~/.nvm/nvm.sh && nvm use 20.15.0 + npm ci && cd e2e && npm ci && cd .. && npm run build-test + + ## Step 2 — Understand the changes + Read each changed file to understand what behavior was added or modified. + + ## Step 3 — Write tests + For each meaningful behavioral change, write a new Playwright E2E spec + (or extend an existing spec if one already covers that area). + + Placement rules: + - New specs go in e2e/specs/ under the most relevant subdirectory. + - Use the fixture layer (e2e/fixtures/index.ts) — do not re-implement + launch or login logic. + - Follow patterns in existing e2e/specs/ files. + - Tag each new test with @P2 @all or the appropriate priority. + + ## Step 4 — Run each new spec before committing + + cd e2e + export DISPLAY=:1 + export MM_TEST_SERVER_URL="${SERVER_URL_LINUX}" + export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" + export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" + xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ + npx playwright test --reporter=list --workers=1 + cd .. + + A new test MUST pass before you commit it. + If it cannot be made to pass locally, explain why in a PR comment. + + ## Step 5 — Hard limits + - Add at most 3 new spec files per run. + - Do not modify src/ files. + - If you cannot determine a clear, testable behavior, post a PR + comment explaining the ambiguity instead of writing a weak test. + + ## Step 6 — Summary comment + After committing, post a PR comment listing each new file with: + - File path + - What behavior it covers + - Exact run command used + "PASSED" confirmation PROMPT_EOF PROMPT=$(sed \ -e "s|\${REQUESTER}|${REQUESTER}|g" \ -e "s|\${CHANGED_FILES}|${CHANGED_FILES}|g" \ - -e "s|\${SERVER_BLOCK}|${SERVER_BLOCK}|g" \ + -e "s|\${SERVER_TABLE_BLOCK}|${SERVER_TABLE_BLOCK}|g" \ + -e "s|\${SERVER_URL_LINUX}|${SERVER_URL_LINUX}|g" \ + -e "s|\${ADMIN_USERNAME}|${ADMIN_USERNAME}|g" \ /tmp/prompt.txt) echo "${PROMPT}" > /tmp/final-prompt.txt @@ -346,14 +529,19 @@ jobs: - name: Reply to comment with agent link if: always() && steps.launch.outcome == 'success' + continue-on-error: true uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 env: AGENT_URL: ${{ steps.launch.outputs.agent_url }} COMMAND: ${{ steps.context.outputs.command }} + PR_NUMBER: ${{ steps.context.outputs.pr_number }} with: github-token: ${{ github.token }} script: | - const prNumber = ${{ github.event.issue.number }}; + const trimmed = String(process.env.PR_NUMBER ?? '').trim(); + if (!(/^\d+$/).test(trimmed)) { return; } + const prNumber = Number(trimmed); + if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } const agentUrl = process.env.AGENT_URL || '(pending)'; const command = process.env.COMMAND; const action = command === 'fix' @@ -368,21 +556,28 @@ jobs: '', `Task: **${action}**`, '', - 'The agent will push results directly to this branch.', + 'The agent will reproduce each failure before editing, confirm the fix passes, then push to this branch.', agentUrl ? `\n**Agent:** ${agentUrl}` : '', ].filter(Boolean).join('\n'), }); - name: Reply to comment on failure if: always() && steps.launch.outcome == 'failure' + continue-on-error: true uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + PR_NUMBER: ${{ steps.context.outputs.pr_number }} with: github-token: ${{ github.token }} script: | + const trimmed = String(process.env.PR_NUMBER ?? '').trim(); + if (!(/^\d+$/).test(trimmed)) { return; } + const prNumber = Number(trimmed); + if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: ${{ github.event.issue.number }}, + issue_number: prNumber, body: [ '### :x: Cursor Agent Launch Failed', '', diff --git a/.github/workflows/e2e-fix-trigger.yml b/.github/workflows/e2e-fix-trigger.yml index d2c1323a35c..5e813d7f47f 100644 --- a/.github/workflows/e2e-fix-trigger.yml +++ b/.github/workflows/e2e-fix-trigger.yml @@ -16,18 +16,23 @@ name: E2E Fix Trigger # # 2. SKIP nightly runs — they have no PR to fix. # -# 3. The prompt passed to the Cursor API contains only: -# - The failing test names grouped by platform (not full logs) -# - The server URLs from the existing PR comment (already posted by -# post-server-info) so the agent can reproduce failures -# - Clear triage instructions so the agent classifies each failure as a -# test bug (fix it) or a product bug (post a comment, do not retry) +# 3. The triage step downloads the JUnit XML artifact so the prompt contains +# the exact failing test names, not just platform-level counts. This +# prevents the agent from guessing which tests failed. # -# 4. source.prUrl is used (not source.repository + ref) so the agent works +# 4. Per-platform server URLs and admin credentials are extracted from the +# server-info PR comment and passed individually so the agent can run the +# exact failing spec against the exact server where it failed. +# +# 5. The agent prompt has a hard "MUST RUN BEFORE COMMIT" rule: the agent +# must execute the failing spec against the live server and see it pass +# before staging the fix. Platform-specific run commands are provided. +# +# 6. source.prUrl is used (not source.repository + ref) so the agent works # directly on the PR's head branch and pushes fixes there, triggering # another CI run automatically. # -# 5. target.autoBranch: false — push to the PR's existing head branch rather +# 7. target.autoBranch: false — push to the PR's existing head branch rather # than creating a new branch. on: @@ -37,10 +42,9 @@ on: # workflow_run always runs from the default branch — no PR secrets available # here. CURSOR_API_KEY must be a repository secret, not a PR/environment secret. +# Write permissions are scoped to the single job that posts PR comments. permissions: contents: read - pull-requests: write - issues: write actions: read env: @@ -60,6 +64,11 @@ jobs: if: | github.event.workflow_run.conclusion == 'failure' && github.event.workflow_run.event == 'workflow_dispatch' + permissions: + contents: read + actions: read + issues: write + pull-requests: write steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -69,16 +78,17 @@ jobs: uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 env: MASS_FAILURE_THRESHOLD: ${{ env.MASS_FAILURE_THRESHOLD }} + WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} with: github-token: ${{ github.token }} script: | const { owner, repo } = context.repo; - const runId = ${{ github.event.workflow_run.id }}; + const runId = Number(process.env.WORKFLOW_RUN_ID); const threshold = parseInt(process.env.MASS_FAILURE_THRESHOLD, 10); // ── 1. Resolve the PR ──────────────────────────────────────────── const run = await github.rest.actions.getWorkflowRun({ owner, repo, run_id: runId }); - const headSha = run.data.head_sha; + const headSha = run.data.head_sha; const headBranch = run.data.head_branch; const headOwner = run.data.head_repository?.owner?.login || owner; @@ -108,17 +118,12 @@ jobs: core.setOutput('pr_number', String(prNumber)); core.setOutput('pr_url', prUrl); - // ── 2. Collect per-platform failure counts from run outputs ────── - // The e2e-tests matrix job exposes NEW_FAILURES_{LINUX,MACOS,WINDOWS} - // as workflow outputs via the reusable template. + // ── 2. Collect per-platform failure counts ─────────────────────── const jobs = await github.rest.actions.listJobsForWorkflowRun({ owner, repo, run_id: runId, per_page: 100, }); - // Extract NEW_FAILURES from step outputs if available, otherwise - // use job conclusion to estimate. const platformFailures = { linux: 0, macos: 0, windows: 0 }; - const platformJobIds = { linux: null, macos: null, windows: null }; for (const job of jobs.data.jobs) { const nameLower = job.name.toLowerCase(); @@ -130,14 +135,8 @@ jobs: } else if (nameLower.includes('windows') || nameLower.includes('win32')) { platform = 'windows'; } - if (!platform) { - continue; - } - platformJobIds[platform] = job.id; + if (!platform) { continue; } if (job.conclusion === 'failure') { - // We don't have direct access to the NEW_FAILURES output here; - // mark as at least 1. The actual count is in the JUnit artifact - // but downloading it here would be expensive. platformFailures[platform] = platformFailures[platform] || 1; } } @@ -145,13 +144,8 @@ jobs: const totalFailures = Object.values(platformFailures).reduce((a, b) => a + b, 0); core.info(`Failure counts — linux:${platformFailures.linux} macos:${platformFailures.macos} windows:${platformFailures.windows} total:${totalFailures}`); core.setOutput('total_failures', String(totalFailures)); - core.setOutput('failures_linux', String(platformFailures.linux)); - core.setOutput('failures_macos', String(platformFailures.macos)); - core.setOutput('failures_windows', String(platformFailures.windows)); // ── 3. Mass-failure check ──────────────────────────────────────── - // If every platform failed we almost certainly have an infra issue, - // not individual test bugs. Skip the agent to avoid wasting tokens. const allPlatformsFailed = platformFailures.linux > 0 && platformFailures.macos > 0 && @@ -166,12 +160,82 @@ jobs: core.setOutput('skip', 'false'); - // ── 4. Collect server URLs from the PR comment ─────────────────── - // post-server-info already posted the server URLs; read them back - // so we can include them in the agent prompt without fetching - // anything extra. + // ── 4. Download JUnit artifacts to get exact failing test names ── + // Each platform uploads a test-results-{os} artifact containing + // e2e/test-results/e2e-junit.xml. Download and parse them here so + // the agent prompt contains exact test names, not just counts. + const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner, repo, run_id: runId, per_page: 50, + }); + + const failingTests = { linux: [], macos: [], windows: [] }; + + for (const artifact of artifacts.data.artifacts) { + const nameLower = artifact.name.toLowerCase(); + let platform = null; + if (nameLower.startsWith('test-results-linux') || nameLower.startsWith('test-results-ubuntu')) { + platform = 'linux'; + } else if (nameLower.startsWith('test-results-macos') || nameLower.startsWith('test-results-darwin')) { + platform = 'macos'; + } else if (nameLower.startsWith('test-results-windows')) { + platform = 'windows'; + } + if (!platform || platformFailures[platform] === 0) { continue; } + + try { + // Download the artifact zip (streams as redirect URL) + const dl = await github.rest.actions.downloadArtifact({ + owner, repo, artifact_id: artifact.id, archive_format: 'zip', + }); + // dl.url is the redirect; use fetch to get the zip bytes + const resp = await fetch(dl.url); + const buf = Buffer.from(await resp.arrayBuffer()); + + // Unzip in memory — find the JUnit XML + const AdmZip = require('adm-zip'); + const zip = new AdmZip(buf); + const xmlEntry = zip.getEntries().find( + (e) => e.entryName.endsWith('e2e-junit.xml'), + ); + if (!xmlEntry) { continue; } + + const xml = xmlEntry.getData().toString('utf8'); + // Parse out test names via simple regex — no full XML parse needed + const suiteMatches = xml.matchAll(/]+name="([^"]+)"[^>]*>[\s\S]*?<(?:failure|error)[^/]/g); + for (const m of suiteMatches) { + failingTests[platform].push(m[1]); + } + } catch (err) { + core.warning(`Could not parse JUnit for ${platform}: ${err.message}`); + } + } + + const failingTestsBlock = Object.entries(failingTests) + .filter(([, names]) => names.length > 0) + .map(([platform, names]) => { + const list = names.map((n) => ` - ${n}`).join('\n'); + return `${platform}:\n${list}`; + }) + .join('\n\n'); + + // Falling back to platform count summary if we could not parse XML + const failingSummary = failingTestsBlock || + Object.entries(platformFailures) + .filter(([, count]) => count > 0) + .map(([p, c]) => `- ${p}: ${c} failure(s) (JUnit not available — check CI logs)`) + .join('\n'); + + core.setOutput('failing_tests_block', failingTestsBlock); + core.setOutput('failing_summary', failingSummary); + + // ── 5. Extract per-platform server info from the PR comment ────── + // post-server-info posted a Markdown table; parse out individual + // rows so we can expose each URL and the admin username separately. const MARKER = ''; - let serverBlock = '(server URLs not found in PR comment)'; + const serverUrls = { linux: '', macos: '', windows: '' }; + let adminUsername = ''; + let serverTableBlock = '(server URLs not found in PR comment)'; + try { const comments = await github.rest.issues.listComments({ owner, repo, issue_number: prNumber, per_page: 100, @@ -180,37 +244,56 @@ jobs: (c) => c.body && c.body.includes(MARKER), ); if (infoComment) { - // Extract just the table lines — skip the header and trailing lines - const tableLines = infoComment.body. - split('\n'). - filter((l) => l.startsWith('|') && !l.startsWith('| Platform')). - slice(0, 4); // at most 3 platform rows + divider - serverBlock = tableLines.join('\n'); + const body = infoComment.body; + + // Extract table rows (lines starting with |`linux`|, |`macos`|, |`windows`|) + const rowRe = /^\|\s*`(linux|macos|windows)`\s*\|\s*(\S+)\s*\|/im; + for (const line of body.split('\n')) { + const m = line.match(rowRe); + if (m) { + serverUrls[m[1].toLowerCase()] = m[2]; + } + } + + // Extract admin username from "**Admin username:** `...`" + const userMatch = body.match(/\*\*Admin username:\*\*\s*`([^`]+)`/); + if (userMatch) { adminUsername = userMatch[1]; } + + // Keep the raw table for the prompt as well + const tableLines = body.split('\n').filter( + (l) => l.startsWith('|') && !l.startsWith('| Platform'), + ).slice(0, 5); + serverTableBlock = tableLines.join('\n'); } } catch (err) { core.warning(`Could not read PR comments: ${err.message}`); } - // ── 5. Build the failing-platform summary (short, no full logs) ── - const failingSummary = Object.entries(platformFailures). - filter(([, count]) => count > 0). - map(([platform, count]) => `- ${platform}: ${count} failure(s)`). - join('\n'); - - core.setOutput('failing_summary', failingSummary); - core.setOutput('server_block', serverBlock); + core.setOutput('server_url_linux', serverUrls.linux); + core.setOutput('server_url_macos', serverUrls.macos); + core.setOutput('server_url_windows', serverUrls.windows); + core.setOutput('admin_username', adminUsername); + core.setOutput('server_table_block', serverTableBlock); core.setOutput('workflow_run_url', `https://github.com/${owner}/${repo}/actions/runs/${runId}`); + core.setOutput('workflow_run_id', String(runId)); - name: Post skip notice for mass failures if: steps.triage.outputs.skip == 'true' && steps.triage.outputs.skip_reason == 'mass_failure' + continue-on-error: true uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + PR_NUMBER: ${{ steps.triage.outputs.pr_number }} + TOTAL_FAILURES: ${{ steps.triage.outputs.total_failures }} + WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} with: github-token: ${{ github.token }} script: | - const prNumber = parseInt('${{ steps.triage.outputs.pr_number }}', 10); - if (!prNumber) { return; } - const total = '${{ steps.triage.outputs.total_failures }}'; - const runUrl = '${{ steps.triage.outputs.workflow_run_url }}'; + const trimmed = String(process.env.PR_NUMBER ?? '').trim(); + if (!(/^\d+$/).test(trimmed)) { return; } + const prNumber = Number(trimmed); + if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } + const total = process.env.TOTAL_FAILURES; + const runUrl = process.env.WORKFLOW_RUN_URL; await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, @@ -226,83 +309,154 @@ jobs: '1. Check the [workflow run](' + runUrl + ') for build errors or server provisioning failures.', '2. If it was a transient CI issue, push an empty commit to re-trigger E2E.', '3. If it is a real product regression, investigate the failing tests manually.', + '4. To override and force-launch the agent anyway, comment `@cursor fix e2e` on this PR.', ].join('\n'), }); - name: Launch Cursor agent to fix E2E failures if: steps.triage.outputs.skip == 'false' && steps.triage.outputs.pr_url != '' + id: launch env: CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} PR_URL: ${{ steps.triage.outputs.pr_url }} FAILING_SUMMARY: ${{ steps.triage.outputs.failing_summary }} - SERVER_BLOCK: ${{ steps.triage.outputs.server_block }} + SERVER_TABLE_BLOCK: ${{ steps.triage.outputs.server_table_block }} + SERVER_URL_LINUX: ${{ steps.triage.outputs.server_url_linux }} + SERVER_URL_MACOS: ${{ steps.triage.outputs.server_url_macos }} + SERVER_URL_WINDOWS: ${{ steps.triage.outputs.server_url_windows }} + ADMIN_USERNAME: ${{ steps.triage.outputs.admin_username }} WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} + WORKFLOW_RUN_ID: ${{ steps.triage.outputs.workflow_run_id }} run: | - # Build the prompt as a file to avoid shell quoting issues with - # multi-line strings and special characters. cat > /tmp/agent-prompt.txt << 'PROMPT_EOF' - E2E tests failed on this PR. Your job is to investigate each failure, - classify it, and act accordingly. + E2E tests failed on this PR. Your job is to fix every test bug you find. + You are running on a Linux agent with an X server on DISPLAY=:1. - FAILING PLATFORMS - ----------------- + ══════════════════════════════════════════════════ + FAILING TESTS (from CI JUnit XML) + ══════════════════════════════════════════════════ ${FAILING_SUMMARY} - SERVER URLS (provisioned by Matterwick — still live) - ----------------------------------------------------- - ${SERVER_BLOCK} - - WORKFLOW RUN (for full logs and JUnit artifacts) - ------------------------------------------------- - ${WORKFLOW_RUN_URL} - - INSTRUCTIONS - ------------ - 1. Read the CI workflow run logs to find the exact failing test names - and error messages. Download the JUnit XML artifact if available. - - 2. For each failing test, open the test source file in e2e/specs/ and - classify the failure: - - TEST BUG — the test itself is wrong: - Examples: stale selector, wrong timeout, missing wait, wrong - assertion value, race condition in setup/teardown. - Action: Fix the test code and push a commit to this branch. - - PRODUCT BUG — the app behavior does not match what the test expects: - Examples: UI element removed/renamed, IPC channel changed, feature - regression in src/. - Action: Do NOT modify the test. Instead post a PR comment describing: - - Which test failed - - What the test expected vs what actually happened - - Which src/ file is likely responsible - Label the comment "Product Bug: ". - - 3. IMPORTANT RULES to avoid wasting tokens: - - Fix at most 8 test files per run. If more than 8 distinct test - files are failing, fix the 8 most-common failure patterns and - post a comment listing the remaining ones as product bugs or - deferred test bugs. - - Do NOT rewrite tests from scratch. Make the smallest change that - fixes the failure. - - Do NOT touch any file in src/ (app source). Your scope is - e2e/specs/, e2e/helpers/, e2e/fixtures/, and e2e/utils/. - - If a selector is missing from the live server (checked via - MM_TEST_SERVER_URL), that is a product bug — report it. - - Run each fixed spec against the live server to confirm it passes - before committing. - - 4. After all fixes are committed (or all bugs reported), post a single - summary comment on the PR with: - - List of test bugs fixed (file name + one-line description) - - List of product bugs reported (test name + symptom) + ══════════════════════════════════════════════════ + PROVISIONED TEST SERVERS (still live) + ══════════════════════════════════════════════════ + ${SERVER_TABLE_BLOCK} + + Individual URLs for use in run commands: + Linux server: ${SERVER_URL_LINUX} + macOS server: ${SERVER_URL_MACOS} + Windows server: ${SERVER_URL_WINDOWS} + Admin username: read the MM_DESKTOP_E2E_USER_NAME environment variable + (injected as a Cursor secret — do NOT hardcode it) + Admin password: read the MM_DESKTOP_E2E_USER_CREDENTIALS environment variable + (injected as a Cursor secret — do NOT hardcode it) + + CI run (for full logs and trace artifacts): ${WORKFLOW_RUN_URL} + + ══════════════════════════════════════════════════ + STEP-BY-STEP INSTRUCTIONS + ══════════════════════════════════════════════════ + + ## Step 1 — Build the app + + Run this once at the start (required before any spec can run): + + source ~/.nvm/nvm.sh && nvm use 20.15.0 + npm ci + cd e2e && npm ci && cd .. + npm run build-test + + If the build fails, stop and post a PR comment with the error. + + ## Step 2 — Identify the exact failing spec files + + The failing test names are listed above. Map each name to a file in + e2e/specs/. If the JUnit block is empty, download the test-results + artifact from the CI run URL above and extract e2e-junit.xml manually + using the GitHub CLI: + + gh run download ${WORKFLOW_RUN_ID} \ + --name test-results-linux \ + --dir /tmp/results-linux + cat /tmp/results-linux/test-results/e2e-junit.xml | grep 'testcase name' + + Repeat for test-results-macos / test-results-windows as needed. + + ## Step 3 — Reproduce the failure BEFORE editing anything + + Run the failing spec file. Use the Linux server URL for Linux-tagged + tests; any URL works for platform-neutral tests. + + Linux run command (use this for linux-tagged tests or neutral tests): + + cd e2e + export DISPLAY=:1 + export MM_TEST_SERVER_URL="${SERVER_URL_LINUX}" + export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" + export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" + xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ + npx playwright test \ + --reporter=list --workers=1 + cd .. + + You MUST confirm the test fails on HEAD before touching any code. + If you cannot reproduce the failure, classify it as "flake" and post + a PR comment — do NOT edit the test. + + ## Step 4 — Classify each failure + + TEST BUG — the test code is wrong: + Examples: stale selector, wrong assertion value, missing wait, + race condition in setup/teardown, obsolete helper. + Action: fix the test code; then proceed to Step 5. + + PRODUCT BUG — the app does not match what the test expects: + Examples: renamed UI element, changed IPC channel, missing feature. + Action: do NOT modify the test. Post a PR comment: + "Product Bug: — expected X, got Y, likely in " + + ## Step 5 — Fix and re-run + + After editing the spec (or its helper/fixture), run the exact same + command from Step 3 and confirm the test now passes. + + DO NOT COMMIT a fix until the re-run shows the test passing. + If the re-run still fails, debug further before committing. + + For macOS/Windows-only failures: + - The macOS/Windows platforms use the same spec source on GitHub. + - Run the spec against the Linux server first to confirm the test + logic is correct. If it passes on Linux but the failure was only + on macOS/Windows, note it in your commit message. Commit, then + push so CI re-runs on the real platform to confirm. + + ## Step 6 — Hard limits + + - Fix at most 8 distinct spec files per run. + - Never touch src/ files. + - Scope: e2e/specs/, e2e/helpers/, e2e/fixtures/, e2e/utils/ only. + - Never add long sleeps (> 2 s) as a fix. + - If you exceed the file limit, post a comment listing remaining failures. + + ## Step 7 — Summary comment + + After committing all fixes, post a single PR comment: + - Each fix: spec file path + one-line root-cause description + + exact run command used + "PASSED" result + - Each product bug: test name + symptom + suspected src/ file + - Any flakes: test name + why you concluded it is a flake PROMPT_EOF - # Substitute the env vars into the prompt + # Substitute env vars into the prompt PROMPT=$(sed \ -e "s|\${FAILING_SUMMARY}|${FAILING_SUMMARY}|g" \ - -e "s|\${SERVER_BLOCK}|${SERVER_BLOCK}|g" \ + -e "s|\${SERVER_TABLE_BLOCK}|${SERVER_TABLE_BLOCK}|g" \ + -e "s|\${SERVER_URL_LINUX}|${SERVER_URL_LINUX}|g" \ + -e "s|\${SERVER_URL_MACOS}|${SERVER_URL_MACOS}|g" \ + -e "s|\${SERVER_URL_WINDOWS}|${SERVER_URL_WINDOWS}|g" \ + -e "s|\${ADMIN_USERNAME}|${ADMIN_USERNAME}|g" \ -e "s|\${WORKFLOW_RUN_URL}|${WORKFLOW_RUN_URL}|g" \ + -e "s|\${WORKFLOW_RUN_ID}|${WORKFLOW_RUN_ID}|g" \ /tmp/agent-prompt.txt) # Launch the Cursor agent @@ -330,24 +484,28 @@ jobs: AGENT_URL=$(echo "${RESPONSE}" | jq -r '.target.url // empty') echo "Cursor agent launched: ${AGENT_ID}" echo "Agent URL: ${AGENT_URL}" - echo "AGENT_ID=${AGENT_ID}" >> "$GITHUB_OUTPUT" - echo "AGENT_URL=${AGENT_URL}" >> "$GITHUB_OUTPUT" + echo "agent_id=${AGENT_ID}" >> "$GITHUB_OUTPUT" + echo "agent_url=${AGENT_URL}" >> "$GITHUB_OUTPUT" - name: Post agent launch notice on PR if: steps.triage.outputs.skip == 'false' && steps.triage.outputs.pr_url != '' + continue-on-error: true uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 env: - AGENT_URL: ${{ env.AGENT_URL }} + AGENT_URL: ${{ steps.launch.outputs.agent_url }} FAILING_SUMMARY: ${{ steps.triage.outputs.failing_summary }} WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} + PR_NUMBER: ${{ steps.triage.outputs.pr_number }} with: github-token: ${{ github.token }} script: | - const prNumber = parseInt('${{ steps.triage.outputs.pr_number }}', 10); - if (!prNumber) { return; } - const agentUrl = process.env.AGENT_URL || '(pending)'; - const runUrl = process.env.WORKFLOW_RUN_URL; - const summary = process.env.FAILING_SUMMARY; + const trimmed = String(process.env.PR_NUMBER ?? '').trim(); + if (!(/^\d+$/).test(trimmed)) { return; } + const prNumber = Number(trimmed); + if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } + const agentUrl = process.env.AGENT_URL || '(pending)'; + const runUrl = process.env.WORKFLOW_RUN_URL; + const summary = process.env.FAILING_SUMMARY; await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, @@ -355,11 +513,17 @@ jobs: body: [ '### :robot: Cursor Agent Launched to Fix E2E Failures', '', - '**Failing platforms:**', + '**Failing tests:**', + '```', summary, + '```', + '', + 'The agent will:', + '1. Reproduce each failure locally before editing', + '2. Fix test bugs and confirm the fix passes before committing', + '3. Report product bugs as PR comments without modifying tests', '', - 'The agent will investigate each failure, fix test bugs, and report product bugs.', - 'It will push fixes directly to this branch (which will trigger a new E2E run).', + 'It will push fixes directly to this branch (triggering a new E2E run).', '', agentUrl ? `**Agent:** ${agentUrl}` : '', `**CI run:** ${runUrl}`, diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index f19e924688e..ca983e93cc5 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -118,11 +118,13 @@ jobs: - name: Update initial status for all platforms uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} with: github-token: ${{ github.token }} script: | const { updateInitialStatus } = require('./e2e/utils/github-actions.js'); - const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; + const platforms = JSON.parse(process.env.PLATFORMS); await updateInitialStatus({ github, context, platforms }); e2e-tests: @@ -237,13 +239,17 @@ jobs: - name: Update final status for all platforms uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} + E2E_OUTPUTS: ${{ toJSON(needs.e2e-tests.outputs) }} + MERGED_REPORT_URL: ${{ needs.merge-e2e-report.outputs.merged-report-url }} with: github-token: ${{ github.token }} script: | const { updateFinalStatus } = require('./e2e/utils/github-actions.js'); - const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; - const outputs = ${{ toJSON(needs.e2e-tests.outputs) }}; - const mergedReportUrl = '${{ needs.merge-e2e-report.outputs.merged-report-url }}' || undefined; + const platforms = JSON.parse(process.env.PLATFORMS); + const outputs = JSON.parse(process.env.E2E_OUTPUTS); + const mergedReportUrl = process.env.MERGED_REPORT_URL || undefined; await updateFinalStatus({ github, context, platforms, outputs, mergedReportUrl }); # remove-e2e-label is intentionally omitted: see e2e-label-cleanup.yml for context. diff --git a/AGENTS.md b/AGENTS.md index 6268a4f7755..17fd59706a9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,6 +37,38 @@ All standard commands are documented in `CLAUDE.md` and `package.json`. Quick re E2E tests live in `e2e/` with a separate `package.json`. See `e2e/AGENTS.md` for detailed guidance. Server-backed E2E tests require a running Mattermost server and env vars `MM_TEST_SERVER_URL`, `MM_TEST_USER_NAME`, `MM_TEST_PASSWORD`. Startup/UI-only E2E tests can run without a server. +### Cursor secrets required for E2E fix agents + +When a Cursor cloud agent is launched by `e2e-fix-trigger.yml` or `e2e-cursor-commands.yml` to fix failing E2E tests, it needs credentials to connect to the provisioned Mattermost test servers. Add the following secrets in the Cursor Dashboard (Cloud Agents → Secrets): + +| Secret name | Value | +|---|---| +| `MM_DESKTOP_E2E_USER_NAME` | The admin username for the Matterwick-provisioned E2E servers (same value as the `MM_DESKTOP_E2E_USER_NAME` GitHub repo secret) | +| `MM_DESKTOP_E2E_USER_CREDENTIALS` | The admin password for the Matterwick-provisioned E2E servers (same value as the `MM_DESKTOP_E2E_USER_CREDENTIALS` GitHub repo secret) | + +These are exposed to the agent directly as environment variables. The agent uses them verbatim as `MM_TEST_USER_NAME` and `MM_TEST_PASSWORD` when running specs. `MM_TEST_SERVER_URL` is still injected into the agent prompt from the server-info PR comment (it is not secret). + +### Running E2E tests locally on this Linux VM + +To run a single spec file against a live server: + +```bash +source ~/.nvm/nvm.sh && nvm use 20.15.0 +npm ci && cd e2e && npm ci && cd .. +npm run build-test + +cd e2e +export DISPLAY=:1 +export MM_TEST_SERVER_URL= +export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" +export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" +xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ + npx playwright test --reporter=list --workers=1 +cd .. +``` + +If a run leaves Electron hanging: `killall Electron 2>/dev/null || true` + ### Native modules The `postinstall` script runs `electron-builder install-app-deps` to rebuild native modules (registry-js, cf-prefs, etc.) for the current Electron version. If you see native module errors after `npm install`, ensure postinstall completed successfully. diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index 1431b481c4c..44bbdd7f7a7 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -204,17 +204,31 @@ async function postServerInfoComment({github, context, platforms, adminUsername, const body = lines.join('\n'); const {owner, repo} = context.repo; + // Search all pages of comments so the marker is never missed on busy PRs. + // Without full pagination a PR with > 100 comments would fail to find the + // existing comment and create a duplicate on every subsequent E2E run. let existingCommentId = null; try { - const {data: comments} = await github.rest.issues.listComments({ - owner, - repo, - issue_number: prNumber, - per_page: 100, - }); - const existing = comments.find((c) => c.body && c.body.includes(MARKER)); - if (existing) { - existingCommentId = existing.id; + let page = 1; + while (!existingCommentId) { + const {data: comments} = await github.rest.issues.listComments({ + owner, + repo, + issue_number: prNumber, + per_page: 100, + page, + }); + if (comments.length === 0) { + break; + } + const found = comments.find((c) => c.body && c.body.includes(MARKER)); + if (found) { + existingCommentId = found.id; + } else if (comments.length < 100) { + // Reached the last page without finding the marker. + break; + } + page++; } } catch (err) { console.log(`Could not list PR comments: ${err.message}`); From ea62893f7fe421203923989f5890d106e090c0e5 Mon Sep 17 00:00:00 2001 From: yasser khan Date: Wed, 15 Apr 2026 13:49:14 +0530 Subject: [PATCH 18/22] ci: fix security issues and improve Cursor E2E automation workflows (#3786) --- .github/workflows/e2e-cursor-commands.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e-cursor-commands.yml b/.github/workflows/e2e-cursor-commands.yml index 6b95c77bea1..4c3a85b0ed0 100644 --- a/.github/workflows/e2e-cursor-commands.yml +++ b/.github/workflows/e2e-cursor-commands.yml @@ -489,7 +489,20 @@ jobs: -e "s|\${ADMIN_USERNAME}|${ADMIN_USERNAME}|g" \ /tmp/prompt.txt) - echo "${PROMPT}" > /tmp/final-prompt.txt + # Use python3 literal str.replace() instead of sed to prevent + # delimiter injection via CHANGED_FILES (attacker-controlled filenames). + jq -n \ + --arg REQUESTER "${REQUESTER}" \ + --arg CHANGED_FILES "${CHANGED_FILES}" \ + --arg SERVER_TABLE_BLOCK "${SERVER_TABLE_BLOCK}" \ + --arg SERVER_URL_LINUX "${SERVER_URL_LINUX}" \ + --arg ADMIN_USERNAME "${ADMIN_USERNAME}" \ + '{"REQUESTER":$REQUESTER,"CHANGED_FILES":$CHANGED_FILES,"SERVER_TABLE_BLOCK":$SERVER_TABLE_BLOCK,"SERVER_URL_LINUX":$SERVER_URL_LINUX,"ADMIN_USERNAME":$ADMIN_USERNAME}' \ + > /tmp/prompt-vars.json + + python3 -c \ + 'import json,sys;t=open("/tmp/prompt.txt").read();v=json.load(open("/tmp/prompt-vars.json"));[t:=t.replace("${"+k+"}",val) for k,val in v.items()];sys.stdout.write(t)' \ + > /tmp/final-prompt.txt echo "prompt_file=/tmp/final-prompt.txt" >> "$GITHUB_OUTPUT" # ── Launch Cursor agent (shared for both commands) ─────────────────────── From 23661702ac9ab607f40842bd31fd0a7248ee72ad Mon Sep 17 00:00:00 2001 From: yasserfaraazkhan Date: Wed, 15 Apr 2026 14:55:31 +0530 Subject: [PATCH 19/22] fix: skip check-for-failures when install-deps was canceled When @mm-cloud-bot cancels a run mid-flight, the install-node-dependencies step is killed before e2e/node_modules is populated. The check-for-failures step ran unconditionally via `if: always()`, crashing with a missing fast-xml-parser module. Guard it with steps.install-deps.outcome == 'success' so it is silently skipped on cancellation. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/e2e-functional.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index ca983e93cc5..c25089e4470 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -328,6 +328,7 @@ jobs: os: ${{ runner.os }} - name: e2e/install-node-dependencies + id: install-deps run: | npm ci cd e2e && npm ci @@ -361,7 +362,7 @@ jobs: - name: e2e/check-for-failures id: check-failures - if: always() + if: always() && steps.install-deps.outcome == 'success' uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 with: script: | From 8642b46ceceefe83651faf22bdc8eca0095e39ea Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 15 Apr 2026 11:33:31 +0000 Subject: [PATCH 20/22] e2e: revert newTabButton :not([disabled]) selector change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The #newTabButton is conditionally rendered — it only exists in the DOM when tabsDisabled is false (which requires isLoggedIn=true). Since the button is removed entirely from the DOM when disabled, the original waitForSelector('#newTabButton') already waited for login state propagation. The :not([disabled]) suffix was redundant and caused 30s timeouts across all 3 CI platforms (6 failures on linux, 7 on macos, 7 on windows). For window_menu.test.ts, remove the entire added wait block since master never had it — the existing focusMainWindow() call suffices. For drag_and_drop, popout_windows, tab_management: restore the original waitForSelector('#newTabButton') selector. Co-authored-by: yasser khan --- e2e/specs/menu_bar/window_menu.test.ts | 4 ---- e2e/specs/server_management/drag_and_drop.test.ts | 5 +---- e2e/specs/server_management/popout_windows.test.ts | 5 +---- e2e/specs/server_management/tab_management.test.ts | 5 +---- 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/e2e/specs/menu_bar/window_menu.test.ts b/e2e/specs/menu_bar/window_menu.test.ts index 883624f99cf..e2e485394b1 100644 --- a/e2e/specs/menu_bar/window_menu.test.ts +++ b/e2e/specs/menu_bar/window_menu.test.ts @@ -333,10 +333,6 @@ test.describe('Menu/window_menu', () => { serverMap = await buildServerMap(electronApp); await loginToMattermost(getMattermostServer()); - - // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so - // tabsDisabled becomes false and #newTabButton becomes enabled. - await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); await focusMainWindow(); }); diff --git a/e2e/specs/server_management/drag_and_drop.test.ts b/e2e/specs/server_management/drag_and_drop.test.ts index 12e80584324..109452c08fd 100644 --- a/e2e/specs/server_management/drag_and_drop.test.ts +++ b/e2e/specs/server_management/drag_and_drop.test.ts @@ -198,10 +198,7 @@ test.describe('server_management/drag_and_drop', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - - // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so - // tabsDisabled becomes false and #newTabButton becomes enabled. - await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); + await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); }); test.beforeEach(async () => { diff --git a/e2e/specs/server_management/popout_windows.test.ts b/e2e/specs/server_management/popout_windows.test.ts index 40331fda644..4b0cb870d03 100644 --- a/e2e/specs/server_management/popout_windows.test.ts +++ b/e2e/specs/server_management/popout_windows.test.ts @@ -163,10 +163,7 @@ test.describe('server_management/popout_windows', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - - // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so - // tabsDisabled becomes false and #newTabButton becomes enabled. - await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); + await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); }); test.beforeEach(async () => { diff --git a/e2e/specs/server_management/tab_management.test.ts b/e2e/specs/server_management/tab_management.test.ts index 17deefb4179..af74fe48b4b 100644 --- a/e2e/specs/server_management/tab_management.test.ts +++ b/e2e/specs/server_management/tab_management.test.ts @@ -79,10 +79,7 @@ test.describe('server_management/tab_management', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - - // Wait for SERVER_LOGGED_IN_CHANGED to propagate to the renderer so - // tabsDisabled becomes false and #newTabButton becomes enabled. - await mainWindow.waitForSelector('#newTabButton:not([disabled])', {timeout: 30_000}); + await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); }); test.beforeEach(async () => { From 21ca4e362b5b5d4987c0e9289c73fd90fd83da3b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 15 Apr 2026 12:29:27 +0000 Subject: [PATCH 21/22] e2e: fix login state propagation race with main-process polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: after loginToMattermost() completes (web app shell visible), the desktop app's isLoggedIn state must travel through a multi-hop IPC chain: web app calls desktopAPI.onLogin() → TAB_LOGIN_CHANGED IPC → WebContentsManager.handleTabLoginChanged → ServerManager.setLoggedIn → SERVER_LOGGED_IN_CHANGED event → MainWindow.sendToRenderer → renderer MainPage.updateServers → getOrderedServers IPC → setState. Only then does tabsDisabled become false and #newTabButton render. On slow CI servers (Matterwick-provisioned instances), this chain can take longer than the 30s timeout used by waitForSelector('#newTabButton'). Fix: add waitForLoggedIn() helper that polls ServerManager.isLoggedIn directly in the main process via electronApp.evaluate(). This is the source of truth — no DOM/renderer round-trip needed. The helper then waits for #newTabButton in the renderer with the remaining budget. Applied to: window_menu, drag_and_drop, popout_windows, tab_management. Also: add retry-reload + increased timeout for bad_servers expired certificate test where .ErrorView may not appear after a single reload. Co-authored-by: yasser khan --- e2e/helpers/login.ts | 49 +++++++++++++++++++ e2e/specs/menu_bar/window_menu.test.ts | 3 +- .../server_management/bad_servers.test.ts | 11 +++-- .../server_management/drag_and_drop.test.ts | 4 +- .../server_management/popout_windows.test.ts | 4 +- .../server_management/tab_management.test.ts | 4 +- 6 files changed, 64 insertions(+), 11 deletions(-) diff --git a/e2e/helpers/login.ts b/e2e/helpers/login.ts index c41d3d6bc30..903eb834e55 100644 --- a/e2e/helpers/login.ts +++ b/e2e/helpers/login.ts @@ -3,6 +3,8 @@ import type {ServerView} from './serverView'; +type ElectronApplication = Awaited>; + async function waitForAppShell(win: ServerView, timeout: number) { const results = await Promise.allSettled([ win.waitForSelector('#post_textbox', {timeout}), @@ -56,3 +58,50 @@ export async function loginToMattermost(win: ServerView): Promise { throw new Error(`loginToMattermost: login succeeded but the app shell never became ready. Current URL: ${await win.url()}`); } } + +/** + * Poll the main process until ServerManager reports isLoggedIn=true for the + * current server, then wait for the renderer to reflect it (the #newTabButton + * appearing in the main window DOM). + * + * The web app's desktopAPI.onLogin() → TAB_LOGIN_CHANGED → ServerManager.setLoggedIn + * chain can lag behind the web app shell becoming visible. Tests that interact + * with the tab bar (new tab, drag-and-drop, close tab) need isLoggedIn=true in + * the renderer before proceeding. + */ +export async function waitForLoggedIn( + electronApp: ElectronApplication, + mainWindow: import('playwright').Page, + timeout = 60_000, +): Promise { + const pollInterval = 500; + const deadline = Date.now() + timeout; + + while (Date.now() < deadline) { + const loggedIn = await electronApp.evaluate(() => { + const refs = (global as any).__e2eTestRefs; // eslint-disable-line @typescript-eslint/no-explicit-any + if (!refs?.ServerManager) { + return false; + } + const serverId = refs.ServerManager.getCurrentServerId?.(); + if (!serverId) { + return false; + } + const server = refs.ServerManager.getServer?.(serverId); + return Boolean(server?.isLoggedIn); + }).catch(() => false); + + if (loggedIn) { + break; + } + + if (Date.now() + pollInterval > deadline) { + throw new Error( + `waitForLoggedIn: ServerManager.isLoggedIn never became true within ${timeout}ms`, + ); + } + await new Promise((resolve) => setTimeout(resolve, pollInterval)); + } + + await mainWindow.waitForSelector('#newTabButton', {timeout: Math.max(deadline - Date.now(), 5_000)}); +} diff --git a/e2e/specs/menu_bar/window_menu.test.ts b/e2e/specs/menu_bar/window_menu.test.ts index e2e485394b1..5a3af368ed4 100644 --- a/e2e/specs/menu_bar/window_menu.test.ts +++ b/e2e/specs/menu_bar/window_menu.test.ts @@ -10,7 +10,7 @@ import {waitForAppReady} from '../../helpers/appReadiness'; import {waitForLockFileRelease} from '../../helpers/cleanup'; import {buildServerMap} from '../../helpers/serverMap'; import {appDir, cmdOrCtrl, demoMattermostConfig, electronBinaryPath, writeConfigFile} from '../../helpers/config'; -import {loginToMattermost} from '../../helpers/login'; +import {loginToMattermost, waitForLoggedIn} from '../../helpers/login'; const windowMenuConfig = { ...demoMattermostConfig, @@ -333,6 +333,7 @@ test.describe('Menu/window_menu', () => { serverMap = await buildServerMap(electronApp); await loginToMattermost(getMattermostServer()); + await waitForLoggedIn(electronApp, mainWindow); await focusMainWindow(); }); diff --git a/e2e/specs/server_management/bad_servers.test.ts b/e2e/specs/server_management/bad_servers.test.ts index 99951f3c238..d9ad905dff3 100644 --- a/e2e/specs/server_management/bad_servers.test.ts +++ b/e2e/specs/server_management/bad_servers.test.ts @@ -313,14 +313,17 @@ test.describe('Bad Server Configurations', () => { }; const {app, userDataDir: badCertUserDataDir} = await launchWithConfig(testInfo, badConfig); try { - // Ensure the renderer has mounted its IPC listeners before the load failure - // fires, then reload to re-trigger the failure so it reaches the UI. await waitForRendererThenReload(app); const mainWindow = app.windows().find((w) => w.url().includes('index')); expect(mainWindow).toBeDefined(); - await mainWindow!.waitForSelector('.ErrorView', {timeout: 30000}); - const errorView = await mainWindow!.$('.ErrorView'); + + let errorView = await mainWindow!.$('.ErrorView'); + if (!errorView) { + await waitForRendererThenReload(app); + await mainWindow!.waitForSelector('.ErrorView', {timeout: 60_000}); + errorView = await mainWindow!.$('.ErrorView'); + } expect(errorView).toBeDefined(); const errorInfo = await mainWindow!.innerText('.ErrorView-techInfo'); diff --git a/e2e/specs/server_management/drag_and_drop.test.ts b/e2e/specs/server_management/drag_and_drop.test.ts index 109452c08fd..cf8e09dfcfb 100644 --- a/e2e/specs/server_management/drag_and_drop.test.ts +++ b/e2e/specs/server_management/drag_and_drop.test.ts @@ -10,7 +10,7 @@ import {test, expect} from '../../fixtures/index'; import {waitForAppReady} from '../../helpers/appReadiness'; import {electronBinaryPath, appDir, demoMattermostConfig, writeConfigFile} from '../../helpers/config'; import {waitForLockFileRelease} from '../../helpers/cleanup'; -import {loginToMattermost} from '../../helpers/login'; +import {loginToMattermost, waitForLoggedIn} from '../../helpers/login'; import {buildServerMap} from '../../helpers/serverMap'; if (!process.env.MM_TEST_SERVER_URL) { @@ -198,7 +198,7 @@ test.describe('server_management/drag_and_drop', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); + await waitForLoggedIn(electronApp, mainWindow); }); test.beforeEach(async () => { diff --git a/e2e/specs/server_management/popout_windows.test.ts b/e2e/specs/server_management/popout_windows.test.ts index 4b0cb870d03..59baa9dc66a 100644 --- a/e2e/specs/server_management/popout_windows.test.ts +++ b/e2e/specs/server_management/popout_windows.test.ts @@ -9,7 +9,7 @@ import {test, expect} from '../../fixtures/index'; import {waitForAppReady} from '../../helpers/appReadiness'; import {electronBinaryPath, appDir, demoMattermostConfig, writeConfigFile} from '../../helpers/config'; import {waitForLockFileRelease} from '../../helpers/cleanup'; -import {loginToMattermost} from '../../helpers/login'; +import {loginToMattermost, waitForLoggedIn} from '../../helpers/login'; import {buildServerMap} from '../../helpers/serverMap'; const config = { @@ -163,7 +163,7 @@ test.describe('server_management/popout_windows', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); + await waitForLoggedIn(electronApp, mainWindow); }); test.beforeEach(async () => { diff --git a/e2e/specs/server_management/tab_management.test.ts b/e2e/specs/server_management/tab_management.test.ts index af74fe48b4b..e376148f8d0 100644 --- a/e2e/specs/server_management/tab_management.test.ts +++ b/e2e/specs/server_management/tab_management.test.ts @@ -9,7 +9,7 @@ import {test, expect} from '../../fixtures/index'; import {waitForAppReady} from '../../helpers/appReadiness'; import {electronBinaryPath, appDir, demoMattermostConfig, writeConfigFile} from '../../helpers/config'; import {waitForWindow, closeElectronApp} from '../../helpers/electronApp'; -import {loginToMattermost} from '../../helpers/login'; +import {loginToMattermost, waitForLoggedIn} from '../../helpers/login'; import {buildServerMap} from '../../helpers/serverMap'; const config = demoMattermostConfig; @@ -79,7 +79,7 @@ test.describe('server_management/tab_management', () => { mainWindow = await waitForWindow(electronApp, 'index'); const mmServer = await getMattermostServer(); await loginToMattermost(mmServer); - await mainWindow.waitForSelector('#newTabButton', {timeout: 30_000}); + await waitForLoggedIn(electronApp, mainWindow); }); test.beforeEach(async () => { From 9d04822c9b9d920a7ae8c41225c6a521246df547 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 16 Apr 2026 01:35:06 +0000 Subject: [PATCH 22/22] refactor: remove CI workflow automation, use AGENTS.md for Cursor Cloud E2E guidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the GitHub Actions-based Cursor automation (e2e-fix-trigger.yml, e2e-cursor-commands.yml, e2e-functional.yml changes, e2e-label-cleanup.yml changes, server-setup.js, global-setup.ts provisionServer call) in favor of a simpler approach: - Cursor Cloud agents can be triggered via @cursoragent PR comments or the Cursor dashboard's CI monitoring — no repo workflow files needed. - AGENTS.md now contains Docker-based local Mattermost server setup instructions so agents can spin up their own server instance, run E2E tests, and verify fixes without depending on Matterwick CI servers. - Stripped workflow-specific sections (GitHub Actions coding rules, Cursor secrets, etc.) from AGENTS.md — not needed for the Docker approach. The E2E test fixes (waitForLoggedIn helper, bad_servers retry) are preserved in separate commits. Co-authored-by: yasser khan --- .github/workflows/e2e-cursor-commands.yml | 605 ---------------------- .github/workflows/e2e-fix-trigger.yml | 531 ------------------- .github/workflows/e2e-functional.yml | 139 ++--- .github/workflows/e2e-label-cleanup.yml | 48 +- AGENTS.md | 282 +++------- e2e/global-setup.ts | 9 - e2e/utils/github-actions.js | 286 ++-------- e2e/utils/server-setup.js | 191 ------- 8 files changed, 220 insertions(+), 1871 deletions(-) delete mode 100644 .github/workflows/e2e-cursor-commands.yml delete mode 100644 .github/workflows/e2e-fix-trigger.yml delete mode 100644 e2e/utils/server-setup.js diff --git a/.github/workflows/e2e-cursor-commands.yml b/.github/workflows/e2e-cursor-commands.yml deleted file mode 100644 index 4c3a85b0ed0..00000000000 --- a/.github/workflows/e2e-cursor-commands.yml +++ /dev/null @@ -1,605 +0,0 @@ -name: Cursor E2E Commands - -# Allows PR authors and contributors to invoke Cursor agents via PR comments. -# -# Supported commands (case-insensitive, anywhere in the comment): -# -# @cursor fix e2e — fix all currently failing E2E tests on this PR, -# including mass failures. Use this after a large -# change that caused widespread failures. -# -# @cursor fix e2e failures — alias for the above -# -# @cursor add e2e tests — generate new E2E test cases that cover the -# changes introduced by this PR. -# -# @cursor add e2e tests for pr — alias for the above -# -# The workflow replies to the comment immediately with a :eyes: reaction so the -# user knows it was received, then launches the appropriate Cursor agent. -# The agent pushes results (fixes or new test files) directly to the PR branch. -# -# Requires: CURSOR_API_KEY repository secret. - -on: - issue_comment: - types: [created] - -# Write permissions are scoped to the job that posts comments / reactions. -permissions: - contents: read - actions: read - -jobs: - handle-command: - name: Handle @cursor command - runs-on: ubuntu-22.04 - # Only run on PR comments (not issue comments) from users with write access. - # github.event.issue.pull_request is set only for PR comments. - if: | - github.event.issue.pull_request != null && - ( - contains(github.event.comment.body, '@cursor fix e2e') || - contains(github.event.comment.body, '@cursor add e2e') - ) - permissions: - contents: read - actions: read - issues: write - pull-requests: write - steps: - - name: Checkout code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: React to comment so the user knows it was received - continue-on-error: true - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - env: - COMMENT_ID: ${{ github.event.comment.id }} - with: - github-token: ${{ github.token }} - script: | - await github.rest.reactions.createForIssueComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: Number(process.env.COMMENT_ID), - content: 'eyes', - }); - - - name: Parse command and gather context - id: context - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - env: - # Pass user-controlled comment body via env to prevent script injection. - COMMENT_BODY: ${{ github.event.comment.body }} - PR_NUMBER: ${{ github.event.issue.number }} - with: - github-token: ${{ github.token }} - script: | - const { owner, repo } = context.repo; - const commentBody = (process.env.COMMENT_BODY || '').toLowerCase(); - - const trimmed = String(process.env.PR_NUMBER ?? '').trim(); - if (!(/^\d+$/).test(trimmed)) { - core.setFailed('Invalid PR number'); - return; - } - const prNumber = Number(trimmed); - if (!Number.isInteger(prNumber) || prNumber <= 0) { - core.setFailed('Invalid PR number'); - return; - } - - // Determine which command was invoked - const isFix = commentBody.includes('@cursor fix e2e'); - const isAdd = commentBody.includes('@cursor add e2e'); - core.setOutput('command', isFix ? 'fix' : 'add'); - core.setOutput('pr_number', String(prNumber)); - - // Get PR metadata - const pr = await github.rest.pulls.get({ owner, repo, pull_number: prNumber }); - const prUrl = pr.data.html_url; - const headBranch = pr.data.head.ref; - const headSha = pr.data.head.sha; - core.setOutput('pr_url', prUrl); - core.setOutput('head_branch', headBranch); - - // ── Find the latest E2E run for this PR branch ─────────────────── - let latestRunUrl = ''; - let latestRunId = ''; - let failingSummary = '(no failing test data found — check CI logs manually)'; - const serverUrls = { linux: '', macos: '', windows: '' }; - let adminUsername = ''; - let serverTableBlock = '(server URLs not found)'; - - try { - const workflows = await github.rest.actions.listRepoWorkflows({ owner, repo }); - const e2eWf = workflows.data.workflows.find( - (w) => w.name === 'Electron Playwright Tests' - ); - if (e2eWf) { - const runs = await github.rest.actions.listWorkflowRuns({ - owner, repo, - workflow_id: e2eWf.id, - event: 'workflow_dispatch', - per_page: 20, - }); - const matchingRun = runs.data.workflow_runs.find( - (r) => r.head_sha === headSha || r.head_branch === headBranch - ); - if (matchingRun) { - latestRunUrl = matchingRun.html_url; - latestRunId = String(matchingRun.id); - - if (isFix) { - // ── Download JUnit artifacts for exact test names ───────── - const failingTests = { linux: [], macos: [], windows: [] }; - - try { - const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner, repo, run_id: matchingRun.id, per_page: 50, - }); - - for (const artifact of artifacts.data.artifacts) { - const aName = artifact.name.toLowerCase(); - let platform = null; - if (aName.startsWith('test-results-linux') || aName.startsWith('test-results-ubuntu')) { - platform = 'linux'; - } else if (aName.startsWith('test-results-macos') || aName.startsWith('test-results-darwin')) { - platform = 'macos'; - } else if (aName.startsWith('test-results-windows')) { - platform = 'windows'; - } - if (!platform) { continue; } - - try { - const dl = await github.rest.actions.downloadArtifact({ - owner, repo, artifact_id: artifact.id, archive_format: 'zip', - }); - const resp = await fetch(dl.url); - const buf = Buffer.from(await resp.arrayBuffer()); - const AdmZip = require('adm-zip'); - const zip = new AdmZip(buf); - const xmlEntry = zip.getEntries().find( - (e) => e.entryName.endsWith('e2e-junit.xml'), - ); - if (!xmlEntry) { continue; } - const xml = xmlEntry.getData().toString('utf8'); - const suiteMatches = xml.matchAll(/]+name="([^"]+)"[^>]*>[\s\S]*?<(?:failure|error)[^/]/g); - for (const m of suiteMatches) { - failingTests[platform].push(m[1]); - } - } catch (err) { - core.warning(`Could not parse JUnit for ${platform}: ${err.message}`); - } - } - } catch (err) { - core.warning(`Could not fetch artifacts: ${err.message}`); - } - - const failingTestsBlock = Object.entries(failingTests) - .filter(([, names]) => names.length > 0) - .map(([plat, names]) => `${plat}:\n${names.map((n) => ` - ${n}`).join('\n')}`) - .join('\n\n'); - - if (failingTestsBlock) { - failingSummary = failingTestsBlock; - } else { - // Fall back to job-level conclusion if JUnit is unavailable - const jobs = await github.rest.actions.listJobsForWorkflowRun({ - owner, repo, run_id: matchingRun.id, per_page: 100, - }); - const lines = []; - for (const job of jobs.data.jobs) { - if (job.conclusion === 'failure') { - const n = job.name.toLowerCase(); - const plat = n.includes('linux') || n.includes('ubuntu') ? 'linux' - : n.includes('macos') || n.includes('darwin') ? 'macos' - : n.includes('windows') ? 'windows' - : null; - if (plat) { lines.push(`- ${plat}: failed (JUnit unavailable — check CI logs)`); } - } - } - if (lines.length > 0) { failingSummary = lines.join('\n'); } - } - } - } - } - } catch (err) { - core.warning(`Could not fetch E2E run info: ${err.message}`); - } - - core.setOutput('latest_run_url', latestRunUrl); - core.setOutput('latest_run_id', latestRunId); - core.setOutput('failing_summary', failingSummary); - - // ── Read per-platform server info from the PR comment ───────────── - try { - const comments = await github.rest.issues.listComments({ - owner, repo, issue_number: prNumber, per_page: 100, - }); - const infoComment = (comments.data || []).find( - (c) => c.body && c.body.includes('') - ); - if (infoComment) { - const body = infoComment.body; - const rowRe = /^\|\s*`(linux|macos|windows)`\s*\|\s*(\S+)\s*\|/im; - for (const line of body.split('\n')) { - const m = line.match(rowRe); - if (m) { serverUrls[m[1].toLowerCase()] = m[2]; } - } - const userMatch = body.match(/\*\*Admin username:\*\*\s*`([^`]+)`/); - if (userMatch) { adminUsername = userMatch[1]; } - const tableLines = body.split('\n').filter( - (l) => l.startsWith('|') && !l.startsWith('| Platform'), - ).slice(0, 5); - serverTableBlock = tableLines.join('\n'); - } - } catch (err) { - core.warning(`Could not read PR comments: ${err.message}`); - } - - core.setOutput('server_url_linux', serverUrls.linux); - core.setOutput('server_url_macos', serverUrls.macos); - core.setOutput('server_url_windows', serverUrls.windows); - core.setOutput('admin_username', adminUsername); - core.setOutput('server_table_block', serverTableBlock); - - // ── Collect the PR diff summary for "add tests" command ────────── - let changedFiles = ''; - if (isAdd) { - try { - const files = await github.rest.pulls.listFiles({ - owner, repo, pull_number: prNumber, per_page: 100, - }); - changedFiles = files.data. - filter((f) => !f.filename.startsWith('e2e/')). - map((f) => `${f.status.padEnd(9)} ${f.filename}`). - join('\n'); - } catch (err) { - core.warning(`Could not list PR files: ${err.message}`); - } - } - core.setOutput('changed_files', changedFiles); - - # ── FIX command ────────────────────────────────────────────────────────── - - name: Build fix-e2e prompt - if: steps.context.outputs.command == 'fix' - id: fix_prompt - env: - FAILING_SUMMARY: ${{ steps.context.outputs.failing_summary }} - SERVER_TABLE_BLOCK: ${{ steps.context.outputs.server_table_block }} - SERVER_URL_LINUX: ${{ steps.context.outputs.server_url_linux }} - SERVER_URL_MACOS: ${{ steps.context.outputs.server_url_macos }} - SERVER_URL_WINDOWS: ${{ steps.context.outputs.server_url_windows }} - ADMIN_USERNAME: ${{ steps.context.outputs.admin_username }} - LATEST_RUN_URL: ${{ steps.context.outputs.latest_run_url }} - LATEST_RUN_ID: ${{ steps.context.outputs.latest_run_id }} - REQUESTER: ${{ github.event.comment.user.login }} - run: | - cat > /tmp/prompt.txt << 'PROMPT_EOF' - @${REQUESTER} asked you to fix ALL failing E2E tests on this PR, - including mass failures that the automatic trigger skipped. - You are running on a Linux agent with an X server on DISPLAY=:1. - - ══════════════════════════════════════════════════ - FAILING TESTS (from CI JUnit XML) - ══════════════════════════════════════════════════ - ${FAILING_SUMMARY} - - ══════════════════════════════════════════════════ - PROVISIONED TEST SERVERS (still live) - ══════════════════════════════════════════════════ - ${SERVER_TABLE_BLOCK} - - Individual URLs for use in run commands: - Linux server: ${SERVER_URL_LINUX} - macOS server: ${SERVER_URL_MACOS} - Windows server: ${SERVER_URL_WINDOWS} - Admin username: read the MM_DESKTOP_E2E_USER_NAME environment variable - (injected as a Cursor secret — do NOT hardcode it) - Admin password: read the MM_DESKTOP_E2E_USER_CREDENTIALS environment variable - (injected as a Cursor secret — do NOT hardcode it) - - CI run (for full logs and trace artifacts): ${LATEST_RUN_URL} - - ══════════════════════════════════════════════════ - STEP-BY-STEP INSTRUCTIONS - ══════════════════════════════════════════════════ - - ## Step 1 — Build the app - - Run this once at the start: - - source ~/.nvm/nvm.sh && nvm use 20.15.0 - npm ci - cd e2e && npm ci && cd .. - npm run build-test - - If the build fails, stop and post a PR comment with the error. - - ## Step 2 — Identify exact failing spec files - - The failing test names are listed above. Map each name to a file in - e2e/specs/. If the JUnit block is empty, download the artifacts: - - gh run download ${LATEST_RUN_ID} \ - --name test-results-linux \ - --dir /tmp/results-linux - cat /tmp/results-linux/test-results/e2e-junit.xml | grep 'testcase name' - - Repeat for test-results-macos / test-results-windows. - - ## Step 3 — Look for a SHARED ROOT CAUSE first - - Widespread failures often share one root cause: - - Same selector missing across tests → renamed UI component - - Same IPC channel failing → changed interface - - Same timeout pattern → new async loading state - - If you find a shared cause, fix it at the shared layer (helper or - fixture) rather than patching every spec individually. - - ## Step 4 — Reproduce BEFORE editing - - Run the failing spec against the Linux server: - - cd e2e - export DISPLAY=:1 - export MM_TEST_SERVER_URL="${SERVER_URL_LINUX}" - export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" - export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" - xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ - npx playwright test \ - --reporter=list --workers=1 - cd .. - - You MUST see the failure before touching code. - If you cannot reproduce, classify as "flake" and post a PR comment. - - ## Step 5 — Fix and re-run - - After each edit, run the same command from Step 4. Confirm the test - passes before committing that file. - - DO NOT COMMIT a fix unless the re-run shows "X passed". - - For macOS/Windows-only failures: - Run the spec against the Linux server to validate the logic. - If it passes on Linux, commit with a note about the platform gap. - CI will confirm on the real platform after push. - - ## Step 6 — Classify failures you cannot fix locally - - PRODUCT BUG → Post a PR comment: - "Product Bug: — expected X, got Y, likely in " - - ## Step 7 — Hard limits - - - Fix at most 10 distinct spec files per run. - - Never touch src/ files. - - Scope: e2e/specs/, e2e/helpers/, e2e/fixtures/, e2e/utils/ only. - - Never add long sleeps (> 2 s) as a fix. - - ## Step 8 — Summary comment - - After committing all fixes, post a single PR comment: - - Each fix: spec file path + root-cause description + - exact run command + "PASSED" result - - Each product bug: test name + symptom + suspected src/ file - - Any flakes: test name + reason - PROMPT_EOF - - PROMPT=$(sed \ - -e "s|\${REQUESTER}|${REQUESTER}|g" \ - -e "s|\${FAILING_SUMMARY}|${FAILING_SUMMARY}|g" \ - -e "s|\${SERVER_TABLE_BLOCK}|${SERVER_TABLE_BLOCK}|g" \ - -e "s|\${SERVER_URL_LINUX}|${SERVER_URL_LINUX}|g" \ - -e "s|\${SERVER_URL_MACOS}|${SERVER_URL_MACOS}|g" \ - -e "s|\${SERVER_URL_WINDOWS}|${SERVER_URL_WINDOWS}|g" \ - -e "s|\${ADMIN_USERNAME}|${ADMIN_USERNAME}|g" \ - -e "s|\${LATEST_RUN_URL}|${LATEST_RUN_URL}|g" \ - -e "s|\${LATEST_RUN_ID}|${LATEST_RUN_ID}|g" \ - /tmp/prompt.txt) - - echo "${PROMPT}" > /tmp/final-prompt.txt - echo "prompt_file=/tmp/final-prompt.txt" >> "$GITHUB_OUTPUT" - - # ── ADD TESTS command ──────────────────────────────────────────────────── - - name: Build add-e2e-tests prompt - if: steps.context.outputs.command == 'add' - id: add_prompt - env: - CHANGED_FILES: ${{ steps.context.outputs.changed_files }} - SERVER_TABLE_BLOCK: ${{ steps.context.outputs.server_table_block }} - SERVER_URL_LINUX: ${{ steps.context.outputs.server_url_linux }} - ADMIN_USERNAME: ${{ steps.context.outputs.admin_username }} - REQUESTER: ${{ github.event.comment.user.login }} - run: | - cat > /tmp/prompt.txt << 'PROMPT_EOF' - @${REQUESTER} asked you to add E2E test cases that cover the changes - introduced by this PR. You are running on a Linux agent (DISPLAY=:1). - - CHANGED FILES IN THIS PR (non-e2e only) - ---------------------------------------- - ${CHANGED_FILES} - - PROVISIONED TEST SERVERS - ------------------------- - ${SERVER_TABLE_BLOCK} - - Linux server for local verification: - MM_TEST_SERVER_URL: ${SERVER_URL_LINUX} - MM_TEST_USER_NAME: read MM_DESKTOP_E2E_USER_NAME env var (Cursor secret) - MM_TEST_PASSWORD: read MM_DESKTOP_E2E_USER_CREDENTIALS env var (Cursor secret) - - INSTRUCTIONS - ------------ - - ## Step 1 — Build - source ~/.nvm/nvm.sh && nvm use 20.15.0 - npm ci && cd e2e && npm ci && cd .. && npm run build-test - - ## Step 2 — Understand the changes - Read each changed file to understand what behavior was added or modified. - - ## Step 3 — Write tests - For each meaningful behavioral change, write a new Playwright E2E spec - (or extend an existing spec if one already covers that area). - - Placement rules: - - New specs go in e2e/specs/ under the most relevant subdirectory. - - Use the fixture layer (e2e/fixtures/index.ts) — do not re-implement - launch or login logic. - - Follow patterns in existing e2e/specs/ files. - - Tag each new test with @P2 @all or the appropriate priority. - - ## Step 4 — Run each new spec before committing - - cd e2e - export DISPLAY=:1 - export MM_TEST_SERVER_URL="${SERVER_URL_LINUX}" - export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" - export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" - xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ - npx playwright test --reporter=list --workers=1 - cd .. - - A new test MUST pass before you commit it. - If it cannot be made to pass locally, explain why in a PR comment. - - ## Step 5 — Hard limits - - Add at most 3 new spec files per run. - - Do not modify src/ files. - - If you cannot determine a clear, testable behavior, post a PR - comment explaining the ambiguity instead of writing a weak test. - - ## Step 6 — Summary comment - After committing, post a PR comment listing each new file with: - - File path - - What behavior it covers - - Exact run command used + "PASSED" confirmation - PROMPT_EOF - - PROMPT=$(sed \ - -e "s|\${REQUESTER}|${REQUESTER}|g" \ - -e "s|\${CHANGED_FILES}|${CHANGED_FILES}|g" \ - -e "s|\${SERVER_TABLE_BLOCK}|${SERVER_TABLE_BLOCK}|g" \ - -e "s|\${SERVER_URL_LINUX}|${SERVER_URL_LINUX}|g" \ - -e "s|\${ADMIN_USERNAME}|${ADMIN_USERNAME}|g" \ - /tmp/prompt.txt) - - # Use python3 literal str.replace() instead of sed to prevent - # delimiter injection via CHANGED_FILES (attacker-controlled filenames). - jq -n \ - --arg REQUESTER "${REQUESTER}" \ - --arg CHANGED_FILES "${CHANGED_FILES}" \ - --arg SERVER_TABLE_BLOCK "${SERVER_TABLE_BLOCK}" \ - --arg SERVER_URL_LINUX "${SERVER_URL_LINUX}" \ - --arg ADMIN_USERNAME "${ADMIN_USERNAME}" \ - '{"REQUESTER":$REQUESTER,"CHANGED_FILES":$CHANGED_FILES,"SERVER_TABLE_BLOCK":$SERVER_TABLE_BLOCK,"SERVER_URL_LINUX":$SERVER_URL_LINUX,"ADMIN_USERNAME":$ADMIN_USERNAME}' \ - > /tmp/prompt-vars.json - - python3 -c \ - 'import json,sys;t=open("/tmp/prompt.txt").read();v=json.load(open("/tmp/prompt-vars.json"));[t:=t.replace("${"+k+"}",val) for k,val in v.items()];sys.stdout.write(t)' \ - > /tmp/final-prompt.txt - echo "prompt_file=/tmp/final-prompt.txt" >> "$GITHUB_OUTPUT" - - # ── Launch Cursor agent (shared for both commands) ─────────────────────── - - name: Launch Cursor agent - id: launch - env: - CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} - PR_URL: ${{ steps.context.outputs.pr_url }} - CURSOR_API_URL: https://api.cursor.com/v0/agents - run: | - PROMPT=$(cat /tmp/final-prompt.txt) - - RESPONSE=$(curl --silent --fail \ - --request POST \ - --url "${CURSOR_API_URL}" \ - -u "${CURSOR_API_KEY}:" \ - --header 'Content-Type: application/json' \ - --data "$(jq -n \ - --arg prompt "${PROMPT}" \ - --arg prUrl "${PR_URL}" \ - '{ - prompt: { text: $prompt }, - source: { prUrl: $prUrl }, - target: { autoBranch: false } - }' - )") - - AGENT_ID=$(echo "${RESPONSE}" | jq -r '.id // empty') - if [ -z "${AGENT_ID}" ]; then - echo "::error::Failed to launch Cursor agent. Response: ${RESPONSE}" - exit 1 - fi - - AGENT_URL=$(echo "${RESPONSE}" | jq -r '.target.url // empty') - echo "Agent launched: ${AGENT_ID} — ${AGENT_URL}" - echo "agent_url=${AGENT_URL}" >> "$GITHUB_OUTPUT" - - - name: Reply to comment with agent link - if: always() && steps.launch.outcome == 'success' - continue-on-error: true - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - env: - AGENT_URL: ${{ steps.launch.outputs.agent_url }} - COMMAND: ${{ steps.context.outputs.command }} - PR_NUMBER: ${{ steps.context.outputs.pr_number }} - with: - github-token: ${{ github.token }} - script: | - const trimmed = String(process.env.PR_NUMBER ?? '').trim(); - if (!(/^\d+$/).test(trimmed)) { return; } - const prNumber = Number(trimmed); - if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } - const agentUrl = process.env.AGENT_URL || '(pending)'; - const command = process.env.COMMAND; - const action = command === 'fix' - ? 'fix failing E2E tests (including mass failures)' - : 'add E2E tests for the current PR changes'; - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - body: [ - `### :robot: Cursor Agent Launched`, - '', - `Task: **${action}**`, - '', - 'The agent will reproduce each failure before editing, confirm the fix passes, then push to this branch.', - agentUrl ? `\n**Agent:** ${agentUrl}` : '', - ].filter(Boolean).join('\n'), - }); - - - name: Reply to comment on failure - if: always() && steps.launch.outcome == 'failure' - continue-on-error: true - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - env: - PR_NUMBER: ${{ steps.context.outputs.pr_number }} - with: - github-token: ${{ github.token }} - script: | - const trimmed = String(process.env.PR_NUMBER ?? '').trim(); - if (!(/^\d+$/).test(trimmed)) { return; } - const prNumber = Number(trimmed); - if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - body: [ - '### :x: Cursor Agent Launch Failed', - '', - 'Could not launch the agent. Possible causes:', - '- `CURSOR_API_KEY` repository secret is missing or invalid.', - '- Cursor API is temporarily unavailable.', - '', - 'Check the [workflow run](' + - `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}` + - ') for details.', - ].join('\n'), - }); diff --git a/.github/workflows/e2e-fix-trigger.yml b/.github/workflows/e2e-fix-trigger.yml deleted file mode 100644 index 5e813d7f47f..00000000000 --- a/.github/workflows/e2e-fix-trigger.yml +++ /dev/null @@ -1,531 +0,0 @@ -name: E2E Fix Trigger - -# Fires when "Electron Playwright Tests" completes. If there are new failures -# AND the failure count is below the mass-failure threshold, this workflow -# launches a Cursor cloud agent on the PR branch to inspect the CI run, read -# the failing test source files, and either fix test bugs or post a comment -# describing product bugs — all without human intervention. -# -# Token / prompt efficiency decisions -# ------------------------------------ -# 1. SKIP when ALL platforms failed (>= MASS_FAILURE_THRESHOLD failures total). -# That pattern almost always means a build/infra breakage, not individual -# test bugs. A Cursor agent would waste tokens trying to fix 30+ tests at -# once. Instead we post a single PR comment explaining why the agent was -# skipped so a developer can investigate the root cause first. -# -# 2. SKIP nightly runs — they have no PR to fix. -# -# 3. The triage step downloads the JUnit XML artifact so the prompt contains -# the exact failing test names, not just platform-level counts. This -# prevents the agent from guessing which tests failed. -# -# 4. Per-platform server URLs and admin credentials are extracted from the -# server-info PR comment and passed individually so the agent can run the -# exact failing spec against the exact server where it failed. -# -# 5. The agent prompt has a hard "MUST RUN BEFORE COMMIT" rule: the agent -# must execute the failing spec against the live server and see it pass -# before staging the fix. Platform-specific run commands are provided. -# -# 6. source.prUrl is used (not source.repository + ref) so the agent works -# directly on the PR's head branch and pushes fixes there, triggering -# another CI run automatically. -# -# 7. target.autoBranch: false — push to the PR's existing head branch rather -# than creating a new branch. - -on: - workflow_run: - workflows: ["Electron Playwright Tests"] - types: [completed] - -# workflow_run always runs from the default branch — no PR secrets available -# here. CURSOR_API_KEY must be a repository secret, not a PR/environment secret. -# Write permissions are scoped to the single job that posts PR comments. -permissions: - contents: read - actions: read - -env: - # Skip the agent if the total failure count across all platforms reaches - # this threshold — it indicates a systemic build/infra issue, not individual - # test bugs that an agent can fix efficiently. - MASS_FAILURE_THRESHOLD: 15 - # Cursor API endpoint - CURSOR_API_URL: https://api.cursor.com/v0/agents - -jobs: - triage-and-fix: - name: Triage E2E failures and launch Cursor fix agent - runs-on: ubuntu-22.04 - # Only run for workflow_dispatch-triggered runs (Matterwick) that failed. - # Nightly runs don't have an associated PR so skip those too. - if: | - github.event.workflow_run.conclusion == 'failure' && - github.event.workflow_run.event == 'workflow_dispatch' - permissions: - contents: read - actions: read - issues: write - pull-requests: write - steps: - - name: Checkout code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: Collect failure details and decide whether to launch agent - id: triage - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - env: - MASS_FAILURE_THRESHOLD: ${{ env.MASS_FAILURE_THRESHOLD }} - WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} - with: - github-token: ${{ github.token }} - script: | - const { owner, repo } = context.repo; - const runId = Number(process.env.WORKFLOW_RUN_ID); - const threshold = parseInt(process.env.MASS_FAILURE_THRESHOLD, 10); - - // ── 1. Resolve the PR ──────────────────────────────────────────── - const run = await github.rest.actions.getWorkflowRun({ owner, repo, run_id: runId }); - const headSha = run.data.head_sha; - const headBranch = run.data.head_branch; - const headOwner = run.data.head_repository?.owner?.login || owner; - - let prNumber = null; - let prUrl = null; - if (run.data.pull_requests && run.data.pull_requests.length > 0) { - prNumber = run.data.pull_requests[0].number; - } else if (headBranch) { - const prs = await github.rest.pulls.list({ - owner, repo, state: 'open', - head: `${headOwner}:${headBranch}`, - }); - const match = (prs.data || []).find((p) => p.head && p.head.sha === headSha); - if (match) { - prNumber = match.number; - } - } - - if (!prNumber) { - core.warning('Could not resolve PR number — skipping agent launch'); - core.setOutput('skip', 'true'); - core.setOutput('skip_reason', 'no_pr'); - return; - } - - prUrl = `https://github.com/${owner}/${repo}/pull/${prNumber}`; - core.setOutput('pr_number', String(prNumber)); - core.setOutput('pr_url', prUrl); - - // ── 2. Collect per-platform failure counts ─────────────────────── - const jobs = await github.rest.actions.listJobsForWorkflowRun({ - owner, repo, run_id: runId, per_page: 100, - }); - - const platformFailures = { linux: 0, macos: 0, windows: 0 }; - - for (const job of jobs.data.jobs) { - const nameLower = job.name.toLowerCase(); - let platform = null; - if (nameLower.includes('linux') || nameLower.includes('ubuntu')) { - platform = 'linux'; - } else if (nameLower.includes('macos') || nameLower.includes('darwin')) { - platform = 'macos'; - } else if (nameLower.includes('windows') || nameLower.includes('win32')) { - platform = 'windows'; - } - if (!platform) { continue; } - if (job.conclusion === 'failure') { - platformFailures[platform] = platformFailures[platform] || 1; - } - } - - const totalFailures = Object.values(platformFailures).reduce((a, b) => a + b, 0); - core.info(`Failure counts — linux:${platformFailures.linux} macos:${platformFailures.macos} windows:${platformFailures.windows} total:${totalFailures}`); - core.setOutput('total_failures', String(totalFailures)); - - // ── 3. Mass-failure check ──────────────────────────────────────── - const allPlatformsFailed = - platformFailures.linux > 0 && - platformFailures.macos > 0 && - platformFailures.windows > 0; - - if (allPlatformsFailed || totalFailures >= threshold) { - core.warning(`Mass-failure detected (total=${totalFailures}, threshold=${threshold}) — skipping agent`); - core.setOutput('skip', 'true'); - core.setOutput('skip_reason', 'mass_failure'); - return; - } - - core.setOutput('skip', 'false'); - - // ── 4. Download JUnit artifacts to get exact failing test names ── - // Each platform uploads a test-results-{os} artifact containing - // e2e/test-results/e2e-junit.xml. Download and parse them here so - // the agent prompt contains exact test names, not just counts. - const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner, repo, run_id: runId, per_page: 50, - }); - - const failingTests = { linux: [], macos: [], windows: [] }; - - for (const artifact of artifacts.data.artifacts) { - const nameLower = artifact.name.toLowerCase(); - let platform = null; - if (nameLower.startsWith('test-results-linux') || nameLower.startsWith('test-results-ubuntu')) { - platform = 'linux'; - } else if (nameLower.startsWith('test-results-macos') || nameLower.startsWith('test-results-darwin')) { - platform = 'macos'; - } else if (nameLower.startsWith('test-results-windows')) { - platform = 'windows'; - } - if (!platform || platformFailures[platform] === 0) { continue; } - - try { - // Download the artifact zip (streams as redirect URL) - const dl = await github.rest.actions.downloadArtifact({ - owner, repo, artifact_id: artifact.id, archive_format: 'zip', - }); - // dl.url is the redirect; use fetch to get the zip bytes - const resp = await fetch(dl.url); - const buf = Buffer.from(await resp.arrayBuffer()); - - // Unzip in memory — find the JUnit XML - const AdmZip = require('adm-zip'); - const zip = new AdmZip(buf); - const xmlEntry = zip.getEntries().find( - (e) => e.entryName.endsWith('e2e-junit.xml'), - ); - if (!xmlEntry) { continue; } - - const xml = xmlEntry.getData().toString('utf8'); - // Parse out test names via simple regex — no full XML parse needed - const suiteMatches = xml.matchAll(/]+name="([^"]+)"[^>]*>[\s\S]*?<(?:failure|error)[^/]/g); - for (const m of suiteMatches) { - failingTests[platform].push(m[1]); - } - } catch (err) { - core.warning(`Could not parse JUnit for ${platform}: ${err.message}`); - } - } - - const failingTestsBlock = Object.entries(failingTests) - .filter(([, names]) => names.length > 0) - .map(([platform, names]) => { - const list = names.map((n) => ` - ${n}`).join('\n'); - return `${platform}:\n${list}`; - }) - .join('\n\n'); - - // Falling back to platform count summary if we could not parse XML - const failingSummary = failingTestsBlock || - Object.entries(platformFailures) - .filter(([, count]) => count > 0) - .map(([p, c]) => `- ${p}: ${c} failure(s) (JUnit not available — check CI logs)`) - .join('\n'); - - core.setOutput('failing_tests_block', failingTestsBlock); - core.setOutput('failing_summary', failingSummary); - - // ── 5. Extract per-platform server info from the PR comment ────── - // post-server-info posted a Markdown table; parse out individual - // rows so we can expose each URL and the admin username separately. - const MARKER = ''; - const serverUrls = { linux: '', macos: '', windows: '' }; - let adminUsername = ''; - let serverTableBlock = '(server URLs not found in PR comment)'; - - try { - const comments = await github.rest.issues.listComments({ - owner, repo, issue_number: prNumber, per_page: 100, - }); - const infoComment = (comments.data || []).find( - (c) => c.body && c.body.includes(MARKER), - ); - if (infoComment) { - const body = infoComment.body; - - // Extract table rows (lines starting with |`linux`|, |`macos`|, |`windows`|) - const rowRe = /^\|\s*`(linux|macos|windows)`\s*\|\s*(\S+)\s*\|/im; - for (const line of body.split('\n')) { - const m = line.match(rowRe); - if (m) { - serverUrls[m[1].toLowerCase()] = m[2]; - } - } - - // Extract admin username from "**Admin username:** `...`" - const userMatch = body.match(/\*\*Admin username:\*\*\s*`([^`]+)`/); - if (userMatch) { adminUsername = userMatch[1]; } - - // Keep the raw table for the prompt as well - const tableLines = body.split('\n').filter( - (l) => l.startsWith('|') && !l.startsWith('| Platform'), - ).slice(0, 5); - serverTableBlock = tableLines.join('\n'); - } - } catch (err) { - core.warning(`Could not read PR comments: ${err.message}`); - } - - core.setOutput('server_url_linux', serverUrls.linux); - core.setOutput('server_url_macos', serverUrls.macos); - core.setOutput('server_url_windows', serverUrls.windows); - core.setOutput('admin_username', adminUsername); - core.setOutput('server_table_block', serverTableBlock); - core.setOutput('workflow_run_url', `https://github.com/${owner}/${repo}/actions/runs/${runId}`); - core.setOutput('workflow_run_id', String(runId)); - - - name: Post skip notice for mass failures - if: steps.triage.outputs.skip == 'true' && steps.triage.outputs.skip_reason == 'mass_failure' - continue-on-error: true - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - env: - PR_NUMBER: ${{ steps.triage.outputs.pr_number }} - TOTAL_FAILURES: ${{ steps.triage.outputs.total_failures }} - WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} - with: - github-token: ${{ github.token }} - script: | - const trimmed = String(process.env.PR_NUMBER ?? '').trim(); - if (!(/^\d+$/).test(trimmed)) { return; } - const prNumber = Number(trimmed); - if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } - const total = process.env.TOTAL_FAILURES; - const runUrl = process.env.WORKFLOW_RUN_URL; - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - body: [ - '### :warning: E2E Fix Agent Skipped — Mass Failure Detected', - '', - `All platforms reported failures (${total} total) which typically indicates a **build or infrastructure issue** rather than individual test bugs.`, - '', - 'The Cursor agent was not launched to avoid wasting tokens on a systemic problem.', - '', - '**What to do:**', - '1. Check the [workflow run](' + runUrl + ') for build errors or server provisioning failures.', - '2. If it was a transient CI issue, push an empty commit to re-trigger E2E.', - '3. If it is a real product regression, investigate the failing tests manually.', - '4. To override and force-launch the agent anyway, comment `@cursor fix e2e` on this PR.', - ].join('\n'), - }); - - - name: Launch Cursor agent to fix E2E failures - if: steps.triage.outputs.skip == 'false' && steps.triage.outputs.pr_url != '' - id: launch - env: - CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} - PR_URL: ${{ steps.triage.outputs.pr_url }} - FAILING_SUMMARY: ${{ steps.triage.outputs.failing_summary }} - SERVER_TABLE_BLOCK: ${{ steps.triage.outputs.server_table_block }} - SERVER_URL_LINUX: ${{ steps.triage.outputs.server_url_linux }} - SERVER_URL_MACOS: ${{ steps.triage.outputs.server_url_macos }} - SERVER_URL_WINDOWS: ${{ steps.triage.outputs.server_url_windows }} - ADMIN_USERNAME: ${{ steps.triage.outputs.admin_username }} - WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} - WORKFLOW_RUN_ID: ${{ steps.triage.outputs.workflow_run_id }} - run: | - cat > /tmp/agent-prompt.txt << 'PROMPT_EOF' - E2E tests failed on this PR. Your job is to fix every test bug you find. - You are running on a Linux agent with an X server on DISPLAY=:1. - - ══════════════════════════════════════════════════ - FAILING TESTS (from CI JUnit XML) - ══════════════════════════════════════════════════ - ${FAILING_SUMMARY} - - ══════════════════════════════════════════════════ - PROVISIONED TEST SERVERS (still live) - ══════════════════════════════════════════════════ - ${SERVER_TABLE_BLOCK} - - Individual URLs for use in run commands: - Linux server: ${SERVER_URL_LINUX} - macOS server: ${SERVER_URL_MACOS} - Windows server: ${SERVER_URL_WINDOWS} - Admin username: read the MM_DESKTOP_E2E_USER_NAME environment variable - (injected as a Cursor secret — do NOT hardcode it) - Admin password: read the MM_DESKTOP_E2E_USER_CREDENTIALS environment variable - (injected as a Cursor secret — do NOT hardcode it) - - CI run (for full logs and trace artifacts): ${WORKFLOW_RUN_URL} - - ══════════════════════════════════════════════════ - STEP-BY-STEP INSTRUCTIONS - ══════════════════════════════════════════════════ - - ## Step 1 — Build the app - - Run this once at the start (required before any spec can run): - - source ~/.nvm/nvm.sh && nvm use 20.15.0 - npm ci - cd e2e && npm ci && cd .. - npm run build-test - - If the build fails, stop and post a PR comment with the error. - - ## Step 2 — Identify the exact failing spec files - - The failing test names are listed above. Map each name to a file in - e2e/specs/. If the JUnit block is empty, download the test-results - artifact from the CI run URL above and extract e2e-junit.xml manually - using the GitHub CLI: - - gh run download ${WORKFLOW_RUN_ID} \ - --name test-results-linux \ - --dir /tmp/results-linux - cat /tmp/results-linux/test-results/e2e-junit.xml | grep 'testcase name' - - Repeat for test-results-macos / test-results-windows as needed. - - ## Step 3 — Reproduce the failure BEFORE editing anything - - Run the failing spec file. Use the Linux server URL for Linux-tagged - tests; any URL works for platform-neutral tests. - - Linux run command (use this for linux-tagged tests or neutral tests): - - cd e2e - export DISPLAY=:1 - export MM_TEST_SERVER_URL="${SERVER_URL_LINUX}" - export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" - export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" - xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ - npx playwright test \ - --reporter=list --workers=1 - cd .. - - You MUST confirm the test fails on HEAD before touching any code. - If you cannot reproduce the failure, classify it as "flake" and post - a PR comment — do NOT edit the test. - - ## Step 4 — Classify each failure - - TEST BUG — the test code is wrong: - Examples: stale selector, wrong assertion value, missing wait, - race condition in setup/teardown, obsolete helper. - Action: fix the test code; then proceed to Step 5. - - PRODUCT BUG — the app does not match what the test expects: - Examples: renamed UI element, changed IPC channel, missing feature. - Action: do NOT modify the test. Post a PR comment: - "Product Bug: — expected X, got Y, likely in " - - ## Step 5 — Fix and re-run - - After editing the spec (or its helper/fixture), run the exact same - command from Step 3 and confirm the test now passes. - - DO NOT COMMIT a fix until the re-run shows the test passing. - If the re-run still fails, debug further before committing. - - For macOS/Windows-only failures: - - The macOS/Windows platforms use the same spec source on GitHub. - - Run the spec against the Linux server first to confirm the test - logic is correct. If it passes on Linux but the failure was only - on macOS/Windows, note it in your commit message. Commit, then - push so CI re-runs on the real platform to confirm. - - ## Step 6 — Hard limits - - - Fix at most 8 distinct spec files per run. - - Never touch src/ files. - - Scope: e2e/specs/, e2e/helpers/, e2e/fixtures/, e2e/utils/ only. - - Never add long sleeps (> 2 s) as a fix. - - If you exceed the file limit, post a comment listing remaining failures. - - ## Step 7 — Summary comment - - After committing all fixes, post a single PR comment: - - Each fix: spec file path + one-line root-cause description + - exact run command used + "PASSED" result - - Each product bug: test name + symptom + suspected src/ file - - Any flakes: test name + why you concluded it is a flake - PROMPT_EOF - - # Substitute env vars into the prompt - PROMPT=$(sed \ - -e "s|\${FAILING_SUMMARY}|${FAILING_SUMMARY}|g" \ - -e "s|\${SERVER_TABLE_BLOCK}|${SERVER_TABLE_BLOCK}|g" \ - -e "s|\${SERVER_URL_LINUX}|${SERVER_URL_LINUX}|g" \ - -e "s|\${SERVER_URL_MACOS}|${SERVER_URL_MACOS}|g" \ - -e "s|\${SERVER_URL_WINDOWS}|${SERVER_URL_WINDOWS}|g" \ - -e "s|\${ADMIN_USERNAME}|${ADMIN_USERNAME}|g" \ - -e "s|\${WORKFLOW_RUN_URL}|${WORKFLOW_RUN_URL}|g" \ - -e "s|\${WORKFLOW_RUN_ID}|${WORKFLOW_RUN_ID}|g" \ - /tmp/agent-prompt.txt) - - # Launch the Cursor agent - RESPONSE=$(curl --silent --fail \ - --request POST \ - --url "${CURSOR_API_URL}" \ - -u "${CURSOR_API_KEY}:" \ - --header 'Content-Type: application/json' \ - --data "$(jq -n \ - --arg prompt "${PROMPT}" \ - --arg prUrl "${PR_URL}" \ - '{ - prompt: { text: $prompt }, - source: { prUrl: $prUrl }, - target: { autoBranch: false } - }' - )") - - AGENT_ID=$(echo "${RESPONSE}" | jq -r '.id // empty') - if [ -z "${AGENT_ID}" ]; then - echo "::error::Failed to launch Cursor agent. Response: ${RESPONSE}" - exit 1 - fi - - AGENT_URL=$(echo "${RESPONSE}" | jq -r '.target.url // empty') - echo "Cursor agent launched: ${AGENT_ID}" - echo "Agent URL: ${AGENT_URL}" - echo "agent_id=${AGENT_ID}" >> "$GITHUB_OUTPUT" - echo "agent_url=${AGENT_URL}" >> "$GITHUB_OUTPUT" - - - name: Post agent launch notice on PR - if: steps.triage.outputs.skip == 'false' && steps.triage.outputs.pr_url != '' - continue-on-error: true - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - env: - AGENT_URL: ${{ steps.launch.outputs.agent_url }} - FAILING_SUMMARY: ${{ steps.triage.outputs.failing_summary }} - WORKFLOW_RUN_URL: ${{ steps.triage.outputs.workflow_run_url }} - PR_NUMBER: ${{ steps.triage.outputs.pr_number }} - with: - github-token: ${{ github.token }} - script: | - const trimmed = String(process.env.PR_NUMBER ?? '').trim(); - if (!(/^\d+$/).test(trimmed)) { return; } - const prNumber = Number(trimmed); - if (!Number.isInteger(prNumber) || prNumber <= 0) { return; } - const agentUrl = process.env.AGENT_URL || '(pending)'; - const runUrl = process.env.WORKFLOW_RUN_URL; - const summary = process.env.FAILING_SUMMARY; - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - body: [ - '### :robot: Cursor Agent Launched to Fix E2E Failures', - '', - '**Failing tests:**', - '```', - summary, - '```', - '', - 'The agent will:', - '1. Reproduce each failure locally before editing', - '2. Fix test bugs and confirm the fix passes before committing', - '3. Report product bugs as PR comments without modifying tests', - '', - 'It will push fixes directly to this branch (triggering a new E2E run).', - '', - agentUrl ? `**Agent:** ${agentUrl}` : '', - `**CI run:** ${runUrl}`, - ].filter(Boolean).join('\n'), - }); diff --git a/.github/workflows/e2e-functional.yml b/.github/workflows/e2e-functional.yml index c25089e4470..c992420a2d2 100644 --- a/.github/workflows/e2e-functional.yml +++ b/.github/workflows/e2e-functional.yml @@ -53,58 +53,6 @@ jobs: run: | echo "platforms=$(echo '${{ inputs.instance_details }}' | jq -c)" >> $GITHUB_OUTPUT - post-server-info: - name: Post server info to PR - needs: prepare-matrix - # Skip nightly runs — they have no PR to comment on. - # Do not gate on inputs.pr_number because findPrNumber has fallback - # resolution paths that can recover the PR number from the run context. - if: ${{ !inputs.nightly }} - runs-on: ubuntu-24.04 - permissions: - issues: write - pull-requests: write - steps: - - name: Checkout code - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - - name: Post provisioned server URLs as PR comment - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - continue-on-error: true - env: - PR_NUMBER: ${{ inputs.pr_number }} - ADMIN_USERNAME: ${{ inputs.MM_TEST_USER_NAME }} - SERVER_VERSION: ${{ inputs.MM_SERVER_VERSION }} - # Pass the trusted internal job output via env so it is never - # interpolated directly into the JS string (prevents injection). - PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} - with: - github-token: ${{ github.token }} - script: | - const { findPrNumber, postServerInfoComment } = require('./e2e/utils/github-actions.js'); - const platforms = JSON.parse(process.env.PLATFORMS); - const prNumber = await findPrNumber({ - github, - context, - prNumberInput: process.env.PR_NUMBER, - }); - if (!prNumber) { - core.warning('Could not resolve PR number — skipping server info comment'); - return; - } - try { - await postServerInfoComment({ - github, - context, - platforms, - adminUsername: process.env.ADMIN_USERNAME, - serverVersion: process.env.SERVER_VERSION, - prNumber, - }); - } catch (err) { - core.warning(`Failed to post server info comment: ${err.message}`); - } - update-initial-status: name: Update initial status needs: prepare-matrix @@ -118,13 +66,11 @@ jobs: - name: Update initial status for all platforms uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} with: github-token: ${{ github.token }} script: | const { updateInitialStatus } = require('./e2e/utils/github-actions.js'); - const platforms = JSON.parse(process.env.PLATFORMS); + const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; await updateInitialStatus({ github, context, platforms }); e2e-tests: @@ -239,20 +185,71 @@ jobs: - name: Update final status for all platforms uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} - E2E_OUTPUTS: ${{ toJSON(needs.e2e-tests.outputs) }} - MERGED_REPORT_URL: ${{ needs.merge-e2e-report.outputs.merged-report-url }} with: github-token: ${{ github.token }} script: | const { updateFinalStatus } = require('./e2e/utils/github-actions.js'); - const platforms = JSON.parse(process.env.PLATFORMS); - const outputs = JSON.parse(process.env.E2E_OUTPUTS); - const mergedReportUrl = process.env.MERGED_REPORT_URL || undefined; + const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; + const outputs = ${{ toJSON(needs.e2e-tests.outputs) }}; + const mergedReportUrl = '${{ needs.merge-e2e-report.outputs.merged-report-url }}' || undefined; await updateFinalStatus({ github, context, platforms, outputs, mergedReportUrl }); - # remove-e2e-label is intentionally omitted: see e2e-label-cleanup.yml for context. + remove-e2e-label: + name: Remove E2E label from PR + runs-on: ubuntu-22.04 + permissions: + issues: write + pull-requests: write + needs: + - e2e-tests + - e2e-policy-tests + - update-final-status + if: always() + steps: + - name: e2e/remove-label-from-pr + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + continue-on-error: true # Label might have been removed manually + with: + github-token: ${{ github.token }} + script: | + // Use pr_number if provided directly by the dispatcher (e.g. Matterwick) + let prNumber = parseInt(process.env.PR_NUMBER) || null; + + // Fall back to looking up the PR by head branch when pr_number was not passed + if (!prNumber) { + const run = await github.rest.actions.getWorkflowRun({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.runId, + }); + const branchName = run.data.head_branch; + if (branchName) { + const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; + const prs = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + head: `${headOwner}:${branchName}`, + }); + if (prs.data && prs.data.length > 0) { + const matchingPr = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); + prNumber = (matchingPr || prs.data[0]).number; + } + } + } + + if (prNumber) { + await github.rest.issues.removeLabel({ + issue_number: prNumber, + owner: context.repo.owner, + repo: context.repo.repo, + name: 'E2E/Run', + }); + } else { + console.log('Label removal skipped - could not find associated PR'); + } + env: + PR_NUMBER: ${{ inputs.pr_number }} e2e-policy-tests: name: policy-tests-${{ matrix.platform }} @@ -285,12 +282,17 @@ jobs: env: INSTANCE_DETAILS: ${{ inputs.instance_details }} run: | + if [ -z "${INSTANCE_DETAILS}" ]; then + echo "Error: instance_details input is required but was not provided" >&2 + exit 1 + fi RUNNER_OS=$(echo "${{ runner.os }}" | tr '[:upper:]' '[:lower:]') - URL="" - if [ -n "${INSTANCE_DETAILS}" ]; then - URL=$(echo "${INSTANCE_DETAILS}" | jq -r --arg os "$RUNNER_OS" '.[] | select(.runner | ascii_downcase | contains($os)) | .url // empty' | head -1) + URL=$(echo "${INSTANCE_DETAILS}" | jq -r --arg os "$RUNNER_OS" '.[] | select(.runner | ascii_downcase | contains($os)) | .url // empty' | head -1) + if [ -z "${URL}" ]; then + echo "Error: No server URL found in instance_details for runner OS: ${RUNNER_OS}" >&2 + exit 1 fi - echo "MM_TEST_SERVER_URL=${URL:-https://mobile-e2e-site-2.test.mattermost.cloud/}" >> $GITHUB_ENV + echo "MM_TEST_SERVER_URL=${URL}" >> $GITHUB_ENV - name: e2e/checkout-repo uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -328,7 +330,6 @@ jobs: os: ${{ runner.os }} - name: e2e/install-node-dependencies - id: install-deps run: | npm ci cd e2e && npm ci @@ -362,7 +363,7 @@ jobs: - name: e2e/check-for-failures id: check-failures - if: always() && steps.install-deps.outcome == 'success' + if: always() uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 with: script: | diff --git a/.github/workflows/e2e-label-cleanup.yml b/.github/workflows/e2e-label-cleanup.yml index 43dbc9d8605..866585bc5a0 100644 --- a/.github/workflows/e2e-label-cleanup.yml +++ b/.github/workflows/e2e-label-cleanup.yml @@ -1,30 +1,42 @@ name: E2E Label Cleanup -# Intentionally disabled: removing E2E/Run causes Matterwick to destroy the -# provisioned servers immediately, which prevents agents from connecting to -# those servers to reproduce and fix failing tests in the same PR run. -# Re-enable once automated fix-and-rerun is no longer needed. -# -# Original behaviour: runs when "Electron Playwright Tests" completes with any -# conclusion (success, failure, or cancelled) and removes the E2E/Run label. -# This was the safety net for cancellations where the remove-e2e-label job in -# e2e-functional.yml would be skipped by GitHub Actions. +# Runs when the "Electron Playwright Tests" workflow completes with any +# conclusion (success, failure, or cancelled). This is the only reliable way +# to remove the E2E/Run label after a cancellation, because GitHub Actions +# cancels all queued jobs — including remove-e2e-label — when a workflow run +# is cancelled, regardless of `if: always()`. on: workflow_run: workflows: ["Electron Playwright Tests"] types: [completed] -permissions: {} +permissions: + issues: write + pull-requests: write + actions: read jobs: - # remove-e2e-label is disabled — see comment at the top of this file. - # if: false ensures no runner is allocated and no permissions are used. - noop: - name: Label cleanup disabled + remove-e2e-label: + name: Remove E2E/Run label from PR runs-on: ubuntu-22.04 - if: ${{ false }} - permissions: {} + if: ${{ github.event.workflow_run.event == 'workflow_dispatch' }} steps: - - name: Skip label removal - run: echo "E2E/Run label removal is disabled." + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Remove E2E/Run label + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + continue-on-error: true # Label may have already been removed by remove-e2e-label job + with: + script: | + const { removeE2ELabel } = require('./e2e/utils/github-actions.js'); + // Pass the original test workflow's run ID so removeE2ELabel can + // look up the dispatching branch and find the associated PR. + await removeE2ELabel({ + github, + context: { + repo: context.repo, + runId: ${{ github.event.workflow_run.id }}, + }, + }); diff --git a/AGENTS.md b/AGENTS.md index 17fd59706a9..373274fa3ec 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,7 +23,7 @@ All standard commands are documented in `CLAUDE.md` and `package.json`. Quick re | `npm run check` | Lint + type-check + unit tests (parallel) | | `npm run build` | Dev build (main + preload + renderer) | | `npm run watch` | Dev mode with auto-rebuild and Electron restart | -| `npm run test:unit` | Jest unit tests (73 suites, 1118 tests) | +| `npm run test:unit` | Jest unit tests | ### Running on headless Linux (Cloud VM) @@ -33,243 +33,93 @@ All standard commands are documented in `CLAUDE.md` and `package.json`. Quick re - DBus errors in the container logs are expected and harmless (no system bus in containers). - The "Failed to load configuration file" message on first run is normal — the app creates defaults. -### E2E tests - -E2E tests live in `e2e/` with a separate `package.json`. See `e2e/AGENTS.md` for detailed guidance. Server-backed E2E tests require a running Mattermost server and env vars `MM_TEST_SERVER_URL`, `MM_TEST_USER_NAME`, `MM_TEST_PASSWORD`. Startup/UI-only E2E tests can run without a server. - -### Cursor secrets required for E2E fix agents - -When a Cursor cloud agent is launched by `e2e-fix-trigger.yml` or `e2e-cursor-commands.yml` to fix failing E2E tests, it needs credentials to connect to the provisioned Mattermost test servers. Add the following secrets in the Cursor Dashboard (Cloud Agents → Secrets): - -| Secret name | Value | -|---|---| -| `MM_DESKTOP_E2E_USER_NAME` | The admin username for the Matterwick-provisioned E2E servers (same value as the `MM_DESKTOP_E2E_USER_NAME` GitHub repo secret) | -| `MM_DESKTOP_E2E_USER_CREDENTIALS` | The admin password for the Matterwick-provisioned E2E servers (same value as the `MM_DESKTOP_E2E_USER_CREDENTIALS` GitHub repo secret) | - -These are exposed to the agent directly as environment variables. The agent uses them verbatim as `MM_TEST_USER_NAME` and `MM_TEST_PASSWORD` when running specs. `MM_TEST_SERVER_URL` is still injected into the agent prompt from the server-info PR comment (it is not secret). - -### Running E2E tests locally on this Linux VM - -To run a single spec file against a live server: - -```bash -source ~/.nvm/nvm.sh && nvm use 20.15.0 -npm ci && cd e2e && npm ci && cd .. -npm run build-test - -cd e2e -export DISPLAY=:1 -export MM_TEST_SERVER_URL= -export MM_TEST_USER_NAME="${MM_DESKTOP_E2E_USER_NAME}" -export MM_TEST_PASSWORD="${MM_DESKTOP_E2E_USER_CREDENTIALS}" -xvfb-run --auto-servernum --server-args='-screen 0 1280x960x24' \ - npx playwright test --reporter=list --workers=1 -cd .. -``` - -If a run leaves Electron hanging: `killall Electron 2>/dev/null || true` - ### Native modules The `postinstall` script runs `electron-builder install-app-deps` to rebuild native modules (registry-js, cf-prefs, etc.) for the current Electron version. If you see native module errors after `npm install`, ensure postinstall completed successfully. --- -## GitHub Actions coding practice - -Every rule below was derived from a concrete CodeRabbit or DryRun Security finding raised on PRs in this repository. Violating any of these will cause the same review comment to re-appear. - -### Security — script injection - -**Rule: Never interpolate `${{ inputs.* }}` or `${{ steps.*.outputs.* }}` directly inside a `github-script` `script:` block.** - -Interpolation happens before the JS is parsed. A single-quote in the value breaks out of the string literal and executes arbitrary JavaScript on the runner. - -```yaml -# WRONG — ${{ inputs.foo }} is injected into JS source text -script: | - doSomething('${{ inputs.foo }}'); - -# CORRECT — pass via env:, read as process.env.* -env: - FOO: ${{ inputs.foo }} -script: | - doSomething(process.env.FOO); -``` - -The **only** safe interpolation inside `script:` is for trusted internal job outputs (e.g. `${{ needs.job.outputs.value }}` that was produced by your own workflow step, not derived from user input). - -**Rule: When passing a JSON array from a job output into a `github-script`, use `env:` + `JSON.parse()`.** - -```yaml -# WRONG -script: | - const platforms = ${{ needs.prepare-matrix.outputs.platforms }}; - -# CORRECT -env: - PLATFORMS: ${{ needs.prepare-matrix.outputs.platforms }} -script: | - const platforms = JSON.parse(process.env.PLATFORMS); -``` - -### Security — Markdown injection in PR comments - -**Rule: Always sanitize values before inserting them into Markdown tables or fenced blocks.** - -Unsanitized `platform` or `url` values can inject pipe characters to break a table, or inject HTML/links. Use a helper like: - -```js -const sanitizeMd = (str) => String(str ?? ''). - replace(/[\r\n]/g, ' '). // no newline injection - replace(/&/g, '&'). // no HTML entities - replace(//g, '>'). - replace(/[|`[\]]/g, (ch) => `\\${ch}`); // no table/code breaks -``` - -### Permissions — least privilege +## E2E testing -**Rule: Declare `issues: write` and `pull-requests: write` at job level only, never at workflow top level.** +E2E tests live in `e2e/` with a separate `package.json`. See `e2e/AGENTS.md` for detailed test design and debugging guidance. -Top-level permissions apply to every job in the file, including jobs that only need `contents: read`. Grant write scopes only on the specific job that calls the API: +### Starting a local Mattermost server with Docker -```yaml -# WRONG — every job gets write access to issues and PRs -permissions: - contents: read - statuses: write - issues: write - pull-requests: write +Server-backed E2E tests require a running Mattermost instance. Use Docker to spin one up locally: -# CORRECT — broad reads at top level, writes scoped to the job that needs them -permissions: - contents: read - statuses: write - -jobs: - post-comment: - permissions: - issues: write - pull-requests: write -``` - -**Rule: Disabled/noop jobs must set `permissions: {}` and `if: ${{ false }}`.** - -A noop job that still has `issues: write` holds a live permission unnecessarily. If a job is disabled, clear its permissions and gate it with `if: false` so it never allocates a runner: - -```yaml -noop: - runs-on: ubuntu-22.04 - if: ${{ false }} - permissions: {} - steps: - - run: echo "disabled" -``` - -### Fault tolerance — `continue-on-error` on auxiliary steps - -**Rule: Auxiliary side-effect steps (posting comments, removing labels, sending notifications) must set `continue-on-error: true`.** - -A comment-posting failure is not a test failure. If the auxiliary step throws, it should warn but not mark the entire job as failed: - -```yaml -- name: Post PR comment - uses: actions/github-script@... - continue-on-error: true # ← required for all non-critical steps - with: - script: | - try { - await postComment(...); - } catch (err) { - core.warning(`Comment failed: ${err.message}`); - } -``` - -### Job condition gates - -**Rule: Do not gate a job on `inputs.pr_number != ''` if the job has fallback resolution logic that can find the PR without the input.** - -Gating on an optional input prevents the fallback from ever running, silently skipping the job for valid runs where the input was not provided: - -```yaml -# WRONG — skips even when findPrNumber() can resolve via branch/SHA lookup -if: ${{ !inputs.nightly && inputs.pr_number != '' }} - -# CORRECT — only skip nightly runs; let the fallback handle missing pr_number -if: ${{ !inputs.nightly }} -``` - -### Dead code in workflow files - -**Rule: Do not leave commented-out job blocks in workflow YAML files.** - -Git history preserves the implementation. Commented blocks drift from the live logic, mislead future readers, and generate repeated review noise. Remove the block entirely and add a one-line comment pointing to git history if the reason needs explaining: - -```yaml -# WRONG — 60 lines of commented-out job YAML -# remove-e2e-label: -# runs-on: ubuntu-22.04 -# steps: -# ... - -# CORRECT — single explanatory line -# remove-e2e-label is intentionally omitted: see e2e-label-cleanup.yml for context. +```bash +# Install Docker if not already present (Cloud VM may need fuse-overlayfs + iptables-legacy) +# See the Cursor Cloud system instructions for the full Docker install recipe. + +# Start the Mattermost preview image (server + database in one container) +docker run -d \ + --name mattermost-e2e \ + -p 8065:8065 \ + --restart unless-stopped \ + mattermost/mattermost-preview:latest + +# Wait for the server to be ready (polls /api/v4/system/ping) +until curl -sf http://localhost:8065/api/v4/system/ping >/dev/null 2>&1; do + echo "Waiting for Mattermost to start..." + sleep 3 +done +echo "Mattermost is ready at http://localhost:8065" ``` -### Input validation in JavaScript utilities - -**Rule: Always use strict positive-integer validation when parsing PR numbers or other numeric inputs.** +On first launch the container creates an admin account — default credentials `admin` / `admin`. If the server prompts for initial setup at `http://localhost:8065`, create the admin user through the API: -`parseInt("123abc", 10)` returns `123`. `parseInt("-42", 10)` returns `-42`. Both pass a truthiness check. Use the pattern below: - -```js -// WRONG -const n = parseInt(input, 10); -if (n) { use(n); } - -// CORRECT -const trimmed = String(input ?? '').trim(); -if ((/^\d+$/).test(trimmed)) { - const parsed = Number(trimmed); - if (Number.isInteger(parsed) && parsed > 0) { - use(parsed); - } -} +```bash +# Create the initial admin user and team via the local-mode API +curl -sf http://localhost:8065/api/v4/users \ + -H 'Content-Type: application/json' \ + -d '{"email":"admin@test.com","username":"admin","password":"admin","auth_service":""}' || true + +# Login and create a team so tests don't land on /select_team +TOKEN=$(curl -sf http://localhost:8065/api/v4/users/login \ + -H 'Content-Type: application/json' \ + -d '{"login_id":"admin","password":"admin"}' \ + -D - 2>/dev/null | grep -i '^token:' | awk '{print $2}' | tr -d '\r') + +curl -sf http://localhost:8065/api/v4/teams \ + -H "Authorization: Bearer $TOKEN" \ + -H 'Content-Type: application/json' \ + -d '{"name":"e2e-team","display_name":"E2E Team","type":"O"}' || true ``` -**Rule: Do not fall back to `prs.data[0]` when a SHA-based match fails.** +### Running E2E tests -Falling back to the first open PR on a branch can post to the wrong PR if multiple PRs share the same head branch. Return `null` and skip the action: +```bash +source ~/.nvm/nvm.sh && nvm use 20.15.0 -```js -// WRONG — may post to wrong PR -return (matching || prs.data[0]).number; +# Install and build the test bundle +npm ci +cd e2e && npm ci && cd .. +npm run build-test -// CORRECT — return null and let caller skip -return matching ? matching.number : null; +# Run a single spec against the local server +cd e2e +export DISPLAY=:1 +export MM_TEST_SERVER_URL=http://localhost:8065 +export MM_TEST_USER_NAME=admin +export MM_TEST_PASSWORD=admin +npx playwright test --reporter=list --workers=1 +cd .. ``` -### `workflow_run` execution context - -**Rule: `workflow_run` workflows always execute from the DEFAULT BRANCH, not from the triggering PR branch.** - -This means: any JS utility called via `require('./e2e/utils/...')` inside a `workflow_run` workflow will be the version on `master`, not the PR branch version — unless the checkout step explicitly checks out the triggering commit. When disabling label-removal or other behavior, the change must land on the default branch to take effect. +If a run leaves Electron hanging: `killall Electron 2>/dev/null || true` -### Fenced code blocks in Markdown +### Fixing E2E tests (for `@cursoragent fix e2e` or CI-failure agents) -**Rule: Always specify a language tag on fenced code blocks.** +When asked to fix E2E failures — whether triggered by a `@cursoragent` comment or by observing CI failures on a PR: -Bare triple-backtick blocks are flagged by markdown linters and render without syntax highlighting. Use `bash`, `yaml`, `typescript`, `js`, etc.: +1. **Start the Mattermost server** using the Docker instructions above. +2. **Read the CI logs** to identify which spec files failed and the error messages. Use `gh run view --job --log-failed` or download JUnit artifacts. +3. **Build the test bundle**: `npm run build-test` +4. **Reproduce** each failure locally before editing. Run the failing spec file and confirm it fails. +5. **Fix the test** — see `e2e/AGENTS.md` for the classification (test bug vs product bug) and design rules. +6. **Re-run the spec** to confirm the fix, then commit. +7. For server-backed tests that use `waitForLoggedIn()`: this helper polls `ServerManager.isLoggedIn` in the main process. If login never completes, check whether the server is reachable and the admin user has a team. -```markdown - -\``` -npm install -\``` +### Login state propagation (common E2E flake) - -\```bash -npm install -\``` -``` +After `loginToMattermost()` completes, the desktop app's `isLoggedIn` flag must travel through a multi-hop IPC chain before the renderer enables tab-bar interactions (`#newTabButton`). The `waitForLoggedIn()` helper in `e2e/helpers/login.ts` polls the main-process `ServerManager` directly, which is more reliable than waiting for the DOM element. Use it in `beforeAll` blocks for any test that interacts with the tab bar after login. diff --git a/e2e/global-setup.ts b/e2e/global-setup.ts index 8c9e5140532..8b545e2b21c 100644 --- a/e2e/global-setup.ts +++ b/e2e/global-setup.ts @@ -6,9 +6,6 @@ import * as fs from 'fs'; import * as os from 'os'; import * as path from 'path'; -// eslint-disable-next-line @typescript-eslint/no-var-requires -const {provisionServer} = require('./utils/server-setup'); - const E2E_PROCESS_REGISTRY = path.join(os.tmpdir(), 'mattermost-desktop-e2e-main-pids.txt'); /** @@ -38,10 +35,4 @@ export default async function globalSetup() { // Non-fatal — tests still run, just potentially with the Resume dialog } } - - // Provision the Mattermost test server once before any tests run. - // Creates the default team and adds the admin user so login lands in a - // channel (not /select_team) on fresh CI servers. - // No-ops when MM_TEST_SERVER_URL or MM_TEST_PASSWORD are absent. - await provisionServer(); } diff --git a/e2e/utils/github-actions.js b/e2e/utils/github-actions.js index 44bbdd7f7a7..4d0cb9c9e7f 100644 --- a/e2e/utils/github-actions.js +++ b/e2e/utils/github-actions.js @@ -74,259 +74,81 @@ async function updateFinalStatus({github, context, platforms, outputs, mergedRep } /** - * Resolve the PR number for a workflow run. - * - * Resolution order: - * 1. prNumberInput — explicit value passed by the workflow dispatcher (e.g. Matterwick). - * 2. run.pull_requests — populated for pull_request-triggered runs. - * 3. Branch/SHA lookup — queries open PRs whose head matches the run's head branch and SHA. - * This is the reliable path for workflow_dispatch runs where pull_requests is empty. - * - * @param {Object} params + * Remove E2E/Run label when workflow triggered via Matterwick + * @param {Object} params - Parameters object * @param {Object} params.github - GitHub API client from actions/github-script - * @param {Object} params.context - GitHub Actions context (context.runId must be set) - * @param {string|number} [params.prNumberInput] - Explicit PR number from a workflow input - * @returns {Promise} Resolved PR number, or null if not found + * @param {Object} params.context - GitHub Actions context */ -async function findPrNumber({github, context, prNumberInput}) { - const trimmed = String(prNumberInput ?? '').trim(); - if ((/^\d+$/).test(trimmed)) { - const parsed = Number(trimmed); - if (Number.isInteger(parsed) && parsed > 0) { - return parsed; - } - } - +async function removeE2ELabel({github, context}) { try { + // Get the current run to check if it was triggered by workflow_dispatch const run = await github.rest.actions.getWorkflowRun({ owner: context.repo.owner, repo: context.repo.repo, run_id: context.runId, }); + // Only remove the label if this was triggered via workflow_dispatch (Matterwick) + if (run.data.event !== 'workflow_dispatch') { + console.log('Label removal skipped - workflow run is not triggered by workflow_dispatch (Matterwick)'); + return; + } + + // Try to find associated PR + let prNumber = null; + + // First try: check run.data.pull_requests (reliable for pull_request events) if (run.data.pull_requests && run.data.pull_requests.length > 0) { - return run.data.pull_requests[0].number; + prNumber = run.data.pull_requests[0].number; + } else { + // Second try: query PRs by head branch (more reliable for workflow_dispatch) + const branchName = run.data.head_branch; + if (branchName) { + // Use the actual head repository owner (supports fork PRs) + const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; + const prs = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + head: `${headOwner}:${branchName}`, + }); + if (prs.data && prs.data.length > 0) { + // Prefer the PR whose head SHA matches the workflow run's head SHA + const matchingPr = prs.data.find( + (pr) => pr.head && pr.head.sha === run.data.head_sha, + ); + if (matchingPr) { + prNumber = matchingPr.number; + } else { + prNumber = prs.data[0].number; + } + } + } } - const branchName = run.data.head_branch; - if (branchName) { - const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; - const prs = await github.rest.pulls.list({ + if (prNumber) { + await github.rest.issues.removeLabel({ owner: context.repo.owner, repo: context.repo.repo, - state: 'open', - head: `${headOwner}:${branchName}`, + issue_number: prNumber, + name: 'E2E/Run', }); - if (prs.data && prs.data.length > 0) { - const matching = prs.data.find((pr) => pr.head && pr.head.sha === run.data.head_sha); - return matching ? matching.number : null; - } + } else { + console.log('Label removal skipped - could not find associated PR'); } } catch (error) { - console.log(`Could not resolve PR number: ${error.message}`); - } - - return null; -} - -/** - * Post (or update) a PR comment listing the provisioned Mattermost server URLs so - * developers can connect to those servers to reproduce and debug failing tests. - * - * The comment is idempotent: if a previous comment with the hidden HTML marker already - * exists on the PR (e.g. from a previous run triggered by a push to the same branch), - * it is updated in place rather than a new one being created. - * - * The admin password is intentionally omitted from the comment. Use the - * MM_DESKTOP_E2E_USER_CREDENTIALS repository secret or contact the team. - * - * @param {Object} params - * @param {Object} params.github - GitHub API client from actions/github-script - * @param {Object} params.context - GitHub Actions context - * @param {Array} params.platforms - Array of platform objects from the matrix - * (each must have at least `platform` and `url`) - * @param {string} [params.adminUsername] - Admin username for the test instances - * @param {string} [params.serverVersion] - Mattermost server version under test - * @param {number} params.prNumber - PR number to comment on - */ -async function postServerInfoComment({github, context, platforms, adminUsername, serverVersion, prNumber}) { - const MARKER = ''; - const workflowUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; - - // Sanitize values before inserting into a Markdown table to prevent - // table-breaking, HTML injection, or newline-based injection. - const sanitizeMd = (str) => String(str ?? ''). - replace(/[\r\n]/g, ' '). - replace(/&/g, '&'). - replace(//g, '>'). - replace(/[|`[\]]/g, (ch) => `\\${ch}`); - - const platformRows = platforms. - map((p) => `| \`${sanitizeMd(p.platform)}\` | ${sanitizeMd(p.url)} |`). - join('\n'); - - const lines = [ - MARKER, - '### :test_tube: E2E Test Servers Ready', - '', - 'Matterwick has provisioned the following Mattermost instances for this PR.', - 'Use them to reproduce and debug failing tests against the exact same servers:', - '', - '| Platform | Server URL |', - '|----------|------------|', - platformRows, - '', - ]; - - if (adminUsername) { - lines.push(`**Admin username:** \`${adminUsername}\``); - } - if (serverVersion) { - lines.push(`**Server version:** \`${serverVersion}\``); - } - - lines.push( - '', - '**Run a single spec against one of these servers:**', - '```sh', - 'MM_TEST_SERVER_URL= \\', - ' MM_TEST_USER_NAME= \\', - ' MM_TEST_PASSWORD= \\', - ' npx playwright test --reporter=list --workers=1', - '```', - '', - '> Servers are active for the duration of this workflow run. Label cleanup is disabled — servers may be retained afterwards for agent-driven test fixing.', - '', - `**Workflow run:** ${workflowUrl}`, - ); - - const body = lines.join('\n'); - const {owner, repo} = context.repo; - - // Search all pages of comments so the marker is never missed on busy PRs. - // Without full pagination a PR with > 100 comments would fail to find the - // existing comment and create a duplicate on every subsequent E2E run. - let existingCommentId = null; - try { - let page = 1; - while (!existingCommentId) { - const {data: comments} = await github.rest.issues.listComments({ - owner, - repo, - issue_number: prNumber, - per_page: 100, - page, - }); - if (comments.length === 0) { - break; - } - const found = comments.find((c) => c.body && c.body.includes(MARKER)); - if (found) { - existingCommentId = found.id; - } else if (comments.length < 100) { - // Reached the last page without finding the marker. - break; - } - page++; + if (error && error.status === 404) { + console.log(`Label removal skipped - label or resource not found (404). Details: ${error.message}`); + } else if (error && error.status === 403) { + console.log(`Label removal failed - insufficient permissions (403). Details: ${error.message}`); + } else { + console.log(`Label removal failed - unexpected error: status=${error && error.status}, message=${error && error.message}`); } - } catch (err) { - console.log(`Could not list PR comments: ${err.message}`); - } - - if (existingCommentId) { - await github.rest.issues.updateComment({owner, repo, comment_id: existingCommentId, body}); - console.log(`Updated existing E2E server info comment ${existingCommentId} on PR #${prNumber}`); - } else { - await github.rest.issues.createComment({owner, repo, issue_number: prNumber, body}); - console.log(`Posted E2E server info comment on PR #${prNumber}`); } } -/** - * Remove E2E/Run label when workflow triggered via Matterwick - * @param {Object} params - Parameters object - * @param {Object} params.github - GitHub API client from actions/github-script - * @param {Object} params.context - GitHub Actions context - */ -async function removeE2ELabel() { - // Commented out for testing purposes — label removal is disabled so - // Matterwick keeps provisioned servers alive after tests finish, allowing - // agents to connect and fix failures in the same run. - // - // async function removeE2ELabel({github, context}) { - // try { - // // Get the current run to check if it was triggered by workflow_dispatch - // const run = await github.rest.actions.getWorkflowRun({ - // owner: context.repo.owner, - // repo: context.repo.repo, - // run_id: context.runId, - // }); - // - // // Only remove the label if this was triggered via workflow_dispatch (Matterwick) - // if (run.data.event !== 'workflow_dispatch') { - // console.log('Label removal skipped - workflow run is not triggered by workflow_dispatch (Matterwick)'); - // return; - // } - // - // // Try to find associated PR - // let prNumber = null; - // - // // First try: check run.data.pull_requests (reliable for pull_request events) - // if (run.data.pull_requests && run.data.pull_requests.length > 0) { - // prNumber = run.data.pull_requests[0].number; - // } else { - // // Second try: query PRs by head branch (more reliable for workflow_dispatch) - // const branchName = run.data.head_branch; - // if (branchName) { - // // Use the actual head repository owner (supports fork PRs) - // const headOwner = run.data.head_repository?.owner?.login || context.repo.owner; - // const prs = await github.rest.pulls.list({ - // owner: context.repo.owner, - // repo: context.repo.repo, - // state: 'open', - // head: `${headOwner}:${branchName}`, - // }); - // if (prs.data && prs.data.length > 0) { - // const matchingPr = prs.data.find( - // (pr) => pr.head && pr.head.sha === run.data.head_sha, - // ); - // if (matchingPr) { - // prNumber = matchingPr.number; - // } else { - // prNumber = prs.data[0].number; - // } - // } - // } - // } - // - // if (prNumber) { - // await github.rest.issues.removeLabel({ - // owner: context.repo.owner, - // repo: context.repo.repo, - // issue_number: prNumber, - // name: 'E2E/Run', - // }); - // } else { - // console.log('Label removal skipped - could not find associated PR'); - // } - // } catch (error) { - // if (error && error.status === 404) { - // console.log(`Label removal skipped - label or resource not found (404). Details: ${error.message}`); - // } else if (error && error.status === 403) { - // console.log(`Label removal failed - insufficient permissions (403). Details: ${error.message}`); - // } else { - // console.log(`Label removal failed - unexpected error: status=${error && error.status}, message=${error && error.message}`); - // } - // } - // } - console.log('removeE2ELabel: commented out for testing purposes — label removal is disabled.'); -} - module.exports = { - findPrNumber, - postServerInfoComment, - removeE2ELabel, - updateFinalStatus, updateInitialStatus, + updateFinalStatus, + removeE2ELabel, }; diff --git a/e2e/utils/server-setup.js b/e2e/utils/server-setup.js deleted file mode 100644 index 9c88dc48da7..00000000000 --- a/e2e/utils/server-setup.js +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (c) 2016-present Mattermost, Inc. All Rights Reserved. -// See LICENSE.txt for license information. -/* eslint-disable no-console -- intentional logging in setup scripts */ - -/** - * Mattermost server provisioning helper. - * - * Runs once before the E2E suite starts (called from global-setup.ts). - * Uses the Mattermost REST API to ensure a default team and the test admin - * user exist so that: - * - Login does not land on /select_team (no-team fresh servers). - * - All server-backed tests can navigate to channels immediately. - * - * Idempotent: safe to call multiple times. Already-existing resources are - * detected and skipped; no error is thrown. - * - * Required env vars (all optional — the function no-ops when absent): - * MM_TEST_SERVER_URL e.g. https://desktop-pr-3773-linux-xxx.test.mattermost.cloud - * MM_TEST_USER_NAME admin username (default: admin) - * MM_TEST_PASSWORD admin password - */ - -const DEFAULT_TEAM_NAME = 'e2e-team'; -const DEFAULT_TEAM_DISPLAY_NAME = 'E2E Test Team'; -const DEFAULT_CHANNEL = 'town-square'; - -/** - * Thin wrapper around fetch that throws on non-2xx with a readable message. - * - * @param {string} url - * @param {RequestInit} options - * @returns {Promise} - */ -async function apiRequest(url, options = {}) { - const response = await fetch(url, { - ...options, - headers: { - 'Content-Type': 'application/json', - ...(options.headers ?? {}), - }, - }); - - const text = await response.text(); - let body; - try { - body = JSON.parse(text); - } catch { - body = text; - } - - if (!response.ok) { - const status = response.status; - const message = typeof body === 'object' ? (body.message ?? JSON.stringify(body)) : body; - throw new Error(`API ${options.method ?? 'GET'} ${url} → ${status}: ${message}`); - } - - return body; -} - -/** - * Provision the test server so the admin user lands in a channel after login. - * - * Steps (all idempotent): - * 1. Authenticate as the admin user → obtain session token. - * 2. Ensure a team named DEFAULT_TEAM_NAME exists (create if absent). - * 3. Ensure the admin user is a member of that team. - * - * @returns {Promise} - */ -async function provisionServer() { - const serverURL = process.env.MM_TEST_SERVER_URL; - const username = process.env.MM_TEST_USER_NAME ?? 'admin'; - const password = process.env.MM_TEST_PASSWORD; - - if (!serverURL || !password) { - console.log('[server-setup] MM_TEST_SERVER_URL or MM_TEST_PASSWORD not set — skipping server provisioning.'); - return; - } - - const base = serverURL.replace(/\/$/, ''); - console.log(`[server-setup] Provisioning server: ${base}`); - - // ── 1. Login ──────────────────────────────────────────────────────────── - let token; - let adminUser; - try { - const response = await fetch(`${base}/api/v4/users/login`, { - method: 'POST', - headers: {'Content-Type': 'application/json'}, - body: JSON.stringify({login_id: username, password}), - }); - - const text = await response.text(); - let body; - try { - body = JSON.parse(text); - } catch { - body = text; - } - - if (!response.ok) { - const msg = typeof body === 'object' ? (body.message ?? JSON.stringify(body)) : body; - throw new Error(`Login failed (${response.status}): ${msg}`); - } - - token = response.headers.get('Token'); - adminUser = body; - - if (!token) { - throw new Error('Login succeeded but no Token header was returned'); - } - } catch (err) { - console.warn(`[server-setup] Login failed — skipping provisioning: ${err.message}`); - return; - } - - const authHeaders = {Authorization: `Bearer ${token}`}; - console.log(`[server-setup] Authenticated as ${adminUser.username} (id: ${adminUser.id})`); - - // ── 2. Ensure the default team exists ─────────────────────────────────── - let teamId; - try { - // Search by exact name - const teams = await apiRequest( - `${base}/api/v4/teams/name/${DEFAULT_TEAM_NAME}`, - {headers: authHeaders}, - ); - teamId = teams.id; - console.log(`[server-setup] Team '${DEFAULT_TEAM_NAME}' already exists (id: ${teamId})`); - } catch (err) { - if (!err.message.includes('404')) { - console.warn(`[server-setup] Could not check team existence: ${err.message}`); - return; - } - - // Team does not exist — create it - try { - const newTeam = await apiRequest(`${base}/api/v4/teams`, { - method: 'POST', - headers: authHeaders, - body: JSON.stringify({ - name: DEFAULT_TEAM_NAME, - display_name: DEFAULT_TEAM_DISPLAY_NAME, - type: 'O', // open team - }), - }); - teamId = newTeam.id; - console.log(`[server-setup] Created team '${DEFAULT_TEAM_NAME}' (id: ${teamId})`); - } catch (createErr) { - console.warn(`[server-setup] Could not create team: ${createErr.message}`); - return; - } - } - - // ── 3. Ensure the admin user is a member of the team ──────────────────── - try { - await apiRequest(`${base}/api/v4/teams/${teamId}/members`, { - method: 'POST', - headers: authHeaders, - body: JSON.stringify({ - team_id: teamId, - user_id: adminUser.id, - }), - }); - console.log(`[server-setup] Admin user added to team '${DEFAULT_TEAM_NAME}'`); - } catch (err) { - // 409 Conflict means the user is already a member — that's fine. - if (err.message.includes('409') || err.message.toLowerCase().includes('already')) { - console.log(`[server-setup] Admin user already a member of '${DEFAULT_TEAM_NAME}'`); - } else { - console.warn(`[server-setup] Could not add user to team: ${err.message}`); - } - } - - // ── 4. Verify the default channel exists ──────────────────────────────── - // town-square is created automatically with every team, but log it for - // visibility so CI operators can confirm the expected state. - try { - const channel = await apiRequest( - `${base}/api/v4/teams/${teamId}/channels/name/${DEFAULT_CHANNEL}`, - {headers: authHeaders}, - ); - console.log(`[server-setup] Default channel '${channel.name}' confirmed (id: ${channel.id})`); - } catch { - // Non-fatal — just skip the log - } - - console.log('[server-setup] Server provisioning complete.'); -} - -module.exports = {provisionServer};