From 0e3a7f739ffecc495e1fd9a39aad9f3ef406ec59 Mon Sep 17 00:00:00 2001 From: Nik Richers Date: Mon, 18 May 2026 09:21:03 -0700 Subject: [PATCH 1/4] Fix intermittent Lighthouse CI and scope audits to changed pages (sc-12702) Chain Lighthouse after validate via workflow_run instead of polling, audit PR-changed site pages by default, and keep depth dispatch and lighthouse:full for thorough runs. --- .github/workflows/lighthouse-check.yaml | 971 ++++++++++-------------- README.md | 24 +- site/scripts/lighthouse_urls.py | 307 ++++++++ site/scripts/test_lighthouse_urls.py | 102 +++ 4 files changed, 833 insertions(+), 571 deletions(-) create mode 100644 site/scripts/lighthouse_urls.py create mode 100644 site/scripts/test_lighthouse_urls.py diff --git a/.github/workflows/lighthouse-check.yaml b/.github/workflows/lighthouse-check.yaml index d96685de1c..f075ff41b7 100644 --- a/.github/workflows/lighthouse-check.yaml +++ b/.github/workflows/lighthouse-check.yaml @@ -1,598 +1,453 @@ name: Lighthouse check on: - pull_request: - types: [opened, synchronize, ready_for_review] + workflow_run: + workflows: ["Validate docs site (render, test, and deploy)"] + types: [completed] + workflow_dispatch: + inputs: + depth: + description: "Sitemap depth for thorough audit (0–2)" + required: true + default: "0" + type: choice + options: + - "0" + - "1" + - "2" + pr_number: + description: "Pull request number to audit" + required: true + type: string permissions: + contents: read issues: write pull-requests: write - -env: - # To change the default depth level: - # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.) - # 1 — All first-level subdirectories (e.g. /guide/*.html) - # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html) - # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete - DEFAULT_DEPTH: '0' + actions: read jobs: lighthouse: runs-on: ubuntu-latest - if: github.event.pull_request.draft == false + if: | + (github.event_name == 'workflow_run' && + github.event.workflow_run.conclusion == 'success' && + github.event.workflow_run.event == 'pull_request') || + github.event_name == 'workflow_dispatch' steps: - - name: Wait for validation workflow to complete - uses: actions/github-script@v6 - with: - script: | - const maxWaitTime = 45 * 60 * 1000; // 45 minutes in milliseconds - const pollInterval = 60 * 1000; // 60 seconds in milliseconds - const startTime = Date.now(); - - console.log(`Waiting for "Validate docs site" workflow to complete for PR #${context.issue.number}`); - console.log(`Head SHA: ${context.payload.pull_request.head.sha}`); - - while (Date.now() - startTime < maxWaitTime) { - try { - // Get workflow runs for the validate-docs-site workflow - const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'validate-docs-site.yaml', - head_sha: context.payload.pull_request.head.sha, - per_page: 5 + - name: Resolve PR context + id: pr + uses: actions/github-script@v6 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + + async function getPr(prNumber) { + const { data: pr } = await github.rest.pulls.get({ + owner, + repo, + pull_number: prNumber, }); - - console.log(`Found ${runs.workflow_runs.length} workflow runs for this commit`); - - if (runs.workflow_runs.length > 0) { - // Get the most recent run - const latestRun = runs.workflow_runs[0]; - console.log(`Latest run: ${latestRun.id}, status: ${latestRun.status}, conclusion: ${latestRun.conclusion}`); - - if (latestRun.status === 'completed') { - if (latestRun.conclusion === 'success') { - console.log('✅ Validation workflow completed successfully'); - break; - } else { - throw new Error(`❌ Validation workflow failed with conclusion: ${latestRun.conclusion}`); - } - } else if (latestRun.status === 'in_progress' || latestRun.status === 'queued') { - console.log(`⏳ Validation workflow is ${latestRun.status}, continuing to wait...`); - } else { - console.log(`⚠️ Unexpected status: ${latestRun.status}`); - } - } else { - console.log('⏳ No workflow runs found yet, validation may not have started...'); + if (pr.draft) { + core.setFailed('Skipping Lighthouse for draft PR'); + return null; } - - console.log(`Elapsed time: ${Math.round((Date.now() - startTime) / 1000 / 60)} minutes`); - await new Promise(resolve => setTimeout(resolve, pollInterval)); - - } catch (error) { - console.error('Error checking workflow status:', error); - throw error; + const labels = (pr.labels || []).map(l => l.name); + const fullAudit = labels.includes('lighthouse:full'); + return { + number: pr.number, + head_ref: pr.head.ref, + head_sha: pr.head.sha, + base_ref: pr.base.ref, + full_audit: fullAudit, + }; } + + if (context.eventName === 'workflow_dispatch') { + const prNumber = parseInt('${{ inputs.pr_number }}', 10); + const info = await getPr(prNumber); + if (!info) return; + core.setOutput('number', String(info.number)); + core.setOutput('head_ref', info.head_ref); + core.setOutput('head_sha', info.head_sha); + core.setOutput('base_ref', info.base_ref); + core.setOutput('mode', 'depth'); + core.setOutput('depth', '${{ inputs.depth }}'); + core.setOutput('full_audit', String(info.full_audit)); + return; + } + + const run = context.payload.workflow_run; + let prNumber = null; + if (run.pull_requests && run.pull_requests.length > 0) { + prNumber = run.pull_requests[0].number; + } else { + const { data: prs } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ + owner, + repo, + commit_sha: run.head_sha, + }); + if (prs.length > 0) { + prNumber = prs[0].number; + } + } + + if (!prNumber) { + core.setFailed('Could not resolve PR for workflow_run'); + return; + } + + const info = await getPr(prNumber); + if (!info) return; + + let mode = 'changed'; + let depth = '0'; + if (info.full_audit) { + mode = 'depth'; + depth = '2'; + } + + core.setOutput('number', String(info.number)); + core.setOutput('head_ref', info.head_ref); + core.setOutput('head_sha', info.head_sha); + core.setOutput('base_ref', info.base_ref); + core.setOutput('mode', mode); + core.setOutput('depth', depth); + core.setOutput('full_audit', String(info.full_audit)); + + - name: Check out repository + uses: actions/checkout@v4 + with: + ref: ${{ steps.pr.outputs.head_sha }} + fetch-depth: 0 + + - name: Set environment + run: | + echo "PREVIEW_URL=https://docs-staging.validmind.ai/pr_previews/${{ steps.pr.outputs.head_ref }}" >> $GITHUB_ENV + echo "COMMIT_SHA=${{ steps.pr.outputs.head_sha }}" >> $GITHUB_ENV + echo "COMMIT_SHA_SHORT=$(echo ${{ steps.pr.outputs.head_sha }} | cut -c1-7)" >> $GITHUB_ENV + echo "LIGHTHOUSE_MODE=${{ steps.pr.outputs.mode }}" >> $GITHUB_ENV + echo "LIGHTHOUSE_DEPTH=${{ steps.pr.outputs.depth }}" >> $GITHUB_ENV + echo "PR_NUMBER=${{ steps.pr.outputs.number }}" >> $GITHUB_ENV + + - name: Check for PR preview URL + id: check_preview + run: | + check_url() { + local url=$1 + local status + status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$url") + echo "Checking $url — status: $status" + [ "$status" -eq 200 ] } - - // Check if we timed out - if (Date.now() - startTime >= maxWaitTime) { - throw new Error('⏰ Timed out waiting for validation workflow to complete'); - } - - - name: Check out repository - uses: actions/checkout@v4 - - - name: Get commit SHA - id: get_sha - run: | - echo "COMMIT_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV - echo "COMMIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_ENV - - - name: Set PR preview URL - id: set_url - run: | - echo "PREVIEW_URL=https://docs-staging.validmind.ai/pr_previews/${{ github.head_ref }}" >> $GITHUB_ENV - echo "DEPTH=${{ env.DEFAULT_DEPTH }}" >> $GITHUB_ENV - - - name: Check for PR preview URL and sitemap - id: check_preview - run: | - # Function to check if URL returns HTTP 200 - check_url() { - local url=$1 - local status - status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$url") - echo "Checking $url — status: $status" - [ "$status" -eq 200 ] - } - - echo "Waiting for preview site to become available ..." - for i in {1..60}; do - if check_url "$PREVIEW_URL/index.html"; then - echo "Info: Preview site is now available" - break + + echo "Waiting for preview site to become available ..." + for i in $(seq 1 30); do + if check_url "$PREVIEW_URL/index.html"; then + echo "Info: Preview site is now available" + break + fi + if [ "$i" -eq 30 ]; then + echo "Error: Preview URL did not become available after 30 minutes" + exit 1 + fi + echo "Attempt $i/30: waiting 1 minute..." + sleep 60 + done + + if ! check_url "$PREVIEW_URL/sitemap.xml"; then + echo "Error: Sitemap missing at $PREVIEW_URL/sitemap.xml" + exit 1 + fi + + echo "preview_exists=true" >> $GITHUB_OUTPUT + + - name: Install Python dependencies + if: steps.check_preview.outputs.preview_exists == 'true' + run: | + python -m pip install --upgrade pip + pip install requests + + - name: Generate URLs to check + if: steps.check_preview.outputs.preview_exists == 'true' + id: generate_urls + env: + INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }} + INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }} + run: | + cd site/scripts + python lighthouse_urls.py \ + --mode "$LIGHTHOUSE_MODE" \ + --base-ref "${{ steps.pr.outputs.base_ref }}" \ + --depth "$LIGHTHOUSE_DEPTH" \ + --preview-url "$PREVIEW_URL" \ + --output ../../lhci-urls.txt \ + --metadata ../../lighthouse-metadata.json \ + --skip-file ../../lighthouse-skip.txt + + if [ -f ../../lighthouse-skip.txt ]; then + echo "skip=true" >> $GITHUB_OUTPUT + echo "No site pages to audit in this PR." + exit 0 fi - - if [ $i -eq 60 ]; then - echo "Error: Preview URL did not become available after 60 minutes at $PREVIEW_URL/index.html" + + if [ ! -s ../../lhci-urls.txt ]; then + echo "Error: No URLs were generated." exit 1 fi - - echo "Attempt $i/60: Preview site not ready yet, waiting 1 minute..." - sleep 60 - done - - if ! check_url "$PREVIEW_URL/sitemap.xml"; then - echo "Error: Sitemap does not exist at $PREVIEW_URL/sitemap.xml" - exit 1 - fi - - echo "Debug: Checking installation page with URL-based auth..." - auth_url="https://${{ secrets.INSTALLATION_USER }}:${{ secrets.INSTALLATION_PW }}@docs-staging.validmind.ai/pr_previews/${{ github.head_ref }}/installation/index.html" - status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" --anyauth "$auth_url") - echo "Checking $auth_url — status: $status" - if [ "$status" -ne 200 ]; then - echo "Error: Installation page is not accessible with authentication at $auth_url" - exit 1 - fi - - echo "Info: Successfully accessed password-protected installation page" - - echo "preview_exists=true" >> $GITHUB_OUTPUT - - - name: Install Lighthouse CI - if: steps.check_preview.outputs.preview_exists == 'true' - run: npm install -g @lhci/cli - - - name: Install required Python packages - if: steps.check_preview.outputs.preview_exists == 'true' - run: | - python -m pip install --upgrade pip - pip install requests beautifulsoup4 - - - name: Generate URLs to check - if: steps.check_preview.outputs.preview_exists == 'true' - id: generate_urls - run: | - BASE_URL="$PREVIEW_URL" - - # Create a Python script to crawl the site - cat > crawl.py << 'EOF' - import requests - from bs4 import BeautifulSoup - import sys - from urllib.parse import urljoin, urlparse - import json - import xml.etree.ElementTree as ET - import base64 - import os - - # Define root pages to check - ROOT_PAGES = [ - "index.html", - "get-started/get-started.html", - "guide/guides.html", - "developer/validmind-library.html", - "support/support.html", - "releases/all-releases.html", - "training/training.html" - ] - - def get_auth_headers(): - # Only use auth for installation pages - if 'installation/' in url: - # Create auth headers from environment variables - auth_string = base64.b64encode(f"{os.environ['INSTALLATION_USER']}:{os.environ['INSTALLATION_PW']}".encode()).decode() - return {"Authorization": f"Basic {auth_string}"} - return {} - - def get_url_depth(url): - # Parse the URL to get just the path - path = urlparse(url).path - # Remove .html extension for depth calculation - path = path.replace('.html', '') - # Remove any leading/trailing slashes - path = path.strip('/') - - # Split into segments and count non-empty ones - segments = [x for x in path.split('/') if x] - - # For PR preview URLs, we need to skip the first 5 segments: - # /pr_previews/username/branch/name/ - if 'pr_previews' in path: - # Skip the first 5 segments (pr_previews/username/branch/name/) - segments = segments[5:] - - # Debug the depth calculation - # print(f"URL depth calculation - Path: {path}, Segments: {segments}, Depth: {len(segments)}", file=sys.stderr) - - return len(segments) - - def get_urls_from_sitemap(sitemap_url, max_depth): - try: - print(f"Fetching sitemap from {sitemap_url}", file=sys.stderr) - # Don't use auth for sitemap - response = requests.get(sitemap_url) - print(f"Sitemap response status: {response.status_code}", file=sys.stderr) - if response.status_code == 200: - print(f"Sitemap content: {response.text[:500]}...", file=sys.stderr) - root = ET.fromstring(response.content) - # Get all URLs from sitemap - all_urls = set() - - for url in root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}url'): - loc = url.find('{http://www.sitemaps.org/schemas/sitemap/0.9}loc') - if loc is not None: - full_url = loc.text - parsed_url = urlparse(full_url) - - # Extract the path part after the base URL - path = parsed_url.path - # Remove leading slash if present - path = path.lstrip('/') - - # Only include .html files - if path.endswith('.html'): - # Check depth - if get_url_depth(path) <= max_depth: - # Remove any segments that match the PR preview path - segments = path.split('/') - # Keep only the segments after the PR preview path - pr_preview_index = -1 - for i, segment in enumerate(segments): - if segment == 'pr_previews': - pr_preview_index = i - break - if pr_preview_index >= 0: - segments = segments[pr_preview_index + 4:] # Skip pr_previews/username/branch/name - path = '/'.join(segments) - all_urls.add(path) - print(f"Found URL in sitemap: {path}", file=sys.stderr) - - print(f"Found {len(all_urls)} URLs in sitemap:", file=sys.stderr) - for url in sorted(all_urls): - print(f" {url}", file=sys.stderr) - return sorted(list(all_urls)) - else: - print(f"Failed to fetch sitemap: {response.status_code}", file=sys.stderr) - except Exception as e: - print(f"Error processing sitemap {sitemap_url}: {str(e)}", file=sys.stderr) - return [] - - def get_links(url, max_depth, visited=None): - if visited is None: - visited = set() - - current_depth = get_url_depth(url) - print(f"Checking URL {url} at depth {current_depth}", file=sys.stderr) - - if current_depth > max_depth or url in visited: - print(f"Skipping {url} - depth {current_depth} > {max_depth} or already visited", file=sys.stderr) - return set() - - visited.add(url) - links = set() - - try: - print(f"Fetching {url}", file=sys.stderr) - headers = get_auth_headers() - response = requests.get(url, headers=headers) - print(f"Response status: {response.status_code}", file=sys.stderr) - if response.status_code == 200: - soup = BeautifulSoup(response.text, 'html.parser') - print(f"Found {len(soup.find_all('a', href=True))} links on page", file=sys.stderr) - - for a in soup.find_all('a', href=True): - href = a['href'] - print(f"Processing link: {href}", file=sys.stderr) - - # Skip external links and anchors - if href.startswith('#') or href.startswith('http'): - print(f"Skipping external/anchor link: {href}", file=sys.stderr) - continue - - # Convert relative URLs to absolute - full_url = urljoin(url, href) - print(f"Converted to full URL: {full_url}", file=sys.stderr) - - # Only include URLs from the same base domain - if urlparse(full_url).netloc == urlparse(url).netloc: - # Extract just the path part - path = urlparse(full_url).path - # Remove leading slash if present - path = path.lstrip('/') - - # Only include .html files - if path.endswith('.html'): - print(f"Found HTML link: {path}", file=sys.stderr) - links.add(path) - # Only recursively get links if we haven't hit max depth - if get_url_depth(path) < max_depth: - print(f"Recursively checking {path} at depth {get_url_depth(path)}", file=sys.stderr) - links.update(get_links(full_url, max_depth, visited)) - else: - print(f"Skipping recursive check for {path} - at max depth", file=sys.stderr) - else: - print(f"Skipping external domain link: {href}", file=sys.stderr) - except Exception as e: - print(f"Error processing {url}: {str(e)}", file=sys.stderr) - - return links - - # Get command line arguments - base_url = sys.argv[1] - max_depth = int(sys.argv[2]) - - print(f"Base URL: {base_url}", file=sys.stderr) - print(f"Max depth: {max_depth}", file=sys.stderr) - - # Get all URLs - all_urls = set() - - if max_depth == 0: - # For depth 0, only check ROOT_PAGES - print("Depth is 0, only checking ROOT_PAGES", file=sys.stderr) - for root in ROOT_PAGES: - all_urls.add(root) - print(f"Added root page: {root}", file=sys.stderr) - else: - # For depth > 0, use sitemap - print(f"Depth is {max_depth}, using sitemap", file=sys.stderr) - sitemap_url = f"{base_url}/sitemap.xml" - sitemap_urls = get_urls_from_sitemap(sitemap_url, max_depth) - print(f"Found {len(sitemap_urls)} URLs in sitemap", file=sys.stderr) - all_urls.update(sitemap_urls) - - # Print URLs to stdout, ensuring proper URL construction - print(f"Total URLs found: {len(all_urls)}", file=sys.stderr) - for url in sorted(all_urls): - # Remove any leading slashes from the URL to avoid double slashes - url = url.lstrip('/') - # Construct the full URL by joining base_url and url with a single slash - full_url = f"{base_url.rstrip('/')}/{url}" - print(full_url) - print(f"Added URL: {full_url}", file=sys.stderr) - EOF - - # Run the crawler - python crawl.py "$BASE_URL" "$DEPTH" > lhci-urls.txt - - echo "Lighthouse will check the following URLs:" - cat lhci-urls.txt - echo -e "\nTotal number of URLs: $(wc -l < lhci-urls.txt)" - - # Verify we have URLs - if [ ! -s lhci-urls.txt ]; then - echo "Error: No URLs were generated. Check the debug output above." - exit 1 - fi - - - name: Create Lighthouse config - if: steps.check_preview.outputs.preview_exists == 'true' - run: | - cat > .lighthouserc.js << 'EOF' - const fs = require('fs'); - const urls = fs.readFileSync('lhci-urls.txt', 'utf-8').split('\n').filter(Boolean); - - // Add auth to installation URLs using the same format as the URL check step - const urlsWithAuth = urls.map(url => { - if (url.includes('/installation/')) { - return `https://${process.env.INSTALLATION_USER}:${process.env.INSTALLATION_PW}@${new URL(url).host}${new URL(url).pathname}`; - } - return url; - }); - - module.exports = { - ci: { - collect: { - url: urlsWithAuth, - numberOfRuns: 1, - settings: { - formFactor: 'desktop', - screenEmulation: { - mobile: false, - width: 1350, - height: 940, - deviceScaleFactor: 1, - disabled: false, + + echo "skip=false" >> $GITHUB_OUTPUT + echo "Lighthouse will check:" + cat ../../lhci-urls.txt + + # Probe first URL from list (beyond index.html) when in changed mode + if [ "$LIGHTHOUSE_MODE" = "changed" ]; then + FIRST=$(head -n1 ../../lhci-urls.txt) + status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$FIRST") + echo "Probe $FIRST — status: $status" + if [ "$status" -ne 200 ]; then + echo "Error: Changed page not reachable on preview" + exit 1 + fi + fi + + - name: Verify installation page auth + if: | + steps.check_preview.outputs.preview_exists == 'true' && + steps.generate_urls.outputs.skip != 'true' + run: | + if ! grep -q '/installation/' lhci-urls.txt 2>/dev/null; then + echo "No installation pages in URL list — skipping auth check" + exit 0 + fi + auth_url="https://${{ secrets.INSTALLATION_USER }}:${{ secrets.INSTALLATION_PW }}@docs-staging.validmind.ai/pr_previews/${{ steps.pr.outputs.head_ref }}/installation/index.html" + status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" --anyauth "$auth_url") + echo "Checking installation page — status: $status" + if [ "$status" -ne 200 ]; then + echo "Error: Installation page not accessible with authentication" + exit 1 + fi + + - name: Post skip comment + if: steps.generate_urls.outputs.skip == 'true' + uses: actions/github-script@v6 + with: + script: | + const prNumber = parseInt(process.env.PR_NUMBER, 10); + const body = `## Lighthouse check results\n\n✓ INFO: No site pages to audit in this PR.\n\nCommit SHA: [${process.env.COMMIT_SHA_SHORT}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${process.env.COMMIT_SHA})`; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body, + }); + + - name: Install Lighthouse CI + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + run: npm install -g @lhci/cli + + - name: Create Lighthouse config + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + run: | + cat > .lighthouserc.js << 'EOF' + const fs = require('fs'); + const urls = fs.readFileSync('lhci-urls.txt', 'utf-8').split('\n').filter(Boolean); + + const urlsWithAuth = urls.map(url => { + if (url.includes('/installation/')) { + return `https://${process.env.INSTALLATION_USER}:${process.env.INSTALLATION_PW}@${new URL(url).host}${new URL(url).pathname}`; + } + return url; + }); + + module.exports = { + ci: { + collect: { + url: urlsWithAuth, + numberOfRuns: 3, + settings: { + formFactor: 'desktop', + screenEmulation: { + mobile: false, + width: 1350, + height: 940, + deviceScaleFactor: 1, + disabled: false, + }, + throttling: { + rttMs: 40, + throughputKbps: 10240, + cpuSlowdownMultiplier: 1, + requestLatencyMs: 0, + downloadThroughputKbps: 0, + uploadThroughputKbps: 0, + }, }, - throttling: { - rttMs: 40, - throughputKbps: 10240, - cpuSlowdownMultiplier: 1, - requestLatencyMs: 0, - downloadThroughputKbps: 0, - uploadThroughputKbps: 0, + }, + assert: { + assertions: { + 'categories:accessibility': ['error', { minScore: 0.9 }], }, }, - }, - assert: { - assertions: { - 'categories:accessibility': ['error', { minScore: 0.9 }], + upload: { + target: 'temporary-public-storage', }, }, - upload: { - target: 'temporary-public-storage', - }, - }, - }; - EOF - - - name: Run Lighthouse audit - if: steps.check_preview.outputs.preview_exists == 'true' - uses: treosh/lighthouse-ci-action@v11 - id: lighthouse - continue-on-error: true - env: - INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }} - INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }} - with: - configPath: .lighthouserc.js - uploadArtifacts: true - temporaryPublicStorage: true - - - name: Check Lighthouse audit result - if: steps.check_preview.outputs.preview_exists == 'true' - run: | - # Check if the manifest exists and is valid JSON - if [ -z "${{ steps.lighthouse.outputs.manifest }}" ]; then - echo "Error: Lighthouse audit failed - no manifest output" - exit 1 - fi - - # Try to parse the manifest as JSON - if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq . > /dev/null 2>&1; then - echo "Error: Lighthouse audit failed - invalid manifest format" - exit 1 - fi - - # Check if any URLs were successfully audited - if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq 'length > 0' > /dev/null 2>&1; then - echo "Error: Lighthouse audit failed - no URLs were successfully audited" - exit 1 - fi - - - name: Post Lighthouse results comment - if: steps.check_preview.outputs.preview_exists == 'true' - uses: actions/github-script@v6 - with: - script: | - const runId = context.runId; - const baseUrl = process.env.PREVIEW_URL; - const commitSha = process.env.COMMIT_SHA; - const commitShaShort = process.env.COMMIT_SHA_SHORT; - - // Get artifacts for this run - const { data: artifacts } = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: runId, - }); + }; + EOF + + - name: Run Lighthouse audit + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + uses: treosh/lighthouse-ci-action@v11 + id: lighthouse + env: + INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }} + INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }} + with: + configPath: .lighthouserc.js + uploadArtifacts: true + temporaryPublicStorage: true + + - name: Check Lighthouse audit result + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + run: | + if [ -z "${{ steps.lighthouse.outputs.manifest }}" ]; then + echo "Error: Lighthouse audit failed - no manifest output" + exit 1 + fi + + if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq . > /dev/null 2>&1; then + echo "Error: Lighthouse audit failed - invalid manifest format" + exit 1 + fi + + if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq 'length > 0' > /dev/null 2>&1; then + echo "Error: Lighthouse audit failed - no URLs were successfully audited" + exit 1 + fi + + # Fail if any page scored below 0.9 on accessibility + below=$(echo '${{ steps.lighthouse.outputs.manifest }}' | jq '[.[] | select(.summary.accessibility < 0.9)] | length') + if [ "$below" -gt 0 ]; then + echo "Error: $below page(s) scored below 0.9 on accessibility" + echo '${{ steps.lighthouse.outputs.manifest }}' | jq -r '.[] | select(.summary.accessibility < 0.9) | "\(.url): \(.summary.accessibility)"' + exit 1 + fi - // Lighthouse artifact - const lighthouseArtifact = artifacts.artifacts.find(a => a.name === 'lighthouse-report'); - const lighthouseArtifactUrl = lighthouseArtifact - ? `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}/artifacts/${lighthouseArtifact.id}` - : null; - - // Lighthouse - const manifest = '${{ steps.lighthouse.outputs.manifest }}'; - let manifestJson; - try { - manifestJson = JSON.parse(manifest); - if (!Array.isArray(manifestJson) || manifestJson.length === 0) { - throw new Error('Invalid manifest format or empty results'); + - name: Post Lighthouse results comment + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + uses: actions/github-script@v6 + env: + LIGHTHOUSE_MODE: ${{ env.LIGHTHOUSE_MODE }} + LIGHTHOUSE_DEPTH: ${{ env.LIGHTHOUSE_DEPTH }} + with: + script: | + const fs = require('fs'); + const prNumber = parseInt(process.env.PR_NUMBER, 10); + const runId = context.runId; + const baseUrl = process.env.PREVIEW_URL; + const commitSha = process.env.COMMIT_SHA; + const commitShaShort = process.env.COMMIT_SHA_SHORT; + const mode = process.env.LIGHTHOUSE_MODE; + const depth = process.env.LIGHTHOUSE_DEPTH; + + let metadata = {}; + try { + metadata = JSON.parse(fs.readFileSync('lighthouse-metadata.json', 'utf8')); + } catch (e) { + console.log('No metadata file:', e.message); } - } catch (error) { - console.error('Error parsing Lighthouse manifest:', error); - await github.rest.issues.createComment({ + + const manifest = '${{ steps.lighthouse.outputs.manifest }}'; + let manifestJson; + try { + manifestJson = JSON.parse(manifest); + if (!Array.isArray(manifestJson) || manifestJson.length === 0) { + throw new Error('Invalid manifest'); + } + } catch (error) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: `## Lighthouse check results\n\n⚠️ WARN: Failed to parse Lighthouse results. [Workflow run](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId})`, + }); + return; + } + + const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: context.issue.number, - body: `## Lighthouse check results\n\n⚠️ WARN: Failed to parse Lighthouse results. Please check the [workflow run](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}) for details.` + issue_number: prNumber, }); - return; - } - - // Delete old Lighthouse comments - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - }); - - // Delete any previous comments from this workflow - for (const comment of comments) { - if (comment.user.login === 'github-actions[bot]' && - comment.body.includes('## Lighthouse check results')) { - try { - console.log(`Deleting Lighthouse comment ${comment.id}`); + for (const comment of comments) { + if (comment.user.login === 'github-actions[bot]' && + comment.body.includes('## Lighthouse check results')) { await github.rest.issues.deleteComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id: comment.id, }); - console.log(`Successfully deleted Lighthouse comment ${comment.id}`); - } catch (error) { - console.error(`Failed to delete Lighthouse comment ${comment.id}:`, error); } } - } - - // Calculate average accessibility score - const scores = manifestJson.map(run => run.summary.accessibility); - const avgScore = scores.reduce((a, b) => a + b, 0) / scores.length; - const lighthouseScore = avgScore.toFixed(2); - - const lighthouseReportUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`; - let lighthouseComment = ''; - if (parseFloat(lighthouseScore) >= 0.9) { - lighthouseComment = `✓ INFO: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [View the workflow run](${lighthouseReportUrl})`; - } else { - lighthouseComment = `⚠️ WARN: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [Check the workflow run](${lighthouseReportUrl})`; - } - const stripAuth = url => { - try { - const u = new URL(url); - u.username = ''; - u.password = ''; - return u.toString(); - } catch { - return url; - } - }; + const scores = manifestJson.map(run => run.summary.accessibility); + const avgScore = (scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(2); + const lighthouseReportUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`; + const lighthouseComment = parseFloat(avgScore) >= 0.9 + ? `✓ INFO: Average accessibility score is **${avgScore}** (required: ≥0.9) — [View the workflow run](${lighthouseReportUrl})` + : `⚠️ WARN: Average accessibility score is **${avgScore}** (required: ≥0.9) — [Check the workflow run](${lighthouseReportUrl})`; - // Helper to get the public report URL from htmlPath - const getReportUrl = (run) => { - if (run.report && Array.isArray(run.report)) { - // Find the public .report.html URL - const htmlReport = run.report.find(r => r.endsWith('.report.html') && r.startsWith('http')); - if (htmlReport) return htmlReport; - // Fallback: first report if available - if (run.report.length > 0) return run.report[0]; - } - // Fallback: just show the workflow run if nothing else - return lighthouseReportUrl; - }; + const stripAuth = url => { + try { + const u = new URL(url); + u.username = ''; + u.password = ''; + return u.toString(); + } catch { + return url; + } + }; - // Parse the links output from the Lighthouse step - const links = (() => { - try { - return JSON.parse(`${{ steps.lighthouse.outputs.links }}`); - } catch { - return {}; + const links = (() => { + try { + return JSON.parse(`${{ steps.lighthouse.outputs.links }}`); + } catch { + return {}; + } + })(); + + const scoresTable = manifestJson + .map(run => { + const formatScore = score => score === null ? 'N/A' : score.toFixed(2); + const displayPath = stripAuth(run.url).replace(baseUrl, '') || run.url; + const reportUrl = links[run.url] || lighthouseReportUrl; + return `| [${displayPath}](${reportUrl}) | ${formatScore(run.summary.accessibility)} | ${formatScore(run.summary.performance)} | ${formatScore(run.summary['best-practices'])} | ${formatScore(run.summary.seo)} |`; + }) + .join('\n'); + + const modeLine = mode === 'changed' + ? `Audit mode: **changed pages** (${metadata.paths?.length || manifestJson.length} URL(s))` + : `Audit mode: **depth ${depth}** (sitemap)`; + + let comment = `## Lighthouse check results\n\n`; + comment += `${lighthouseComment}\n\n`; + comment += `${modeLine}\n\n`; + comment += `
\nShow Lighthouse scores\n\n`; + comment += `Commit SHA: [${commitShaShort}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${commitSha})\n\n`; + if (metadata.global_fallback) { + comment += `_Global site files changed — audited root navigation pages._\n\n`; } - })(); - - const scoresTable = manifestJson - .map(run => { - const formatScore = (score) => score === null ? 'N/A' : score.toFixed(2); - const displayPath = stripAuth(run.url).replace(baseUrl, ''); - // Use the public report URL from the links output, fallback to workflow run if missing - const reportUrl = links[run.url] || lighthouseReportUrl; - return `| [${displayPath}](${reportUrl}) | ${formatScore(run.summary.accessibility)} | ${formatScore(run.summary.performance)} | ${formatScore(run.summary['best-practices'])} | ${formatScore(run.summary.seo)} |`; - }) - .join('\n'); - - let comment = `## Lighthouse check results\n\n`; - comment += `${lighthouseComment}\n\n`; - comment += `
\nShow Lighthouse scores\n\n`; - comment += `Folder depth level checked: **${process.env.DEPTH}**\n\n`; - comment += `Commit SHA: [${commitShaShort}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${commitSha})\n\n`; - comment += `Modify the workflow to check a different depth:\n`; - comment += `- 0: Top-level navigation only — /index.html, /guide/guides.html, ...\n`; - comment += `- 1: All first-level subdirectories — /guide/\*.html, /developer/\*.html, ...\n`; - comment += `- 2: All second-level subdirectories — /guide/attestation/\*.html, ...\n\n`; - comment += `| Page | Accessibility | Performance | Best Practices | SEO |\n`; - comment += `|------|---------------|-------------|----------------|-----|\n`; - comment += `${scoresTable}\n\n`; - comment += `
\n\n`; - - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: comment - }); + comment += `For a thorough audit, run the **Lighthouse check** workflow manually (Actions → Lighthouse check → Run workflow) with depth 0–2, or add the \`lighthouse:full\` label for depth 2 on the next validate run.\n\n`; + comment += `| Page | Accessibility | Performance | Best Practices | SEO |\n`; + comment += `|------|---------------|-------------|----------------|-----|\n`; + comment += `${scoresTable}\n\n`; + comment += `
\n\n`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: comment, + }); diff --git a/README.md b/README.md index 8de8105fa5..66f3e92c9f 100644 --- a/README.md +++ b/README.md @@ -379,24 +379,22 @@ Similarly, http://localhost:4444/ in your browsers should show an all green logo ## Configuring Lighthouse checks -Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites to enable a better, accessible documentation for everyone. +Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites after the **Validate docs site** workflow deploys a preview. -By default, Lighthouse checks only the top-level pages in our site navigation, such as `/index.html`, `/guide/guides.html`, `/developer/validmind-library.html`, and so forth. You can configure this behavior in the workflow: +**Default (every PR):** Lighthouse audits only HTML pages that correspond to files changed under `site/` in the pull request. If you change shared layout files (`_quarto.yml`, `theme.scss`, `_variables.yml`, `_extensions/`, and similar), it falls back to the root navigation pages (`index.html`, `guide/guides.html`, and so on). -```sh -env: - # To change the default depth level: - # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.) - # 1 — All first-level subdirectories (e.g. /guide/*.html) - # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html) - # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete - DEFAULT_DEPTH: '0' -``` +**Thorough audit:** + +- Add the `lighthouse:full` label to a PR to run a depth-2 sitemap audit on the next successful validate run. +- Or run the **Lighthouse check** workflow manually from Actions → **Run workflow**, set the PR number, and choose depth `0` (root pages), `1` (first-level sections), or `2` (second-level). Depths above zero can take hours; use them on feature branches only. + +The PR comment lists audited URLs, the commit SHA, and accessibility scores (required: ≥ 0.9 per page). **Tips:** -- On the first run, the workflow waits for a preview site to become available. For subsequent runs, it checks the currently available site, which may be behind HEAD. The PR comment shows which commit SHA was checked — rerun the check if needed. -- Use folder depths greater than zero only on working branches when you need a thorough site audit. Deeper checks take 2-12 hours to complete and significantly slow down the CI/CD pipeline. Do not merge depth changes to `main`. +- Lighthouse starts only after validate succeeds, so it no longer polls for up to 45 minutes. +- If a PR changes only CI or repo metadata (no `site/` pages), Lighthouse skips with an informational comment. +- Re-run validate (or push a commit) if the preview comment SHA does not match the commit you expect audited. ## Monitoring diff --git a/site/scripts/lighthouse_urls.py b/site/scripts/lighthouse_urls.py new file mode 100644 index 0000000000..73234c3b7e --- /dev/null +++ b/site/scripts/lighthouse_urls.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +"""Generate Lighthouse preview URLs from PR diffs or sitemap depth.""" + +from __future__ import annotations + +import argparse +import fnmatch +import json +import os +import re +import subprocess +import sys +import xml.etree.ElementTree as ET +from pathlib import Path +from urllib.parse import urlparse + +import requests + +ROOT_PAGES = [ + "index.html", + "get-started/get-started.html", + "guide/guides.html", + "developer/validmind-library.html", + "support/support.html", + "releases/all-releases.html", + "training/training.html", +] + +GLOBAL_PATTERNS = [ + "site/_quarto.yml", + "site/_quarto-*.yml", + "site/_variables.yml", + "site/theme.scss", + "site/styles.css", + "site/_extensions/**", +] + +OUTPUT_FILE_RE = re.compile( + r"^\s*output-file:\s*[_]?([^\s#]+\.html)\s*$", + re.MULTILINE, +) + +SITEMAP_NS = {"sm": "http://www.sitemaps.org/schemas/sitemap/0.9"} + + +def _matches_global_pattern(path: str) -> bool: + for pattern in GLOBAL_PATTERNS: + if fnmatch.fnmatch(path, pattern): + return True + return False + + +def _parse_output_file(qmd_path: Path) -> str | None: + try: + text = qmd_path.read_text(encoding="utf-8") + except OSError: + return None + match = OUTPUT_FILE_RE.search(text) + if not match: + return None + name = match.group(1).lstrip("_") + return str(qmd_path.parent.relative_to(Path("site")) / name).replace("\\", "/") + + +def qmd_path_to_html(path: str) -> str | None: + """Map a site/ source path to a preview HTML path.""" + if not path.startswith("site/"): + return None + + rel = path[len("site/") :] + p = Path(rel) + + if p.suffix == ".qmd": + if p.name == "index.qmd": + return str(p.parent / "index.html").replace("\\", "/") + return str(p.with_suffix(".html")).replace("\\", "/") + + return None + + +def asset_path_to_html(path: str) -> str | None: + """Map co-located assets under site/ to their page HTML.""" + if not path.startswith("site/"): + return None + rel = Path(path[len("site/") :]) + if rel.suffix == ".qmd": + return qmd_path_to_html(path) + + parent = rel.parent + if parent == Path("."): + return None + + site_parent = Path("site") / parent + index_qmd = site_parent / "index.qmd" + if index_qmd.exists(): + return str(parent / "index.html").replace("\\", "/") + + for qmd in sorted(site_parent.glob("*.qmd")): + if qmd.name != "index.qmd": + return str(parent / f"{qmd.stem}.html").replace("\\", "/") + + return None + + +def changed_file_to_html(path: str) -> list[str]: + """Return HTML paths affected by a single changed file.""" + if _matches_global_pattern(path): + return list(ROOT_PAGES) + + if path.endswith(".qmd"): + html = qmd_path_to_html(path) + if html: + qmd_file = Path(path) + custom = _parse_output_file(qmd_file) if qmd_file.exists() else None + results = [html] + if custom and custom not in results: + results.append(custom) + return results + return [] + + html = asset_path_to_html(path) + return [html] if html else [] + + +def git_changed_files(base_ref: str) -> list[str]: + subprocess.run( + ["git", "fetch", "origin", base_ref], + check=True, + capture_output=True, + ) + result = subprocess.run( + ["git", "diff", "--name-only", f"origin/{base_ref}...HEAD", "--", "site/"], + check=True, + capture_output=True, + text=True, + ) + return [line.strip() for line in result.stdout.splitlines() if line.strip()] + + +def urls_from_changed_files(base_ref: str) -> tuple[list[str], bool]: + """Return sorted HTML paths and whether global fallback was used.""" + changed = git_changed_files(base_ref) + if not changed: + return [], False + + html_paths: set[str] = set() + used_global_fallback = False + + for path in changed: + if _matches_global_pattern(path): + used_global_fallback = True + html_paths.update(ROOT_PAGES) + continue + for html in changed_file_to_html(path): + html_paths.add(html) + + if used_global_fallback: + return sorted(ROOT_PAGES), True + + return sorted(html_paths), False + + +def _path_depth(html_path: str) -> int: + path = html_path.replace(".html", "").strip("/") + if not path or path == "index": + return 0 + return len([s for s in path.split("/") if s]) + + +def urls_from_sitemap(preview_base_url: str, max_depth: int) -> list[str]: + sitemap_url = f"{preview_base_url.rstrip('/')}/sitemap.xml" + response = requests.get(sitemap_url, timeout=60) + response.raise_for_status() + root = ET.fromstring(response.content) + urls: set[str] = set() + + for url_el in root.findall(".//sm:url", SITEMAP_NS): + loc = url_el.find("sm:loc", SITEMAP_NS) + if loc is None or not loc.text: + continue + parsed = urlparse(loc.text) + path = parsed.path.lstrip("/") + if not path.endswith(".html"): + continue + + segments = path.split("/") + pr_idx = next((i for i, s in enumerate(segments) if s == "pr_previews"), -1) + if pr_idx >= 0 and len(segments) > pr_idx + 4: + path = "/".join(segments[pr_idx + 4 :]) + + if _path_depth(path) <= max_depth: + urls.add(path) + + if max_depth == 0: + return sorted(ROOT_PAGES) + + return sorted(urls) + + +def verify_urls( + preview_base_url: str, + html_paths: list[str], + installation_user: str | None = None, + installation_password: str | None = None, +) -> list[str]: + """Keep only paths that return HTTP 200 on the preview.""" + base = preview_base_url.rstrip("/") + ok: list[str] = [] + + for path in html_paths: + path = path.lstrip("/") + url = f"{base}/{path}" + if path.startswith("installation/") and installation_user and installation_password: + parsed = urlparse(url) + url = ( + f"https://{installation_user}:{installation_password}@" + f"{parsed.netloc}{parsed.path}" + ) + + try: + status = requests.head( + url, + allow_redirects=True, + timeout=30, + headers={"User-Agent": "Mozilla/5.0"}, + ).status_code + if status == 405: + status = requests.get( + url, + allow_redirects=True, + timeout=30, + headers={"User-Agent": "Mozilla/5.0"}, + ).status_code + except requests.RequestException as exc: + print(f"WARN: Could not reach {path}: {exc}", file=sys.stderr) + continue + + if status == 200: + ok.append(path) + print(f"OK: {path}", file=sys.stderr) + else: + print(f"WARN: Skipping {path} (HTTP {status})", file=sys.stderr) + + return ok + + +def write_url_list(preview_base_url: str, html_paths: list[str], out_path: Path) -> None: + base = preview_base_url.rstrip("/") + lines = [f"{base}/{p.lstrip('/')}" for p in html_paths] + out_path.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8") + + +def main() -> int: + parser = argparse.ArgumentParser(description="Generate Lighthouse URL list") + parser.add_argument("--mode", choices=["changed", "depth"], required=True) + parser.add_argument("--base-ref", default="main") + parser.add_argument("--depth", type=int, default=0, choices=[0, 1, 2]) + parser.add_argument("--preview-url", required=True) + parser.add_argument("--output", default="lhci-urls.txt") + parser.add_argument("--metadata", default="lighthouse-metadata.json") + parser.add_argument( + "--skip-file", + help="If set and no URLs, write this path so workflow can detect skip", + ) + args = parser.parse_args() + + metadata: dict = { + "mode": args.mode, + "depth": args.depth if args.mode == "depth" else None, + "global_fallback": False, + "skip": False, + "paths": [], + } + + if args.mode == "changed": + paths, global_fallback = urls_from_changed_files(args.base_ref) + metadata["global_fallback"] = global_fallback + else: + paths = urls_from_sitemap(args.preview_url, args.depth) + + if not paths: + metadata["skip"] = True + Path(args.metadata).write_text(json.dumps(metadata, indent=2), encoding="utf-8") + if args.skip_file: + Path(args.skip_file).write_text("skip\n", encoding="utf-8") + print("No pages to audit in this PR.", file=sys.stderr) + return 0 + + verified = verify_urls( + args.preview_url, + paths, + installation_user=os.environ.get("INSTALLATION_USER"), + installation_password=os.environ.get("INSTALLATION_PW"), + ) + if not verified: + print("Error: No URLs returned HTTP 200 on the preview.", file=sys.stderr) + return 1 + + metadata["paths"] = verified + Path(args.metadata).write_text(json.dumps(metadata, indent=2), encoding="utf-8") + write_url_list(args.preview_url, verified, Path(args.output)) + print(f"Wrote {len(verified)} URL(s) to {args.output}", file=sys.stderr) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/site/scripts/test_lighthouse_urls.py b/site/scripts/test_lighthouse_urls.py new file mode 100644 index 0000000000..8741af75e4 --- /dev/null +++ b/site/scripts/test_lighthouse_urls.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""Unit tests for lighthouse_urls.py""" + +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +from lighthouse_urls import ( + ROOT_PAGES, + _matches_global_pattern, + asset_path_to_html, + changed_file_to_html, + qmd_path_to_html, + urls_from_changed_files, +) + + +class TestQmdMapping(unittest.TestCase): + def test_simple_qmd(self): + self.assertEqual( + qmd_path_to_html("site/guide/foo.qmd"), + "guide/foo.html", + ) + + def test_index_qmd(self): + self.assertEqual( + qmd_path_to_html("site/guide/foo/index.qmd"), + "guide/foo/index.html", + ) + + def test_root_index(self): + self.assertEqual( + qmd_path_to_html("site/index.qmd"), + "index.html", + ) + + +class TestGlobalPatterns(unittest.TestCase): + def test_quarto_yml(self): + self.assertTrue(_matches_global_pattern("site/_quarto.yml")) + + def test_theme_scss(self): + self.assertTrue(_matches_global_pattern("site/theme.scss")) + + def test_extensions(self): + self.assertTrue(_matches_global_pattern("site/_extensions/foo/bar.lua")) + + def test_page_qmd_not_global(self): + self.assertFalse(_matches_global_pattern("site/guide/foo.qmd")) + + +class TestChangedFileToHtml(unittest.TestCase): + def test_global_returns_root_pages(self): + result = changed_file_to_html("site/_quarto.yml") + self.assertEqual(result, ROOT_PAGES) + + def test_asset_with_index_qmd(self): + import os + + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "site" / "guide" / "foo").mkdir(parents=True) + (root / "site" / "guide" / "foo" / "index.qmd").write_text("---\n") + (root / "site" / "guide" / "foo" / "pic.png").write_bytes(b"") + prev = os.getcwd() + try: + os.chdir(tmp) + html = asset_path_to_html("site/guide/foo/pic.png") + finally: + os.chdir(prev) + self.assertEqual(html, "guide/foo/index.html") + + +class TestUrlsFromChangedFiles(unittest.TestCase): + def test_empty_diff(self): + with mock.patch("lighthouse_urls.git_changed_files", return_value=[]): + paths, fallback = urls_from_changed_files("main") + self.assertEqual(paths, []) + self.assertFalse(fallback) + + def test_single_qmd(self): + with mock.patch( + "lighthouse_urls.git_changed_files", + return_value=["site/developer/how-to/test-sandbox.qmd"], + ): + paths, fallback = urls_from_changed_files("main") + self.assertEqual(paths, ["developer/how-to/test-sandbox.html"]) + self.assertFalse(fallback) + + def test_global_fallback(self): + with mock.patch( + "lighthouse_urls.git_changed_files", + return_value=["site/_variables.yml", "site/guide/foo.qmd"], + ): + paths, fallback = urls_from_changed_files("main") + self.assertEqual(set(paths), set(ROOT_PAGES)) + self.assertTrue(fallback) + + +if __name__ == "__main__": + unittest.main() From c82745be62d5a70b816f70a02589218a39c5cf9d Mon Sep 17 00:00:00 2001 From: Nik Richers Date: Mon, 18 May 2026 09:51:05 -0700 Subject: [PATCH 2/4] Dispatch Lighthouse from validate after preview deploy (sc-12702) workflow_run only runs from the default branch, so trigger Lighthouse via workflow_dispatch from validate on the PR branch instead. --- .github/workflows/lighthouse-check.yaml | 60 ++++++----------------- .github/workflows/validate-docs-site.yaml | 18 +++++++ README.md | 2 +- 3 files changed, 33 insertions(+), 47 deletions(-) diff --git a/.github/workflows/lighthouse-check.yaml b/.github/workflows/lighthouse-check.yaml index f075ff41b7..de78a2ce5d 100644 --- a/.github/workflows/lighthouse-check.yaml +++ b/.github/workflows/lighthouse-check.yaml @@ -1,13 +1,18 @@ name: Lighthouse check on: - workflow_run: - workflows: ["Validate docs site (render, test, and deploy)"] - types: [completed] workflow_dispatch: inputs: + mode: + description: "Audit mode" + required: true + default: "changed" + type: choice + options: + - "changed" + - "depth" depth: - description: "Sitemap depth for thorough audit (0–2)" + description: "Sitemap depth when mode is depth (0–2)" required: true default: "0" type: choice @@ -29,11 +34,7 @@ permissions: jobs: lighthouse: runs-on: ubuntu-latest - if: | - (github.event_name == 'workflow_run' && - github.event.workflow_run.conclusion == 'success' && - github.event.workflow_run.event == 'pull_request') || - github.event_name == 'workflow_dispatch' + if: github.event_name == 'workflow_dispatch' steps: - name: Resolve PR context id: pr @@ -64,46 +65,13 @@ jobs: }; } - if (context.eventName === 'workflow_dispatch') { - const prNumber = parseInt('${{ inputs.pr_number }}', 10); - const info = await getPr(prNumber); - if (!info) return; - core.setOutput('number', String(info.number)); - core.setOutput('head_ref', info.head_ref); - core.setOutput('head_sha', info.head_sha); - core.setOutput('base_ref', info.base_ref); - core.setOutput('mode', 'depth'); - core.setOutput('depth', '${{ inputs.depth }}'); - core.setOutput('full_audit', String(info.full_audit)); - return; - } - - const run = context.payload.workflow_run; - let prNumber = null; - if (run.pull_requests && run.pull_requests.length > 0) { - prNumber = run.pull_requests[0].number; - } else { - const { data: prs } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ - owner, - repo, - commit_sha: run.head_sha, - }); - if (prs.length > 0) { - prNumber = prs[0].number; - } - } - - if (!prNumber) { - core.setFailed('Could not resolve PR for workflow_run'); - return; - } - + const prNumber = parseInt('${{ inputs.pr_number }}', 10); const info = await getPr(prNumber); if (!info) return; - let mode = 'changed'; - let depth = '0'; - if (info.full_audit) { + let mode = '${{ inputs.mode }}'; + let depth = '${{ inputs.depth }}'; + if (info.full_audit && mode === 'changed') { mode = 'depth'; depth = '2'; } diff --git a/.github/workflows/validate-docs-site.yaml b/.github/workflows/validate-docs-site.yaml index 1116bfae2a..35c48aefe5 100644 --- a/.github/workflows/validate-docs-site.yaml +++ b/.github/workflows/validate-docs-site.yaml @@ -5,6 +5,7 @@ on: types: [opened, synchronize, ready_for_review] permissions: + actions: write issues: write pull-requests: write @@ -161,6 +162,23 @@ jobs: body: comment }); + - name: Trigger Lighthouse check + uses: actions/github-script@v6 + with: + script: | + await github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'lighthouse-check.yaml', + ref: context.payload.pull_request.head.ref, + inputs: { + mode: 'changed', + depth: '0', + pr_number: String(context.issue.number), + }, + }); + console.log(`Dispatched Lighthouse check for PR #${context.issue.number}`); + - name: Install pandoc run: | sudo apt-get update diff --git a/README.md b/README.md index 66f3e92c9f..1bf5519e53 100644 --- a/README.md +++ b/README.md @@ -379,7 +379,7 @@ Similarly, http://localhost:4444/ in your browsers should show an all green logo ## Configuring Lighthouse checks -Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites after the **Validate docs site** workflow deploys a preview. +Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites when **Validate docs site** finishes deploying a preview (it dispatches the Lighthouse workflow on the PR branch). **Default (every PR):** Lighthouse audits only HTML pages that correspond to files changed under `site/` in the pull request. If you change shared layout files (`_quarto.yml`, `theme.scss`, `_variables.yml`, `_extensions/`, and similar), it falls back to the root navigation pages (`index.html`, `guide/guides.html`, and so on). From 4459f469b630d2b937c2a510bcf22883e8bccdc4 Mon Sep 17 00:00:00 2001 From: Nik Richers Date: Mon, 18 May 2026 16:41:40 -0700 Subject: [PATCH 3/4] test: Trigger Lighthouse changed-page audit via minor wording tweak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercises the changed-files path on PR 1333 — Lighthouse should audit developer/how-to/test-sandbox.html only. --- site/developer/how-to/test-sandbox.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/developer/how-to/test-sandbox.qmd b/site/developer/how-to/test-sandbox.qmd index c9ea3a23c3..50e9505ec9 100644 --- a/site/developer/how-to/test-sandbox.qmd +++ b/site/developer/how-to/test-sandbox.qmd @@ -12,7 +12,7 @@ aliases: -Explore our interactive sandbox to see what tests are available in the {{< var validmind.developer >}} and how you can use them in your own code. +Explore our interactive sandbox to see which tests are available in the {{< var validmind.developer >}} and how you can use them in your own code. ::: {.column-screen-right} From 8f0a727f71efb7b7ba3744269a8d733f13d5fbe2 Mon Sep 17 00:00:00 2001 From: Nik Richers Date: Mon, 18 May 2026 17:31:33 -0700 Subject: [PATCH 4/4] Fix diff pathspec so script works from any cwd (sc-12702) Use ':(top)site/' to anchor the pathspec at the repo root regardless of where the script is invoked from. The workflow runs it from site/scripts/, which previously caused git diff to return zero files. --- site/scripts/lighthouse_urls.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/site/scripts/lighthouse_urls.py b/site/scripts/lighthouse_urls.py index 73234c3b7e..b7aa13619b 100644 --- a/site/scripts/lighthouse_urls.py +++ b/site/scripts/lighthouse_urls.py @@ -129,7 +129,14 @@ def git_changed_files(base_ref: str) -> list[str]: capture_output=True, ) result = subprocess.run( - ["git", "diff", "--name-only", f"origin/{base_ref}...HEAD", "--", "site/"], + [ + "git", + "diff", + "--name-only", + f"origin/{base_ref}...HEAD", + "--", + ":(top)site/", + ], check=True, capture_output=True, text=True,