From 0e3a7f739ffecc495e1fd9a39aad9f3ef406ec59 Mon Sep 17 00:00:00 2001
From: Nik Richers <nik@validmind.ai>
Date: Mon, 18 May 2026 09:21:03 -0700
Subject: [PATCH 1/4] Fix intermittent Lighthouse CI and scope audits to
 changed pages (sc-12702)

Chain Lighthouse after validate via workflow_run instead of polling, audit
PR-changed site pages by default, and keep depth dispatch and lighthouse:full
for thorough runs.
---
 .github/workflows/lighthouse-check.yaml | 971 ++++++++++--------------
 README.md                               |  24 +-
 site/scripts/lighthouse_urls.py         | 307 ++++++++
 site/scripts/test_lighthouse_urls.py    | 102 +++
 4 files changed, 833 insertions(+), 571 deletions(-)
 create mode 100644 site/scripts/lighthouse_urls.py
 create mode 100644 site/scripts/test_lighthouse_urls.py

diff --git a/.github/workflows/lighthouse-check.yaml b/.github/workflows/lighthouse-check.yaml
index d96685de1c..f075ff41b7 100644
--- a/.github/workflows/lighthouse-check.yaml
+++ b/.github/workflows/lighthouse-check.yaml
@@ -1,598 +1,453 @@
 name: Lighthouse check
 
 on:
-  pull_request:
-    types: [opened, synchronize, ready_for_review]
+  workflow_run:
+    workflows: ["Validate docs site (render, test, and deploy)"]
+    types: [completed]
+  workflow_dispatch:
+    inputs:
+      depth:
+        description: "Sitemap depth for thorough audit (0–2)"
+        required: true
+        default: "0"
+        type: choice
+        options:
+          - "0"
+          - "1"
+          - "2"
+      pr_number:
+        description: "Pull request number to audit"
+        required: true
+        type: string
 
 permissions:
+  contents: read
   issues: write
   pull-requests: write
-
-env:
-  # To change the default depth level:
-  # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.)
-  # 1 — All first-level subdirectories (e.g. /guide/*.html)
-  # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html)
-  # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete
-  DEFAULT_DEPTH: '0'
+  actions: read
 
 jobs:
   lighthouse:
     runs-on: ubuntu-latest
-    if: github.event.pull_request.draft == false
+    if: |
+      (github.event_name == 'workflow_run' &&
+       github.event.workflow_run.conclusion == 'success' &&
+       github.event.workflow_run.event == 'pull_request') ||
+      github.event_name == 'workflow_dispatch'
     steps:
-    - name: Wait for validation workflow to complete
-      uses: actions/github-script@v6
-      with:
-        script: |
-          const maxWaitTime = 45 * 60 * 1000; // 45 minutes in milliseconds
-          const pollInterval = 60 * 1000; // 60 seconds in milliseconds
-          const startTime = Date.now();
-          
-          console.log(`Waiting for "Validate docs site" workflow to complete for PR #${context.issue.number}`);
-          console.log(`Head SHA: ${context.payload.pull_request.head.sha}`);
-          
-          while (Date.now() - startTime < maxWaitTime) {
-            try {
-              // Get workflow runs for the validate-docs-site workflow
-              const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                workflow_id: 'validate-docs-site.yaml',
-                head_sha: context.payload.pull_request.head.sha,
-                per_page: 5
+      - name: Resolve PR context
+        id: pr
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+
+            async function getPr(prNumber) {
+              const { data: pr } = await github.rest.pulls.get({
+                owner,
+                repo,
+                pull_number: prNumber,
               });
-              
-              console.log(`Found ${runs.workflow_runs.length} workflow runs for this commit`);
-              
-              if (runs.workflow_runs.length > 0) {
-                // Get the most recent run
-                const latestRun = runs.workflow_runs[0];
-                console.log(`Latest run: ${latestRun.id}, status: ${latestRun.status}, conclusion: ${latestRun.conclusion}`);
-                
-                if (latestRun.status === 'completed') {
-                  if (latestRun.conclusion === 'success') {
-                    console.log('✅ Validation workflow completed successfully');
-                    break;
-                  } else {
-                    throw new Error(`❌ Validation workflow failed with conclusion: ${latestRun.conclusion}`);
-                  }
-                } else if (latestRun.status === 'in_progress' || latestRun.status === 'queued') {
-                  console.log(`⏳ Validation workflow is ${latestRun.status}, continuing to wait...`);
-                } else {
-                  console.log(`⚠️  Unexpected status: ${latestRun.status}`);
-                }
-              } else {
-                console.log('⏳ No workflow runs found yet, validation may not have started...');
+              if (pr.draft) {
+                core.setFailed('Skipping Lighthouse for draft PR');
+                return null;
               }
-              
-              console.log(`Elapsed time: ${Math.round((Date.now() - startTime) / 1000 / 60)} minutes`);
-              await new Promise(resolve => setTimeout(resolve, pollInterval));
-              
-            } catch (error) {
-              console.error('Error checking workflow status:', error);
-              throw error;
+              const labels = (pr.labels || []).map(l => l.name);
+              const fullAudit = labels.includes('lighthouse:full');
+              return {
+                number: pr.number,
+                head_ref: pr.head.ref,
+                head_sha: pr.head.sha,
+                base_ref: pr.base.ref,
+                full_audit: fullAudit,
+              };
             }
+
+            if (context.eventName === 'workflow_dispatch') {
+              const prNumber = parseInt('${{ inputs.pr_number }}', 10);
+              const info = await getPr(prNumber);
+              if (!info) return;
+              core.setOutput('number', String(info.number));
+              core.setOutput('head_ref', info.head_ref);
+              core.setOutput('head_sha', info.head_sha);
+              core.setOutput('base_ref', info.base_ref);
+              core.setOutput('mode', 'depth');
+              core.setOutput('depth', '${{ inputs.depth }}');
+              core.setOutput('full_audit', String(info.full_audit));
+              return;
+            }
+
+            const run = context.payload.workflow_run;
+            let prNumber = null;
+            if (run.pull_requests && run.pull_requests.length > 0) {
+              prNumber = run.pull_requests[0].number;
+            } else {
+              const { data: prs } = await github.rest.repos.listPullRequestsAssociatedWithCommit({
+                owner,
+                repo,
+                commit_sha: run.head_sha,
+              });
+              if (prs.length > 0) {
+                prNumber = prs[0].number;
+              }
+            }
+
+            if (!prNumber) {
+              core.setFailed('Could not resolve PR for workflow_run');
+              return;
+            }
+
+            const info = await getPr(prNumber);
+            if (!info) return;
+
+            let mode = 'changed';
+            let depth = '0';
+            if (info.full_audit) {
+              mode = 'depth';
+              depth = '2';
+            }
+
+            core.setOutput('number', String(info.number));
+            core.setOutput('head_ref', info.head_ref);
+            core.setOutput('head_sha', info.head_sha);
+            core.setOutput('base_ref', info.base_ref);
+            core.setOutput('mode', mode);
+            core.setOutput('depth', depth);
+            core.setOutput('full_audit', String(info.full_audit));
+
+      - name: Check out repository
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.pr.outputs.head_sha }}
+          fetch-depth: 0
+
+      - name: Set environment
+        run: |
+          echo "PREVIEW_URL=https://docs-staging.validmind.ai/pr_previews/${{ steps.pr.outputs.head_ref }}" >> $GITHUB_ENV
+          echo "COMMIT_SHA=${{ steps.pr.outputs.head_sha }}" >> $GITHUB_ENV
+          echo "COMMIT_SHA_SHORT=$(echo ${{ steps.pr.outputs.head_sha }} | cut -c1-7)" >> $GITHUB_ENV
+          echo "LIGHTHOUSE_MODE=${{ steps.pr.outputs.mode }}" >> $GITHUB_ENV
+          echo "LIGHTHOUSE_DEPTH=${{ steps.pr.outputs.depth }}" >> $GITHUB_ENV
+          echo "PR_NUMBER=${{ steps.pr.outputs.number }}" >> $GITHUB_ENV
+
+      - name: Check for PR preview URL
+        id: check_preview
+        run: |
+          check_url() {
+            local url=$1
+            local status
+            status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$url")
+            echo "Checking $url — status: $status"
+            [ "$status" -eq 200 ]
           }
-          
-          // Check if we timed out
-          if (Date.now() - startTime >= maxWaitTime) {
-            throw new Error('⏰ Timed out waiting for validation workflow to complete');
-          }
-          
-    - name: Check out repository
-      uses: actions/checkout@v4
-
-    - name: Get commit SHA
-      id: get_sha
-      run: |
-        echo "COMMIT_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV
-        echo "COMMIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
-
-    - name: Set PR preview URL
-      id: set_url
-      run: |
-        echo "PREVIEW_URL=https://docs-staging.validmind.ai/pr_previews/${{ github.head_ref }}" >> $GITHUB_ENV
-        echo "DEPTH=${{ env.DEFAULT_DEPTH }}" >> $GITHUB_ENV
-
-    - name: Check for PR preview URL and sitemap
-      id: check_preview
-      run: |
-        # Function to check if URL returns HTTP 200
-        check_url() {
-          local url=$1
-          local status
-          status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$url")
-          echo "Checking $url — status: $status"
-          [ "$status" -eq 200 ]
-        }
-    
-        echo "Waiting for preview site to become available ..."
-        for i in {1..60}; do
-          if check_url "$PREVIEW_URL/index.html"; then
-            echo "Info: Preview site is now available"
-            break
+
+          echo "Waiting for preview site to become available ..."
+          for i in $(seq 1 30); do
+            if check_url "$PREVIEW_URL/index.html"; then
+              echo "Info: Preview site is now available"
+              break
+            fi
+            if [ "$i" -eq 30 ]; then
+              echo "Error: Preview URL did not become available after 30 minutes"
+              exit 1
+            fi
+            echo "Attempt $i/30: waiting 1 minute..."
+            sleep 60
+          done
+
+          if ! check_url "$PREVIEW_URL/sitemap.xml"; then
+            echo "Error: Sitemap missing at $PREVIEW_URL/sitemap.xml"
+            exit 1
+          fi
+
+          echo "preview_exists=true" >> $GITHUB_OUTPUT
+
+      - name: Install Python dependencies
+        if: steps.check_preview.outputs.preview_exists == 'true'
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+
+      - name: Generate URLs to check
+        if: steps.check_preview.outputs.preview_exists == 'true'
+        id: generate_urls
+        env:
+          INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }}
+          INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }}
+        run: |
+          cd site/scripts
+          python lighthouse_urls.py \
+            --mode "$LIGHTHOUSE_MODE" \
+            --base-ref "${{ steps.pr.outputs.base_ref }}" \
+            --depth "$LIGHTHOUSE_DEPTH" \
+            --preview-url "$PREVIEW_URL" \
+            --output ../../lhci-urls.txt \
+            --metadata ../../lighthouse-metadata.json \
+            --skip-file ../../lighthouse-skip.txt
+
+          if [ -f ../../lighthouse-skip.txt ]; then
+            echo "skip=true" >> $GITHUB_OUTPUT
+            echo "No site pages to audit in this PR."
+            exit 0
           fi
-    
-          if [ $i -eq 60 ]; then
-            echo "Error: Preview URL did not become available after 60 minutes at $PREVIEW_URL/index.html"
+
+          if [ ! -s ../../lhci-urls.txt ]; then
+            echo "Error: No URLs were generated."
             exit 1
           fi
-    
-          echo "Attempt $i/60: Preview site not ready yet, waiting 1 minute..."
-          sleep 60
-        done
-    
-        if ! check_url "$PREVIEW_URL/sitemap.xml"; then
-          echo "Error: Sitemap does not exist at $PREVIEW_URL/sitemap.xml"
-          exit 1
-        fi
-    
-        echo "Debug: Checking installation page with URL-based auth..."
-        auth_url="https://${{ secrets.INSTALLATION_USER }}:${{ secrets.INSTALLATION_PW }}@docs-staging.validmind.ai/pr_previews/${{ github.head_ref }}/installation/index.html"
-        status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" --anyauth "$auth_url")
-        echo "Checking $auth_url — status: $status"
-        if [ "$status" -ne 200 ]; then
-          echo "Error: Installation page is not accessible with authentication at $auth_url"
-          exit 1
-        fi
-    
-        echo "Info: Successfully accessed password-protected installation page"
-    
-        echo "preview_exists=true" >> $GITHUB_OUTPUT
-
-    - name: Install Lighthouse CI
-      if: steps.check_preview.outputs.preview_exists == 'true'
-      run: npm install -g @lhci/cli
-
-    - name: Install required Python packages
-      if: steps.check_preview.outputs.preview_exists == 'true'
-      run: |
-        python -m pip install --upgrade pip
-        pip install requests beautifulsoup4
-
-    - name: Generate URLs to check
-      if: steps.check_preview.outputs.preview_exists == 'true'
-      id: generate_urls
-      run: |
-        BASE_URL="$PREVIEW_URL"
-        
-        # Create a Python script to crawl the site
-        cat > crawl.py << 'EOF'
-        import requests
-        from bs4 import BeautifulSoup
-        import sys
-        from urllib.parse import urljoin, urlparse
-        import json
-        import xml.etree.ElementTree as ET
-        import base64
-        import os
-
-        # Define root pages to check
-        ROOT_PAGES = [
-            "index.html",
-            "get-started/get-started.html",
-            "guide/guides.html",
-            "developer/validmind-library.html",
-            "support/support.html",
-            "releases/all-releases.html",
-            "training/training.html"
-        ]
-
-        def get_auth_headers():
-            # Only use auth for installation pages
-            if 'installation/' in url:
-                # Create auth headers from environment variables
-                auth_string = base64.b64encode(f"{os.environ['INSTALLATION_USER']}:{os.environ['INSTALLATION_PW']}".encode()).decode()
-                return {"Authorization": f"Basic {auth_string}"}
-            return {}
-
-        def get_url_depth(url):
-            # Parse the URL to get just the path
-            path = urlparse(url).path
-            # Remove .html extension for depth calculation
-            path = path.replace('.html', '')
-            # Remove any leading/trailing slashes
-            path = path.strip('/')
-            
-            # Split into segments and count non-empty ones
-            segments = [x for x in path.split('/') if x]
-            
-            # For PR preview URLs, we need to skip the first 5 segments:
-            # /pr_previews/username/branch/name/
-            if 'pr_previews' in path:
-                # Skip the first 5 segments (pr_previews/username/branch/name/)
-                segments = segments[5:]
-            
-            # Debug the depth calculation
-            # print(f"URL depth calculation - Path: {path}, Segments: {segments}, Depth: {len(segments)}", file=sys.stderr)
-            
-            return len(segments)
-
-        def get_urls_from_sitemap(sitemap_url, max_depth):
-            try:
-                print(f"Fetching sitemap from {sitemap_url}", file=sys.stderr)
-                # Don't use auth for sitemap
-                response = requests.get(sitemap_url)
-                print(f"Sitemap response status: {response.status_code}", file=sys.stderr)
-                if response.status_code == 200:
-                    print(f"Sitemap content: {response.text[:500]}...", file=sys.stderr)
-                    root = ET.fromstring(response.content)
-                    # Get all URLs from sitemap
-                    all_urls = set()
-                    
-                    for url in root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
-                        loc = url.find('{http://www.sitemaps.org/schemas/sitemap/0.9}loc')
-                        if loc is not None:
-                            full_url = loc.text
-                            parsed_url = urlparse(full_url)
-                            
-                            # Extract the path part after the base URL
-                            path = parsed_url.path
-                            # Remove leading slash if present
-                            path = path.lstrip('/')
-                            
-                            # Only include .html files
-                            if path.endswith('.html'):
-                                # Check depth
-                                if get_url_depth(path) <= max_depth:
-                                    # Remove any segments that match the PR preview path
-                                    segments = path.split('/')
-                                    # Keep only the segments after the PR preview path
-                                    pr_preview_index = -1
-                                    for i, segment in enumerate(segments):
-                                        if segment == 'pr_previews':
-                                            pr_preview_index = i
-                                            break
-                                    if pr_preview_index >= 0:
-                                        segments = segments[pr_preview_index + 4:]  # Skip pr_previews/username/branch/name
-                                    path = '/'.join(segments)
-                                    all_urls.add(path)
-                                    print(f"Found URL in sitemap: {path}", file=sys.stderr)
-                    
-                    print(f"Found {len(all_urls)} URLs in sitemap:", file=sys.stderr)
-                    for url in sorted(all_urls):
-                        print(f"  {url}", file=sys.stderr)
-                    return sorted(list(all_urls))
-                else:
-                    print(f"Failed to fetch sitemap: {response.status_code}", file=sys.stderr)
-            except Exception as e:
-                print(f"Error processing sitemap {sitemap_url}: {str(e)}", file=sys.stderr)
-            return []
-
-        def get_links(url, max_depth, visited=None):
-            if visited is None:
-                visited = set()
-            
-            current_depth = get_url_depth(url)
-            print(f"Checking URL {url} at depth {current_depth}", file=sys.stderr)
-            
-            if current_depth > max_depth or url in visited:
-                print(f"Skipping {url} - depth {current_depth} > {max_depth} or already visited", file=sys.stderr)
-                return set()
-            
-            visited.add(url)
-            links = set()
-            
-            try:
-                print(f"Fetching {url}", file=sys.stderr)
-                headers = get_auth_headers()
-                response = requests.get(url, headers=headers)
-                print(f"Response status: {response.status_code}", file=sys.stderr)
-                if response.status_code == 200:
-                    soup = BeautifulSoup(response.text, 'html.parser')
-                    print(f"Found {len(soup.find_all('a', href=True))} links on page", file=sys.stderr)
-                    
-                    for a in soup.find_all('a', href=True):
-                        href = a['href']
-                        print(f"Processing link: {href}", file=sys.stderr)
-                        
-                        # Skip external links and anchors
-                        if href.startswith('#') or href.startswith('http'):
-                            print(f"Skipping external/anchor link: {href}", file=sys.stderr)
-                            continue
-                            
-                        # Convert relative URLs to absolute
-                        full_url = urljoin(url, href)
-                        print(f"Converted to full URL: {full_url}", file=sys.stderr)
-                        
-                        # Only include URLs from the same base domain
-                        if urlparse(full_url).netloc == urlparse(url).netloc:
-                            # Extract just the path part
-                            path = urlparse(full_url).path
-                            # Remove leading slash if present
-                            path = path.lstrip('/')
-                            
-                            # Only include .html files
-                            if path.endswith('.html'):
-                                print(f"Found HTML link: {path}", file=sys.stderr)
-                                links.add(path)
-                                # Only recursively get links if we haven't hit max depth
-                                if get_url_depth(path) < max_depth:
-                                    print(f"Recursively checking {path} at depth {get_url_depth(path)}", file=sys.stderr)
-                                    links.update(get_links(full_url, max_depth, visited))
-                                else:
-                                    print(f"Skipping recursive check for {path} - at max depth", file=sys.stderr)
-                        else:
-                            print(f"Skipping external domain link: {href}", file=sys.stderr)
-            except Exception as e:
-                print(f"Error processing {url}: {str(e)}", file=sys.stderr)
-            
-            return links
-
-        # Get command line arguments
-        base_url = sys.argv[1]
-        max_depth = int(sys.argv[2])
-        
-        print(f"Base URL: {base_url}", file=sys.stderr)
-        print(f"Max depth: {max_depth}", file=sys.stderr)
-
-        # Get all URLs
-        all_urls = set()
-        
-        if max_depth == 0:
-            # For depth 0, only check ROOT_PAGES
-            print("Depth is 0, only checking ROOT_PAGES", file=sys.stderr)
-            for root in ROOT_PAGES:
-                all_urls.add(root)
-                print(f"Added root page: {root}", file=sys.stderr)
-        else:
-            # For depth > 0, use sitemap
-            print(f"Depth is {max_depth}, using sitemap", file=sys.stderr)
-            sitemap_url = f"{base_url}/sitemap.xml"
-            sitemap_urls = get_urls_from_sitemap(sitemap_url, max_depth)
-            print(f"Found {len(sitemap_urls)} URLs in sitemap", file=sys.stderr)
-            all_urls.update(sitemap_urls)
-
-        # Print URLs to stdout, ensuring proper URL construction
-        print(f"Total URLs found: {len(all_urls)}", file=sys.stderr)
-        for url in sorted(all_urls):
-            # Remove any leading slashes from the URL to avoid double slashes
-            url = url.lstrip('/')
-            # Construct the full URL by joining base_url and url with a single slash
-            full_url = f"{base_url.rstrip('/')}/{url}"
-            print(full_url)
-            print(f"Added URL: {full_url}", file=sys.stderr)
-        EOF
-
-        # Run the crawler
-        python crawl.py "$BASE_URL" "$DEPTH" > lhci-urls.txt
-        
-        echo "Lighthouse will check the following URLs:"
-        cat lhci-urls.txt
-        echo -e "\nTotal number of URLs: $(wc -l < lhci-urls.txt)"
-        
-        # Verify we have URLs
-        if [ ! -s lhci-urls.txt ]; then
-          echo "Error: No URLs were generated. Check the debug output above."
-          exit 1
-        fi
-
-    - name: Create Lighthouse config
-      if: steps.check_preview.outputs.preview_exists == 'true'
-      run: |
-        cat > .lighthouserc.js << 'EOF'
-        const fs = require('fs');
-        const urls = fs.readFileSync('lhci-urls.txt', 'utf-8').split('\n').filter(Boolean);
-        
-        // Add auth to installation URLs using the same format as the URL check step
-        const urlsWithAuth = urls.map(url => {
-          if (url.includes('/installation/')) {
-            return `https://${process.env.INSTALLATION_USER}:${process.env.INSTALLATION_PW}@${new URL(url).host}${new URL(url).pathname}`;
-          }
-          return url;
-        });
-        
-        module.exports = {
-          ci: {
-            collect: {
-              url: urlsWithAuth,
-              numberOfRuns: 1,
-              settings: {
-                formFactor: 'desktop',
-                screenEmulation: {
-                  mobile: false,
-                  width: 1350,
-                  height: 940,
-                  deviceScaleFactor: 1,
-                  disabled: false,
+
+          echo "skip=false" >> $GITHUB_OUTPUT
+          echo "Lighthouse will check:"
+          cat ../../lhci-urls.txt
+
+          # Probe first URL from list (beyond index.html) when in changed mode
+          if [ "$LIGHTHOUSE_MODE" = "changed" ]; then
+            FIRST=$(head -n1 ../../lhci-urls.txt)
+            status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$FIRST")
+            echo "Probe $FIRST — status: $status"
+            if [ "$status" -ne 200 ]; then
+              echo "Error: Changed page not reachable on preview"
+              exit 1
+            fi
+          fi
+
+      - name: Verify installation page auth
+        if: |
+          steps.check_preview.outputs.preview_exists == 'true' &&
+          steps.generate_urls.outputs.skip != 'true'
+        run: |
+          if ! grep -q '/installation/' lhci-urls.txt 2>/dev/null; then
+            echo "No installation pages in URL list — skipping auth check"
+            exit 0
+          fi
+          auth_url="https://${{ secrets.INSTALLATION_USER }}:${{ secrets.INSTALLATION_PW }}@docs-staging.validmind.ai/pr_previews/${{ steps.pr.outputs.head_ref }}/installation/index.html"
+          status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" --anyauth "$auth_url")
+          echo "Checking installation page — status: $status"
+          if [ "$status" -ne 200 ]; then
+            echo "Error: Installation page not accessible with authentication"
+            exit 1
+          fi
+
+      - name: Post skip comment
+        if: steps.generate_urls.outputs.skip == 'true'
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const prNumber = parseInt(process.env.PR_NUMBER, 10);
+            const body = `## Lighthouse check results\n\n✓ INFO: No site pages to audit in this PR.\n\nCommit SHA: [${process.env.COMMIT_SHA_SHORT}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${process.env.COMMIT_SHA})`;
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body,
+            });
+
+      - name: Install Lighthouse CI
+        if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true'
+        run: npm install -g @lhci/cli
+
+      - name: Create Lighthouse config
+        if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true'
+        run: |
+          cat > .lighthouserc.js << 'EOF'
+          const fs = require('fs');
+          const urls = fs.readFileSync('lhci-urls.txt', 'utf-8').split('\n').filter(Boolean);
+
+          const urlsWithAuth = urls.map(url => {
+            if (url.includes('/installation/')) {
+              return `https://${process.env.INSTALLATION_USER}:${process.env.INSTALLATION_PW}@${new URL(url).host}${new URL(url).pathname}`;
+            }
+            return url;
+          });
+
+          module.exports = {
+            ci: {
+              collect: {
+                url: urlsWithAuth,
+                numberOfRuns: 3,
+                settings: {
+                  formFactor: 'desktop',
+                  screenEmulation: {
+                    mobile: false,
+                    width: 1350,
+                    height: 940,
+                    deviceScaleFactor: 1,
+                    disabled: false,
+                  },
+                  throttling: {
+                    rttMs: 40,
+                    throughputKbps: 10240,
+                    cpuSlowdownMultiplier: 1,
+                    requestLatencyMs: 0,
+                    downloadThroughputKbps: 0,
+                    uploadThroughputKbps: 0,
+                  },
                 },
-                throttling: {
-                  rttMs: 40,
-                  throughputKbps: 10240,
-                  cpuSlowdownMultiplier: 1,
-                  requestLatencyMs: 0,
-                  downloadThroughputKbps: 0,
-                  uploadThroughputKbps: 0,
+              },
+              assert: {
+                assertions: {
+                  'categories:accessibility': ['error', { minScore: 0.9 }],
                 },
               },
-            },
-            assert: {
-              assertions: {
-                'categories:accessibility': ['error', { minScore: 0.9 }],
+              upload: {
+                target: 'temporary-public-storage',
               },
             },
-            upload: {
-              target: 'temporary-public-storage',
-            },
-          },
-        };
-        EOF
-
-    - name: Run Lighthouse audit
-      if: steps.check_preview.outputs.preview_exists == 'true'
-      uses: treosh/lighthouse-ci-action@v11
-      id: lighthouse
-      continue-on-error: true
-      env:
-        INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }}
-        INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }}
-      with:
-        configPath: .lighthouserc.js
-        uploadArtifacts: true
-        temporaryPublicStorage: true
-
-    - name: Check Lighthouse audit result
-      if: steps.check_preview.outputs.preview_exists == 'true'
-      run: |
-        # Check if the manifest exists and is valid JSON
-        if [ -z "${{ steps.lighthouse.outputs.manifest }}" ]; then
-          echo "Error: Lighthouse audit failed - no manifest output"
-          exit 1
-        fi
-        
-        # Try to parse the manifest as JSON
-        if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq . > /dev/null 2>&1; then
-          echo "Error: Lighthouse audit failed - invalid manifest format"
-          exit 1
-        fi
-        
-        # Check if any URLs were successfully audited
-        if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq 'length > 0' > /dev/null 2>&1; then
-          echo "Error: Lighthouse audit failed - no URLs were successfully audited"
-          exit 1
-        fi
-
-    - name: Post Lighthouse results comment
-      if: steps.check_preview.outputs.preview_exists == 'true'
-      uses: actions/github-script@v6
-      with:
-        script: |
-          const runId = context.runId;
-          const baseUrl = process.env.PREVIEW_URL;
-          const commitSha = process.env.COMMIT_SHA;
-          const commitShaShort = process.env.COMMIT_SHA_SHORT;
-
-          // Get artifacts for this run
-          const { data: artifacts } = await github.rest.actions.listWorkflowRunArtifacts({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            run_id: runId,
-          });
+          };
+          EOF
+
+      - name: Run Lighthouse audit
+        if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true'
+        uses: treosh/lighthouse-ci-action@v11
+        id: lighthouse
+        env:
+          INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }}
+          INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }}
+        with:
+          configPath: .lighthouserc.js
+          uploadArtifacts: true
+          temporaryPublicStorage: true
+
+      - name: Check Lighthouse audit result
+        if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true'
+        run: |
+          if [ -z "${{ steps.lighthouse.outputs.manifest }}" ]; then
+            echo "Error: Lighthouse audit failed - no manifest output"
+            exit 1
+          fi
+
+          if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq . > /dev/null 2>&1; then
+            echo "Error: Lighthouse audit failed - invalid manifest format"
+            exit 1
+          fi
+
+          if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq 'length > 0' > /dev/null 2>&1; then
+            echo "Error: Lighthouse audit failed - no URLs were successfully audited"
+            exit 1
+          fi
+
+          # Fail if any page scored below 0.9 on accessibility
+          below=$(echo '${{ steps.lighthouse.outputs.manifest }}' | jq '[.[] | select(.summary.accessibility < 0.9)] | length')
+          if [ "$below" -gt 0 ]; then
+            echo "Error: $below page(s) scored below 0.9 on accessibility"
+            echo '${{ steps.lighthouse.outputs.manifest }}' | jq -r '.[] | select(.summary.accessibility < 0.9) | "\(.url): \(.summary.accessibility)"'
+            exit 1
+          fi
 
-          // Lighthouse artifact
-          const lighthouseArtifact = artifacts.artifacts.find(a => a.name === 'lighthouse-report');
-          const lighthouseArtifactUrl = lighthouseArtifact
-            ? `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}/artifacts/${lighthouseArtifact.id}`
-            : null;
-
-          // Lighthouse
-          const manifest = '${{ steps.lighthouse.outputs.manifest }}';
-          let manifestJson;
-          try {
-            manifestJson = JSON.parse(manifest);
-            if (!Array.isArray(manifestJson) || manifestJson.length === 0) {
-              throw new Error('Invalid manifest format or empty results');
+      - name: Post Lighthouse results comment
+        if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true'
+        uses: actions/github-script@v6
+        env:
+          LIGHTHOUSE_MODE: ${{ env.LIGHTHOUSE_MODE }}
+          LIGHTHOUSE_DEPTH: ${{ env.LIGHTHOUSE_DEPTH }}
+        with:
+          script: |
+            const fs = require('fs');
+            const prNumber = parseInt(process.env.PR_NUMBER, 10);
+            const runId = context.runId;
+            const baseUrl = process.env.PREVIEW_URL;
+            const commitSha = process.env.COMMIT_SHA;
+            const commitShaShort = process.env.COMMIT_SHA_SHORT;
+            const mode = process.env.LIGHTHOUSE_MODE;
+            const depth = process.env.LIGHTHOUSE_DEPTH;
+
+            let metadata = {};
+            try {
+              metadata = JSON.parse(fs.readFileSync('lighthouse-metadata.json', 'utf8'));
+            } catch (e) {
+              console.log('No metadata file:', e.message);
             }
-          } catch (error) {
-            console.error('Error parsing Lighthouse manifest:', error);
-            await github.rest.issues.createComment({
+
+            const manifest = '${{ steps.lighthouse.outputs.manifest }}';
+            let manifestJson;
+            try {
+              manifestJson = JSON.parse(manifest);
+              if (!Array.isArray(manifestJson) || manifestJson.length === 0) {
+                throw new Error('Invalid manifest');
+              }
+            } catch (error) {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: `## Lighthouse check results\n\n⚠️ WARN: Failed to parse Lighthouse results. [Workflow run](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId})`,
+              });
+              return;
+            }
+
+            const { data: comments } = await github.rest.issues.listComments({
               owner: context.repo.owner,
               repo: context.repo.repo,
-              issue_number: context.issue.number,
-              body: `## Lighthouse check results\n\n⚠️ WARN: Failed to parse Lighthouse results. Please check the [workflow run](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}) for details.`
+              issue_number: prNumber,
             });
-            return;
-          }
-          
-          // Delete old Lighthouse comments
-          const { data: comments } = await github.rest.issues.listComments({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            issue_number: context.issue.number,
-          });
-          
-          // Delete any previous comments from this workflow
-          for (const comment of comments) {
-            if (comment.user.login === 'github-actions[bot]' && 
-                comment.body.includes('## Lighthouse check results')) {
-              try {
-                console.log(`Deleting Lighthouse comment ${comment.id}`);
+            for (const comment of comments) {
+              if (comment.user.login === 'github-actions[bot]' &&
+                  comment.body.includes('## Lighthouse check results')) {
                 await github.rest.issues.deleteComment({
                   owner: context.repo.owner,
                   repo: context.repo.repo,
                   comment_id: comment.id,
                 });
-                console.log(`Successfully deleted Lighthouse comment ${comment.id}`);
-              } catch (error) {
-                console.error(`Failed to delete Lighthouse comment ${comment.id}:`, error);
               }
             }
-          }
-          
-          // Calculate average accessibility score
-          const scores = manifestJson.map(run => run.summary.accessibility);
-          const avgScore = scores.reduce((a, b) => a + b, 0) / scores.length;
-          const lighthouseScore = avgScore.toFixed(2);
-          
-          const lighthouseReportUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`;
-          let lighthouseComment = '';
-          if (parseFloat(lighthouseScore) >= 0.9) {
-            lighthouseComment = `✓ INFO: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [View the workflow run](${lighthouseReportUrl})`; 
-          } else {
-            lighthouseComment = `⚠️ WARN: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [Check the workflow run](${lighthouseReportUrl})`;
-          }
 
-          const stripAuth = url => {
-            try {
-              const u = new URL(url);
-              u.username = '';
-              u.password = '';
-              return u.toString();
-            } catch {
-              return url;
-            }
-          };
+            const scores = manifestJson.map(run => run.summary.accessibility);
+            const avgScore = (scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(2);
+            const lighthouseReportUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`;
+            const lighthouseComment = parseFloat(avgScore) >= 0.9
+              ? `✓ INFO: Average accessibility score is **${avgScore}** (required: ≥0.9) — [View the workflow run](${lighthouseReportUrl})`
+              : `⚠️ WARN: Average accessibility score is **${avgScore}** (required: ≥0.9) — [Check the workflow run](${lighthouseReportUrl})`;
 
-          // Helper to get the public report URL from htmlPath
-          const getReportUrl = (run) => {
-            if (run.report && Array.isArray(run.report)) {
-              // Find the public .report.html URL
-              const htmlReport = run.report.find(r => r.endsWith('.report.html') && r.startsWith('http'));
-              if (htmlReport) return htmlReport;
-              // Fallback: first report if available
-              if (run.report.length > 0) return run.report[0];
-            }
-            // Fallback: just show the workflow run if nothing else
-            return lighthouseReportUrl;
-          };
+            const stripAuth = url => {
+              try {
+                const u = new URL(url);
+                u.username = '';
+                u.password = '';
+                return u.toString();
+              } catch {
+                return url;
+              }
+            };
 
-          // Parse the links output from the Lighthouse step
-          const links = (() => {
-            try {
-              return JSON.parse(`${{ steps.lighthouse.outputs.links }}`);
-            } catch {
-              return {};
+            const links = (() => {
+              try {
+                return JSON.parse(`${{ steps.lighthouse.outputs.links }}`);
+              } catch {
+                return {};
+              }
+            })();
+
+            const scoresTable = manifestJson
+              .map(run => {
+                const formatScore = score => score === null ? 'N/A' : score.toFixed(2);
+                const displayPath = stripAuth(run.url).replace(baseUrl, '') || run.url;
+                const reportUrl = links[run.url] || lighthouseReportUrl;
+                return `| [${displayPath}](${reportUrl}) | ${formatScore(run.summary.accessibility)} | ${formatScore(run.summary.performance)} | ${formatScore(run.summary['best-practices'])} | ${formatScore(run.summary.seo)} |`;
+              })
+              .join('\n');
+
+            const modeLine = mode === 'changed'
+              ? `Audit mode: **changed pages** (${metadata.paths?.length || manifestJson.length} URL(s))`
+              : `Audit mode: **depth ${depth}** (sitemap)`;
+
+            let comment = `## Lighthouse check results\n\n`;
+            comment += `${lighthouseComment}\n\n`;
+            comment += `${modeLine}\n\n`;
+            comment += `<details>\n<summary>Show Lighthouse scores</summary>\n\n`;
+            comment += `Commit SHA: [${commitShaShort}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${commitSha})\n\n`;
+            if (metadata.global_fallback) {
+              comment += `_Global site files changed — audited root navigation pages._\n\n`;
             }
-          })();
-
-          const scoresTable = manifestJson
-            .map(run => {
-              const formatScore = (score) => score === null ? 'N/A' : score.toFixed(2);
-              const displayPath = stripAuth(run.url).replace(baseUrl, '');
-              // Use the public report URL from the links output, fallback to workflow run if missing
-              const reportUrl = links[run.url] || lighthouseReportUrl;
-              return `| [${displayPath}](${reportUrl}) | ${formatScore(run.summary.accessibility)} | ${formatScore(run.summary.performance)} | ${formatScore(run.summary['best-practices'])} | ${formatScore(run.summary.seo)} |`;
-            })
-            .join('\n');
-
-          let comment = `## Lighthouse check results\n\n`;
-          comment += `${lighthouseComment}\n\n`;
-          comment += `<details>\n<summary>Show Lighthouse scores</summary>\n\n`;
-          comment += `Folder depth level checked: **${process.env.DEPTH}**\n\n`;
-          comment += `Commit SHA: [${commitShaShort}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${commitSha})\n\n`;
-          comment += `Modify the workflow to check a different depth:\n`;
-          comment += `- 0: Top-level navigation only — /index.html, /guide/guides.html, ...\n`;
-          comment += `- 1: All first-level subdirectories — /guide/\*.html, /developer/\*.html, ...\n`;
-          comment += `- 2: All second-level subdirectories — /guide/attestation/\*.html, ...\n\n`;
-          comment += `| Page | Accessibility | Performance | Best Practices | SEO |\n`;
-          comment += `|------|---------------|-------------|----------------|-----|\n`;
-          comment += `${scoresTable}\n\n`;
-          comment += `</details>\n\n`;
-
-          await github.rest.issues.createComment({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            issue_number: context.issue.number,
-            body: comment
-          }); 
+            comment += `For a thorough audit, run the **Lighthouse check** workflow manually (Actions → Lighthouse check → Run workflow) with depth 0–2, or add the \`lighthouse:full\` label for depth 2 on the next validate run.\n\n`;
+            comment += `| Page | Accessibility | Performance | Best Practices | SEO |\n`;
+            comment += `|------|---------------|-------------|----------------|-----|\n`;
+            comment += `${scoresTable}\n\n`;
+            comment += `</details>\n\n`;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: comment,
+            });
diff --git a/README.md b/README.md
index 8de8105fa5..66f3e92c9f 100644
--- a/README.md
+++ b/README.md
@@ -379,24 +379,22 @@ Similarly, http://localhost:4444/ in your browsers should show an all green logo
 
 ## Configuring Lighthouse checks
 
-Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites to enable a better, accessible documentation for everyone.
+Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites after the **Validate docs site** workflow deploys a preview.
 
-By default, Lighthouse checks only the top-level pages in our site navigation, such as `/index.html`, `/guide/guides.html`, `/developer/validmind-library.html`, and so forth. You can configure this behavior in the workflow:
+**Default (every PR):** Lighthouse audits only HTML pages that correspond to files changed under `site/` in the pull request. If you change shared layout files (`_quarto.yml`, `theme.scss`, `_variables.yml`, `_extensions/`, and similar), it falls back to the root navigation pages (`index.html`, `guide/guides.html`, and so on).
 
-```sh
-env:
-  # To change the default depth level:
-  # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.)
-  # 1 — All first-level subdirectories (e.g. /guide/*.html)
-  # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html)
-  # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete
-  DEFAULT_DEPTH: '0'
-```
+**Thorough audit:**
+
+- Add the `lighthouse:full` label to a PR to run a depth-2 sitemap audit on the next successful validate run.
+- Or run the **Lighthouse check** workflow manually from Actions → **Run workflow**, set the PR number, and choose depth `0` (root pages), `1` (first-level sections), or `2` (second-level). Depths above zero can take hours; use them on feature branches only.
+
+The PR comment lists audited URLs, the commit SHA, and accessibility scores (required: ≥ 0.9 per page).
 
 **Tips:**
 
-- On the first run, the workflow waits for a preview site to become available. For subsequent runs, it checks the currently available site, which may be behind HEAD. The PR comment shows which commit SHA was checked — rerun the check if needed.
-- Use folder depths greater than zero only on working branches when you need a thorough site audit. Deeper checks take 2-12 hours to complete and significantly slow down the CI/CD pipeline. Do not merge depth changes to `main`.
+- Lighthouse starts only after validate succeeds, so it no longer polls for up to 45 minutes.
+- If a PR changes only CI or repo metadata (no `site/` pages), Lighthouse skips with an informational comment.
+- Re-run validate (or push a commit) if the preview comment SHA does not match the commit you expect audited.
 
 ## Monitoring
 
diff --git a/site/scripts/lighthouse_urls.py b/site/scripts/lighthouse_urls.py
new file mode 100644
index 0000000000..73234c3b7e
--- /dev/null
+++ b/site/scripts/lighthouse_urls.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+"""Generate Lighthouse preview URLs from PR diffs or sitemap depth."""
+
+from __future__ import annotations
+
+import argparse
+import fnmatch
+import json
+import os
+import re
+import subprocess
+import sys
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from urllib.parse import urlparse
+
+import requests
+
+ROOT_PAGES = [
+    "index.html",
+    "get-started/get-started.html",
+    "guide/guides.html",
+    "developer/validmind-library.html",
+    "support/support.html",
+    "releases/all-releases.html",
+    "training/training.html",
+]
+
+GLOBAL_PATTERNS = [
+    "site/_quarto.yml",
+    "site/_quarto-*.yml",
+    "site/_variables.yml",
+    "site/theme.scss",
+    "site/styles.css",
+    "site/_extensions/**",
+]
+
+OUTPUT_FILE_RE = re.compile(
+    r"^\s*output-file:\s*[_]?([^\s#]+\.html)\s*$",
+    re.MULTILINE,
+)
+
+SITEMAP_NS = {"sm": "http://www.sitemaps.org/schemas/sitemap/0.9"}
+
+
+def _matches_global_pattern(path: str) -> bool:
+    for pattern in GLOBAL_PATTERNS:
+        if fnmatch.fnmatch(path, pattern):
+            return True
+    return False
+
+
+def _parse_output_file(qmd_path: Path) -> str | None:
+    try:
+        text = qmd_path.read_text(encoding="utf-8")
+    except OSError:
+        return None
+    match = OUTPUT_FILE_RE.search(text)
+    if not match:
+        return None
+    name = match.group(1).lstrip("_")
+    return str(qmd_path.parent.relative_to(Path("site")) / name).replace("\\", "/")
+
+
+def qmd_path_to_html(path: str) -> str | None:
+    """Map a site/ source path to a preview HTML path."""
+    if not path.startswith("site/"):
+        return None
+
+    rel = path[len("site/") :]
+    p = Path(rel)
+
+    if p.suffix == ".qmd":
+        if p.name == "index.qmd":
+            return str(p.parent / "index.html").replace("\\", "/")
+        return str(p.with_suffix(".html")).replace("\\", "/")
+
+    return None
+
+
+def asset_path_to_html(path: str) -> str | None:
+    """Map co-located assets under site/ to their page HTML."""
+    if not path.startswith("site/"):
+        return None
+    rel = Path(path[len("site/") :])
+    if rel.suffix == ".qmd":
+        return qmd_path_to_html(path)
+
+    parent = rel.parent
+    if parent == Path("."):
+        return None
+
+    site_parent = Path("site") / parent
+    index_qmd = site_parent / "index.qmd"
+    if index_qmd.exists():
+        return str(parent / "index.html").replace("\\", "/")
+
+    for qmd in sorted(site_parent.glob("*.qmd")):
+        if qmd.name != "index.qmd":
+            return str(parent / f"{qmd.stem}.html").replace("\\", "/")
+
+    return None
+
+
+def changed_file_to_html(path: str) -> list[str]:
+    """Return HTML paths affected by a single changed file."""
+    if _matches_global_pattern(path):
+        return list(ROOT_PAGES)
+
+    if path.endswith(".qmd"):
+        html = qmd_path_to_html(path)
+        if html:
+            qmd_file = Path(path)
+            custom = _parse_output_file(qmd_file) if qmd_file.exists() else None
+            results = [html]
+            if custom and custom not in results:
+                results.append(custom)
+            return results
+        return []
+
+    html = asset_path_to_html(path)
+    return [html] if html else []
+
+
+def git_changed_files(base_ref: str) -> list[str]:
+    subprocess.run(
+        ["git", "fetch", "origin", base_ref],
+        check=True,
+        capture_output=True,
+    )
+    result = subprocess.run(
+        ["git", "diff", "--name-only", f"origin/{base_ref}...HEAD", "--", "site/"],
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    return [line.strip() for line in result.stdout.splitlines() if line.strip()]
+
+
+def urls_from_changed_files(base_ref: str) -> tuple[list[str], bool]:
+    """Return sorted HTML paths and whether global fallback was used."""
+    changed = git_changed_files(base_ref)
+    if not changed:
+        return [], False
+
+    html_paths: set[str] = set()
+    used_global_fallback = False
+
+    for path in changed:
+        if _matches_global_pattern(path):
+            used_global_fallback = True
+            html_paths.update(ROOT_PAGES)
+            continue
+        for html in changed_file_to_html(path):
+            html_paths.add(html)
+
+    if used_global_fallback:
+        return sorted(ROOT_PAGES), True
+
+    return sorted(html_paths), False
+
+
+def _path_depth(html_path: str) -> int:
+    path = html_path.replace(".html", "").strip("/")
+    if not path or path == "index":
+        return 0
+    return len([s for s in path.split("/") if s])
+
+
+def urls_from_sitemap(preview_base_url: str, max_depth: int) -> list[str]:
+    sitemap_url = f"{preview_base_url.rstrip('/')}/sitemap.xml"
+    response = requests.get(sitemap_url, timeout=60)
+    response.raise_for_status()
+    root = ET.fromstring(response.content)
+    urls: set[str] = set()
+
+    for url_el in root.findall(".//sm:url", SITEMAP_NS):
+        loc = url_el.find("sm:loc", SITEMAP_NS)
+        if loc is None or not loc.text:
+            continue
+        parsed = urlparse(loc.text)
+        path = parsed.path.lstrip("/")
+        if not path.endswith(".html"):
+            continue
+
+        segments = path.split("/")
+        pr_idx = next((i for i, s in enumerate(segments) if s == "pr_previews"), -1)
+        if pr_idx >= 0 and len(segments) > pr_idx + 4:
+            path = "/".join(segments[pr_idx + 4 :])
+
+        if _path_depth(path) <= max_depth:
+            urls.add(path)
+
+    if max_depth == 0:
+        return sorted(ROOT_PAGES)
+
+    return sorted(urls)
+
+
+def verify_urls(
+    preview_base_url: str,
+    html_paths: list[str],
+    installation_user: str | None = None,
+    installation_password: str | None = None,
+) -> list[str]:
+    """Keep only paths that return HTTP 200 on the preview."""
+    base = preview_base_url.rstrip("/")
+    ok: list[str] = []
+
+    for path in html_paths:
+        path = path.lstrip("/")
+        url = f"{base}/{path}"
+        if path.startswith("installation/") and installation_user and installation_password:
+            parsed = urlparse(url)
+            url = (
+                f"https://{installation_user}:{installation_password}@"
+                f"{parsed.netloc}{parsed.path}"
+            )
+
+        try:
+            status = requests.head(
+                url,
+                allow_redirects=True,
+                timeout=30,
+                headers={"User-Agent": "Mozilla/5.0"},
+            ).status_code
+            if status == 405:
+                status = requests.get(
+                    url,
+                    allow_redirects=True,
+                    timeout=30,
+                    headers={"User-Agent": "Mozilla/5.0"},
+                ).status_code
+        except requests.RequestException as exc:
+            print(f"WARN: Could not reach {path}: {exc}", file=sys.stderr)
+            continue
+
+        if status == 200:
+            ok.append(path)
+            print(f"OK: {path}", file=sys.stderr)
+        else:
+            print(f"WARN: Skipping {path} (HTTP {status})", file=sys.stderr)
+
+    return ok
+
+
+def write_url_list(preview_base_url: str, html_paths: list[str], out_path: Path) -> None:
+    base = preview_base_url.rstrip("/")
+    lines = [f"{base}/{p.lstrip('/')}" for p in html_paths]
+    out_path.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Generate Lighthouse URL list")
+    parser.add_argument("--mode", choices=["changed", "depth"], required=True)
+    parser.add_argument("--base-ref", default="main")
+    parser.add_argument("--depth", type=int, default=0, choices=[0, 1, 2])
+    parser.add_argument("--preview-url", required=True)
+    parser.add_argument("--output", default="lhci-urls.txt")
+    parser.add_argument("--metadata", default="lighthouse-metadata.json")
+    parser.add_argument(
+        "--skip-file",
+        help="If set and no URLs, write this path so workflow can detect skip",
+    )
+    args = parser.parse_args()
+
+    metadata: dict = {
+        "mode": args.mode,
+        "depth": args.depth if args.mode == "depth" else None,
+        "global_fallback": False,
+        "skip": False,
+        "paths": [],
+    }
+
+    if args.mode == "changed":
+        paths, global_fallback = urls_from_changed_files(args.base_ref)
+        metadata["global_fallback"] = global_fallback
+    else:
+        paths = urls_from_sitemap(args.preview_url, args.depth)
+
+    if not paths:
+        metadata["skip"] = True
+        Path(args.metadata).write_text(json.dumps(metadata, indent=2), encoding="utf-8")
+        if args.skip_file:
+            Path(args.skip_file).write_text("skip\n", encoding="utf-8")
+        print("No pages to audit in this PR.", file=sys.stderr)
+        return 0
+
+    verified = verify_urls(
+        args.preview_url,
+        paths,
+        installation_user=os.environ.get("INSTALLATION_USER"),
+        installation_password=os.environ.get("INSTALLATION_PW"),
+    )
+    if not verified:
+        print("Error: No URLs returned HTTP 200 on the preview.", file=sys.stderr)
+        return 1
+
+    metadata["paths"] = verified
+    Path(args.metadata).write_text(json.dumps(metadata, indent=2), encoding="utf-8")
+    write_url_list(args.preview_url, verified, Path(args.output))
+    print(f"Wrote {len(verified)} URL(s) to {args.output}", file=sys.stderr)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/site/scripts/test_lighthouse_urls.py b/site/scripts/test_lighthouse_urls.py
new file mode 100644
index 0000000000..8741af75e4
--- /dev/null
+++ b/site/scripts/test_lighthouse_urls.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""Unit tests for lighthouse_urls.py"""
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest import mock
+
+from lighthouse_urls import (
+    ROOT_PAGES,
+    _matches_global_pattern,
+    asset_path_to_html,
+    changed_file_to_html,
+    qmd_path_to_html,
+    urls_from_changed_files,
+)
+
+
+class TestQmdMapping(unittest.TestCase):
+    def test_simple_qmd(self):
+        self.assertEqual(
+            qmd_path_to_html("site/guide/foo.qmd"),
+            "guide/foo.html",
+        )
+
+    def test_index_qmd(self):
+        self.assertEqual(
+            qmd_path_to_html("site/guide/foo/index.qmd"),
+            "guide/foo/index.html",
+        )
+
+    def test_root_index(self):
+        self.assertEqual(
+            qmd_path_to_html("site/index.qmd"),
+            "index.html",
+        )
+
+
+class TestGlobalPatterns(unittest.TestCase):
+    def test_quarto_yml(self):
+        self.assertTrue(_matches_global_pattern("site/_quarto.yml"))
+
+    def test_theme_scss(self):
+        self.assertTrue(_matches_global_pattern("site/theme.scss"))
+
+    def test_extensions(self):
+        self.assertTrue(_matches_global_pattern("site/_extensions/foo/bar.lua"))
+
+    def test_page_qmd_not_global(self):
+        self.assertFalse(_matches_global_pattern("site/guide/foo.qmd"))
+
+
+class TestChangedFileToHtml(unittest.TestCase):
+    def test_global_returns_root_pages(self):
+        result = changed_file_to_html("site/_quarto.yml")
+        self.assertEqual(result, ROOT_PAGES)
+
+    def test_asset_with_index_qmd(self):
+        import os
+
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            (root / "site" / "guide" / "foo").mkdir(parents=True)
+            (root / "site" / "guide" / "foo" / "index.qmd").write_text("---\n")
+            (root / "site" / "guide" / "foo" / "pic.png").write_bytes(b"")
+            prev = os.getcwd()
+            try:
+                os.chdir(tmp)
+                html = asset_path_to_html("site/guide/foo/pic.png")
+            finally:
+                os.chdir(prev)
+            self.assertEqual(html, "guide/foo/index.html")
+
+
+class TestUrlsFromChangedFiles(unittest.TestCase):
+    def test_empty_diff(self):
+        with mock.patch("lighthouse_urls.git_changed_files", return_value=[]):
+            paths, fallback = urls_from_changed_files("main")
+            self.assertEqual(paths, [])
+            self.assertFalse(fallback)
+
+    def test_single_qmd(self):
+        with mock.patch(
+            "lighthouse_urls.git_changed_files",
+            return_value=["site/developer/how-to/test-sandbox.qmd"],
+        ):
+            paths, fallback = urls_from_changed_files("main")
+            self.assertEqual(paths, ["developer/how-to/test-sandbox.html"])
+            self.assertFalse(fallback)
+
+    def test_global_fallback(self):
+        with mock.patch(
+            "lighthouse_urls.git_changed_files",
+            return_value=["site/_variables.yml", "site/guide/foo.qmd"],
+        ):
+            paths, fallback = urls_from_changed_files("main")
+            self.assertEqual(set(paths), set(ROOT_PAGES))
+            self.assertTrue(fallback)
+
+
+if __name__ == "__main__":
+    unittest.main()

From c82745be62d5a70b816f70a02589218a39c5cf9d Mon Sep 17 00:00:00 2001
From: Nik Richers <nik@validmind.ai>
Date: Mon, 18 May 2026 09:51:05 -0700
Subject: [PATCH 2/4] Dispatch Lighthouse from validate after preview deploy
 (sc-12702)

workflow_run only runs from the default branch, so trigger Lighthouse via
workflow_dispatch from validate on the PR branch instead.
---
 .github/workflows/lighthouse-check.yaml   | 60 ++++++-----------------
 .github/workflows/validate-docs-site.yaml | 18 +++++++
 README.md                                 |  2 +-
 3 files changed, 33 insertions(+), 47 deletions(-)

diff --git a/.github/workflows/lighthouse-check.yaml b/.github/workflows/lighthouse-check.yaml
index f075ff41b7..de78a2ce5d 100644
--- a/.github/workflows/lighthouse-check.yaml
+++ b/.github/workflows/lighthouse-check.yaml
@@ -1,13 +1,18 @@
 name: Lighthouse check
 
 on:
-  workflow_run:
-    workflows: ["Validate docs site (render, test, and deploy)"]
-    types: [completed]
   workflow_dispatch:
     inputs:
+      mode:
+        description: "Audit mode"
+        required: true
+        default: "changed"
+        type: choice
+        options:
+          - "changed"
+          - "depth"
       depth:
-        description: "Sitemap depth for thorough audit (0–2)"
+        description: "Sitemap depth when mode is depth (0–2)"
         required: true
         default: "0"
         type: choice
@@ -29,11 +34,7 @@ permissions:
 jobs:
   lighthouse:
     runs-on: ubuntu-latest
-    if: |
-      (github.event_name == 'workflow_run' &&
-       github.event.workflow_run.conclusion == 'success' &&
-       github.event.workflow_run.event == 'pull_request') ||
-      github.event_name == 'workflow_dispatch'
+    if: github.event_name == 'workflow_dispatch'
     steps:
       - name: Resolve PR context
         id: pr
@@ -64,46 +65,13 @@ jobs:
               };
             }
 
-            if (context.eventName === 'workflow_dispatch') {
-              const prNumber = parseInt('${{ inputs.pr_number }}', 10);
-              const info = await getPr(prNumber);
-              if (!info) return;
-              core.setOutput('number', String(info.number));
-              core.setOutput('head_ref', info.head_ref);
-              core.setOutput('head_sha', info.head_sha);
-              core.setOutput('base_ref', info.base_ref);
-              core.setOutput('mode', 'depth');
-              core.setOutput('depth', '${{ inputs.depth }}');
-              core.setOutput('full_audit', String(info.full_audit));
-              return;
-            }
-
-            const run = context.payload.workflow_run;
-            let prNumber = null;
-            if (run.pull_requests && run.pull_requests.length > 0) {
-              prNumber = run.pull_requests[0].number;
-            } else {
-              const { data: prs } = await github.rest.repos.listPullRequestsAssociatedWithCommit({
-                owner,
-                repo,
-                commit_sha: run.head_sha,
-              });
-              if (prs.length > 0) {
-                prNumber = prs[0].number;
-              }
-            }
-
-            if (!prNumber) {
-              core.setFailed('Could not resolve PR for workflow_run');
-              return;
-            }
-
+            const prNumber = parseInt('${{ inputs.pr_number }}', 10);
             const info = await getPr(prNumber);
             if (!info) return;
 
-            let mode = 'changed';
-            let depth = '0';
-            if (info.full_audit) {
+            let mode = '${{ inputs.mode }}';
+            let depth = '${{ inputs.depth }}';
+            if (info.full_audit && mode === 'changed') {
               mode = 'depth';
               depth = '2';
             }
diff --git a/.github/workflows/validate-docs-site.yaml b/.github/workflows/validate-docs-site.yaml
index 1116bfae2a..35c48aefe5 100644
--- a/.github/workflows/validate-docs-site.yaml
+++ b/.github/workflows/validate-docs-site.yaml
@@ -5,6 +5,7 @@ on:
     types: [opened, synchronize, ready_for_review]
 
 permissions:
+  actions: write
   issues: write
   pull-requests: write
 
@@ -161,6 +162,23 @@ jobs:
             body: comment
           });
 
+    - name: Trigger Lighthouse check
+      uses: actions/github-script@v6
+      with:
+        script: |
+          await github.rest.actions.createWorkflowDispatch({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            workflow_id: 'lighthouse-check.yaml',
+            ref: context.payload.pull_request.head.ref,
+            inputs: {
+              mode: 'changed',
+              depth: '0',
+              pr_number: String(context.issue.number),
+            },
+          });
+          console.log(`Dispatched Lighthouse check for PR #${context.issue.number}`);
+
     - name: Install pandoc
       run: |
         sudo apt-get update
diff --git a/README.md b/README.md
index 66f3e92c9f..1bf5519e53 100644
--- a/README.md
+++ b/README.md
@@ -379,7 +379,7 @@ Similarly, http://localhost:4444/ in your browsers should show an all green logo
 
 ## Configuring Lighthouse checks
 
-Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites after the **Validate docs site** workflow deploys a preview.
+Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites when **Validate docs site** finishes deploying a preview (it dispatches the Lighthouse workflow on the PR branch).
 
 **Default (every PR):** Lighthouse audits only HTML pages that correspond to files changed under `site/` in the pull request. If you change shared layout files (`_quarto.yml`, `theme.scss`, `_variables.yml`, `_extensions/`, and similar), it falls back to the root navigation pages (`index.html`, `guide/guides.html`, and so on).
 

From 4459f469b630d2b937c2a510bcf22883e8bccdc4 Mon Sep 17 00:00:00 2001
From: Nik Richers <nik@validmind.ai>
Date: Mon, 18 May 2026 16:41:40 -0700
Subject: [PATCH 3/4] test: Trigger Lighthouse changed-page audit via minor
 wording tweak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exercises the changed-files path on PR 1333 — Lighthouse should audit
developer/how-to/test-sandbox.html only.
---
 site/developer/how-to/test-sandbox.qmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/developer/how-to/test-sandbox.qmd b/site/developer/how-to/test-sandbox.qmd
index c9ea3a23c3..50e9505ec9 100644
--- a/site/developer/how-to/test-sandbox.qmd
+++ b/site/developer/how-to/test-sandbox.qmd
@@ -12,7 +12,7 @@ aliases:
 <!--- TO DO
 - Ordering of notebooks if we want them to appear in a specific sequence
 --->
-Explore our interactive sandbox to see what tests are available in the {{< var validmind.developer >}} and how you can use them in your own code.
+Explore our interactive sandbox to see which tests are available in the {{< var validmind.developer >}} and how you can use them in your own code.
 
 ::: {.column-screen-right}
 

From 8f0a727f71efb7b7ba3744269a8d733f13d5fbe2 Mon Sep 17 00:00:00 2001
From: Nik Richers <nik@validmind.ai>
Date: Mon, 18 May 2026 17:31:33 -0700
Subject: [PATCH 4/4] Fix diff pathspec so script works from any cwd (sc-12702)

Use ':(top)site/' to anchor the pathspec at the repo root regardless of
where the script is invoked from. The workflow runs it from site/scripts/,
which previously caused git diff to return zero files.
---
 site/scripts/lighthouse_urls.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/site/scripts/lighthouse_urls.py b/site/scripts/lighthouse_urls.py
index 73234c3b7e..b7aa13619b 100644
--- a/site/scripts/lighthouse_urls.py
+++ b/site/scripts/lighthouse_urls.py
@@ -129,7 +129,14 @@ def git_changed_files(base_ref: str) -> list[str]:
         capture_output=True,
     )
     result = subprocess.run(
-        ["git", "diff", "--name-only", f"origin/{base_ref}...HEAD", "--", "site/"],
+        [
+            "git",
+            "diff",
+            "--name-only",
+            f"origin/{base_ref}...HEAD",
+            "--",
+            ":(top)site/",
+        ],
         check=True,
         capture_output=True,
         text=True,