Merge pull request #19 from sharpninja/copilot/get-real-benchmark-run

sharpninja · web-flow · commit 0c9fd3f5e6e4 · 2026-03-21T14:49:23.000-05:00
Clarify how to get a real benchmark run from Benchmark Comparison
diff --git a/.github/workflows/benchmark-comparison.yml b/.github/workflows/benchmark-comparison.yml
@@ -84,6 +84,8 @@ jobs:
           else
             echo "BENCHMARK_EXTRA_ARG=--dry-run" >> "$GITHUB_ENV"
             echo "Benchmark secrets are unavailable; running benchmark comparison in dry-run mode." >> "$GITHUB_STEP_SUMMARY"
+            echo "To run the real benchmark, add these secrets and rerun the workflow: OPENAI_API_KEY, GRAPHRAG_API_BASE, AZURE_AI_SEARCH_URL_ENDPOINT, AZURE_AI_SEARCH_API_KEY." >> "$GITHUB_STEP_SUMMARY"
+            echo "You can trigger a rerun from Actions → Benchmark Comparison → Run workflow once the secrets are configured." >> "$GITHUB_STEP_SUMMARY"
           fi
 
       - name: Benchmark Python implementation
diff --git a/scripts/benchmark_smoke.py b/scripts/benchmark_smoke.py
@@ -374,6 +374,11 @@ def summarize_results(results: list[OperationResult]) -> dict[str, int]:
     return summary
 
 
+def has_dry_run_results(*result_groups: list[OperationResult]) -> bool:
+    """Return whether any result group contains dry-run benchmark results."""
+    return any(result.status == "dry_run" for results in result_groups for result in results)
+
+
 def render_markdown_report(
     python_results: list[OperationResult],
     dotnet_results: list[OperationResult],
@@ -399,6 +404,18 @@ def render_markdown_report(
             f"{summary.get('dry_run', 0)} |"
         )
 
+    if has_dry_run_results(python_results, dotnet_results):
+        lines.extend(
+            [
+                "",
+                "> [!IMPORTANT]",
+                "> This comparison used `--dry-run`, so it validated commands without executing the real benchmark workload.",
+                "> To get a real benchmark run in GitHub Actions, configure these secrets and rerun the `Benchmark Comparison` workflow:",
+                "> `OPENAI_API_KEY`, `GRAPHRAG_API_BASE`, `AZURE_AI_SEARCH_URL_ENDPOINT`, and `AZURE_AI_SEARCH_API_KEY`.",
+                "> You can rerun it manually with **Actions → Benchmark Comparison → Run workflow** after those secrets are available.",
+            ]
+        )
+
     lines.extend(
         [
             "",
diff --git a/tests/unit/test_benchmark_smoke.py b/tests/unit/test_benchmark_smoke.py
@@ -90,6 +90,30 @@ def test_render_markdown_report_includes_missing_output_notes():
     assert "extract_graph_nlp" in report
 
 
+def test_render_markdown_report_explains_how_to_get_real_benchmark_run():
+    module = load_module()
+
+    dry_run_result = module.OperationResult(
+        implementation="python",
+        fixture="text",
+        operation_type="index",
+        operation_label="index:fast",
+        method="fast",
+        query=None,
+        command=["uv", "run", "python", "-m", "graphrag", "index"],
+        duration_seconds=0.0,
+        exit_code=0,
+        status="dry_run",
+        stdout="[dry-run]",
+    )
+
+    report = module.render_markdown_report([dry_run_result], [])
+
+    assert "This comparison used `--dry-run`" in report
+    assert "OPENAI_API_KEY" in report
+    assert "Run workflow" in report
+
+
 def test_python_query_command_uses_cli_shape_from_fixture():
     module = load_module()
     repo_root = Path(__file__).resolve().parents[2]