Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/reporting-and-feedback.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,16 @@ Redacted feedback should:

Scorer-private reports can be more specific, but they must stay outside public repositories and
outside agent-visible context.

## Lightweight Public Gate

The repository includes a small redaction utility for public-facing generated reports.

This utility is a safety gate, not a private evaluation implementation and not a full data-loss
prevention system. It catches obvious scorer-only strings such as answer-key hints, hidden labels,
private thresholds, canary identifiers, raw traces, and protected scorer config references before
they appear in public Markdown or CSV reports.

Private scorer-only content should still be isolated at the source. Do not pass hidden labels,
answer keys, private rubrics, customer data, or protected scorer configs into public reports and
then rely on redaction to clean them up.
111 changes: 105 additions & 6 deletions scripts/public_leak_check.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import re
import subprocess
import sys
from pathlib import Path

Expand All @@ -15,18 +16,110 @@
"sk-public-test-DO-NOT-LEAK",
}

SKIP_DIRS = {".git", ".venv", "venv", "__pycache__", "private", "runs", "traces", "artifacts", "build", "dist"}
SKIP_CONTENT_DIRS = {
".git",
".venv",
"venv",
"__pycache__",
"private",
"runs",
"traces",
"artifacts",
"build",
"dist",
}
SKIP_WALK_DIRS = {".git", ".venv", "venv", "__pycache__", "build", "dist"}
SKIP_WALK_PREFIXES = {
"runs/",
"traces/",
"artifacts/",
"examples/artifacts/",
"reports/generated/",
}
TEXT_SUFFIXES = {".md", ".py", ".json", ".txt", ".csv", ".html", ".env", ".toml", ".yaml", ".yml", ".gitignore", ""}

DENIED_PATH_PREFIXES = {
"runs/",
"traces/",
"private/",
"fixtures/private/",
"examples/artifacts/",
"artifacts/",
"reports/generated/",
}
DENIED_PATH_PARTS = {"runs", "traces", "private", "artifacts"}
DENIED_PATH_TERMS = {
".env",
".env.",
"secret",
"token",
"key",
"answer_key",
"hidden_label",
"customer_private",
}

def should_skip(path: Path) -> bool:
return any(part in SKIP_DIRS for part in path.parts)

def should_skip_content(path: Path) -> bool:
return any(part in SKIP_CONTENT_DIRS for part in path.parts)

def scan(root: Path) -> list[str]:
findings = []

def _relative_path(path: Path) -> str:
return path.as_posix().removeprefix("./")


def denylisted_path_reason(path: str) -> str | None:
normalized = _relative_path(Path(path)).lower()
parts = normalized.split("/")
for prefix in DENIED_PATH_PREFIXES:
if normalized.startswith(prefix):
return f"denied path prefix {prefix}"
for part in parts:
if part in DENIED_PATH_PARTS:
return f"denied path component {part}"
if part == ".env" or part.startswith(".env."):
return "denied env file path"
for term in DENIED_PATH_TERMS:
if term in normalized:
return f"denied path term {term}"
Comment on lines +82 to +84
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Narrow denylist term matching to path components

The new path gate uses substring checks (if term in normalized) for generic terms like key and token, so benign filenames such as src/monkeypatch.py or docs/tokenization.md will be flagged as leaks even when file contents are safe. Because make leak-check runs this script over tracked files, this can block normal repo changes with false positives. Match on path components/word boundaries (or stricter patterns) instead of raw substring inclusion.

Useful? React with 👍 / 👎.

return None


def git_tracked_paths(root: Path) -> list[str] | None:
try:
result = subprocess.run(
["git", "-C", str(root), "ls-files", "-z"],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
except (FileNotFoundError, subprocess.CalledProcessError):
return None
return [item.decode("utf-8") for item in result.stdout.split(b"\0") if item]
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Handle non-UTF8 tracked paths when reading git output

git ls-files -z emits raw path bytes, but this code decodes each entry with strict UTF-8 and does not catch UnicodeDecodeError. In repositories that contain tracked filenames not valid UTF-8 (allowed by Git), leak-check crashes before scanning any files, so the public gate becomes unavailable. Decode with surrogateescape (or similar) to keep scanning robust.

Useful? React with 👍 / 👎.



def walk_source_paths(root: Path) -> list[str]:
paths = []
for path in root.rglob("*"):
if path.is_dir() or should_skip(path):
rel = path.relative_to(root)
rel_name = rel.as_posix()
if any(part in SKIP_WALK_DIRS for part in rel.parts):
continue
if any(rel_name.startswith(prefix) for prefix in SKIP_WALK_PREFIXES):
continue
if path.is_file():
paths.append(rel_name)
return sorted(paths)


def scan_paths(root: Path, paths: list[str]) -> list[str]:
findings = []
for rel_path in sorted(paths):
reason = denylisted_path_reason(rel_path)
if reason:
findings.append(f"{rel_path}: {reason}")
path = root / rel_path
if not path.is_file() or should_skip_content(path):
continue
if path.suffix not in TEXT_SUFFIXES and path.name != ".gitignore":
continue
Expand All @@ -46,6 +139,12 @@ def scan(root: Path) -> list[str]:
return findings


def scan(root: Path) -> list[str]:
tracked_paths = git_tracked_paths(root)
paths = tracked_paths if tracked_paths is not None else walk_source_paths(root)
return scan_paths(root, paths)


if __name__ == "__main__":
root = Path(sys.argv[1] if len(sys.argv) > 1 else ".").resolve()
findings = scan(root)
Expand Down
40 changes: 33 additions & 7 deletions src/agent_bench_lab/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pathlib import Path
from typing import Any

from .redaction import redact_text

EPSILON = 1e-9


Expand Down Expand Up @@ -134,9 +136,13 @@ def _format_number(value: float | None, *, signed: bool = False) -> str:
return f"{value:.3f}"


def _safe_cell(value: Any) -> str:
return redact_text(str(value))


def _format_row_item(row: dict[str, Any]) -> str:
delta = _format_number(row["delta"], signed=True)
return f"- {row['task_id']}/{row['case_id']}: {delta}"
return f"- {_safe_cell(row['task_id'])}/{_safe_cell(row['case_id'])}: {delta}"


def render_markdown_report(result: dict[str, Any], title: str = "Compare") -> str:
Expand Down Expand Up @@ -173,12 +179,20 @@ def render_markdown_report(result: dict[str, Any], title: str = "Compare") -> st
lines.append("- none")
lines.extend(["", "## Policy Violations"])
for item in result["policy_violations"][:20]:
lines.append(f"- {item['side']} {item['task_id']}/{item['case_id']}: {item['violation']}")
lines.append(
"- "
f"{_safe_cell(item['side'])} "
f"{_safe_cell(item['task_id'])}/{_safe_cell(item['case_id'])}: "
f"{_safe_cell(item['violation'])}"
)
if not result["policy_violations"]:
lines.append("- none")
lines.extend(["", "## Missing Scores"])
for row in result["missing_scores"][:20]:
lines.append(f"- {row['task_id']}/{row['case_id']}: {row['status']}")
lines.append(
f"- {_safe_cell(row['task_id'])}/{_safe_cell(row['case_id'])}: "
f"{_safe_cell(row['status'])}"
)
if not result["missing_scores"]:
lines.append("- none")
lines.extend(
Expand All @@ -193,14 +207,14 @@ def render_markdown_report(result: dict[str, Any], title: str = "Compare") -> st
for row in result["rows"]:
lines.append(
"| "
f"{row['task_id']} | "
f"{row['case_id']} | "
f"{_safe_cell(row['task_id'])} | "
f"{_safe_cell(row['case_id'])} | "
f"{_format_number(row['baseline_score'])} | "
f"{_format_number(row['candidate_score'])} | "
f"{_format_number(row['delta'], signed=True)} | "
f"{row['baseline_success']} | "
f"{row['candidate_success']} | "
f"{row['status']} |"
f"{_safe_cell(row['status'])} |"
)
lines.extend(
[
Expand Down Expand Up @@ -231,4 +245,16 @@ def write_csv_report(result: dict[str, Any], output_path: Path) -> None:
],
)
writer.writeheader()
writer.writerows(result["rows"])
for row in result["rows"]:
writer.writerow(
{
"task_id": _safe_cell(row["task_id"]),
"case_id": _safe_cell(row["case_id"]),
"baseline_score": row["baseline_score"],
"candidate_score": row["candidate_score"],
"delta": row["delta"],
"baseline_success": row["baseline_success"],
"candidate_success": row["candidate_success"],
"status": _safe_cell(row["status"]),
}
)
61 changes: 61 additions & 0 deletions src/agent_bench_lab/redaction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from __future__ import annotations

import re
from collections.abc import Mapping, Sequence
from typing import Any

REDACTED = "[REDACTED]"
REDACTED_KEY = "[REDACTED_KEY]"

UNSAFE_TEXT_PATTERNS = [
re.compile(pattern, re.IGNORECASE)
for pattern in [
r"\banswer[_ -]?key\b",
r"\bhidden[_ -]?label(s)?\b",
r"\bprivate[_ -]?threshold\b",
r"\bprotected[_ -]?scorer[_ -]?config\b",
r"\bscorer[_ -]?config\b",
r"\bcanary\b",
r"\bCANARY_",
r"\bHONEY_",
r"\bhoney row\b",
r"\bsecret\b",
r"\btoken\b",
r"\bapi[_ -]?key\b",
r"\bexpected\s*=",
r"\bexpected\s*:",
r"\bcorrect answer\b",
r"\bprivate rubric\b",
r"\bcustomer[_ -]?private\b",
r"fixtures/private",
r"(^|/)private/",
r"\braw[_ -]?trace\b",
r"\braw[_ -]?diagnostics\b",
]
]


def is_public_safe_text(text: str) -> bool:
return not any(pattern.search(text) for pattern in UNSAFE_TEXT_PATTERNS)


def redact_text(text: str) -> str:
if is_public_safe_text(text):
return text
return REDACTED


def redact_obj(obj: Any) -> Any:
if isinstance(obj, str):
return redact_text(obj)
if isinstance(obj, Mapping):
redacted: dict[str, Any] = {}
for key, value in obj.items():
safe_key = str(key) if is_public_safe_text(str(key)) else REDACTED_KEY
redacted[safe_key] = redact_obj(value)
Comment on lines +54 to +55
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve all mapping entries when redacting unsafe keys

In redact_obj, every unsafe mapping key is replaced with the same literal ([REDACTED_KEY]), so multiple unsafe keys in one object collide and later values overwrite earlier ones. This silently drops data (e.g., both api_key and token keys reduce to one entry), which can make redacted diagnostics incomplete or misleading. The redaction should avoid key-collision data loss.

Useful? React with 👍 / 👎.

return redacted
if isinstance(obj, tuple):
return tuple(redact_obj(item) for item in obj)
if isinstance(obj, Sequence) and not isinstance(obj, bytes | bytearray):
return [redact_obj(item) for item in obj]
return obj
Loading
Loading