From 058a50351d54bcb9e98dff8dedb3d6ae4fd04f29 Mon Sep 17 00:00:00 2001 From: toby-bridges <59594712+toby-bridges@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:44:00 +0800 Subject: [PATCH 1/3] Add report reproducibility metadata --- api_relay_audit/reporter.py | 18 ++++++++-- audit.py | 68 +++++++++++++++++++++++++++++++++---- scripts/audit.py | 46 ++++++++++++++++++++++++- scripts/sync-version.py | 8 ++++- tests/test_reporter.py | 22 ++++++++++-- tests/test_version_sync.py | 2 ++ 6 files changed, 151 insertions(+), 13 deletions(-) diff --git a/api_relay_audit/reporter.py b/api_relay_audit/reporter.py index 8647d97..75d6f97 100644 --- a/api_relay_audit/reporter.py +++ b/api_relay_audit/reporter.py @@ -1,6 +1,6 @@ """Markdown report generator for audit results.""" -from datetime import datetime +from datetime import datetime, timezone class Reporter: @@ -77,7 +77,8 @@ def flag(self, level, msg): self.summary.append((level, msg)) self.sections.append(f"{icon} **{msg}**\n") - def render(self, target_url="", model=""): + def render(self, target_url="", model="", tool_version="", profile="", + tool_commit=""): """Render the complete Markdown report. Produces a header block (title, metadata, risk summary) followed @@ -88,6 +89,11 @@ def render(self, target_url="", model=""): metadata when provided. model: The model identifier used for the audit. Shown in the report metadata when provided. + tool_version: API Relay Audit version used for the run. + profile: Audit profile used for the run (``general``, ``web3``, + or ``full``). + tool_commit: Optional git commit for checkout-based runs. Omitted + when the standalone script is run outside a repository. Returns: A single Markdown string containing the full report. @@ -100,12 +106,18 @@ def render(self, target_url="", model=""): """ header = ( f"# API Relay Security Audit Report\n\n" - f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n" + f"**Generated**: {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}\n" ) + if tool_version: + header += f"**Tool Version**: `{tool_version}`\n" + if profile: + header += f"**Profile**: `{profile}`\n" if target_url: header += f"**Target**: `{target_url}`\n" if model: header += f"**Model**: `{model}`\n" + if tool_commit: + header += f"**Tool Commit**: `{tool_commit}`\n" header += "\n## Risk Summary\n\n" for level, msg in self.summary: diff --git a/audit.py b/audit.py index 8c41c4f..c5d0b84 100644 --- a/audit.py +++ b/audit.py @@ -4,8 +4,8 @@ # Regenerate after modular audit changes with: # python3 scripts/build-standalone.py # CI verifies this generated artifact plus key behavior regressions. -# source_sha256: 57cc4ddc3ccb3ed54e76e82dfc447c6e21322c2c03ab804625924e7ec655f245 -# standalone_body_sha256: 3adf747824d6eeb9df837a1e6ae8fd6fe5aa861c57b6a422b9367e52ee0c630f +# source_sha256: 6422e70e00cfaf38ead45273f3b3dad3546f6f6d3cf3ade0b84b81ffbf71dcec +# standalone_body_sha256: 5cdef92a5e99e00a7574ffb76a90f91870095e1701c20bf3ee519217c713f0b4 # END GENERATED STANDALONE HEADER """ @@ -1624,7 +1624,7 @@ def iter_stdout(): """Markdown report generator for audit results.""" -from datetime import datetime +from datetime import datetime, timezone class Reporter: @@ -1701,7 +1701,8 @@ def flag(self, level, msg): self.summary.append((level, msg)) self.sections.append(f"{icon} **{msg}**\n") - def render(self, target_url="", model=""): + def render(self, target_url="", model="", tool_version="", profile="", + tool_commit=""): """Render the complete Markdown report. Produces a header block (title, metadata, risk summary) followed @@ -1712,6 +1713,11 @@ def render(self, target_url="", model=""): metadata when provided. model: The model identifier used for the audit. Shown in the report metadata when provided. + tool_version: API Relay Audit version used for the run. + profile: Audit profile used for the run (``general``, ``web3``, + or ``full``). + tool_commit: Optional git commit for checkout-based runs. Omitted + when the standalone script is run outside a repository. Returns: A single Markdown string containing the full report. @@ -1724,12 +1730,18 @@ def render(self, target_url="", model=""): """ header = ( f"# API Relay Security Audit Report\n\n" - f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n" + f"**Generated**: {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}\n" ) + if tool_version: + header += f"**Tool Version**: `{tool_version}`\n" + if profile: + header += f"**Profile**: `{profile}`\n" if target_url: header += f"**Target**: `{target_url}`\n" if model: header += f"**Model**: `{model}`\n" + if tool_commit: + header += f"**Tool Commit**: `{tool_commit}`\n" header += "\n## Risk Summary\n\n" for level, msg in self.summary: @@ -4839,6 +4851,8 @@ def run_channel_classifier(client): +TOOL_VERSION_FALLBACK = "2.3.0" + def _format_identity_inconsistency(non_claude_matches): """Render Step 5's non-Claude self-ID finding without over-attribution.""" @@ -4866,6 +4880,42 @@ def _report_error(report, error, status=None): report.p(format_diagnosis(_diagnosis_for_error(error, status=status))) +def _tool_version(): + """Return the packaged tool version for report metadata.""" + script_path = Path(__file__).resolve() + for candidate in ( + script_path.parent / "VERSION", + script_path.parent.parent / "VERSION", + ): + try: + value = candidate.read_text(encoding="utf-8").strip() + except OSError: + continue + if re.fullmatch(r"\d+\.\d+\.\d+", value): + return value + return TOOL_VERSION_FALLBACK + + +def _tool_commit_from_checkout(): + """Return a short git commit only when this script is in this repo checkout.""" + script_path = Path(__file__).resolve() + repo_root = script_path.parent.parent if script_path.parent.name == "scripts" else script_path.parent + if not (repo_root / ".git").exists(): + return "" + try: + result = subprocess.run( + ["git", "-C", str(repo_root), "rev-parse", "--short=12", "HEAD"], + capture_output=True, + text=True, + timeout=2, + check=True, + ) + except Exception: + return "" + commit = result.stdout.strip() + return commit if re.fullmatch(r"[0-9a-f]{7,12}", commit) else "" + + # ============================================================ # CLI # ============================================================ @@ -6688,7 +6738,13 @@ def main(): "anomaly, or Web3 injection detected.") # Output - md = report.render(target_url=client.base_url, model=args.model) + md = report.render( + target_url=client.base_url, + model=args.model, + tool_version=f"v{_tool_version()}", + profile=args.profile, + tool_commit=_tool_commit_from_checkout(), + ) if args.output: Path(args.output).parent.mkdir(parents=True, exist_ok=True) diff --git a/scripts/audit.py b/scripts/audit.py index 225fd9a..f466688 100644 --- a/scripts/audit.py +++ b/scripts/audit.py @@ -69,6 +69,8 @@ from api_relay_audit.tool_substitution import run_tool_substitution_test from api_relay_audit.web3.injection_probes import run_web3_injection_probes +TOOL_VERSION_FALLBACK = "2.3.0" + def _format_identity_inconsistency(non_claude_matches): """Render Step 5's non-Claude self-ID finding without over-attribution.""" @@ -96,6 +98,42 @@ def _report_error(report, error, status=None): report.p(format_diagnosis(_diagnosis_for_error(error, status=status))) +def _tool_version(): + """Return the packaged tool version for report metadata.""" + script_path = Path(__file__).resolve() + for candidate in ( + script_path.parent / "VERSION", + script_path.parent.parent / "VERSION", + ): + try: + value = candidate.read_text(encoding="utf-8").strip() + except OSError: + continue + if re.fullmatch(r"\d+\.\d+\.\d+", value): + return value + return TOOL_VERSION_FALLBACK + + +def _tool_commit_from_checkout(): + """Return a short git commit only when this script is in this repo checkout.""" + script_path = Path(__file__).resolve() + repo_root = script_path.parent.parent if script_path.parent.name == "scripts" else script_path.parent + if not (repo_root / ".git").exists(): + return "" + try: + result = subprocess.run( + ["git", "-C", str(repo_root), "rev-parse", "--short=12", "HEAD"], + capture_output=True, + text=True, + timeout=2, + check=True, + ) + except Exception: + return "" + commit = result.stdout.strip() + return commit if re.fullmatch(r"[0-9a-f]{7,12}", commit) else "" + + # ============================================================ # CLI # ============================================================ @@ -1919,7 +1957,13 @@ def main(): "anomaly, or Web3 injection detected.") # Output - md = report.render(target_url=client.base_url, model=args.model) + md = report.render( + target_url=client.base_url, + model=args.model, + tool_version=f"v{_tool_version()}", + profile=args.profile, + tool_commit=_tool_commit_from_checkout(), + ) if args.output: Path(args.output).parent.mkdir(parents=True, exist_ok=True) diff --git a/scripts/sync-version.py b/scripts/sync-version.py index 1ac6b58..8299011 100644 --- a/scripts/sync-version.py +++ b/scripts/sync-version.py @@ -103,12 +103,18 @@ def replace_regex( def sync_audit_script(version: Version, path: Path) -> str: text = read_text(path) - return replace_regex( + text = replace_regex( text, rf"API Relay Security Audit Tool {DISPLAY_VERSION_RE}", f"API Relay Security Audit Tool {version.display}", path, ) + return replace_regex( + text, + rf'^TOOL_VERSION_FALLBACK = "{FULL_VERSION_RE}"$', + f'TOOL_VERSION_FALLBACK = "{version.full}"', + path, + ) def sync_build_standalone(version: Version, path: Path) -> str: diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 2954e76..03cbb28 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -119,17 +119,35 @@ def test_render_header(self, mock_dt, rpt): @patch("api_relay_audit.reporter.datetime") def test_render_with_target_and_model(self, mock_dt, rpt): - mock_dt.now.return_value.strftime.return_value = "2026-03-30 12:00" + mock_dt.now.return_value.strftime.return_value = "2026-03-30T12:00:00Z" output = rpt.render(target_url="https://relay.test", model="claude-3") assert "`https://relay.test`" in output assert "`claude-3`" in output + @patch("api_relay_audit.reporter.datetime") + def test_render_with_reproducibility_metadata(self, mock_dt, rpt): + mock_dt.now.return_value.strftime.return_value = "2026-03-30T12:00:00Z" + output = rpt.render( + target_url="https://relay.test", + model="claude-3", + tool_version="v2.3.0", + profile="full", + tool_commit="abc1234", + ) + assert "**Generated**: 2026-03-30T12:00:00Z" in output + assert "**Tool Version**: `v2.3.0`" in output + assert "**Profile**: `full`" in output + assert "**Tool Commit**: `abc1234`" in output + @patch("api_relay_audit.reporter.datetime") def test_render_without_target_and_model(self, mock_dt, rpt): - mock_dt.now.return_value.strftime.return_value = "2026-03-30 12:00" + mock_dt.now.return_value.strftime.return_value = "2026-03-30T12:00:00Z" output = rpt.render() assert "**Target**" not in output assert "**Model**" not in output + assert "**Tool Version**" not in output + assert "**Profile**" not in output + assert "**Tool Commit**" not in output @patch("api_relay_audit.reporter.datetime") def test_render_includes_risk_summary(self, mock_dt, rpt): diff --git a/tests/test_version_sync.py b/tests/test_version_sync.py index 2640392..89dec80 100644 --- a/tests/test_version_sync.py +++ b/tests/test_version_sync.py @@ -77,3 +77,5 @@ def test_generated_standalone_uses_display_version(): modular = (REPO_ROOT / "scripts" / "audit.py").read_text(encoding="utf-8") assert f"API Relay Security Audit Tool {display}" in modular assert f"API Relay Security Audit Tool {display} --- Standalone Edition" in standalone + assert f'TOOL_VERSION_FALLBACK = "{version}"' in modular + assert f'TOOL_VERSION_FALLBACK = "{version}"' in standalone From 68d4141dd5da0b5d7fa23edbc00f90a236761855 Mon Sep 17 00:00:00 2001 From: toby-bridges <59594712+toby-bridges@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:46:09 +0800 Subject: [PATCH 2/3] Sync metrics after report metadata tests --- README.md | 4 ++-- ROADMAP.md | 4 ++-- docs/_metrics.md | 12 ++++++------ web/index.html | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 7cb9867..930bcc8 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ Community evidence is shape-checked by GitHub Actions, but publication still req | Version | `v2.3` | | Audit steps | 14 | | Risk matrix | 6D | -| pytest collected tests | 778 | +| pytest collected tests | 779 | | CLI flags | 21 | | Runtime profiles | `general`, `web3`, `full` | @@ -328,7 +328,7 @@ API Relay Audit 也可以作为 agent skill 使用。 | 版本 | `v2.3` | | 审计步骤 | 14 | | 风险矩阵 | 6D | -| pytest collected tests | 778 | +| pytest collected tests | 779 | | CLI flags | 21 | | Runtime profiles | `general`, `web3`, `full` | diff --git a/ROADMAP.md b/ROADMAP.md index c93620d..208d882 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -40,10 +40,10 @@ contributor, arXiv:2026-04-26, 正交威胁轴:模型替换质量欺诈 vs 我 detector, not a hosted preflight service, not a new API family, and not a change to LOW/MEDIUM/HIGH semantics. `inconclusive` remains `inconclusive`. -- **Final test count**: 778/778 passing (733 baseline → 764 after current +- **Final test count**: 779/779 passing (733 baseline → 764 after current master follow-ups → 769 after release-engineering version sync coverage → 775 after query-family growth contracts → 778 after release-hardening - public verification regressions). + public verification regressions → 779 after report reproducibility metadata). ### v1.9 — Dual-distribution full generation (2026-06-01) - **Standalone product promise preserved**: root `audit.py` stays committed, diff --git a/docs/_metrics.md b/docs/_metrics.md index 1f561cc..d1129d4 100644 --- a/docs/_metrics.md +++ b/docs/_metrics.md @@ -16,23 +16,23 @@ | 单文件版版本 | `v2.3` | `audit.py` docstring | | 步骤数 (Step N) | **14** | grep `Step N` in `scripts/audit.py` | | 步骤数 (单文件版) | 14 | grep `Step N` in `audit.py` | -| 测试数 (pytest) | **778** | `pytest --collect-only` | -| 测试数 (static) | 754 | grep `def test_*` in tests/ | +| 测试数 (pytest) | **779** | `pytest --collect-only` | +| 测试数 (static) | 755 | grep `def test_*` in tests/ | | CLI flag 数 | 21 | grep `add_argument("--*")` | | profile 选项 | general, web3, full | argparse choices | | ROADMAP 上次更新 | 2026-06-07 | `ROADMAP.md` 头部 | | Codex review 提及次数 | 4 | grep `Codex review (cycle\|round)` 在 Shipped 节 | | Codex review 已编号轮次(最大) | 6 | grep `Nth Codex review round` | | Codex bug 累计(最新声称) | 18 | grep `cumulative N real bug` | -| 测试数演进 (ROADMAP) | [546, 560, 562, 586, 642, 700, 778] | grep `Final test count: N/N passing` | -| Recorded commit SHA | `eeb9a2c` | recent reachable commit; `--check` allows follow-up metrics commits | -| Recorded commit date | 2026-06-07 | recent reachable commit; `--check` allows follow-up metrics commits | +| 测试数演进 (ROADMAP) | [546, 560, 562, 586, 642, 700, 779] | grep `Final test count: N/N passing` | +| Recorded commit SHA | `058a503` | recent reachable commit; `--check` allows follow-up metrics commits | +| Recorded commit date | 2026-06-12 | recent reachable commit; `--check` allows follow-up metrics commits | ## 一致性自检 - ✅ 版本一致:两份都是 `v2.3`。 - ✅ 步骤数一致:14。 -- ℹ️ pytest (778) vs 静态 (754) 差距 >20,多出来的来自 parametrize/fixture——以 pytest 为准。 +- ℹ️ pytest (779) vs 静态 (755) 差距 >20,多出来的来自 parametrize/fixture——以 pytest 为准。 ## 人工 review 边界(脚本抓不到,每次发布要人工核对) diff --git a/web/index.html b/web/index.html index 152adf3..9194710 100644 --- a/web/index.html +++ b/web/index.html @@ -383,7 +383,7 @@