From ce1df3fc443615a9e2c91bdb1d472c05d281f299 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 17:10:39 +0900 Subject: [PATCH 1/9] Fix Scorecard SARIF upload warnings --- .github/workflows/ossf-scorecard.yml | 25 +++- scripts/checks/normalize_scorecard_sarif.py | 63 ++++++++ scripts/checks/verify_supply_chain.py | 11 ++ .../tests/test_supply_chain_policy.py | 141 ++++++++++++++++++ 4 files changed, 238 insertions(+), 2 deletions(-) create mode 100644 scripts/checks/normalize_scorecard_sarif.py diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index ef95d63d..76a762a0 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -33,7 +33,28 @@ jobs: name: ossf-scorecard-results path: results.sarif retention-days: 5 + scorecard-sarif-upload: + name: scorecard-sarif-upload + needs: analysis + if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ossf-scorecard-results + path: scorecard-sarif + - name: Normalize repository-level Scorecard SARIF locations + run: >- + python3 scripts/checks/normalize_scorecard_sarif.py + scorecard-sarif/results.sarif + normalized-scorecard-results.sarif - uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 peeled commit; SHA pinning retained as supply-chain attack mitigation. - if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) with: - sarif_file: results.sarif + sarif_file: normalized-scorecard-results.sarif diff --git a/scripts/checks/normalize_scorecard_sarif.py b/scripts/checks/normalize_scorecard_sarif.py new file mode 100644 index 00000000..95909102 --- /dev/null +++ b/scripts/checks/normalize_scorecard_sarif.py @@ -0,0 +1,63 @@ +"""Normalize OSSF Scorecard SARIF so GitHub can ingest repository findings.""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +SCORECARD_REPOSITORY_PLACEHOLDER_URI = "no file associated with this alert" +SCORECARD_WORKFLOW_URI = ".github/workflows/ossf-scorecard.yml" + + +def normalize_scorecard_sarif(source: Path, target: Path) -> int: + """Rewrite repository-level Scorecard placeholder URIs and return change count.""" + sarif = json.loads(source.read_text(encoding="utf-8")) + rewritten = 0 + + for run in sarif.get("runs", []): + for result in run.get("results", []): + for location in result.get("locations", []): + physical_location = location.get("physicalLocation") + if not isinstance(physical_location, dict): + continue + artifact_location = physical_location.get("artifactLocation") + if not isinstance(artifact_location, dict): + continue + if artifact_location.get("uri") != SCORECARD_REPOSITORY_PLACEHOLDER_URI: + continue + artifact_location["uri"] = SCORECARD_WORKFLOW_URI + physical_location.setdefault("region", {"startLine": 1}) + properties = physical_location.setdefault("properties", {}) + if isinstance(properties, dict): + properties["bandscopeOriginalUri"] = ( + SCORECARD_REPOSITORY_PLACEHOLDER_URI + ) + properties["bandscopeRepositoryLevelFinding"] = True + rewritten += 1 + + target.write_text( + json.dumps(sarif, indent=2, sort_keys=True) + "\n", encoding="utf-8" + ) + return rewritten + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Normalize OSSF Scorecard SARIF repository-level locations." + ) + parser.add_argument("source", type=Path, help="Path to the Scorecard SARIF file") + parser.add_argument("target", type=Path, help="Path to write normalized SARIF") + return parser.parse_args() + + +def main() -> None: + """Normalize a Scorecard SARIF file from the command line.""" + args = parse_args() + rewritten = normalize_scorecard_sarif(args.source, args.target) + print(f"Normalized {rewritten} OSSF Scorecard repository-level SARIF locations") + + +if __name__ == "__main__": + main() diff --git a/scripts/checks/verify_supply_chain.py b/scripts/checks/verify_supply_chain.py index 4578737a..bd7b55ff 100644 --- a/scripts/checks/verify_supply_chain.py +++ b/scripts/checks/verify_supply_chain.py @@ -45,6 +45,9 @@ "ossf scorecard publishing job must only contain uses steps; split run steps " "into a separate non-publishing job" ) +OSSF_SARIF_NORMALIZER = "scripts/checks/normalize_scorecard_sarif.py" +OSSF_NORMALIZED_SARIF = "normalized-scorecard-results.sarif" +OSSF_NORMALIZED_SARIF_UPLOAD = f"sarif_file: {OSSF_NORMALIZED_SARIF}" RELEASE_ARTIFACT_GLOB = re.compile(r"(?:^|\s)artifacts/\*") RELEASE_ASSET_VALIDATOR = ( "scripts/release/select_release_assets.py --output release-assets.txt" @@ -453,6 +456,14 @@ def verify_workflow_coverage() -> list[str]: missing.append( "ossf scorecard publish_results must use the repository default branch guard" ) + if "github/codeql-action/upload-sarif" in scorecard and ( + OSSF_SARIF_NORMALIZER not in scorecard + or OSSF_NORMALIZED_SARIF_UPLOAD not in scorecard + ): + missing.append( + "ossf scorecard SARIF upload must normalize repository-level " + "placeholder URIs before upload-sarif" + ) workflow_paths = sorted(Path(".github/workflows").glob("*.yml")) + sorted( Path(".github/workflows").glob("*.yaml") ) diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index c419fa8b..19033104 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -3,6 +3,7 @@ from __future__ import annotations import importlib +import json import re from pathlib import Path @@ -369,6 +370,146 @@ def test_supply_chain_check_accepts_repo_ossf_publish_restrictions( assert not any("ossf scorecard" in violation for violation in violations) +def test_supply_chain_check_rejects_unnormalized_scorecard_sarif_upload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure Scorecard SARIF is normalized before CodeQL upload ingestion.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", "verify_supply_chain_ossf_sarif_guard" + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " analysis:", + " name: ossf-scorecard", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + +def test_scorecard_sarif_normalizer_replaces_repository_level_placeholder( + tmp_path: Path, +) -> None: + """Ensure repository-level Scorecard SARIF locations use upload-safe URIs.""" + normalizer = load_module( + "scripts/checks/normalize_scorecard_sarif.py", "normalize_scorecard_sarif" + ) + source = tmp_path / "results.sarif" + target = tmp_path / "normalized-results.sarif" + source.write_text( + json.dumps( + { + "version": "2.1.0", + "runs": [ + { + "results": [ + { + "ruleId": "Token-Permissions", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "no file associated with this alert" + } + } + } + ], + } + ] + } + ], + } + ), + encoding="utf-8", + ) + + rewritten = normalizer.normalize_scorecard_sarif(source, target) + normalized = json.loads(target.read_text(encoding="utf-8")) + location = normalized["runs"][0]["results"][0]["locations"][0]["physicalLocation"] + + assert rewritten == 1 + assert location["artifactLocation"]["uri"] == ".github/workflows/ossf-scorecard.yml" + assert location["region"]["startLine"] == 1 + assert location["properties"]["bandscopeOriginalUri"] == ("no file associated with this alert") + + +def test_scorecard_sarif_normalizer_preserves_file_locations(tmp_path: Path) -> None: + """Ensure file-associated Scorecard SARIF locations are not rewritten.""" + normalizer = load_module( + "scripts/checks/normalize_scorecard_sarif.py", "normalize_scorecard_sarif_preserve" + ) + source = tmp_path / "results.sarif" + target = tmp_path / "normalized-results.sarif" + source.write_text( + json.dumps( + { + "version": "2.1.0", + "runs": [ + { + "results": [ + { + "ruleId": "Pinned-Dependencies", + "locations": [ + { + "physicalLocation": { + "artifactLocation": {"uri": ".github/workflows/ci.yml"}, + "region": {"startLine": 12}, + } + } + ], + } + ] + } + ], + } + ), + encoding="utf-8", + ) + + rewritten = normalizer.normalize_scorecard_sarif(source, target) + normalized = json.loads(target.read_text(encoding="utf-8")) + location = normalized["runs"][0]["results"][0]["locations"][0]["physicalLocation"] + + assert rewritten == 0 + assert location["artifactLocation"]["uri"] == ".github/workflows/ci.yml" + assert location["region"]["startLine"] == 12 + assert "properties" not in location + + def test_supply_chain_check_rejects_vulnerable_rust_rand_lockfile( tmp_path: Path, ) -> None: From 7094fceddae2f08a2cdd382a468dbf657e87142a Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 17:26:30 +0900 Subject: [PATCH 2/9] Harden Scorecard SARIF normalizer --- scripts/checks/normalize_scorecard_sarif.py | 9 +++- .../tests/test_supply_chain_policy.py | 45 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/scripts/checks/normalize_scorecard_sarif.py b/scripts/checks/normalize_scorecard_sarif.py index 95909102..461170be 100644 --- a/scripts/checks/normalize_scorecard_sarif.py +++ b/scripts/checks/normalize_scorecard_sarif.py @@ -15,9 +15,16 @@ def normalize_scorecard_sarif(source: Path, target: Path) -> int: sarif = json.loads(source.read_text(encoding="utf-8")) rewritten = 0 - for run in sarif.get("runs", []): + runs = sarif.get("runs", []) if isinstance(sarif, dict) else [] + for run in runs: + if not isinstance(run, dict): + continue for result in run.get("results", []): + if not isinstance(result, dict): + continue for location in result.get("locations", []): + if not isinstance(location, dict): + continue physical_location = location.get("physicalLocation") if not isinstance(physical_location, dict): continue diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 19033104..a53821fc 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -510,6 +510,51 @@ def test_scorecard_sarif_normalizer_preserves_file_locations(tmp_path: Path) -> assert "properties" not in location +def test_scorecard_sarif_normalizer_skips_malformed_locations(tmp_path: Path) -> None: + """Ensure malformed Scorecard SARIF arrays do not crash normalization.""" + normalizer = load_module( + "scripts/checks/normalize_scorecard_sarif.py", "normalize_scorecard_sarif_malformed" + ) + source = tmp_path / "results.sarif" + target = tmp_path / "normalized-results.sarif" + source.write_text( + json.dumps( + { + "version": "2.1.0", + "runs": [ + "not-a-run", + { + "results": [ + "not-a-result", + { + "ruleId": "Token-Permissions", + "locations": [ + "not-a-location", + { + "physicalLocation": { + "artifactLocation": { + "uri": "no file associated with this alert" + } + } + }, + ], + }, + ] + }, + ], + } + ), + encoding="utf-8", + ) + + rewritten = normalizer.normalize_scorecard_sarif(source, target) + normalized = json.loads(target.read_text(encoding="utf-8")) + physical_location = normalized["runs"][1]["results"][1]["locations"][1]["physicalLocation"] + + assert rewritten == 1 + assert physical_location["artifactLocation"]["uri"] == (".github/workflows/ossf-scorecard.yml") + + def test_supply_chain_check_rejects_vulnerable_rust_rand_lockfile( tmp_path: Path, ) -> None: From cbf63b0e00d9fd4321b386c17ab9dc4a054a60bb Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 17:42:45 +0900 Subject: [PATCH 3/9] Ensure Scorecard SARIF region lines --- scripts/checks/normalize_scorecard_sarif.py | 6 ++- .../tests/test_supply_chain_policy.py | 45 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/scripts/checks/normalize_scorecard_sarif.py b/scripts/checks/normalize_scorecard_sarif.py index 461170be..d43f105b 100644 --- a/scripts/checks/normalize_scorecard_sarif.py +++ b/scripts/checks/normalize_scorecard_sarif.py @@ -34,7 +34,11 @@ def normalize_scorecard_sarif(source: Path, target: Path) -> int: if artifact_location.get("uri") != SCORECARD_REPOSITORY_PLACEHOLDER_URI: continue artifact_location["uri"] = SCORECARD_WORKFLOW_URI - physical_location.setdefault("region", {"startLine": 1}) + region = physical_location.get("region") + if not isinstance(region, dict): + region = {} + physical_location["region"] = region + region.setdefault("startLine", 1) properties = physical_location.setdefault("properties", {}) if isinstance(properties, dict): properties["bandscopeOriginalUri"] = ( diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index a53821fc..306dbbac 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -510,6 +510,51 @@ def test_scorecard_sarif_normalizer_preserves_file_locations(tmp_path: Path) -> assert "properties" not in location +def test_scorecard_sarif_normalizer_fills_existing_region_start_line( + tmp_path: Path, +) -> None: + """Ensure repository-level SARIF locations with a region still get startLine.""" + normalizer = load_module( + "scripts/checks/normalize_scorecard_sarif.py", "normalize_scorecard_sarif_region" + ) + source = tmp_path / "results.sarif" + target = tmp_path / "normalized-results.sarif" + source.write_text( + json.dumps( + { + "version": "2.1.0", + "runs": [ + { + "results": [ + { + "ruleId": "Token-Permissions", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "no file associated with this alert" + }, + "region": {}, + } + } + ], + } + ] + } + ], + } + ), + encoding="utf-8", + ) + + rewritten = normalizer.normalize_scorecard_sarif(source, target) + normalized = json.loads(target.read_text(encoding="utf-8")) + physical_location = normalized["runs"][0]["results"][0]["locations"][0]["physicalLocation"] + + assert rewritten == 1 + assert physical_location["region"]["startLine"] == 1 + + def test_scorecard_sarif_normalizer_skips_malformed_locations(tmp_path: Path) -> None: """Ensure malformed Scorecard SARIF arrays do not crash normalization.""" normalizer = load_module( From d1cd795d208ab0477a5c90f15c3784a02a51e4ec Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 17:55:59 +0900 Subject: [PATCH 4/9] Validate Scorecard SARIF start lines --- scripts/checks/normalize_scorecard_sarif.py | 4 +- .../tests/test_supply_chain_policy.py | 76 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/scripts/checks/normalize_scorecard_sarif.py b/scripts/checks/normalize_scorecard_sarif.py index d43f105b..b8b33a5a 100644 --- a/scripts/checks/normalize_scorecard_sarif.py +++ b/scripts/checks/normalize_scorecard_sarif.py @@ -38,7 +38,9 @@ def normalize_scorecard_sarif(source: Path, target: Path) -> int: if not isinstance(region, dict): region = {} physical_location["region"] = region - region.setdefault("startLine", 1) + start_line = region.get("startLine") + if type(start_line) is not int or start_line < 1: + region["startLine"] = 1 properties = physical_location.setdefault("properties", {}) if isinstance(properties, dict): properties["bandscopeOriginalUri"] = ( diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 306dbbac..24d0c806 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -465,6 +465,7 @@ def test_scorecard_sarif_normalizer_replaces_repository_level_placeholder( assert location["artifactLocation"]["uri"] == ".github/workflows/ossf-scorecard.yml" assert location["region"]["startLine"] == 1 assert location["properties"]["bandscopeOriginalUri"] == ("no file associated with this alert") + assert location["properties"]["bandscopeRepositoryLevelFinding"] is True def test_scorecard_sarif_normalizer_preserves_file_locations(tmp_path: Path) -> None: @@ -555,6 +556,81 @@ def test_scorecard_sarif_normalizer_fills_existing_region_start_line( assert physical_location["region"]["startLine"] == 1 +def test_scorecard_sarif_normalizer_repairs_invalid_region_start_lines( + tmp_path: Path, +) -> None: + """Ensure invalid repository-level SARIF region startLine values become valid.""" + normalizer = load_module( + "scripts/checks/normalize_scorecard_sarif.py", + "normalize_scorecard_sarif_invalid_region", + ) + source = tmp_path / "results.sarif" + target = tmp_path / "normalized-results.sarif" + source.write_text( + json.dumps( + { + "version": "2.1.0", + "runs": [ + { + "results": [ + { + "ruleId": "Token-Permissions", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "no file associated with this alert" + }, + "region": {"startLine": 0}, + } + }, + { + "physicalLocation": { + "artifactLocation": { + "uri": "no file associated with this alert" + }, + "region": {"startLine": None}, + } + }, + { + "physicalLocation": { + "artifactLocation": { + "uri": "no file associated with this alert" + }, + "region": {"startLine": "7"}, + } + }, + { + "physicalLocation": { + "artifactLocation": { + "uri": "no file associated with this alert" + }, + "region": {"startLine": 3}, + } + }, + ], + } + ] + } + ], + } + ), + encoding="utf-8", + ) + + rewritten = normalizer.normalize_scorecard_sarif(source, target) + normalized = json.loads(target.read_text(encoding="utf-8")) + locations = normalized["runs"][0]["results"][0]["locations"] + + assert rewritten == 4 + assert [location["physicalLocation"]["region"]["startLine"] for location in locations] == [ + 1, + 1, + 1, + 3, + ] + + def test_scorecard_sarif_normalizer_skips_malformed_locations(tmp_path: Path) -> None: """Ensure malformed Scorecard SARIF arrays do not crash normalization.""" normalizer = load_module( From f54a3cb39d03748263dacf123e19a99358458476 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 18:11:26 +0900 Subject: [PATCH 5/9] Guard Scorecard SARIF containers --- scripts/checks/normalize_scorecard_sarif.py | 12 ++++++-- .../tests/test_supply_chain_policy.py | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/scripts/checks/normalize_scorecard_sarif.py b/scripts/checks/normalize_scorecard_sarif.py index b8b33a5a..2bad0b50 100644 --- a/scripts/checks/normalize_scorecard_sarif.py +++ b/scripts/checks/normalize_scorecard_sarif.py @@ -16,13 +16,21 @@ def normalize_scorecard_sarif(source: Path, target: Path) -> int: rewritten = 0 runs = sarif.get("runs", []) if isinstance(sarif, dict) else [] + if not isinstance(runs, list): + runs = [] for run in runs: if not isinstance(run, dict): continue - for result in run.get("results", []): + results = run.get("results", []) + if not isinstance(results, list): + continue + for result in results: if not isinstance(result, dict): continue - for location in result.get("locations", []): + locations = result.get("locations", []) + if not isinstance(locations, list): + continue + for location in locations: if not isinstance(location, dict): continue physical_location = location.get("physicalLocation") diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 24d0c806..e2358c32 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -676,6 +676,34 @@ def test_scorecard_sarif_normalizer_skips_malformed_locations(tmp_path: Path) -> assert physical_location["artifactLocation"]["uri"] == (".github/workflows/ossf-scorecard.yml") +def test_scorecard_sarif_normalizer_skips_malformed_containers( + tmp_path: Path, +) -> None: + """Ensure non-list SARIF containers do not crash normalization.""" + normalizer = load_module( + "scripts/checks/normalize_scorecard_sarif.py", + "normalize_scorecard_sarif_malformed_containers", + ) + cases = [ + {"version": "2.1.0", "runs": None}, + {"version": "2.1.0", "runs": {"results": []}}, + {"version": "2.1.0", "runs": [{"results": None}]}, + {"version": "2.1.0", "runs": [{"results": {"locations": []}}]}, + {"version": "2.1.0", "runs": [{"results": [{"locations": None}]}]}, + {"version": "2.1.0", "runs": [{"results": [{"locations": {}}]}]}, + ] + + for index, sarif in enumerate(cases): + source = tmp_path / f"results-{index}.sarif" + target = tmp_path / f"normalized-results-{index}.sarif" + source.write_text(json.dumps(sarif), encoding="utf-8") + + rewritten = normalizer.normalize_scorecard_sarif(source, target) + + assert rewritten == 0 + assert json.loads(target.read_text(encoding="utf-8")) == sarif + + def test_supply_chain_check_rejects_vulnerable_rust_rand_lockfile( tmp_path: Path, ) -> None: From 64cf557fe371aae9e667b9a75b8075284a0a574e Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 18:23:50 +0900 Subject: [PATCH 6/9] Normalize Scorecard SARIF properties --- scripts/checks/normalize_scorecard_sarif.py | 14 ++++++++------ .../tests/test_supply_chain_policy.py | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/scripts/checks/normalize_scorecard_sarif.py b/scripts/checks/normalize_scorecard_sarif.py index 2bad0b50..caf90c87 100644 --- a/scripts/checks/normalize_scorecard_sarif.py +++ b/scripts/checks/normalize_scorecard_sarif.py @@ -49,12 +49,14 @@ def normalize_scorecard_sarif(source: Path, target: Path) -> int: start_line = region.get("startLine") if type(start_line) is not int or start_line < 1: region["startLine"] = 1 - properties = physical_location.setdefault("properties", {}) - if isinstance(properties, dict): - properties["bandscopeOriginalUri"] = ( - SCORECARD_REPOSITORY_PLACEHOLDER_URI - ) - properties["bandscopeRepositoryLevelFinding"] = True + properties = physical_location.get("properties") + if not isinstance(properties, dict): + properties = {} + physical_location["properties"] = properties + properties["bandscopeOriginalUri"] = ( + SCORECARD_REPOSITORY_PLACEHOLDER_URI + ) + properties["bandscopeRepositoryLevelFinding"] = True rewritten += 1 target.write_text( diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index e2358c32..622a359d 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -655,7 +655,8 @@ def test_scorecard_sarif_normalizer_skips_malformed_locations(tmp_path: Path) -> "physicalLocation": { "artifactLocation": { "uri": "no file associated with this alert" - } + }, + "properties": "not-properties", } }, ], @@ -674,6 +675,7 @@ def test_scorecard_sarif_normalizer_skips_malformed_locations(tmp_path: Path) -> assert rewritten == 1 assert physical_location["artifactLocation"]["uri"] == (".github/workflows/ossf-scorecard.yml") + assert physical_location["properties"]["bandscopeRepositoryLevelFinding"] is True def test_scorecard_sarif_normalizer_skips_malformed_containers( From 9d46a7360907a4bc7a69cbaf3da7ca384d5e2d87 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 18:55:25 +0900 Subject: [PATCH 7/9] Harden Scorecard SARIF upload guard --- scripts/checks/verify_supply_chain.py | 54 ++++++-- .../tests/test_supply_chain_policy.py | 130 ++++++++++++++++++ 2 files changed, 175 insertions(+), 9 deletions(-) diff --git a/scripts/checks/verify_supply_chain.py b/scripts/checks/verify_supply_chain.py index bd7b55ff..8d30f54c 100644 --- a/scripts/checks/verify_supply_chain.py +++ b/scripts/checks/verify_supply_chain.py @@ -353,6 +353,46 @@ def evaluate_job(job_lines: list[str], start_line: int) -> None: return violations +def scorecard_sarif_upload_normalization_violations(content: str) -> list[str]: + """Return Scorecard SARIF upload steps that bypass the normalizer output.""" + if "ossf/scorecard-action" not in content: + return [] + if "github/codeql-action/upload-sarif" not in content: + return [] + + violations: list[str] = [] + has_normalizer_step = OSSF_SARIF_NORMALIZER in content + lines = content.splitlines() + for index, line in enumerate(lines): + if "uses:" not in line or "github/codeql-action/upload-sarif" not in line: + continue + step_indent = len(line) - len(line.lstrip(" ")) + step_lines = [line] + for following_line in lines[index + 1 :]: + following_stripped = following_line.strip() + following_indent = len(following_line) - len(following_line.lstrip(" ")) + if following_stripped.startswith("- ") and following_indent <= step_indent: + break + step_lines.append(following_line) + step_content = "\n".join(step_lines) + if not any( + scorecard_sarif_token in step_content + for scorecard_sarif_token in [ + OSSF_NORMALIZED_SARIF_UPLOAD, + "sarif_file: results.sarif", + "sarif_file: scorecard-sarif/results.sarif", + ] + ): + continue + if has_normalizer_step and OSSF_NORMALIZED_SARIF_UPLOAD in step_content: + continue + violations.append( + "ossf scorecard SARIF upload must normalize repository-level " + "placeholder URIs before upload-sarif" + ) + return violations + + def verify_workflow_coverage() -> list[str]: """Return workflow trigger and artifact coverage violations.""" missing: list[str] = [] @@ -456,21 +496,17 @@ def verify_workflow_coverage() -> list[str]: missing.append( "ossf scorecard publish_results must use the repository default branch guard" ) - if "github/codeql-action/upload-sarif" in scorecard and ( - OSSF_SARIF_NORMALIZER not in scorecard - or OSSF_NORMALIZED_SARIF_UPLOAD not in scorecard - ): - missing.append( - "ossf scorecard SARIF upload must normalize repository-level " - "placeholder URIs before upload-sarif" - ) workflow_paths = sorted(Path(".github/workflows").glob("*.yml")) + sorted( Path(".github/workflows").glob("*.yaml") ) for workflow_path in workflow_paths: + workflow_content = workflow_path.read_text(encoding="utf-8") + missing.extend( + scorecard_sarif_upload_normalization_violations(workflow_content) + ) missing.extend( ossf_scorecard_publish_restriction_violations( - workflow_path.read_text(encoding="utf-8"), workflow_path + workflow_content, workflow_path ) ) return missing diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 622a359d..369105de 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -421,6 +421,136 @@ def test_supply_chain_check_rejects_unnormalized_scorecard_sarif_upload( ) in violations +def test_supply_chain_check_rejects_upload_step_with_unnormalized_scorecard_sarif( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure unrelated normalizer tokens cannot bless a raw Scorecard upload step.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_step_guard", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " analysis:", + " name: ossf-scorecard", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - name: Mention normalizer without protecting upload", + " env:", + " UNUSED_SARIF_HINT: 'sarif_file: normalized-scorecard-results.sarif'", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py raw.sarif", + " normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + +def test_supply_chain_check_accepts_colocated_non_scorecard_sarif_upload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure non-Scorecard SARIF uploads are not forced through Scorecard normalization.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_mixed_uploads", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "combined-security.yml").write_text( + "\n".join( + [ + "name: combined-security", + "on: push", + "jobs:", + " scorecard:", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + " trivy:", + " steps:", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: trivy-results.sarif", + ] + ), + encoding="utf-8", + ) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " placeholder:", + " steps:", + " - run: echo placeholder", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert not any("ossf scorecard SARIF upload" in violation for violation in violations) + + def test_scorecard_sarif_normalizer_replaces_repository_level_placeholder( tmp_path: Path, ) -> None: From bd80834833267435d6acaf4400ea76a314f8ea7b Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 19:27:42 +0900 Subject: [PATCH 8/9] Parse Scorecard SARIF upload steps --- scripts/checks/verify_supply_chain.py | 90 +++++- .../tests/test_supply_chain_policy.py | 288 +++++++++++++++++- 2 files changed, 365 insertions(+), 13 deletions(-) diff --git a/scripts/checks/verify_supply_chain.py b/scripts/checks/verify_supply_chain.py index 8d30f54c..24d7a602 100644 --- a/scripts/checks/verify_supply_chain.py +++ b/scripts/checks/verify_supply_chain.py @@ -360,11 +360,52 @@ def scorecard_sarif_upload_normalization_violations(content: str) -> list[str]: if "github/codeql-action/upload-sarif" not in content: return [] - violations: list[str] = [] - has_normalizer_step = OSSF_SARIF_NORMALIZER in content + def upload_step_sarif_file(step_lines: list[str], step_indent: int) -> str | None: + with_indent: int | None = None + for step_line in step_lines: + raw_stripped = step_line.strip().partition("#")[0].strip() + stripped = raw_stripped + is_step_start = stripped.startswith("- ") + if is_step_start: + stripped = stripped[2:].strip() + indent = len(step_line) - len(step_line.lstrip(" ")) + if with_indent is None: + if stripped == "with:" and (indent > step_indent or is_step_start): + with_indent = indent + continue + if stripped and indent <= with_indent: + break + if stripped.startswith("sarif_file:") and indent > with_indent: + return stripped.partition(":")[2].partition("#")[0].strip().strip("'\"") + return None + + def workflow_job_content(line_index: int) -> str: + job_start = 0 + for reverse_index in range(line_index, -1, -1): + candidate = lines[reverse_index] + candidate_without_comment = candidate.strip().partition("#")[0].strip() + if len(candidate) - len( + candidate.lstrip(" ") + ) == 2 and candidate_without_comment.endswith(":"): + job_start = reverse_index + break + job_end = len(lines) + for forward_index in range(job_start + 1, len(lines)): + candidate = lines[forward_index] + candidate_without_comment = candidate.strip().partition("#")[0].strip() + if len(candidate) - len( + candidate.lstrip(" ") + ) == 2 and candidate_without_comment.endswith(":"): + job_end = forward_index + break + return "\n".join(lines[job_start:job_end]) + lines = content.splitlines() + + step_blocks: list[tuple[int, int, list[str]]] = [] for index, line in enumerate(lines): - if "uses:" not in line or "github/codeql-action/upload-sarif" not in line: + stripped = line.strip() + if not stripped.startswith("- "): continue step_indent = len(line) - len(line.lstrip(" ")) step_lines = [line] @@ -374,17 +415,42 @@ def scorecard_sarif_upload_normalization_violations(content: str) -> list[str]: if following_stripped.startswith("- ") and following_indent <= step_indent: break step_lines.append(following_line) - step_content = "\n".join(step_lines) - if not any( - scorecard_sarif_token in step_content - for scorecard_sarif_token in [ - OSSF_NORMALIZED_SARIF_UPLOAD, - "sarif_file: results.sarif", - "sarif_file: scorecard-sarif/results.sarif", - ] + step_blocks.append((index, step_indent, step_lines)) + + has_normalizer_step = any( + any( + line.strip().partition("#")[0].strip().startswith("run:") + for line in step_lines + ) + and OSSF_SARIF_NORMALIZER + in "\n".join(line.partition("#")[0] for line in step_lines) + for _, _, step_lines in step_blocks + ) + + violations: list[str] = [] + for index, step_indent, step_lines in step_blocks: + if "github/codeql-action/upload-sarif" not in "\n".join( + line.partition("#")[0] for line in step_lines ): continue - if has_normalizer_step and OSSF_NORMALIZED_SARIF_UPLOAD in step_content: + sarif_file = upload_step_sarif_file(step_lines, step_indent) + job_content = workflow_job_content(index) + job_content_without_comments = "\n".join( + line.partition("#")[0] for line in job_content.splitlines() + ) + scorecard_sarif_upload = sarif_file == OSSF_NORMALIZED_SARIF or ( + sarif_file is not None + and ( + "scorecard" in sarif_file + or ( + sarif_file == "results.sarif" + and "ossf/scorecard-action" in job_content_without_comments + ) + ) + ) + if not scorecard_sarif_upload: + continue + if has_normalizer_step and sarif_file == OSSF_NORMALIZED_SARIF: continue violations.append( "ossf scorecard SARIF upload must normalize repository-level " diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 369105de..37db5fbb 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -479,6 +479,219 @@ def test_supply_chain_check_rejects_upload_step_with_unnormalized_scorecard_sari ) in violations +def test_supply_chain_check_rejects_env_spoofed_scorecard_sarif_upload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure upload step env cannot spoof the required normalized sarif_file value.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_env_spoof_guard", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " analysis:", + " name: ossf-scorecard", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " env:", + " UNUSED_SARIF_HINT: 'sarif_file: normalized-scorecard-results.sarif'", + " with:", + " sarif_file: results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + +def test_supply_chain_check_rejects_env_spoofed_scorecard_normalizer( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure env-only normalizer mentions do not satisfy Scorecard normalization.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_normalizer_env_spoof_guard", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on: push", + "jobs:", + " analysis:", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " env:", + " NORMALIZER_HINT: scripts/checks/normalize_scorecard_sarif.py", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + +def test_supply_chain_check_rejects_with_before_uses_raw_scorecard_sarif_upload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure step field order cannot hide raw Scorecard SARIF uploads.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_step_order_guard", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on: push", + "jobs:", + " analysis:", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - with:", + " sarif_file: results.sarif", + " uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + +def test_supply_chain_check_rejects_inline_comment_raw_scorecard_sarif_upload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure sarif_file inline comments cannot hide raw Scorecard uploads.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_inline_comment_guard", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " analysis:", + " name: ossf-scorecard", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with: # upload arguments", + " sarif_file: results.sarif # raw Scorecard upload", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + def test_supply_chain_check_accepts_colocated_non_scorecard_sarif_upload( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: @@ -514,8 +727,9 @@ def test_supply_chain_check_accepts_colocated_non_scorecard_sarif_upload( "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", " with:", " sarif_file: normalized-scorecard-results.sarif", - " trivy:", + " trivy: # scanner SARIF upload", " steps:", + " # not ossf/scorecard-action", " - uses: " "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", " with:", @@ -551,6 +765,78 @@ def test_supply_chain_check_accepts_colocated_non_scorecard_sarif_upload( assert not any("ossf scorecard SARIF upload" in violation for violation in violations) +def test_supply_chain_check_accepts_colocated_generic_non_scorecard_sarif_upload( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure separate non-Scorecard jobs may upload generic SARIF filenames.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_generic_mixed_uploads", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "combined-security.yml").write_text( + "\n".join( + [ + "name: combined-security", + "on: push", + "jobs:", + " scorecard:", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + " trivy: # scanner SARIF upload", + " steps:", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: results.sarif", + ] + ), + encoding="utf-8", + ) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " placeholder:", + " steps:", + " - run: echo placeholder", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert not any("ossf scorecard SARIF upload" in violation for violation in violations) + + def test_scorecard_sarif_normalizer_replaces_repository_level_placeholder( tmp_path: Path, ) -> None: From d644fb2554712ee45aff0c4e92ce6f16d6b1aa85 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 1 May 2026 20:03:44 +0900 Subject: [PATCH 9/9] Harden Scorecard SARIF upload matching --- scripts/checks/verify_supply_chain.py | 80 +++++++- .../tests/test_supply_chain_policy.py | 194 ++++++++++++++++++ 2 files changed, 263 insertions(+), 11 deletions(-) diff --git a/scripts/checks/verify_supply_chain.py b/scripts/checks/verify_supply_chain.py index 24d7a602..b4276355 100644 --- a/scripts/checks/verify_supply_chain.py +++ b/scripts/checks/verify_supply_chain.py @@ -379,6 +379,45 @@ def upload_step_sarif_file(step_lines: list[str], step_indent: int) -> str | Non return stripped.partition(":")[2].partition("#")[0].strip().strip("'\"") return None + def step_run_command(step_lines: list[str], step_indent: int) -> str: + run_indent: int | None = None + command_lines: list[str] = [] + for step_line in step_lines: + raw_stripped = step_line.strip().partition("#")[0].strip() + stripped = raw_stripped + is_step_start = stripped.startswith("- ") + if is_step_start: + stripped = stripped[2:].strip() + indent = len(step_line) - len(step_line.lstrip(" ")) + if run_indent is None: + if stripped.startswith("run:") and (indent > step_indent or is_step_start): + run_indent = indent + command_lines.append(stripped.partition(":")[2].strip()) + continue + if stripped and indent <= run_indent: + break + command_lines.append(stripped) + return "\n".join(command_lines) + + def normalizer_output_file(command: str) -> str | None: + try: + tokens = shlex.split(command) + except ValueError: + tokens = re.split(r"\s+", command) + cleaned_tokens = [token.strip("'\"") for token in tokens if token.strip("'\"")] + if cleaned_tokens and cleaned_tokens[0] in {">", ">-", "|", "|-"}: + cleaned_tokens = cleaned_tokens[1:] + if len(cleaned_tokens) < 4: + return None + if cleaned_tokens[0] not in {"python", "python3"}: + return None + if cleaned_tokens[1] != OSSF_SARIF_NORMALIZER: + return None + positional_args = cleaned_tokens[2:] + if len(positional_args) < 2: + return None + return positional_args[1] + def workflow_job_content(line_index: int) -> str: job_start = 0 for reverse_index in range(line_index, -1, -1): @@ -400,6 +439,14 @@ def workflow_job_content(line_index: int) -> str: break return "\n".join(lines[job_start:job_end]) + def workflow_job_step_blocks(line_index: int) -> list[tuple[int, int, list[str]]]: + job_content = workflow_job_content(line_index) + return [ + block + for block in step_blocks + if workflow_job_content(block[0]) == job_content + ] + lines = content.splitlines() step_blocks: list[tuple[int, int, list[str]]] = [] @@ -417,16 +464,6 @@ def workflow_job_content(line_index: int) -> str: step_lines.append(following_line) step_blocks.append((index, step_indent, step_lines)) - has_normalizer_step = any( - any( - line.strip().partition("#")[0].strip().startswith("run:") - for line in step_lines - ) - and OSSF_SARIF_NORMALIZER - in "\n".join(line.partition("#")[0] for line in step_lines) - for _, _, step_lines in step_blocks - ) - violations: list[str] = [] for index, step_indent, step_lines in step_blocks: if "github/codeql-action/upload-sarif" not in "\n".join( @@ -438,6 +475,23 @@ def workflow_job_content(line_index: int) -> str: job_content_without_comments = "\n".join( line.partition("#")[0] for line in job_content.splitlines() ) + job_blocks = workflow_job_step_blocks(index) + normalizer_run_commands = [ + step_run_command(normalizer_step_lines, normalizer_step_indent) + for _, normalizer_step_indent, normalizer_step_lines in job_blocks + ] + normalizer_outputs = { + output + for command in normalizer_run_commands + if (output := normalizer_output_file(command)) is not None + } + job_has_scorecard_artifact_source = ( + "ossf/scorecard-action" in job_content_without_comments + or ( + "actions/download-artifact" in job_content_without_comments + and "ossf-scorecard-results" in job_content_without_comments + ) + ) scorecard_sarif_upload = sarif_file == OSSF_NORMALIZED_SARIF or ( sarif_file is not None and ( @@ -450,7 +504,11 @@ def workflow_job_content(line_index: int) -> str: ) if not scorecard_sarif_upload: continue - if has_normalizer_step and sarif_file == OSSF_NORMALIZED_SARIF: + if ( + job_has_scorecard_artifact_source + and sarif_file is not None + and sarif_file in normalizer_outputs + ): continue violations.append( "ossf scorecard SARIF upload must normalize repository-level " diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 37db5fbb..968093c0 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -837,6 +837,200 @@ def test_supply_chain_check_accepts_colocated_generic_non_scorecard_sarif_upload assert not any("ossf scorecard SARIF upload" in violation for violation in violations) +def test_supply_chain_check_rejects_mismatched_scorecard_normalizer_output( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure upload-sarif only accepts the same normalized file the job produced.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_mismatched_normalizer_output", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " scorecard:", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " - uses: " + "actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1", + " with:", + " name: ossf-scorecard-results", + " path: results.sarif", + " scorecard-sarif-upload:", + " steps:", + " - uses: " + "actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1", + " with:", + " name: ossf-scorecard-results", + " path: scorecard-sarif", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " normalized-scorecard-results.sarif", + " other-normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + +def test_supply_chain_check_rejects_shell_spoofed_normalizer_output( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure shell tokens after the normalizer target cannot spoof output matching.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_shell_spoofed_output", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " scorecard:", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " scorecard-sarif-upload:", + " steps:", + " - uses: " + "actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1", + " with:", + " name: ossf-scorecard-results", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " other-normalized-scorecard-results.sarif", + " && cp other-normalized-scorecard-results.sarif", + " normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + +def test_supply_chain_check_rejects_echo_spoofed_normalizer_command( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure mentioning the normalizer in a non-executing command is rejected.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_sarif_echo_spoofed_normalizer", + ) + default_branch_ref = "format('refs/heads/{0}', github.event.repository.default_branch)" + publish_guard = supply_chain.OSSF_DEFAULT_BRANCH_PUBLISH_GUARD.partition(": ")[2] + + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on:", + " push:", + " branches:", + " - develop", + " - main", + " schedule:", + " - cron: '30 1 * * 1'", + "jobs:", + " scorecard:", + " steps:", + " - uses: " + "ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3", + f" if: github.ref == {default_branch_ref}", + " with:", + f" publish_results: {publish_guard}", + " scorecard-sarif-upload:", + " steps:", + " - uses: " + "actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1", + " with:", + " name: ossf-scorecard-results", + " - name: Mention normalizer without running it", + " run: >-", + " echo python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: " + "github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard SARIF upload must normalize repository-level placeholder URIs " + "before upload-sarif" + ) in violations + + def test_scorecard_sarif_normalizer_replaces_repository_level_placeholder( tmp_path: Path, ) -> None: