ContextualWisdomLab · seonghobae · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
@@ -33,7 +33,28 @@ jobs:
           name: ossf-scorecard-results
           path: results.sarif
           retention-days: 5
+  scorecard-sarif-upload:
+    name: scorecard-sarif-upload
+    needs: analysis
+    if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch)
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+      - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        with:
+          name: ossf-scorecard-results
+          path: scorecard-sarif
+      - name: Normalize repository-level Scorecard SARIF locations
+        run: >-
+          python3 scripts/checks/normalize_scorecard_sarif.py
+          scorecard-sarif/results.sarif
+          normalized-scorecard-results.sarif
       - uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 peeled commit; SHA pinning retained as supply-chain attack mitigation.
-        if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch)
         with:
-          sarif_file: results.sarif
+          sarif_file: normalized-scorecard-results.sarif
@@ -0,0 +1,86 @@
+"""Normalize OSSF Scorecard SARIF so GitHub can ingest repository findings."""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+SCORECARD_REPOSITORY_PLACEHOLDER_URI = "no file associated with this alert"
+SCORECARD_WORKFLOW_URI = ".github/workflows/ossf-scorecard.yml"
+
+
+def normalize_scorecard_sarif(source: Path, target: Path) -> int:
+    """Rewrite repository-level Scorecard placeholder URIs and return change count."""
+    sarif = json.loads(source.read_text(encoding="utf-8"))
+    rewritten = 0
+
+    runs = sarif.get("runs", []) if isinstance(sarif, dict) else []
+    if not isinstance(runs, list):
+        runs = []
+    for run in runs:
+        if not isinstance(run, dict):
+            continue
+        results = run.get("results", [])
+        if not isinstance(results, list):
+            continue
+        for result in results:
+            if not isinstance(result, dict):
+                continue
+            locations = result.get("locations", [])
+            if not isinstance(locations, list):
+                continue
+            for location in locations:
+                if not isinstance(location, dict):
+                    continue
+                physical_location = location.get("physicalLocation")
+                if not isinstance(physical_location, dict):
+                    continue
+                artifact_location = physical_location.get("artifactLocation")
+                if not isinstance(artifact_location, dict):
+                    continue
+                if artifact_location.get("uri") != SCORECARD_REPOSITORY_PLACEHOLDER_URI:
+                    continue
+                artifact_location["uri"] = SCORECARD_WORKFLOW_URI
+                region = physical_location.get("region")
+                if not isinstance(region, dict):
+                    region = {}
+                    physical_location["region"] = region
+                start_line = region.get("startLine")
+                if type(start_line) is not int or start_line < 1:
+                    region["startLine"] = 1
+                properties = physical_location.get("properties")
+                if not isinstance(properties, dict):
+                    properties = {}
+                    physical_location["properties"] = properties
+                properties["bandscopeOriginalUri"] = (
+                    SCORECARD_REPOSITORY_PLACEHOLDER_URI
+                )
+                properties["bandscopeRepositoryLevelFinding"] = True
+                rewritten += 1
+
+    target.write_text(
+        json.dumps(sarif, indent=2, sort_keys=True) + "\n", encoding="utf-8"
+    )
+    return rewritten
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Normalize OSSF Scorecard SARIF repository-level locations."
+    )
+    parser.add_argument("source", type=Path, help="Path to the Scorecard SARIF file")
+    parser.add_argument("target", type=Path, help="Path to write normalized SARIF")
+    return parser.parse_args()
+
+
+def main() -> None:
+    """Normalize a Scorecard SARIF file from the command line."""
+    args = parse_args()
+    rewritten = normalize_scorecard_sarif(args.source, args.target)
+    print(f"Normalized {rewritten} OSSF Scorecard repository-level SARIF locations")
+
+
+if __name__ == "__main__":
+    main()
@@ -45,6 +45,9 @@
     "ossf scorecard publishing job must only contain uses steps; split run steps "
     "into a separate non-publishing job"
 )
+OSSF_SARIF_NORMALIZER = "scripts/checks/normalize_scorecard_sarif.py"
+OSSF_NORMALIZED_SARIF = "normalized-scorecard-results.sarif"
+OSSF_NORMALIZED_SARIF_UPLOAD = f"sarif_file: {OSSF_NORMALIZED_SARIF}"
 RELEASE_ARTIFACT_GLOB = re.compile(r"(?:^|\s)artifacts/\*")
 RELEASE_ASSET_VALIDATOR = (
     "scripts/release/select_release_assets.py --output release-assets.txt"
@@ -350,6 +353,170 @@ def evaluate_job(job_lines: list[str], start_line: int) -> None:
     return violations
 
 
+def scorecard_sarif_upload_normalization_violations(content: str) -> list[str]:
+    """Return Scorecard SARIF upload steps that bypass the normalizer output."""
+    if "ossf/scorecard-action" not in content:
+        return []
+    if "github/codeql-action/upload-sarif" not in content:
+        return []
+
+    def upload_step_sarif_file(step_lines: list[str], step_indent: int) -> str | None:
+        with_indent: int | None = None
+        for step_line in step_lines:
+            raw_stripped = step_line.strip().partition("#")[0].strip()
+            stripped = raw_stripped
+            is_step_start = stripped.startswith("- ")
+            if is_step_start:
+                stripped = stripped[2:].strip()
+            indent = len(step_line) - len(step_line.lstrip(" "))
+            if with_indent is None:
+                if stripped == "with:" and (indent > step_indent or is_step_start):
+                    with_indent = indent
+                continue
+            if stripped and indent <= with_indent:
+                break
+            if stripped.startswith("sarif_file:") and indent > with_indent:
+                return stripped.partition(":")[2].partition("#")[0].strip().strip("'\"")
+        return None
+
+    def step_run_command(step_lines: list[str], step_indent: int) -> str:
+        run_indent: int | None = None
+        command_lines: list[str] = []
+        for step_line in step_lines:
+            raw_stripped = step_line.strip().partition("#")[0].strip()
+            stripped = raw_stripped
+            is_step_start = stripped.startswith("- ")
+            if is_step_start:
+                stripped = stripped[2:].strip()
+            indent = len(step_line) - len(step_line.lstrip(" "))
+            if run_indent is None:
+                if stripped.startswith("run:") and (indent > step_indent or is_step_start):
+                    run_indent = indent
+                    command_lines.append(stripped.partition(":")[2].strip())
+                continue
+            if stripped and indent <= run_indent:
+                break
+            command_lines.append(stripped)
+        return "\n".join(command_lines)
+
+    def normalizer_output_file(command: str) -> str | None:
+        try:
+            tokens = shlex.split(command)
+        except ValueError:
+            tokens = re.split(r"\s+", command)
+        cleaned_tokens = [token.strip("'\"") for token in tokens if token.strip("'\"")]
+        if cleaned_tokens and cleaned_tokens[0] in {">", ">-", "|", "|-"}:
+            cleaned_tokens = cleaned_tokens[1:]
+        if len(cleaned_tokens) < 4:
+            return None
+        if cleaned_tokens[0] not in {"python", "python3"}:
+            return None
+        if cleaned_tokens[1] != OSSF_SARIF_NORMALIZER:
+            return None
+        positional_args = cleaned_tokens[2:]
+        if len(positional_args) < 2:
+            return None
+        return positional_args[1]
+
+    def workflow_job_content(line_index: int) -> str:
+        job_start = 0
+        for reverse_index in range(line_index, -1, -1):
+            candidate = lines[reverse_index]
+            candidate_without_comment = candidate.strip().partition("#")[0].strip()
+            if len(candidate) - len(
+                candidate.lstrip(" ")
+            ) == 2 and candidate_without_comment.endswith(":"):
+                job_start = reverse_index
+                break
+        job_end = len(lines)
+        for forward_index in range(job_start + 1, len(lines)):
+            candidate = lines[forward_index]
+            candidate_without_comment = candidate.strip().partition("#")[0].strip()
+            if len(candidate) - len(
+                candidate.lstrip(" ")
+            ) == 2 and candidate_without_comment.endswith(":"):
+                job_end = forward_index
+                break
+        return "\n".join(lines[job_start:job_end])
+
+    def workflow_job_step_blocks(line_index: int) -> list[tuple[int, int, list[str]]]:
+        job_content = workflow_job_content(line_index)
+        return [
+            block
+            for block in step_blocks
+            if workflow_job_content(block[0]) == job_content
+        ]
+
+    lines = content.splitlines()
+
+    step_blocks: list[tuple[int, int, list[str]]] = []
+    for index, line in enumerate(lines):
+        stripped = line.strip()
+        if not stripped.startswith("- "):
+            continue
+        step_indent = len(line) - len(line.lstrip(" "))
+        step_lines = [line]
+        for following_line in lines[index + 1 :]:
+            following_stripped = following_line.strip()
+            following_indent = len(following_line) - len(following_line.lstrip(" "))
+            if following_stripped.startswith("- ") and following_indent <= step_indent:
+                break
+            step_lines.append(following_line)
+        step_blocks.append((index, step_indent, step_lines))
+
+    violations: list[str] = []
+    for index, step_indent, step_lines in step_blocks:
+        if "github/codeql-action/upload-sarif" not in "\n".join(
+            line.partition("#")[0] for line in step_lines
+        ):
+            continue
+        sarif_file = upload_step_sarif_file(step_lines, step_indent)
+        job_content = workflow_job_content(index)
+        job_content_without_comments = "\n".join(
+            line.partition("#")[0] for line in job_content.splitlines()
+        )
+        job_blocks = workflow_job_step_blocks(index)
+        normalizer_run_commands = [
+            step_run_command(normalizer_step_lines, normalizer_step_indent)
+            for _, normalizer_step_indent, normalizer_step_lines in job_blocks
+        ]
+        normalizer_outputs = {
+            output
+            for command in normalizer_run_commands
+            if (output := normalizer_output_file(command)) is not None
+        }
+        job_has_scorecard_artifact_source = (
+            "ossf/scorecard-action" in job_content_without_comments
+            or (
+                "actions/download-artifact" in job_content_without_comments
+                and "ossf-scorecard-results" in job_content_without_comments
+            )
+        )
+        scorecard_sarif_upload = sarif_file == OSSF_NORMALIZED_SARIF or (
+            sarif_file is not None
+            and (
+                "scorecard" in sarif_file
+                or (
+                    sarif_file == "results.sarif"
+                    and "ossf/scorecard-action" in job_content_without_comments
+                )
+            )
+        )
+        if not scorecard_sarif_upload:
+            continue
+        if (
+            job_has_scorecard_artifact_source
+            and sarif_file is not None
+            and sarif_file in normalizer_outputs
+        ):
+            continue
+        violations.append(
+            "ossf scorecard SARIF upload must normalize repository-level "
+            "placeholder URIs before upload-sarif"
+        )
+    return violations
+
+
 def verify_workflow_coverage() -> list[str]:
     """Return workflow trigger and artifact coverage violations."""
     missing: list[str] = []
@@ -457,9 +624,13 @@ def verify_workflow_coverage() -> list[str]:
             Path(".github/workflows").glob("*.yaml")
         )
         for workflow_path in workflow_paths:
+            workflow_content = workflow_path.read_text(encoding="utf-8")
+            missing.extend(
+                scorecard_sarif_upload_normalization_violations(workflow_content)
+            )
             missing.extend(
                 ossf_scorecard_publish_restriction_violations(
-                    workflow_path.read_text(encoding="utf-8"), workflow_path
+                    workflow_content, workflow_path
                 )
             )
     return missing