diff --git a/.github/scripts/checkout_pr.py b/.github/scripts/checkout_pr.py new file mode 100644 index 0000000..064f416 --- /dev/null +++ b/.github/scripts/checkout_pr.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2026 SandAI. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Robust PR checkout: tarball (GitHub API) → git-fetch fallback. + +Env: REPO_URL, BASE_SHA, HEAD_SHA, GITHUB_TOKEN, GITHUB_REPOSITORY. +""" + +from __future__ import annotations + +import glob # noqa: E401 +import os +import re +import shutil +import subprocess +import sys +import tarfile +import time + +RETRIES, SLEEP = 10, 5 +REPO_URL = os.environ["REPO_URL"] +BASE_SHA = os.environ["BASE_SHA"] +HEAD_SHA = os.environ["HEAD_SHA"] +TOKEN = os.environ["GITHUB_TOKEN"] +REPO = os.environ.get("GITHUB_REPOSITORY") or re.sub(r".*github\.com/", "", REPO_URL).removesuffix(".git") + + +def log(msg: str) -> None: + print(f"[checkout] {msg}", flush=True) + + +def sh(cmd: str, **kw) -> int: + kw.setdefault("check", True) + return subprocess.run(cmd, shell=True, **kw).returncode + + +def env_no_proxy() -> dict[str, str]: + return {k: v for k, v in os.environ.items() if k.lower() not in ("http_proxy", "https_proxy")} + + +# ── tarball checkout ───────────────────────────────────────────────── + + +def _curl(sha: str, dest: str) -> bool: + url = f"https://api.github.com/repos/{REPO}/tarball/{sha}" + for i in range(1, RETRIES + 1): + via = "proxy" if i % 2 else "direct" + log(f"tarball({sha[:8]}) attempt {i}/{RETRIES} via {via}") + rc = subprocess.run( + [ + "curl", + "-fSL", + "--retry", + "2", + "--connect-timeout", + "15", + "--max-time", + "180", + "-H", + f"Authorization: Bearer {TOKEN}", + "-H", + "Accept: application/vnd.github+json", + "-o", + dest, + url, + ], + env=env_no_proxy() if via == "direct" else None, + ).returncode + if rc == 0: + return True + if i < RETRIES: + time.sleep(SLEEP) + return False + + +def _extract(tar_path: str) -> None: + with tarfile.open(tar_path, "r:gz") as tf: + prefix = os.path.commonprefix(tf.getnames()).rstrip("/") + for m in tf.getmembers(): + m.name = m.name[len(prefix) :].lstrip("/") if prefix else m.name + if m.name: + tf.extract(m, ".", filter="data") + + +def tarball_checkout() -> tuple[str, str] | None: + """Download HEAD & BASE tarballs, build synthetic two-commit repo. + + Returns ``(local_base_sha, local_head_sha)`` or ``None``. + """ + h, b = "/tmp/_head.tar.gz", "/tmp/_base.tar.gz" + try: + if not _curl(HEAD_SHA, h) or not _curl(BASE_SHA, b): + return None + shutil.rmtree(".git", ignore_errors=True) + sh("git init . && git config user.email ci@sandai.org && git config user.name CI") + for tar, msg, date in [ + (b, f"base {BASE_SHA}", "2000-01-01T00:00:00Z"), + (h, f"head {HEAD_SHA}", "2000-01-02T00:00:00Z"), + ]: + if tar == h: + sh("git rm -rf . 2>/dev/null || true", check=False) + _extract(tar) + sh("git add -A") + env = {**os.environ, "GIT_COMMITTER_DATE": date, "GIT_AUTHOR_DATE": date} + sh(f'git commit --allow-empty -m "{msg}"', env=env) + base_l = subprocess.check_output(["git", "rev-parse", "HEAD~1"], text=True).strip() + head_l = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() + log(f"synthetic: {BASE_SHA[:8]}→{base_l[:8]}, {HEAD_SHA[:8]}→{head_l[:8]}") + return base_l, head_l + finally: + for f in (h, b): + try: + os.remove(f) + except FileNotFoundError: + pass + + +# ── git-fetch fallback ─────────────────────────────────────────────── + + +def git_fetch_checkout() -> bool: + if sh("git rev-parse --is-inside-work-tree", check=False) != 0: + sh("git init .") + if sh(f"git remote set-url origin {REPO_URL}", check=False) != 0: + sh(f"git remote add origin {REPO_URL}") + for i in range(1, RETRIES + 1): + for f in glob.glob(".git/*.lock"): + os.remove(f) + log(f"fetch attempt {i}/{RETRIES}") + if i % 2: + cmd = f"timeout 120 git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" + env = None + else: + cmd = f"timeout 120 git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" + env = env_no_proxy() + if sh(cmd, check=False, env=env) == 0: + sh(f"git checkout --force {HEAD_SHA} && git clean -fdx && git reset --hard {HEAD_SHA}") + return True + if i < RETRIES: + time.sleep(SLEEP) + return False + + +# ── main ───────────────────────────────────────────────────────────── + + +def main() -> None: + log(f"HEAD={HEAD_SHA}, BASE={BASE_SHA}") + result = tarball_checkout() + if result: + base_ref, head_ref = result + log("tarball succeeded") + elif git_fetch_checkout(): + base_ref, head_ref = BASE_SHA, HEAD_SHA + log("git-fetch fallback succeeded") + else: + log("all methods failed") + sys.exit(1) + out = os.environ.get("GITHUB_OUTPUT") + if out: + with open(out, "a") as f: + f.write(f"base_ref={base_ref}\nhead_ref={head_ref}\n") + sh(f"git diff --stat {base_ref} {head_ref} | tail -3") + log("done") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index b3f7be3..1bd83d8 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -36,28 +36,44 @@ jobs: git config --global http.proxy "${{ secrets.HTTP_PROXY }}" git config --global https.proxy "${{ secrets.HTTPS_PROXY }}" - - name: Checkout base branch - uses: actions/checkout@v6 - timeout-minutes: 5 - with: - ref: ${{ github.event.pull_request.base.sha }} - fetch-depth: 1 - persist-credentials: false + - name: Checkout PR head with retry + id: checkout + timeout-minutes: 10 + env: + REPO_URL: https://github.com/${{ github.repository }}.git + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_TOKEN: ${{ github.token }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + + script_path=".github/scripts/checkout_pr.py" + api_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/contents/${script_path}?ref=${HEAD_SHA}" + + mkdir -p .github/scripts + for attempt in $(seq 1 10); do + echo "[bootstrap] download checkout script attempt ${attempt}/10" + if curl -fsSL \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github.raw" \ + "$api_url" -o "$script_path"; then + break + fi + if [ "$attempt" -eq 10 ]; then + echo "[bootstrap] failed to download checkout script" + exit 1 + fi + sleep 3 + done - - name: Checkout PR head - uses: actions/checkout@v6 - timeout-minutes: 5 - with: - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 1 - persist-credentials: false - clean: false + python3 "$script_path" - name: Check Chinese Characters run: python3 .github/workflows/check_chinese_chars.py env: - BASE_REF: ${{ github.event.pull_request.base.sha }} - HEAD_REF: ${{ github.event.pull_request.head.sha }} + BASE_REF: ${{ steps.checkout.outputs.base_ref || github.event.pull_request.base.sha }} + HEAD_REF: ${{ steps.checkout.outputs.head_ref || github.event.pull_request.head.sha }} - name: Check Code Style run: pre-commit run --show-diff-on-failure --color=always --all-files