Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 181 additions & 0 deletions .github/scripts/checkout_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/usr/bin/env python3

# Copyright (c) 2026 SandAI. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Robust PR checkout: tarball (GitHub API) β†’ git-fetch fallback.

Env: REPO_URL, BASE_SHA, HEAD_SHA, GITHUB_TOKEN, GITHUB_REPOSITORY.
"""

from __future__ import annotations

import glob # noqa: E401
import os
import re
import shutil
import subprocess
import sys
import tarfile
import time

RETRIES, SLEEP = 10, 5
REPO_URL = os.environ["REPO_URL"]
BASE_SHA = os.environ["BASE_SHA"]
HEAD_SHA = os.environ["HEAD_SHA"]
TOKEN = os.environ["GITHUB_TOKEN"]
REPO = os.environ.get("GITHUB_REPOSITORY") or re.sub(r".*github\.com/", "", REPO_URL).removesuffix(".git")


def log(msg: str) -> None:
print(f"[checkout] {msg}", flush=True)


def sh(cmd: str, **kw) -> int:
kw.setdefault("check", True)
return subprocess.run(cmd, shell=True, **kw).returncode


def env_no_proxy() -> dict[str, str]:
return {k: v for k, v in os.environ.items() if k.lower() not in ("http_proxy", "https_proxy")}


# ── tarball checkout ─────────────────────────────────────────────────


def _curl(sha: str, dest: str) -> bool:
url = f"https://api.github.com/repos/{REPO}/tarball/{sha}"
for i in range(1, RETRIES + 1):
via = "proxy" if i % 2 else "direct"
log(f"tarball({sha[:8]}) attempt {i}/{RETRIES} via {via}")
rc = subprocess.run(
[
"curl",
"-fSL",
"--retry",
"2",
"--connect-timeout",
"15",
"--max-time",
"180",
"-H",
f"Authorization: Bearer {TOKEN}",
"-H",
"Accept: application/vnd.github+json",
"-o",
dest,
url,
],
env=env_no_proxy() if via == "direct" else None,
).returncode
if rc == 0:
return True
if i < RETRIES:
time.sleep(SLEEP)
return False


def _extract(tar_path: str) -> None:
with tarfile.open(tar_path, "r:gz") as tf:
prefix = os.path.commonprefix(tf.getnames()).rstrip("/")
for m in tf.getmembers():
m.name = m.name[len(prefix) :].lstrip("/") if prefix else m.name
if m.name:
tf.extract(m, ".", filter="data")


def tarball_checkout() -> tuple[str, str] | None:
"""Download HEAD & BASE tarballs, build synthetic two-commit repo.

Returns ``(local_base_sha, local_head_sha)`` or ``None``.
"""
h, b = "/tmp/_head.tar.gz", "/tmp/_base.tar.gz"
try:
if not _curl(HEAD_SHA, h) or not _curl(BASE_SHA, b):
return None
shutil.rmtree(".git", ignore_errors=True)
sh("git init . && git config user.email ci@sandai.org && git config user.name CI")
for tar, msg, date in [
(b, f"base {BASE_SHA}", "2000-01-01T00:00:00Z"),
(h, f"head {HEAD_SHA}", "2000-01-02T00:00:00Z"),
]:
if tar == h:
sh("git rm -rf . 2>/dev/null || true", check=False)
_extract(tar)
sh("git add -A")
env = {**os.environ, "GIT_COMMITTER_DATE": date, "GIT_AUTHOR_DATE": date}
sh(f'git commit --allow-empty -m "{msg}"', env=env)
base_l = subprocess.check_output(["git", "rev-parse", "HEAD~1"], text=True).strip()
head_l = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
log(f"synthetic: {BASE_SHA[:8]}β†’{base_l[:8]}, {HEAD_SHA[:8]}β†’{head_l[:8]}")
return base_l, head_l
finally:
for f in (h, b):
try:
os.remove(f)
except FileNotFoundError:
pass


# ── git-fetch fallback ───────────────────────────────────────────────


def git_fetch_checkout() -> bool:
if sh("git rev-parse --is-inside-work-tree", check=False) != 0:
sh("git init .")
if sh(f"git remote set-url origin {REPO_URL}", check=False) != 0:
sh(f"git remote add origin {REPO_URL}")
for i in range(1, RETRIES + 1):
for f in glob.glob(".git/*.lock"):
os.remove(f)
log(f"fetch attempt {i}/{RETRIES}")
if i % 2:
cmd = f"timeout 120 git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}"
env = None
else:
cmd = f"timeout 120 git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}"
env = env_no_proxy()
if sh(cmd, check=False, env=env) == 0:
sh(f"git checkout --force {HEAD_SHA} && git clean -fdx && git reset --hard {HEAD_SHA}")
return True
if i < RETRIES:
time.sleep(SLEEP)
return False


# ── main ─────────────────────────────────────────────────────────────


def main() -> None:
log(f"HEAD={HEAD_SHA}, BASE={BASE_SHA}")
result = tarball_checkout()
if result:
base_ref, head_ref = result
log("tarball succeeded")
elif git_fetch_checkout():
base_ref, head_ref = BASE_SHA, HEAD_SHA
log("git-fetch fallback succeeded")
else:
log("all methods failed")
sys.exit(1)
out = os.environ.get("GITHUB_OUTPUT")
if out:
with open(out, "a") as f:
f.write(f"base_ref={base_ref}\nhead_ref={head_ref}\n")
sh(f"git diff --stat {base_ref} {head_ref} | tail -3")
log("done")


if __name__ == "__main__":
main()
50 changes: 33 additions & 17 deletions .github/workflows/integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,44 @@ jobs:
git config --global http.proxy "${{ secrets.HTTP_PROXY }}"
git config --global https.proxy "${{ secrets.HTTPS_PROXY }}"

- name: Checkout base branch
uses: actions/checkout@v6
timeout-minutes: 5
with:
ref: ${{ github.event.pull_request.base.sha }}
fetch-depth: 1
persist-credentials: false
- name: Checkout PR head with retry
id: checkout
timeout-minutes: 10
env:
REPO_URL: https://github.com/${{ github.repository }}.git
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_TOKEN: ${{ github.token }}
BASE_SHA: ${{ github.event.pull_request.base.sha }}
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
run: |
set -euo pipefail

script_path=".github/scripts/checkout_pr.py"
api_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/contents/${script_path}?ref=${HEAD_SHA}"

mkdir -p .github/scripts
for attempt in $(seq 1 10); do
echo "[bootstrap] download checkout script attempt ${attempt}/10"
if curl -fsSL \
-H "Authorization: Bearer ${GITHUB_TOKEN}" \
-H "Accept: application/vnd.github.raw" \
"$api_url" -o "$script_path"; then
break
fi
if [ "$attempt" -eq 10 ]; then
echo "[bootstrap] failed to download checkout script"
exit 1
fi
sleep 3
done

- name: Checkout PR head
uses: actions/checkout@v6
timeout-minutes: 5
with:
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 1
persist-credentials: false
clean: false
python3 "$script_path"

- name: Check Chinese Characters
run: python3 .github/workflows/check_chinese_chars.py
env:
BASE_REF: ${{ github.event.pull_request.base.sha }}
HEAD_REF: ${{ github.event.pull_request.head.sha }}
BASE_REF: ${{ steps.checkout.outputs.base_ref || github.event.pull_request.base.sha }}
HEAD_REF: ${{ steps.checkout.outputs.head_ref || github.event.pull_request.head.sha }}

- name: Check Code Style
run: pre-commit run --show-diff-on-failure --color=always --all-files
Expand Down