From 7f7f6b76d1b4cef0ede04b7df0e7fe7deed61669 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 16:15:28 +0800 Subject: [PATCH 01/18] [CI] Add retry-based PR checkout in integration workflow Replace dual actions/checkout steps with a manual git fetch strategy that retries up to 15 times (20s interval) before failing, to reduce transient network/proxy checkout flakes without changing low-speed threshold settings. --- .github/workflows/integration_test.yml | 48 ++++++++++++++++++-------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index b3f7be3..0c5154b 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -36,22 +36,40 @@ jobs: git config --global http.proxy "${{ secrets.HTTP_PROXY }}" git config --global https.proxy "${{ secrets.HTTPS_PROXY }}" - - name: Checkout base branch - uses: actions/checkout@v6 - timeout-minutes: 5 - with: - ref: ${{ github.event.pull_request.base.sha }} - fetch-depth: 1 - persist-credentials: false + - name: Checkout PR head with retry + timeout-minutes: 10 + run: | + set -euo pipefail + repo_url="https://github.com/${{ github.repository }}.git" + + git init . + git remote add origin "$repo_url" + + for attempt in $(seq 1 15); do + echo "[checkout] attempt ${attempt}/15" + if timeout 1m git fetch --no-tags --prune --depth=1 origin \ + "${{ github.event.pull_request.base.sha }}" \ + "${{ github.event.pull_request.head.sha }}"; then + break + fi + + if [ "$attempt" -eq 15 ]; then + echo "[checkout] failed after 15 attempts" + exit 1 + fi + + echo "[checkout] fetch timeout/failure, retry in 20s (per-attempt timeout: 1m)" + sleep 20 + done - - name: Checkout PR head - uses: actions/checkout@v6 - timeout-minutes: 5 - with: - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 1 - persist-credentials: false - clean: false + git checkout --force "${{ github.event.pull_request.head.sha }}" + git clean -fdx + git reset --hard "${{ github.event.pull_request.head.sha }}" + git rev-parse "${{ github.event.pull_request.base.sha }}" + git rev-parse "${{ github.event.pull_request.head.sha }}" + git log -1 --oneline + git diff --name-only "${{ github.event.pull_request.base.sha }}"..."${{ github.event.pull_request.head.sha }}" | head -n 20 + echo "[checkout] done" - name: Check Chinese Characters run: python3 .github/workflows/check_chinese_chars.py From 8ad3c1fa81fedfb4e60c6102fe630942f80dbab7 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 16:38:41 +0800 Subject: [PATCH 02/18] [CI] Make manual checkout idempotent on existing runners Handle pre-existing .git directories by reusing/updating origin remote instead of blindly running `git remote add origin`. --- .github/workflows/integration_test.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 0c5154b..88f6293 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -42,8 +42,12 @@ jobs: set -euo pipefail repo_url="https://github.com/${{ github.repository }}.git" - git init . - git remote add origin "$repo_url" + if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git remote set-url origin "$repo_url" 2>/dev/null || git remote add origin "$repo_url" + else + git init . + git remote add origin "$repo_url" + fi for attempt in $(seq 1 15); do echo "[checkout] attempt ${attempt}/15" From c775139e066b7edef5bf6d1c1c59969d26737858 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 16:42:49 +0800 Subject: [PATCH 03/18] [CI] Add direct-network fallback for checkout fetch retries Keep strict low-speed thresholds while using proxy in early attempts, then fallback to proxy-unset direct fetch with relaxed low-speed limits for later retries. --- .github/workflows/integration_test.yml | 28 ++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 88f6293..41ea1a0 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -51,9 +51,29 @@ jobs: for attempt in $(seq 1 15); do echo "[checkout] attempt ${attempt}/15" - if timeout 1m git fetch --no-tags --prune --depth=1 origin \ - "${{ github.event.pull_request.base.sha }}" \ - "${{ github.event.pull_request.head.sha }}"; then + + if [ "$attempt" -le 8 ]; then + echo "[checkout] mode=proxy strict(lowSpeed=100/10)" + fetch_cmd=( + timeout 1m + git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 + fetch --no-tags --prune --depth=1 origin + "${{ github.event.pull_request.base.sha }}" + "${{ github.event.pull_request.head.sha }}" + ) + else + echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" + fetch_cmd=( + env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY + timeout 1m + git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 + fetch --no-tags --prune --depth=1 origin + "${{ github.event.pull_request.base.sha }}" + "${{ github.event.pull_request.head.sha }}" + ) + fi + + if "${fetch_cmd[@]}"; then break fi @@ -62,7 +82,7 @@ jobs: exit 1 fi - echo "[checkout] fetch timeout/failure, retry in 20s (per-attempt timeout: 1m)" + echo "[checkout] fetch timeout/failure, retry in 20s" sleep 20 done From 0cfe89b474cfc31104e5051eb9f2e953b9c6990b Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 16:45:01 +0800 Subject: [PATCH 04/18] [CI] Reduce checkout retry sleep interval to 5 seconds Shorten retry backoff from 20s to 5s to speed up recovery when transient network errors clear quickly. --- .github/workflows/integration_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 41ea1a0..09c4d3a 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -82,8 +82,8 @@ jobs: exit 1 fi - echo "[checkout] fetch timeout/failure, retry in 20s" - sleep 20 + echo "[checkout] fetch timeout/failure, retry in 5s" + sleep 5 done git checkout --force "${{ github.event.pull_request.head.sha }}" From 73e512f1734ac320479cfff36b4ecb1f54d2d62f Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 16:51:02 +0800 Subject: [PATCH 05/18] [CI] Clean stale git lock files during checkout retries Remove common stale .git lock files before/after each retry attempt so leftover locks from previous interrupted runs don't block fetch. --- .github/workflows/integration_test.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 09c4d3a..78ed91e 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -49,7 +49,16 @@ jobs: git remote add origin "$repo_url" fi + cleanup_git_locks() { + rm -f .git/shallow.lock \ + .git/index.lock \ + .git/packed-refs.lock \ + .git/FETCH_HEAD.lock \ + .git/config.lock + } + for attempt in $(seq 1 15); do + cleanup_git_locks echo "[checkout] attempt ${attempt}/15" if [ "$attempt" -le 8 ]; then @@ -82,10 +91,12 @@ jobs: exit 1 fi + cleanup_git_locks echo "[checkout] fetch timeout/failure, retry in 5s" sleep 5 done + cleanup_git_locks git checkout --force "${{ github.event.pull_request.head.sha }}" git clean -fdx git reset --hard "${{ github.event.pull_request.head.sha }}" From bcdf94d6943126b7418a176d490961a2f5ffcdf7 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 16:58:01 +0800 Subject: [PATCH 06/18] [CI] Avoid merge-base dependency in checkout debug diff Use two-dot diff (`base..head`) instead of three-dot to prevent `no merge base` failures when only shallow commit objects are fetched. --- .github/workflows/integration_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 78ed91e..d494398 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -103,7 +103,7 @@ jobs: git rev-parse "${{ github.event.pull_request.base.sha }}" git rev-parse "${{ github.event.pull_request.head.sha }}" git log -1 --oneline - git diff --name-only "${{ github.event.pull_request.base.sha }}"..."${{ github.event.pull_request.head.sha }}" | head -n 20 + git diff --name-only "${{ github.event.pull_request.base.sha }}".."${{ github.event.pull_request.head.sha }}" | head -n 20 echo "[checkout] done" - name: Check Chinese Characters From bd2e67dbc1a0bd28c33122a3b48abfb98294691c Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 17:01:24 +0800 Subject: [PATCH 07/18] [CI] Alternate strict and relaxed fetch modes per retry Switch checkout retries to strict/relaxed alternating strategy (odd attempts strict via proxy, even attempts relaxed direct mode). --- .github/workflows/integration_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index d494398..fa3ede7 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -61,7 +61,7 @@ jobs: cleanup_git_locks echo "[checkout] attempt ${attempt}/15" - if [ "$attempt" -le 8 ]; then + if [ $((attempt % 2)) -eq 1 ]; then echo "[checkout] mode=proxy strict(lowSpeed=100/10)" fetch_cmd=( timeout 1m From fae921dffffead958bc51beb8f87c14a5c8f1299 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 20:28:41 +0800 Subject: [PATCH 08/18] [CI] Extract PR checkout retry script Move the long retry-based PR checkout logic out of integration_test.yml into .github/scripts/checkout_pr.sh, and keep the workflow step minimal with only REPO_URL/BASE_SHA/HEAD_SHA inputs. --- .github/scripts/checkout_pr.sh | 96 ++++++++++++++++++++++++++ .github/workflows/integration_test.yml | 72 ++----------------- 2 files changed, 101 insertions(+), 67 deletions(-) create mode 100644 .github/scripts/checkout_pr.sh diff --git a/.github/scripts/checkout_pr.sh b/.github/scripts/checkout_pr.sh new file mode 100644 index 0000000..158d50e --- /dev/null +++ b/.github/scripts/checkout_pr.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +# Copyright (c) 2026 SandAI. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Robust PR checkout with retry for self-hosted runners. +# +# Required env vars (set by the caller / GitHub Actions): +# REPO_URL – https clone URL of the repository +# BASE_SHA – base commit SHA of the pull request +# HEAD_SHA – head commit SHA of the pull request +# +# Usage (in a workflow step): +# env: +# REPO_URL: https://github.com/${{ github.repository }}.git +# BASE_SHA: ${{ github.event.pull_request.base.sha }} +# HEAD_SHA: ${{ github.event.pull_request.head.sha }} +# run: bash .github/scripts/checkout_pr.sh + +set -euo pipefail + +MAX_ATTEMPTS=15 +SLEEP_SECS=5 + +if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git remote set-url origin "$REPO_URL" 2>/dev/null || git remote add origin "$REPO_URL" +else + git init . + git remote add origin "$REPO_URL" +fi + +cleanup_git_locks() { + rm -f .git/shallow.lock \ + .git/index.lock \ + .git/packed-refs.lock \ + .git/FETCH_HEAD.lock \ + .git/config.lock +} + +for attempt in $(seq 1 "$MAX_ATTEMPTS"); do + cleanup_git_locks + echo "[checkout] attempt ${attempt}/${MAX_ATTEMPTS}" + + if [ $((attempt % 2)) -eq 1 ]; then + echo "[checkout] mode=proxy strict(lowSpeed=100/10)" + fetch_cmd=( + timeout 1m + git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 + fetch --no-tags --prune --depth=1 origin + "$BASE_SHA" "$HEAD_SHA" + ) + else + echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" + fetch_cmd=( + env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY + timeout 1m + git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 + fetch --no-tags --prune --depth=1 origin + "$BASE_SHA" "$HEAD_SHA" + ) + fi + + if "${fetch_cmd[@]}"; then + break + fi + + if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then + echo "[checkout] failed after ${MAX_ATTEMPTS} attempts" + exit 1 + fi + + cleanup_git_locks + echo "[checkout] fetch timeout/failure, retry in ${SLEEP_SECS}s" + sleep "$SLEEP_SECS" +done + +cleanup_git_locks +git checkout --force "$HEAD_SHA" +git clean -fdx +git reset --hard "$HEAD_SHA" +git rev-parse "$BASE_SHA" +git rev-parse "$HEAD_SHA" +git log -1 --oneline +git diff --name-only "${BASE_SHA}..${HEAD_SHA}" | head -n 20 +echo "[checkout] done" diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index fa3ede7..9060b2e 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -38,73 +38,11 @@ jobs: - name: Checkout PR head with retry timeout-minutes: 10 - run: | - set -euo pipefail - repo_url="https://github.com/${{ github.repository }}.git" - - if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then - git remote set-url origin "$repo_url" 2>/dev/null || git remote add origin "$repo_url" - else - git init . - git remote add origin "$repo_url" - fi - - cleanup_git_locks() { - rm -f .git/shallow.lock \ - .git/index.lock \ - .git/packed-refs.lock \ - .git/FETCH_HEAD.lock \ - .git/config.lock - } - - for attempt in $(seq 1 15); do - cleanup_git_locks - echo "[checkout] attempt ${attempt}/15" - - if [ $((attempt % 2)) -eq 1 ]; then - echo "[checkout] mode=proxy strict(lowSpeed=100/10)" - fetch_cmd=( - timeout 1m - git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 - fetch --no-tags --prune --depth=1 origin - "${{ github.event.pull_request.base.sha }}" - "${{ github.event.pull_request.head.sha }}" - ) - else - echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" - fetch_cmd=( - env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY - timeout 1m - git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 - fetch --no-tags --prune --depth=1 origin - "${{ github.event.pull_request.base.sha }}" - "${{ github.event.pull_request.head.sha }}" - ) - fi - - if "${fetch_cmd[@]}"; then - break - fi - - if [ "$attempt" -eq 15 ]; then - echo "[checkout] failed after 15 attempts" - exit 1 - fi - - cleanup_git_locks - echo "[checkout] fetch timeout/failure, retry in 5s" - sleep 5 - done - - cleanup_git_locks - git checkout --force "${{ github.event.pull_request.head.sha }}" - git clean -fdx - git reset --hard "${{ github.event.pull_request.head.sha }}" - git rev-parse "${{ github.event.pull_request.base.sha }}" - git rev-parse "${{ github.event.pull_request.head.sha }}" - git log -1 --oneline - git diff --name-only "${{ github.event.pull_request.base.sha }}".."${{ github.event.pull_request.head.sha }}" | head -n 20 - echo "[checkout] done" + run: bash .github/scripts/checkout_pr.sh + env: + REPO_URL: https://github.com/${{ github.repository }}.git + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} - name: Check Chinese Characters run: python3 .github/workflows/check_chinese_chars.py From 18bfea52fcd6cbd8b254937790a6287f92b73178 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 20:36:12 +0800 Subject: [PATCH 09/18] [CI] Inline checkout retry script in workflow Inline the checkout retry logic back into integration_test.yml so the step does not depend on an external script file that may be unavailable in runner workspaces. --- .github/workflows/integration_test.yml | 72 ++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 9060b2e..02b6a9b 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -38,11 +38,73 @@ jobs: - name: Checkout PR head with retry timeout-minutes: 10 - run: bash .github/scripts/checkout_pr.sh - env: - REPO_URL: https://github.com/${{ github.repository }}.git - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + repo_url="https://github.com/${{ github.repository }}.git" + base_sha="${{ github.event.pull_request.base.sha }}" + head_sha="${{ github.event.pull_request.head.sha }}" + + if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git remote set-url origin "$repo_url" 2>/dev/null || git remote add origin "$repo_url" + else + git init . + git remote add origin "$repo_url" + fi + + cleanup_git_locks() { + rm -f .git/shallow.lock \ + .git/index.lock \ + .git/packed-refs.lock \ + .git/FETCH_HEAD.lock \ + .git/config.lock + } + + for attempt in $(seq 1 15); do + cleanup_git_locks + echo "[checkout] attempt ${attempt}/15" + + if [ $((attempt % 2)) -eq 1 ]; then + echo "[checkout] mode=proxy strict(lowSpeed=100/10)" + fetch_cmd=( + timeout 1m + git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 + fetch --no-tags --prune --depth=1 origin + "$base_sha" "$head_sha" + ) + else + echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" + fetch_cmd=( + env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY + timeout 1m + git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 + fetch --no-tags --prune --depth=1 origin + "$base_sha" "$head_sha" + ) + fi + + if "${fetch_cmd[@]}"; then + break + fi + + if [ "$attempt" -eq 15 ]; then + echo "[checkout] failed after 15 attempts" + exit 1 + fi + + cleanup_git_locks + echo "[checkout] fetch timeout/failure, retry in 5s" + sleep 5 + done + + cleanup_git_locks + git checkout --force "$head_sha" + git clean -fdx + git reset --hard "$head_sha" + git rev-parse "$base_sha" + git rev-parse "$head_sha" + git log -1 --oneline + git diff --name-only "$base_sha".."$head_sha" | head -n 20 + echo "[checkout] done" - name: Check Chinese Characters run: python3 .github/workflows/check_chinese_chars.py From 6fb0991578d9d56509529fa7478ce58bc6bff13f Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 20:49:27 +0800 Subject: [PATCH 10/18] [CI] Bootstrap checkout script via GitHub API Download .github/scripts/checkout_pr.sh from PR head SHA via GitHub contents API before running checkout, so the workflow no longer depends on local script availability in stale workspaces. --- .github/workflows/integration_test.yml | 74 ++++++-------------------- 1 file changed, 17 insertions(+), 57 deletions(-) diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 02b6a9b..5cc0896 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -40,71 +40,31 @@ jobs: timeout-minutes: 10 run: | set -euo pipefail - repo_url="https://github.com/${{ github.repository }}.git" - base_sha="${{ github.event.pull_request.base.sha }}" - head_sha="${{ github.event.pull_request.head.sha }}" - if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then - git remote set-url origin "$repo_url" 2>/dev/null || git remote add origin "$repo_url" - else - git init . - git remote add origin "$repo_url" - fi + script_path=".github/scripts/checkout_pr.sh" + api_url="https://api.github.com/repos/${{ github.repository }}/contents/${script_path}?ref=${{ github.event.pull_request.head.sha }}" - cleanup_git_locks() { - rm -f .git/shallow.lock \ - .git/index.lock \ - .git/packed-refs.lock \ - .git/FETCH_HEAD.lock \ - .git/config.lock - } - - for attempt in $(seq 1 15); do - cleanup_git_locks - echo "[checkout] attempt ${attempt}/15" - - if [ $((attempt % 2)) -eq 1 ]; then - echo "[checkout] mode=proxy strict(lowSpeed=100/10)" - fetch_cmd=( - timeout 1m - git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 - fetch --no-tags --prune --depth=1 origin - "$base_sha" "$head_sha" - ) - else - echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" - fetch_cmd=( - env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY - timeout 1m - git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 - fetch --no-tags --prune --depth=1 origin - "$base_sha" "$head_sha" - ) - fi - - if "${fetch_cmd[@]}"; then + mkdir -p .github/scripts + for attempt in $(seq 1 10); do + echo "[checkout-bootstrap] download script attempt ${attempt}/10" + if curl -fsSL \ + -H "Authorization: Bearer ${{ github.token }}" \ + -H "Accept: application/vnd.github.raw" \ + "$api_url" -o "$script_path"; then break fi - - if [ "$attempt" -eq 15 ]; then - echo "[checkout] failed after 15 attempts" + if [ "$attempt" -eq 10 ]; then + echo "[checkout-bootstrap] failed to download checkout script" exit 1 fi - - cleanup_git_locks - echo "[checkout] fetch timeout/failure, retry in 5s" - sleep 5 + sleep 3 done - cleanup_git_locks - git checkout --force "$head_sha" - git clean -fdx - git reset --hard "$head_sha" - git rev-parse "$base_sha" - git rev-parse "$head_sha" - git log -1 --oneline - git diff --name-only "$base_sha".."$head_sha" | head -n 20 - echo "[checkout] done" + chmod +x "$script_path" + REPO_URL="https://github.com/${{ github.repository }}.git" \ + BASE_SHA="${{ github.event.pull_request.base.sha }}" \ + HEAD_SHA="${{ github.event.pull_request.head.sha }}" \ + bash "$script_path" - name: Check Chinese Characters run: python3 .github/workflows/check_chinese_chars.py From cf8d1e8e0be144758d2b0fafe617a855a37b4a0a Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 21:20:40 +0800 Subject: [PATCH 11/18] [CI] Use GitHub API tarball download as primary checkout strategy git-fetch consistently times out on self-hosted runners due to network instability. Switch to downloading the HEAD tarball via GitHub REST API (single HTTP request, curl-based) as the primary method, with git-fetch kept as a fallback. --- .github/scripts/checkout_pr.sh | 155 +++++++++++++++---------- .github/workflows/integration_test.yml | 2 + 2 files changed, 94 insertions(+), 63 deletions(-) diff --git a/.github/scripts/checkout_pr.sh b/.github/scripts/checkout_pr.sh index 158d50e..1db1bb4 100644 --- a/.github/scripts/checkout_pr.sh +++ b/.github/scripts/checkout_pr.sh @@ -14,83 +14,112 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Robust PR checkout with retry for self-hosted runners. +# Robust PR checkout for self-hosted runners. # -# Required env vars (set by the caller / GitHub Actions): -# REPO_URL – https clone URL of the repository -# BASE_SHA – base commit SHA of the pull request -# HEAD_SHA – head commit SHA of the pull request +# Strategy: +# 1. Download HEAD tarball via GitHub API (fast, single-file HTTP) +# 2. Fallback to git-fetch if API download fails # -# Usage (in a workflow step): -# env: -# REPO_URL: https://github.com/${{ github.repository }}.git -# BASE_SHA: ${{ github.event.pull_request.base.sha }} -# HEAD_SHA: ${{ github.event.pull_request.head.sha }} -# run: bash .github/scripts/checkout_pr.sh +# Required env vars: +# REPO_URL – https clone URL (e.g. https://github.com/org/repo.git) +# BASE_SHA – PR base commit SHA +# HEAD_SHA – PR head commit SHA +# GITHUB_TOKEN – GitHub token for API auth (set automatically by Actions) +# +# Optional env vars: +# GITHUB_REPOSITORY – owner/repo (for API URL construction) set -euo pipefail -MAX_ATTEMPTS=15 +MAX_ATTEMPTS=10 SLEEP_SECS=5 -if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then - git remote set-url origin "$REPO_URL" 2>/dev/null || git remote add origin "$REPO_URL" -else - git init . - git remote add origin "$REPO_URL" +# ── Derive repo slug from REPO_URL if GITHUB_REPOSITORY is not set ── +if [ -z "${GITHUB_REPOSITORY:-}" ]; then + GITHUB_REPOSITORY=$(echo "$REPO_URL" | sed -E 's|.*github\.com/||; s|\.git$||') fi -cleanup_git_locks() { - rm -f .git/shallow.lock \ - .git/index.lock \ - .git/packed-refs.lock \ - .git/FETCH_HEAD.lock \ - .git/config.lock +# ── Method 1: tarball via GitHub API ───────────────────────────────── +download_tarball() { + local tarball_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/tarball/${HEAD_SHA}" + local tarball="/tmp/checkout_${HEAD_SHA}.tar.gz" + + for attempt in $(seq 1 "$MAX_ATTEMPTS"); do + echo "[checkout] tarball download attempt ${attempt}/${MAX_ATTEMPTS}" + if curl -fsSL --retry 3 --retry-delay 2 \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + -L "$tarball_url" -o "$tarball"; then + + echo "[checkout] tarball downloaded, extracting..." + # tarball contains a top-level directory like org-repo-/ + # strip it so files land in current directory + tar xzf "$tarball" --strip-components=1 + rm -f "$tarball" + return 0 + fi + + if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then + echo "[checkout] tarball download failed after ${MAX_ATTEMPTS} attempts" + return 1 + fi + sleep "$SLEEP_SECS" + done } -for attempt in $(seq 1 "$MAX_ATTEMPTS"); do - cleanup_git_locks - echo "[checkout] attempt ${attempt}/${MAX_ATTEMPTS}" - - if [ $((attempt % 2)) -eq 1 ]; then - echo "[checkout] mode=proxy strict(lowSpeed=100/10)" - fetch_cmd=( - timeout 1m - git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 - fetch --no-tags --prune --depth=1 origin - "$BASE_SHA" "$HEAD_SHA" - ) +# ── Method 2: git fetch (fallback) ────────────────────────────────── +git_fetch_fallback() { + if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git remote set-url origin "$REPO_URL" 2>/dev/null || git remote add origin "$REPO_URL" else - echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" - fetch_cmd=( - env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY - timeout 1m - git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 - fetch --no-tags --prune --depth=1 origin - "$BASE_SHA" "$HEAD_SHA" - ) + git init . + git remote add origin "$REPO_URL" fi - if "${fetch_cmd[@]}"; then - break - fi + cleanup_git_locks() { + rm -f .git/shallow.lock .git/index.lock .git/packed-refs.lock \ + .git/FETCH_HEAD.lock .git/config.lock + } - if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then - echo "[checkout] failed after ${MAX_ATTEMPTS} attempts" - exit 1 - fi + for attempt in $(seq 1 "$MAX_ATTEMPTS"); do + cleanup_git_locks + echo "[checkout] git-fetch attempt ${attempt}/${MAX_ATTEMPTS}" + + if [ $((attempt % 2)) -eq 1 ]; then + fetch_cmd=(timeout 2m git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 + fetch --no-tags --prune --depth=1 origin "$BASE_SHA" "$HEAD_SHA") + else + fetch_cmd=(env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY + timeout 2m git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 + fetch --no-tags --prune --depth=1 origin "$BASE_SHA" "$HEAD_SHA") + fi + + if "${fetch_cmd[@]}"; then + cleanup_git_locks + git checkout --force "$HEAD_SHA" + git clean -fdx + git reset --hard "$HEAD_SHA" + return 0 + fi + + if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then + echo "[checkout] git-fetch failed after ${MAX_ATTEMPTS} attempts" + return 1 + fi + cleanup_git_locks + sleep "$SLEEP_SECS" + done +} + +# ── Main ───────────────────────────────────────────────────────────── +echo "[checkout] HEAD_SHA=${HEAD_SHA}" +echo "[checkout] BASE_SHA=${BASE_SHA}" + +if download_tarball; then + echo "[checkout] tarball checkout succeeded" +else + echo "[checkout] tarball failed, falling back to git-fetch" + git_fetch_fallback +fi - cleanup_git_locks - echo "[checkout] fetch timeout/failure, retry in ${SLEEP_SECS}s" - sleep "$SLEEP_SECS" -done - -cleanup_git_locks -git checkout --force "$HEAD_SHA" -git clean -fdx -git reset --hard "$HEAD_SHA" -git rev-parse "$BASE_SHA" -git rev-parse "$HEAD_SHA" -git log -1 --oneline -git diff --name-only "${BASE_SHA}..${HEAD_SHA}" | head -n 20 echo "[checkout] done" diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 5cc0896..fb0bf8c 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -62,6 +62,8 @@ jobs: chmod +x "$script_path" REPO_URL="https://github.com/${{ github.repository }}.git" \ + GITHUB_REPOSITORY="${{ github.repository }}" \ + GITHUB_TOKEN="${{ github.token }}" \ BASE_SHA="${{ github.event.pull_request.base.sha }}" \ HEAD_SHA="${{ github.event.pull_request.head.sha }}" \ bash "$script_path" From 8aa026fef4f3b95526ab3d2dffe5bbc2a896b337 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 21:26:45 +0800 Subject: [PATCH 12/18] [CI] Build synthetic git history from tarballs for diff support After downloading HEAD/BASE tarballs via GitHub API, create a local git repo with two commits (base -> head) and git-replace refs so that `git rev-parse ` and `git diff base..head` work correctly for downstream CI steps (check_chinese_chars, pre-commit, etc.). --- .github/scripts/checkout_pr.sh | 106 +++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 24 deletions(-) diff --git a/.github/scripts/checkout_pr.sh b/.github/scripts/checkout_pr.sh index 1db1bb4..76ece58 100644 --- a/.github/scripts/checkout_pr.sh +++ b/.github/scripts/checkout_pr.sh @@ -17,8 +17,10 @@ # Robust PR checkout for self-hosted runners. # # Strategy: -# 1. Download HEAD tarball via GitHub API (fast, single-file HTTP) -# 2. Fallback to git-fetch if API download fails +# 1. Download HEAD & BASE tarballs via GitHub API (fast, curl-based) +# Then synthesize a local git repo with two commits so that +# downstream steps (git diff, pre-commit, etc.) work normally. +# 2. Fallback to traditional git-fetch if tarball download fails. # # Required env vars: # REPO_URL – https clone URL (e.g. https://github.com/org/repo.git) @@ -39,42 +41,91 @@ if [ -z "${GITHUB_REPOSITORY:-}" ]; then GITHUB_REPOSITORY=$(echo "$REPO_URL" | sed -E 's|.*github\.com/||; s|\.git$||') fi -# ── Method 1: tarball via GitHub API ───────────────────────────────── -download_tarball() { - local tarball_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/tarball/${HEAD_SHA}" - local tarball="/tmp/checkout_${HEAD_SHA}.tar.gz" +# ── Helper: download a tarball for a given SHA ─────────────────────── +download_sha_tarball() { + local sha="$1" + local dest="$2" + local tarball_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/tarball/${sha}" for attempt in $(seq 1 "$MAX_ATTEMPTS"); do - echo "[checkout] tarball download attempt ${attempt}/${MAX_ATTEMPTS}" - if curl -fsSL --retry 3 --retry-delay 2 \ + echo "[checkout] tarball(${sha:0:8}) attempt ${attempt}/${MAX_ATTEMPTS}" + if curl -fsSL --retry 3 --retry-delay 2 --max-time 120 \ -H "Authorization: Bearer ${GITHUB_TOKEN}" \ -H "Accept: application/vnd.github+json" \ - -L "$tarball_url" -o "$tarball"; then - - echo "[checkout] tarball downloaded, extracting..." - # tarball contains a top-level directory like org-repo-/ - # strip it so files land in current directory - tar xzf "$tarball" --strip-components=1 - rm -f "$tarball" + -L "$tarball_url" -o "$dest"; then return 0 fi - if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then - echo "[checkout] tarball download failed after ${MAX_ATTEMPTS} attempts" return 1 fi sleep "$SLEEP_SECS" done } +# ── Method 1: tarball + synthetic git history ──────────────────────── +tarball_checkout() { + local head_tar="/tmp/checkout_head_${HEAD_SHA}.tar.gz" + local base_tar="/tmp/checkout_base_${BASE_SHA}.tar.gz" + + echo "[checkout] downloading HEAD tarball..." + if ! download_sha_tarball "$HEAD_SHA" "$head_tar"; then + echo "[checkout] HEAD tarball download failed" + return 1 + fi + + echo "[checkout] downloading BASE tarball..." + if ! download_sha_tarball "$BASE_SHA" "$base_tar"; then + echo "[checkout] BASE tarball download failed" + rm -f "$head_tar" + return 1 + fi + + # Clean working directory (keep .git if it exists, we'll reinit) + rm -rf .git + git init . + git config user.email "ci@sandai.org" + git config user.name "CI" + + # Commit 1: BASE + echo "[checkout] extracting BASE tarball..." + tar xzf "$base_tar" --strip-components=1 + rm -f "$base_tar" + git add -A + GIT_COMMITTER_DATE="2000-01-01T00:00:00Z" \ + GIT_AUTHOR_DATE="2000-01-01T00:00:00Z" \ + git commit --allow-empty -m "base ${BASE_SHA}" + # Tag the commit so we can reference it by the original SHA + git tag "sha-base" + + # Commit 2: HEAD (clear everything, then extract head tarball) + git rm -rf . > /dev/null 2>&1 || true + echo "[checkout] extracting HEAD tarball..." + tar xzf "$head_tar" --strip-components=1 + rm -f "$head_tar" + git add -A + GIT_COMMITTER_DATE="2000-01-02T00:00:00Z" \ + GIT_AUTHOR_DATE="2000-01-02T00:00:00Z" \ + git commit --allow-empty -m "head ${HEAD_SHA}" + git tag "sha-head" + + # Create replace refs so that `git rev-parse ` resolves + local base_local head_local + base_local=$(git rev-parse sha-base) + head_local=$(git rev-parse sha-head) + git replace "$BASE_SHA" "$base_local" 2>/dev/null || true + git replace "$HEAD_SHA" "$head_local" 2>/dev/null || true + + echo "[checkout] synthetic git history created" + echo "[checkout] BASE ${BASE_SHA} -> ${base_local}" + echo "[checkout] HEAD ${HEAD_SHA} -> ${head_local}" + return 0 +} + # ── Method 2: git fetch (fallback) ────────────────────────────────── git_fetch_fallback() { - if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then - git remote set-url origin "$REPO_URL" 2>/dev/null || git remote add origin "$REPO_URL" - else - git init . - git remote add origin "$REPO_URL" - fi + rm -rf .git + git init . + git remote add origin "$REPO_URL" cleanup_git_locks() { rm -f .git/shallow.lock .git/index.lock .git/packed-refs.lock \ @@ -86,9 +137,11 @@ git_fetch_fallback() { echo "[checkout] git-fetch attempt ${attempt}/${MAX_ATTEMPTS}" if [ $((attempt % 2)) -eq 1 ]; then + echo "[checkout] mode=proxy strict(lowSpeed=100/10)" fetch_cmd=(timeout 2m git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 fetch --no-tags --prune --depth=1 origin "$BASE_SHA" "$HEAD_SHA") else + echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" fetch_cmd=(env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY timeout 2m git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 fetch --no-tags --prune --depth=1 origin "$BASE_SHA" "$HEAD_SHA") @@ -115,11 +168,16 @@ git_fetch_fallback() { echo "[checkout] HEAD_SHA=${HEAD_SHA}" echo "[checkout] BASE_SHA=${BASE_SHA}" -if download_tarball; then +if tarball_checkout; then echo "[checkout] tarball checkout succeeded" else echo "[checkout] tarball failed, falling back to git-fetch" git_fetch_fallback fi +echo "[checkout] verifying..." +git rev-parse "$BASE_SHA" +git rev-parse "$HEAD_SHA" +git log --oneline --all | head -5 +git diff --stat "$BASE_SHA" "$HEAD_SHA" | tail -3 echo "[checkout] done" From 35fda7269e7e4f2b8b91fd87369e0c97dc5cdf4b Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 21:33:08 +0800 Subject: [PATCH 13/18] [CI] Rewrite checkout script in Python for clarity Replace bash checkout_pr.sh with checkout_pr.py. Strategy unchanged: git-fetch with retry first, tarball fallback with synthetic git history. --- .github/scripts/checkout_pr.py | 201 +++++++++++++++++++++++++ .github/scripts/checkout_pr.sh | 183 ---------------------- .github/workflows/integration_test.yml | 24 +-- 3 files changed, 213 insertions(+), 195 deletions(-) create mode 100644 .github/scripts/checkout_pr.py delete mode 100644 .github/scripts/checkout_pr.sh diff --git a/.github/scripts/checkout_pr.py b/.github/scripts/checkout_pr.py new file mode 100644 index 0000000..0f9c7a4 --- /dev/null +++ b/.github/scripts/checkout_pr.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2026 SandAI. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Robust PR checkout for self-hosted runners. + +Strategy: git-fetch with retry → tarball fallback (synthetic git history). + +Env vars: REPO_URL, BASE_SHA, HEAD_SHA, GITHUB_TOKEN, GITHUB_REPOSITORY. +""" + +from __future__ import annotations + +import glob +import os +import re +import shutil +import subprocess +import sys +import tarfile +import time +import urllib.request + +RETRIES = 10 +SLEEP = 5 + +REPO_URL = os.environ["REPO_URL"] +BASE_SHA = os.environ["BASE_SHA"] +HEAD_SHA = os.environ["HEAD_SHA"] +TOKEN = os.environ["GITHUB_TOKEN"] +REPO = os.environ.get("GITHUB_REPOSITORY") or re.sub(r".*github\.com/", "", REPO_URL).removesuffix(".git") + + +def log(msg: str) -> None: + print(f"[checkout] {msg}", flush=True) + + +def sh(cmd: str, *, check: bool = True, env: dict | None = None) -> int: + """Run a shell command, return exit code.""" + log(f"$ {cmd}") + return subprocess.run(cmd, shell=True, check=check, env=env).returncode + + +def cleanup_locks() -> None: + for f in glob.glob(".git/*.lock"): + os.remove(f) + + +def no_proxy_env() -> dict[str, str]: + env = os.environ.copy() + for k in ("http_proxy", "https_proxy", "HTTP_PROXY", "HTTPS_PROXY"): + env.pop(k, None) + return env + + +# ── Method 1: git fetch ───────────────────────────────────────────── + + +def git_fetch_checkout() -> bool: + if sh("git rev-parse --is-inside-work-tree", check=False) == 0: + sh(f"git remote set-url origin {REPO_URL}", check=False) + else: + sh("git init .") + sh(f"git remote add origin {REPO_URL}") + + for i in range(1, RETRIES + 1): + cleanup_locks() + log(f"fetch attempt {i}/{RETRIES}") + + if i % 2 == 1: + log("mode=proxy strict") + cmd = ( + f"timeout 120 git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10" + f" fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" + ) + env = None + else: + log("mode=direct relaxed") + cmd = ( + f"timeout 120 git -c http.proxy= -c https.proxy=" + f" -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30" + f" fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" + ) + env = no_proxy_env() + + if sh(cmd, check=False, env=env) == 0: + cleanup_locks() + sh(f"git checkout --force {HEAD_SHA}") + sh("git clean -fdx") + sh(f"git reset --hard {HEAD_SHA}") + return True + + if i < RETRIES: + cleanup_locks() + log(f"retry in {SLEEP}s") + time.sleep(SLEEP) + + log("git-fetch failed") + return False + + +# ── Method 2: tarball + synthetic git history ──────────────────────── + + +def _download(sha: str, dest: str) -> bool: + url = f"https://api.github.com/repos/{REPO}/tarball/{sha}" + req = urllib.request.Request(url, headers={"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}) + for i in range(1, RETRIES + 1): + log(f"tarball({sha[:8]}) attempt {i}/{RETRIES}") + try: + with urllib.request.urlopen(req, timeout=120) as r, open(dest, "wb") as f: + shutil.copyfileobj(r, f) + return True + except Exception as e: + log(f" error: {e}") + if i < RETRIES: + time.sleep(SLEEP) + return False + + +def _extract(tar_path: str) -> None: + with tarfile.open(tar_path, "r:gz") as tf: + prefix = os.path.commonprefix(tf.getnames()).rstrip("/") + for m in tf.getmembers(): + if prefix: + m.name = m.name[len(prefix) :].lstrip("/") + if m.name: + tf.extract(m, ".", filter="data") + + +def _commit(msg: str, date: str) -> None: + sh("git add -A") + env = {**os.environ, "GIT_COMMITTER_DATE": date, "GIT_AUTHOR_DATE": date} + sh(f'git commit --allow-empty -m "{msg}"', env=env) + + +def tarball_checkout() -> bool: + head_tar, base_tar = "/tmp/_head.tar.gz", "/tmp/_base.tar.gz" + try: + if not _download(HEAD_SHA, head_tar) or not _download(BASE_SHA, base_tar): + return False + + shutil.rmtree(".git", ignore_errors=True) + sh("git init . && git config user.email ci@sandai.org && git config user.name CI") + + # commit base + _extract(base_tar) + _commit(f"base {BASE_SHA}", "2000-01-01T00:00:00Z") + + # commit head + sh("git rm -rf .", check=False) + _extract(head_tar) + _commit(f"head {HEAD_SHA}", "2000-01-02T00:00:00Z") + + # make real SHAs resolvable via git-replace + base_local = subprocess.check_output(["git", "rev-parse", "HEAD~1"], text=True).strip() + head_local = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() + sh(f"git replace {BASE_SHA} {base_local}", check=False) + sh(f"git replace {HEAD_SHA} {head_local}", check=False) + log(f"synthetic: BASE {BASE_SHA[:8]}→{base_local[:8]}, HEAD {HEAD_SHA[:8]}→{head_local[:8]}") + return True + finally: + for f in (head_tar, base_tar): + try: + os.remove(f) + except FileNotFoundError: + pass + + +# ── Main ───────────────────────────────────────────────────────────── + + +def main() -> None: + log(f"HEAD={HEAD_SHA}, BASE={BASE_SHA}") + + if git_fetch_checkout(): + log("git-fetch succeeded") + elif tarball_checkout(): + log("tarball fallback succeeded") + else: + log("all methods failed") + sys.exit(1) + + sh(f"git diff --stat {BASE_SHA} {HEAD_SHA} | tail -3") + log("done") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/checkout_pr.sh b/.github/scripts/checkout_pr.sh deleted file mode 100644 index 76ece58..0000000 --- a/.github/scripts/checkout_pr.sh +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (c) 2026 SandAI. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Robust PR checkout for self-hosted runners. -# -# Strategy: -# 1. Download HEAD & BASE tarballs via GitHub API (fast, curl-based) -# Then synthesize a local git repo with two commits so that -# downstream steps (git diff, pre-commit, etc.) work normally. -# 2. Fallback to traditional git-fetch if tarball download fails. -# -# Required env vars: -# REPO_URL – https clone URL (e.g. https://github.com/org/repo.git) -# BASE_SHA – PR base commit SHA -# HEAD_SHA – PR head commit SHA -# GITHUB_TOKEN – GitHub token for API auth (set automatically by Actions) -# -# Optional env vars: -# GITHUB_REPOSITORY – owner/repo (for API URL construction) - -set -euo pipefail - -MAX_ATTEMPTS=10 -SLEEP_SECS=5 - -# ── Derive repo slug from REPO_URL if GITHUB_REPOSITORY is not set ── -if [ -z "${GITHUB_REPOSITORY:-}" ]; then - GITHUB_REPOSITORY=$(echo "$REPO_URL" | sed -E 's|.*github\.com/||; s|\.git$||') -fi - -# ── Helper: download a tarball for a given SHA ─────────────────────── -download_sha_tarball() { - local sha="$1" - local dest="$2" - local tarball_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/tarball/${sha}" - - for attempt in $(seq 1 "$MAX_ATTEMPTS"); do - echo "[checkout] tarball(${sha:0:8}) attempt ${attempt}/${MAX_ATTEMPTS}" - if curl -fsSL --retry 3 --retry-delay 2 --max-time 120 \ - -H "Authorization: Bearer ${GITHUB_TOKEN}" \ - -H "Accept: application/vnd.github+json" \ - -L "$tarball_url" -o "$dest"; then - return 0 - fi - if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then - return 1 - fi - sleep "$SLEEP_SECS" - done -} - -# ── Method 1: tarball + synthetic git history ──────────────────────── -tarball_checkout() { - local head_tar="/tmp/checkout_head_${HEAD_SHA}.tar.gz" - local base_tar="/tmp/checkout_base_${BASE_SHA}.tar.gz" - - echo "[checkout] downloading HEAD tarball..." - if ! download_sha_tarball "$HEAD_SHA" "$head_tar"; then - echo "[checkout] HEAD tarball download failed" - return 1 - fi - - echo "[checkout] downloading BASE tarball..." - if ! download_sha_tarball "$BASE_SHA" "$base_tar"; then - echo "[checkout] BASE tarball download failed" - rm -f "$head_tar" - return 1 - fi - - # Clean working directory (keep .git if it exists, we'll reinit) - rm -rf .git - git init . - git config user.email "ci@sandai.org" - git config user.name "CI" - - # Commit 1: BASE - echo "[checkout] extracting BASE tarball..." - tar xzf "$base_tar" --strip-components=1 - rm -f "$base_tar" - git add -A - GIT_COMMITTER_DATE="2000-01-01T00:00:00Z" \ - GIT_AUTHOR_DATE="2000-01-01T00:00:00Z" \ - git commit --allow-empty -m "base ${BASE_SHA}" - # Tag the commit so we can reference it by the original SHA - git tag "sha-base" - - # Commit 2: HEAD (clear everything, then extract head tarball) - git rm -rf . > /dev/null 2>&1 || true - echo "[checkout] extracting HEAD tarball..." - tar xzf "$head_tar" --strip-components=1 - rm -f "$head_tar" - git add -A - GIT_COMMITTER_DATE="2000-01-02T00:00:00Z" \ - GIT_AUTHOR_DATE="2000-01-02T00:00:00Z" \ - git commit --allow-empty -m "head ${HEAD_SHA}" - git tag "sha-head" - - # Create replace refs so that `git rev-parse ` resolves - local base_local head_local - base_local=$(git rev-parse sha-base) - head_local=$(git rev-parse sha-head) - git replace "$BASE_SHA" "$base_local" 2>/dev/null || true - git replace "$HEAD_SHA" "$head_local" 2>/dev/null || true - - echo "[checkout] synthetic git history created" - echo "[checkout] BASE ${BASE_SHA} -> ${base_local}" - echo "[checkout] HEAD ${HEAD_SHA} -> ${head_local}" - return 0 -} - -# ── Method 2: git fetch (fallback) ────────────────────────────────── -git_fetch_fallback() { - rm -rf .git - git init . - git remote add origin "$REPO_URL" - - cleanup_git_locks() { - rm -f .git/shallow.lock .git/index.lock .git/packed-refs.lock \ - .git/FETCH_HEAD.lock .git/config.lock - } - - for attempt in $(seq 1 "$MAX_ATTEMPTS"); do - cleanup_git_locks - echo "[checkout] git-fetch attempt ${attempt}/${MAX_ATTEMPTS}" - - if [ $((attempt % 2)) -eq 1 ]; then - echo "[checkout] mode=proxy strict(lowSpeed=100/10)" - fetch_cmd=(timeout 2m git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 - fetch --no-tags --prune --depth=1 origin "$BASE_SHA" "$HEAD_SHA") - else - echo "[checkout] mode=direct relaxed(lowSpeed=1/30)" - fetch_cmd=(env -u http_proxy -u https_proxy -u HTTP_PROXY -u HTTPS_PROXY - timeout 2m git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 - fetch --no-tags --prune --depth=1 origin "$BASE_SHA" "$HEAD_SHA") - fi - - if "${fetch_cmd[@]}"; then - cleanup_git_locks - git checkout --force "$HEAD_SHA" - git clean -fdx - git reset --hard "$HEAD_SHA" - return 0 - fi - - if [ "$attempt" -eq "$MAX_ATTEMPTS" ]; then - echo "[checkout] git-fetch failed after ${MAX_ATTEMPTS} attempts" - return 1 - fi - cleanup_git_locks - sleep "$SLEEP_SECS" - done -} - -# ── Main ───────────────────────────────────────────────────────────── -echo "[checkout] HEAD_SHA=${HEAD_SHA}" -echo "[checkout] BASE_SHA=${BASE_SHA}" - -if tarball_checkout; then - echo "[checkout] tarball checkout succeeded" -else - echo "[checkout] tarball failed, falling back to git-fetch" - git_fetch_fallback -fi - -echo "[checkout] verifying..." -git rev-parse "$BASE_SHA" -git rev-parse "$HEAD_SHA" -git log --oneline --all | head -5 -git diff --stat "$BASE_SHA" "$HEAD_SHA" | tail -3 -echo "[checkout] done" diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index fb0bf8c..5408587 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -38,35 +38,35 @@ jobs: - name: Checkout PR head with retry timeout-minutes: 10 + env: + REPO_URL: https://github.com/${{ github.repository }}.git + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_TOKEN: ${{ github.token }} + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} run: | set -euo pipefail - script_path=".github/scripts/checkout_pr.sh" - api_url="https://api.github.com/repos/${{ github.repository }}/contents/${script_path}?ref=${{ github.event.pull_request.head.sha }}" + script_path=".github/scripts/checkout_pr.py" + api_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/contents/${script_path}?ref=${HEAD_SHA}" mkdir -p .github/scripts for attempt in $(seq 1 10); do - echo "[checkout-bootstrap] download script attempt ${attempt}/10" + echo "[bootstrap] download checkout script attempt ${attempt}/10" if curl -fsSL \ - -H "Authorization: Bearer ${{ github.token }}" \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ -H "Accept: application/vnd.github.raw" \ "$api_url" -o "$script_path"; then break fi if [ "$attempt" -eq 10 ]; then - echo "[checkout-bootstrap] failed to download checkout script" + echo "[bootstrap] failed to download checkout script" exit 1 fi sleep 3 done - chmod +x "$script_path" - REPO_URL="https://github.com/${{ github.repository }}.git" \ - GITHUB_REPOSITORY="${{ github.repository }}" \ - GITHUB_TOKEN="${{ github.token }}" \ - BASE_SHA="${{ github.event.pull_request.base.sha }}" \ - HEAD_SHA="${{ github.event.pull_request.head.sha }}" \ - bash "$script_path" + python3 "$script_path" - name: Check Chinese Characters run: python3 .github/workflows/check_chinese_chars.py From 5682eb5d528db591ac152a020bb64f48cfdb5450 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 21:35:14 +0800 Subject: [PATCH 14/18] [CI] Fix missing origin remote: fallback to git remote add --- .github/scripts/checkout_pr.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/scripts/checkout_pr.py b/.github/scripts/checkout_pr.py index 0f9c7a4..f19059e 100644 --- a/.github/scripts/checkout_pr.py +++ b/.github/scripts/checkout_pr.py @@ -69,10 +69,9 @@ def no_proxy_env() -> dict[str, str]: def git_fetch_checkout() -> bool: - if sh("git rev-parse --is-inside-work-tree", check=False) == 0: - sh(f"git remote set-url origin {REPO_URL}", check=False) - else: + if sh("git rev-parse --is-inside-work-tree", check=False) != 0: sh("git init .") + if sh(f"git remote set-url origin {REPO_URL}", check=False) != 0: sh(f"git remote add origin {REPO_URL}") for i in range(1, RETRIES + 1): From 2fdda06534eceb4679fee5b08e3ba1aad5edac03 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 21:57:26 +0800 Subject: [PATCH 15/18] [CI] Prioritize tarball download over git-fetch git-fetch consistently times out on self-hosted runner; tarball via GitHub API is reliable. Swap order: tarball first, git-fetch fallback. --- .github/scripts/checkout_pr.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/scripts/checkout_pr.py b/.github/scripts/checkout_pr.py index f19059e..39a6f6e 100644 --- a/.github/scripts/checkout_pr.py +++ b/.github/scripts/checkout_pr.py @@ -16,7 +16,7 @@ """Robust PR checkout for self-hosted runners. -Strategy: git-fetch with retry → tarball fallback (synthetic git history). +Strategy: tarball via GitHub API → git-fetch fallback. Env vars: REPO_URL, BASE_SHA, HEAD_SHA, GITHUB_TOKEN, GITHUB_REPOSITORY. """ @@ -184,10 +184,10 @@ def tarball_checkout() -> bool: def main() -> None: log(f"HEAD={HEAD_SHA}, BASE={BASE_SHA}") - if git_fetch_checkout(): - log("git-fetch succeeded") - elif tarball_checkout(): - log("tarball fallback succeeded") + if tarball_checkout(): + log("tarball succeeded") + elif git_fetch_checkout(): + log("git-fetch fallback succeeded") else: log("all methods failed") sys.exit(1) From 279bd848635a1e8b8ca90275e09bd45b9a7d2398 Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 22:05:07 +0800 Subject: [PATCH 16/18] [CI] Use curl with proxy/direct alternation for tarball download urllib honors env proxy which may throttle large downloads. Switch to curl subprocess with alternating proxy/direct attempts. --- .github/scripts/checkout_pr.py | 35 ++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/.github/scripts/checkout_pr.py b/.github/scripts/checkout_pr.py index 39a6f6e..740de28 100644 --- a/.github/scripts/checkout_pr.py +++ b/.github/scripts/checkout_pr.py @@ -31,7 +31,6 @@ import sys import tarfile import time -import urllib.request RETRIES = 10 SLEEP = 5 @@ -115,17 +114,33 @@ def git_fetch_checkout() -> bool: def _download(sha: str, dest: str) -> bool: url = f"https://api.github.com/repos/{REPO}/tarball/{sha}" - req = urllib.request.Request(url, headers={"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}) for i in range(1, RETRIES + 1): - log(f"tarball({sha[:8]}) attempt {i}/{RETRIES}") - try: - with urllib.request.urlopen(req, timeout=120) as r, open(dest, "wb") as f: - shutil.copyfileobj(r, f) + via = "proxy" if i % 2 == 1 else "direct" + log(f"tarball({sha[:8]}) attempt {i}/{RETRIES} via {via}") + curl = [ + "curl", + "-fSL", + "--retry", + "2", + "--retry-delay", + "3", + "--connect-timeout", + "15", + "--max-time", + "180", + "-H", + f"Authorization: Bearer {TOKEN}", + "-H", + "Accept: application/vnd.github+json", + "-o", + dest, + url, + ] + env = no_proxy_env() if via == "direct" else None + if subprocess.run(curl, check=False, env=env).returncode == 0: return True - except Exception as e: - log(f" error: {e}") - if i < RETRIES: - time.sleep(SLEEP) + if i < RETRIES: + time.sleep(SLEEP) return False From cfc86978d72760b63f7796b68ee9c3e5363983fd Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 22:18:40 +0800 Subject: [PATCH 17/18] [CI] Output local SHA via GITHUB_OUTPUT for tarball mode git-replace is unreliable for mapping real SHAs to synthetic commits. Instead, output local base_ref/head_ref via GITHUB_OUTPUT and use step outputs in downstream steps (check_chinese_chars). --- .github/scripts/checkout_pr.py | 29 +++++++++++++++++--------- .github/workflows/integration_test.yml | 5 +++-- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/.github/scripts/checkout_pr.py b/.github/scripts/checkout_pr.py index 740de28..2e9fdae 100644 --- a/.github/scripts/checkout_pr.py +++ b/.github/scripts/checkout_pr.py @@ -160,31 +160,27 @@ def _commit(msg: str, date: str) -> None: sh(f'git commit --allow-empty -m "{msg}"', env=env) -def tarball_checkout() -> bool: +def tarball_checkout() -> tuple[str, str] | None: + """Returns (local_base_sha, local_head_sha) on success, None on failure.""" head_tar, base_tar = "/tmp/_head.tar.gz", "/tmp/_base.tar.gz" try: if not _download(HEAD_SHA, head_tar) or not _download(BASE_SHA, base_tar): - return False + return None shutil.rmtree(".git", ignore_errors=True) sh("git init . && git config user.email ci@sandai.org && git config user.name CI") - # commit base _extract(base_tar) _commit(f"base {BASE_SHA}", "2000-01-01T00:00:00Z") - # commit head sh("git rm -rf .", check=False) _extract(head_tar) _commit(f"head {HEAD_SHA}", "2000-01-02T00:00:00Z") - # make real SHAs resolvable via git-replace base_local = subprocess.check_output(["git", "rev-parse", "HEAD~1"], text=True).strip() head_local = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() - sh(f"git replace {BASE_SHA} {base_local}", check=False) - sh(f"git replace {HEAD_SHA} {head_local}", check=False) log(f"synthetic: BASE {BASE_SHA[:8]}→{base_local[:8]}, HEAD {HEAD_SHA[:8]}→{head_local[:8]}") - return True + return base_local, head_local finally: for f in (head_tar, base_tar): try: @@ -196,18 +192,31 @@ def tarball_checkout() -> bool: # ── Main ───────────────────────────────────────────────────────────── +def _set_output(key: str, value: str) -> None: + """Write a key=value pair to $GITHUB_OUTPUT (if available).""" + path = os.environ.get("GITHUB_OUTPUT") + if path: + with open(path, "a") as f: + f.write(f"{key}={value}\n") + + def main() -> None: log(f"HEAD={HEAD_SHA}, BASE={BASE_SHA}") - if tarball_checkout(): + result = tarball_checkout() + if result: + base_ref, head_ref = result log("tarball succeeded") elif git_fetch_checkout(): + base_ref, head_ref = BASE_SHA, HEAD_SHA log("git-fetch fallback succeeded") else: log("all methods failed") sys.exit(1) - sh(f"git diff --stat {BASE_SHA} {HEAD_SHA} | tail -3") + _set_output("base_ref", base_ref) + _set_output("head_ref", head_ref) + sh(f"git diff --stat {base_ref} {head_ref} | tail -3") log("done") diff --git a/.github/workflows/integration_test.yml b/.github/workflows/integration_test.yml index 5408587..1bd83d8 100644 --- a/.github/workflows/integration_test.yml +++ b/.github/workflows/integration_test.yml @@ -37,6 +37,7 @@ jobs: git config --global https.proxy "${{ secrets.HTTPS_PROXY }}" - name: Checkout PR head with retry + id: checkout timeout-minutes: 10 env: REPO_URL: https://github.com/${{ github.repository }}.git @@ -71,8 +72,8 @@ jobs: - name: Check Chinese Characters run: python3 .github/workflows/check_chinese_chars.py env: - BASE_REF: ${{ github.event.pull_request.base.sha }} - HEAD_REF: ${{ github.event.pull_request.head.sha }} + BASE_REF: ${{ steps.checkout.outputs.base_ref || github.event.pull_request.base.sha }} + HEAD_REF: ${{ steps.checkout.outputs.head_ref || github.event.pull_request.head.sha }} - name: Check Code Style run: pre-commit run --show-diff-on-failure --color=always --all-files From 5740909549443c9082c24d63dbb296aaed27df7f Mon Sep 17 00:00:00 2001 From: Zhiyao Cen <2523403608@qq.com> Date: Mon, 13 Apr 2026 23:21:09 +0800 Subject: [PATCH 18/18] [CI] Condense checkout_pr.py --- .github/scripts/checkout_pr.py | 211 +++++++++++++-------------------- 1 file changed, 84 insertions(+), 127 deletions(-) diff --git a/.github/scripts/checkout_pr.py b/.github/scripts/checkout_pr.py index 2e9fdae..064f416 100644 --- a/.github/scripts/checkout_pr.py +++ b/.github/scripts/checkout_pr.py @@ -14,16 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Robust PR checkout for self-hosted runners. +"""Robust PR checkout: tarball (GitHub API) → git-fetch fallback. -Strategy: tarball via GitHub API → git-fetch fallback. - -Env vars: REPO_URL, BASE_SHA, HEAD_SHA, GITHUB_TOKEN, GITHUB_REPOSITORY. +Env: REPO_URL, BASE_SHA, HEAD_SHA, GITHUB_TOKEN, GITHUB_REPOSITORY. """ from __future__ import annotations -import glob +import glob # noqa: E401 import os import re import shutil @@ -32,9 +30,7 @@ import tarfile import time -RETRIES = 10 -SLEEP = 5 - +RETRIES, SLEEP = 10, 5 REPO_URL = os.environ["REPO_URL"] BASE_SHA = os.environ["BASE_SHA"] HEAD_SHA = os.environ["HEAD_SHA"] @@ -46,98 +42,44 @@ def log(msg: str) -> None: print(f"[checkout] {msg}", flush=True) -def sh(cmd: str, *, check: bool = True, env: dict | None = None) -> int: - """Run a shell command, return exit code.""" - log(f"$ {cmd}") - return subprocess.run(cmd, shell=True, check=check, env=env).returncode - - -def cleanup_locks() -> None: - for f in glob.glob(".git/*.lock"): - os.remove(f) - - -def no_proxy_env() -> dict[str, str]: - env = os.environ.copy() - for k in ("http_proxy", "https_proxy", "HTTP_PROXY", "HTTPS_PROXY"): - env.pop(k, None) - return env - - -# ── Method 1: git fetch ───────────────────────────────────────────── - - -def git_fetch_checkout() -> bool: - if sh("git rev-parse --is-inside-work-tree", check=False) != 0: - sh("git init .") - if sh(f"git remote set-url origin {REPO_URL}", check=False) != 0: - sh(f"git remote add origin {REPO_URL}") - - for i in range(1, RETRIES + 1): - cleanup_locks() - log(f"fetch attempt {i}/{RETRIES}") - - if i % 2 == 1: - log("mode=proxy strict") - cmd = ( - f"timeout 120 git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10" - f" fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" - ) - env = None - else: - log("mode=direct relaxed") - cmd = ( - f"timeout 120 git -c http.proxy= -c https.proxy=" - f" -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30" - f" fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" - ) - env = no_proxy_env() - - if sh(cmd, check=False, env=env) == 0: - cleanup_locks() - sh(f"git checkout --force {HEAD_SHA}") - sh("git clean -fdx") - sh(f"git reset --hard {HEAD_SHA}") - return True +def sh(cmd: str, **kw) -> int: + kw.setdefault("check", True) + return subprocess.run(cmd, shell=True, **kw).returncode - if i < RETRIES: - cleanup_locks() - log(f"retry in {SLEEP}s") - time.sleep(SLEEP) - log("git-fetch failed") - return False +def env_no_proxy() -> dict[str, str]: + return {k: v for k, v in os.environ.items() if k.lower() not in ("http_proxy", "https_proxy")} -# ── Method 2: tarball + synthetic git history ──────────────────────── +# ── tarball checkout ───────────────────────────────────────────────── -def _download(sha: str, dest: str) -> bool: +def _curl(sha: str, dest: str) -> bool: url = f"https://api.github.com/repos/{REPO}/tarball/{sha}" for i in range(1, RETRIES + 1): - via = "proxy" if i % 2 == 1 else "direct" + via = "proxy" if i % 2 else "direct" log(f"tarball({sha[:8]}) attempt {i}/{RETRIES} via {via}") - curl = [ - "curl", - "-fSL", - "--retry", - "2", - "--retry-delay", - "3", - "--connect-timeout", - "15", - "--max-time", - "180", - "-H", - f"Authorization: Bearer {TOKEN}", - "-H", - "Accept: application/vnd.github+json", - "-o", - dest, - url, - ] - env = no_proxy_env() if via == "direct" else None - if subprocess.run(curl, check=False, env=env).returncode == 0: + rc = subprocess.run( + [ + "curl", + "-fSL", + "--retry", + "2", + "--connect-timeout", + "15", + "--max-time", + "180", + "-H", + f"Authorization: Bearer {TOKEN}", + "-H", + "Accept: application/vnd.github+json", + "-o", + dest, + url, + ], + env=env_no_proxy() if via == "direct" else None, + ).returncode + if rc == 0: return True if i < RETRIES: time.sleep(SLEEP) @@ -148,61 +90,75 @@ def _extract(tar_path: str) -> None: with tarfile.open(tar_path, "r:gz") as tf: prefix = os.path.commonprefix(tf.getnames()).rstrip("/") for m in tf.getmembers(): - if prefix: - m.name = m.name[len(prefix) :].lstrip("/") + m.name = m.name[len(prefix) :].lstrip("/") if prefix else m.name if m.name: tf.extract(m, ".", filter="data") -def _commit(msg: str, date: str) -> None: - sh("git add -A") - env = {**os.environ, "GIT_COMMITTER_DATE": date, "GIT_AUTHOR_DATE": date} - sh(f'git commit --allow-empty -m "{msg}"', env=env) - - def tarball_checkout() -> tuple[str, str] | None: - """Returns (local_base_sha, local_head_sha) on success, None on failure.""" - head_tar, base_tar = "/tmp/_head.tar.gz", "/tmp/_base.tar.gz" + """Download HEAD & BASE tarballs, build synthetic two-commit repo. + + Returns ``(local_base_sha, local_head_sha)`` or ``None``. + """ + h, b = "/tmp/_head.tar.gz", "/tmp/_base.tar.gz" try: - if not _download(HEAD_SHA, head_tar) or not _download(BASE_SHA, base_tar): + if not _curl(HEAD_SHA, h) or not _curl(BASE_SHA, b): return None - shutil.rmtree(".git", ignore_errors=True) sh("git init . && git config user.email ci@sandai.org && git config user.name CI") - - _extract(base_tar) - _commit(f"base {BASE_SHA}", "2000-01-01T00:00:00Z") - - sh("git rm -rf .", check=False) - _extract(head_tar) - _commit(f"head {HEAD_SHA}", "2000-01-02T00:00:00Z") - - base_local = subprocess.check_output(["git", "rev-parse", "HEAD~1"], text=True).strip() - head_local = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() - log(f"synthetic: BASE {BASE_SHA[:8]}→{base_local[:8]}, HEAD {HEAD_SHA[:8]}→{head_local[:8]}") - return base_local, head_local + for tar, msg, date in [ + (b, f"base {BASE_SHA}", "2000-01-01T00:00:00Z"), + (h, f"head {HEAD_SHA}", "2000-01-02T00:00:00Z"), + ]: + if tar == h: + sh("git rm -rf . 2>/dev/null || true", check=False) + _extract(tar) + sh("git add -A") + env = {**os.environ, "GIT_COMMITTER_DATE": date, "GIT_AUTHOR_DATE": date} + sh(f'git commit --allow-empty -m "{msg}"', env=env) + base_l = subprocess.check_output(["git", "rev-parse", "HEAD~1"], text=True).strip() + head_l = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip() + log(f"synthetic: {BASE_SHA[:8]}→{base_l[:8]}, {HEAD_SHA[:8]}→{head_l[:8]}") + return base_l, head_l finally: - for f in (head_tar, base_tar): + for f in (h, b): try: os.remove(f) except FileNotFoundError: pass -# ── Main ───────────────────────────────────────────────────────────── +# ── git-fetch fallback ─────────────────────────────────────────────── + + +def git_fetch_checkout() -> bool: + if sh("git rev-parse --is-inside-work-tree", check=False) != 0: + sh("git init .") + if sh(f"git remote set-url origin {REPO_URL}", check=False) != 0: + sh(f"git remote add origin {REPO_URL}") + for i in range(1, RETRIES + 1): + for f in glob.glob(".git/*.lock"): + os.remove(f) + log(f"fetch attempt {i}/{RETRIES}") + if i % 2: + cmd = f"timeout 120 git -c http.lowSpeedLimit=100 -c http.lowSpeedTime=10 fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" + env = None + else: + cmd = f"timeout 120 git -c http.proxy= -c https.proxy= -c http.lowSpeedLimit=1 -c http.lowSpeedTime=30 fetch --no-tags --prune --depth=1 origin {BASE_SHA} {HEAD_SHA}" + env = env_no_proxy() + if sh(cmd, check=False, env=env) == 0: + sh(f"git checkout --force {HEAD_SHA} && git clean -fdx && git reset --hard {HEAD_SHA}") + return True + if i < RETRIES: + time.sleep(SLEEP) + return False -def _set_output(key: str, value: str) -> None: - """Write a key=value pair to $GITHUB_OUTPUT (if available).""" - path = os.environ.get("GITHUB_OUTPUT") - if path: - with open(path, "a") as f: - f.write(f"{key}={value}\n") +# ── main ───────────────────────────────────────────────────────────── def main() -> None: log(f"HEAD={HEAD_SHA}, BASE={BASE_SHA}") - result = tarball_checkout() if result: base_ref, head_ref = result @@ -213,9 +169,10 @@ def main() -> None: else: log("all methods failed") sys.exit(1) - - _set_output("base_ref", base_ref) - _set_output("head_ref", head_ref) + out = os.environ.get("GITHUB_OUTPUT") + if out: + with open(out, "a") as f: + f.write(f"base_ref={base_ref}\nhead_ref={head_ref}\n") sh(f"git diff --stat {base_ref} {head_ref} | tail -3") log("done")