Skip to content

Commit 00151b3

Browse files
committed
Improve e2e test stability in CI
- Free ~14GB disk on GitHub runners (up from ~2GB) by removing additional unused pre-installed toolchains - Reduce KinD cluster from 3 to 2 nodes to lower disk and resource pressure - Add kubectl_apply_with_retry with exponential backoff for external URL fetches during cluster setup to survive transient 502 errors - Increase func deploy retries from 3/5s flat to 5/exponential backoff (10s-80s) to handle in-cluster-dialer pod instability
1 parent 91e9cec commit 00151b3

3 files changed

Lines changed: 49 additions & 16 deletions

File tree

.github/workflows/test-e2e.yml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,17 @@ jobs:
2323
steps:
2424
- name: Free up disk space
2525
run: |
26-
# Remove large packages to free up disk space on GitHub runners
27-
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
26+
# Remove large pre-installed toolchains not needed for K8s operator testing
27+
sudo rm -rf \
28+
/usr/share/dotnet \
29+
/usr/local/lib/android \
30+
/opt/ghc \
31+
/opt/hostedtoolcache \
32+
/usr/local/share/powershell \
33+
/usr/share/swift \
34+
/usr/local/.ghcup \
35+
/usr/local/share/chromium \
36+
/usr/local/lib/heroku
2837
# Clean up Docker to start fresh
2938
docker system prune -af --volumes
3039
df -h

hack/create-kind-cluster.sh

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,27 @@ function header_text {
3030
echo "$header$*$reset"
3131
}
3232

33+
function kubectl_apply_with_retry() {
34+
local max_attempts=5
35+
local delay=5
36+
local attempt
37+
38+
for attempt in $(seq 1 $max_attempts); do
39+
if kubectl apply "$@"; then
40+
return 0
41+
fi
42+
43+
if [ "$attempt" -lt "$max_attempts" ]; then
44+
header_text "kubectl apply failed (attempt $attempt/$max_attempts), retrying in ${delay}s..."
45+
sleep "$delay"
46+
delay=$((delay * 2))
47+
fi
48+
done
49+
50+
header_text "kubectl apply failed after $max_attempts attempts"
51+
return 1
52+
}
53+
3354
function delete_existing_cluster() {
3455
header_text "Deleting existing Kind cluster..."
3556
kind delete cluster --name "$CLUSTER_NAME" || true
@@ -84,8 +105,6 @@ nodes:
84105
image: kindest/node:$NODE_VERSION
85106
- role: worker
86107
image: kindest/node:$NODE_VERSION
87-
- role: worker
88-
image: kindest/node:$NODE_VERSION
89108
containerdConfigPatches:
90109
- |-
91110
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:$REGISTRY_PORT"]
@@ -122,7 +141,7 @@ EOF
122141

123142
function install_tekton() {
124143
header_text "Install Tekton"
125-
kubectl apply -f https://infra.tekton.dev/tekton-releases/pipeline/previous/${TEKTON_VERSION}/release.yaml
144+
kubectl_apply_with_retry -f https://infra.tekton.dev/tekton-releases/pipeline/previous/${TEKTON_VERSION}/release.yaml
126145
kubectl patch configmap feature-flags -n tekton-pipelines --type merge -p '{"data":{"coschedule":"disabled"}}'
127146

128147
header_text "Waiting for Tekton to be ready..."
@@ -132,9 +151,9 @@ function install_tekton() {
132151

133152
function install_knative_serving() {
134153
header_text "Installing Knative Serving..."
135-
kubectl apply -f https://github.com/knative/serving/releases/download/knative-${SERVING_VERSION}/serving-crds.yaml
136-
kubectl apply -f https://github.com/knative/serving/releases/download/knative-${SERVING_VERSION}/serving-core.yaml
137-
kubectl apply -f https://github.com/knative/net-kourier/releases/download/knative-${SERVING_VERSION}/kourier.yaml
154+
kubectl_apply_with_retry -f https://github.com/knative/serving/releases/download/knative-${SERVING_VERSION}/serving-crds.yaml
155+
kubectl_apply_with_retry -f https://github.com/knative/serving/releases/download/knative-${SERVING_VERSION}/serving-core.yaml
156+
kubectl_apply_with_retry -f https://github.com/knative/net-kourier/releases/download/knative-${SERVING_VERSION}/kourier.yaml
138157

139158
kubectl patch configmap/config-network \
140159
--namespace knative-serving \
@@ -148,14 +167,14 @@ function install_knative_serving() {
148167

149168
function install_keda() {
150169
header_text "Installing keda"
151-
kubectl apply --server-side -f https://github.com/kedacore/keda/releases/download/${KEDA_VERSION}/keda-${KEDA_VERSION:1}.yaml
152-
kubectl apply --server-side -f https://github.com/kedacore/keda/releases/download/${KEDA_VERSION}/keda-${KEDA_VERSION:1}-core.yaml
170+
kubectl_apply_with_retry --server-side -f https://github.com/kedacore/keda/releases/download/${KEDA_VERSION}/keda-${KEDA_VERSION:1}.yaml
171+
kubectl_apply_with_retry --server-side -f https://github.com/kedacore/keda/releases/download/${KEDA_VERSION}/keda-${KEDA_VERSION:1}-core.yaml
153172
header_text "Waiting for Keda to become ready"
154173
kubectl wait deployment --all --timeout=-1s --for=condition=Available --namespace keda
155174

156175
header_text "Installing keda HTTP add-on"
157-
kubectl apply --server-side -f https://github.com/kedacore/http-add-on/releases/download/${KEDA_HTTP_ADDON_VERSION}/keda-add-ons-http-${KEDA_HTTP_ADDON_VERSION:1}-crds.yaml
158-
kubectl apply --server-side -f https://github.com/kedacore/http-add-on/releases/download/${KEDA_HTTP_ADDON_VERSION}/keda-add-ons-http-${KEDA_HTTP_ADDON_VERSION:1}.yaml
176+
kubectl_apply_with_retry --server-side -f https://github.com/kedacore/http-add-on/releases/download/${KEDA_HTTP_ADDON_VERSION}/keda-add-ons-http-${KEDA_HTTP_ADDON_VERSION:1}-crds.yaml
177+
kubectl_apply_with_retry --server-side -f https://github.com/kedacore/http-add-on/releases/download/${KEDA_HTTP_ADDON_VERSION}/keda-add-ons-http-${KEDA_HTTP_ADDON_VERSION:1}.yaml
159178
header_text "Waiting for Keda HTTP add-on to become ready"
160179
kubectl wait deployment --all --timeout=-1s --for=condition=Available --namespace keda
161180
}

test/utils/func.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,16 @@ func RunFuncDeploy(functionDir string, optFns ...FuncDeployOption) (string, erro
8484
var output string
8585
var err error
8686

87-
// Retry up to 3 times with 5s delay between attempts
88-
for attempt := 0; attempt < 3; attempt++ {
87+
maxAttempts := 5
88+
retryDelay := 10 * time.Second
89+
90+
for attempt := 0; attempt < maxAttempts; attempt++ {
8991
if attempt > 0 {
90-
time.Sleep(5 * time.Second)
91-
_, _ = fmt.Fprintf(ginkgo.GinkgoWriter, "func deploy attempt %d failed: %v (retrying)\n", attempt, err)
92+
_, _ = fmt.Fprintf(ginkgo.GinkgoWriter,
93+
"func deploy attempt %d/%d failed: %v (retrying in %s)\n",
94+
attempt, maxAttempts, err, retryDelay)
95+
time.Sleep(retryDelay)
96+
retryDelay *= 2
9297
}
9398

9499
if opts.CliVersion != "" {

0 commit comments

Comments
 (0)