From 03fb7f19db876b24ef66984fcc4f2a3d17aa5467 Mon Sep 17 00:00:00 2001 From: Tom Wieczorek Date: Mon, 15 Jun 2026 15:51:52 +0200 Subject: [PATCH 1/3] Add a fail-fast input to the OS test workflow This is useful for the release workflow, where we're only interested in successful runs and a quicker feedback loop. Signed-off-by: Tom Wieczorek (cherry picked from commit cccefbfa00d48da3cd03eac594f648a8b15f9b4a) (cherry picked from commit 36dc6250e67747ad008bf30107c3e638d5d3c074) --- .github/workflows/ostests-e2e.yaml | 17 +++++++++++++++-- .github/workflows/ostests-matrix.yaml | 5 +++++ .github/workflows/release.yml | 1 + hack/ostests/README.md | 3 +++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ostests-e2e.yaml b/.github/workflows/ostests-e2e.yaml index fe1c7cc3b7e7..77dcf0c99f92 100644 --- a/.github/workflows/ostests-e2e.yaml +++ b/.github/workflows/ostests-e2e.yaml @@ -16,6 +16,10 @@ on: type: string description: The selector for the e2e tests to be run. default: \[Conformance\] + fail-fast: + type: boolean + description: Stop the e2e test suite after the first test failure. + default: false os: type: string description: The operating system to test. @@ -64,6 +68,7 @@ jobs: env: E2E_FOCUS: ${{ inputs.e2e-focus }} + E2E_FAIL_FAST: ${{ inputs.fail-fast }} TF_VAR_os: ${{ inputs.os }} TF_VAR_additional_tags: '{ "ostests.k0sproject.io/github-run-id"="${{ github.run_id }}", @@ -168,12 +173,16 @@ jobs: E2E_CONCURRENCY_LEVEL: ${{ inputs.e2e-concurrency-level }} run: | make bin/sonobuoy + ginkgoArgs="-v --timeout=120m --procs=$E2E_CONCURRENCY_LEVEL" + if [ "$E2E_FAIL_FAST" = true ]; then + ginkgoArgs="$ginkgoArgs --fail-fast" + fi bin/sonobuoy run -p e2e --wait=150 \ --kubernetes-version=v"$KUBERNETES_VERSION" \ --plugin-env=e2e.E2E_PARALLEL=true \ --plugin-env=e2e.E2E_FOCUS="$E2E_FOCUS" \ --plugin-env=e2e.E2E_SKIP='\[Serial\]' \ - --plugin-env=e2e.E2E_EXTRA_GINKGO_ARGS="-v --timeout=120m --procs=$E2E_CONCURRENCY_LEVEL" + --plugin-env=e2e.E2E_EXTRA_GINKGO_ARGS="$ginkgoArgs" - name: "e2e tests :: Retrieve parallel results" id: e2e-retrieve-parallel @@ -196,11 +205,15 @@ jobs: timeout-minutes: 180 # three hours run: | make bin/sonobuoy + ginkgoArgs="-v --timeout=120m" + if [ "$E2E_FAIL_FAST" = true ]; then + ginkgoArgs="$ginkgoArgs --fail-fast" + fi bin/sonobuoy run -p e2e --wait=150 \ --kubernetes-version=v"$KUBERNETES_VERSION" \ --plugin-env=e2e.E2E_FOCUS="$E2E_FOCUS" \ --plugin-env=e2e.E2E_SKIP='' \ - --plugin-env=e2e.E2E_EXTRA_GINKGO_ARGS='-v --timeout=120m' + --plugin-env=e2e.E2E_EXTRA_GINKGO_ARGS="$ginkgoArgs" - name: "e2e tests :: Retrieve serial results" id: e2e-retrieve-serial diff --git a/.github/workflows/ostests-matrix.yaml b/.github/workflows/ostests-matrix.yaml index 238b2fea0b22..d41528916d9a 100644 --- a/.github/workflows/ostests-matrix.yaml +++ b/.github/workflows/ostests-matrix.yaml @@ -20,6 +20,10 @@ on: type: string description: The selector for the e2e tests to be run. default: \[Conformance\] + fail-fast: + type: boolean + description: Stop each e2e test suite after the first test failure. + default: false oses: type: string description: The operating systems to test. @@ -89,6 +93,7 @@ jobs: k0sctl-version: ${{ inputs.k0sctl-version }} e2e-concurrency-level: ${{ fromJSON(inputs.e2e-concurrency-level) }} # infamous GH workflows bug that looses type information (actions/runner#2206) e2e-focus: ${{ inputs.e2e-focus }} + fail-fast: ${{ inputs.fail-fast }} os: ${{ matrix.os }} arch: ${{ inputs.arch }} network-provider: ${{ matrix.network-provider }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index af35edcd0554..a5894341b472 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -584,6 +584,7 @@ jobs: os: debian_12 arch: ${{ matrix.arch }} network-provider: kuberouter + fail-fast: true secrets: aws-access-key-id: ${{ secrets.AWS_TERRAFORM_ID }} aws-secret-access-key: ${{ secrets.AWS_TERRAFORM_KEY }} diff --git a/hack/ostests/README.md b/hack/ostests/README.md index 11e0e68a9a0b..70884aaf2f9b 100644 --- a/hack/ostests/README.md +++ b/hack/ostests/README.md @@ -190,6 +190,9 @@ $ gh workflow run ostests-matrix.yaml --ref some/experimental/branch \ To see runs for this workflow, try: gh run list --workflow=ostests-matrix.yaml ``` +Add `-f fail-fast=true` to the above command line to stop the e2e test suite +after the first test failure has been recorded. + [gh]: https://github.com/cli/cli ## TODO From cca1b474bbc9c8aef0b96875a29de1c80b480638 Mon Sep 17 00:00:00 2001 From: Tom Wieczorek Date: Mon, 15 Jun 2026 16:31:32 +0200 Subject: [PATCH 2/3] Tune OS test timeouts for e2e runs This mostly applies to full conformance test runs that are called from the release workflow. In order to obtain the "Certified Kubernetes" badge, these cannot be run in parallel. They currently take around two hours to complete, close to the Ginkgo test timeout. Funnily enough, the 64-bit ARM VMs are slightly faster than the 64-bit x86_64 VMs, resulting in mostly successful arm64 runs and very unreliable amd64 runs. Moreover, Ginkgo/sonobuoy simply report any overall test timeouts as failures of the pending tests, with no indication that the tests did not even complete. This makes it very difficult to realize that this is a timeout issue rather than a flaky test. For an all-serial workflow step, use the following timeouts: * three hours for Ginkgo, * three and a half hours for Sonobuoy * and four hours for the workflow step. When the serial step follows a parallel step, use tighter timeouts because only the remaining serial tests should run: * thirty minutes for Ginkgo, * forty-five minutes for Sonobuoy, * and one hur for the workflow step. See: 71b3209d57 ("Use OS tests in release workflow") Signed-off-by: Tom Wieczorek (cherry picked from commit 9746895e25f5dba0f51969d5a5026e7f251b37db) (cherry picked from commit a23c8ccd95aa046edc5c90bf15464270a97b614a) --- .github/workflows/ostests-e2e.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ostests-e2e.yaml b/.github/workflows/ostests-e2e.yaml index 77dcf0c99f92..9eaf0454c140 100644 --- a/.github/workflows/ostests-e2e.yaml +++ b/.github/workflows/ostests-e2e.yaml @@ -202,14 +202,17 @@ jobs: - name: "e2e tests :: Run serial tests" working-directory: inttest - timeout-minutes: 180 # three hours + timeout-minutes: ${{ inputs.e2e-concurrency-level > 1 && 60 || 240 }} # 1 hour in parallel mode, 4 hours in serial mode + env: + K0S_SONOBUOY_WAIT: "${{ inputs.e2e-concurrency-level > 1 && 45 || 210 }}" # 45 mins in parallel mode, 3.5 hours in serial mode + K0S_GINKGO_TIMEOUT: "${{ inputs.e2e-concurrency-level > 1 && '30m' || '3h' }}" # 30 mins in parallel mode, 3 hours in serial mode run: | make bin/sonobuoy - ginkgoArgs="-v --timeout=120m" + ginkgoArgs="-v --timeout=$K0S_GINKGO_TIMEOUT" if [ "$E2E_FAIL_FAST" = true ]; then ginkgoArgs="$ginkgoArgs --fail-fast" fi - bin/sonobuoy run -p e2e --wait=150 \ + bin/sonobuoy run -p e2e --wait="$K0S_SONOBUOY_WAIT" \ --kubernetes-version=v"$KUBERNETES_VERSION" \ --plugin-env=e2e.E2E_FOCUS="$E2E_FOCUS" \ --plugin-env=e2e.E2E_SKIP='' \ From c3b437b899c096ce8e65a6c08953ccb6936d2406 Mon Sep 17 00:00:00 2001 From: Tom Wieczorek Date: Tue, 16 Jun 2026 09:26:54 +0200 Subject: [PATCH 3/3] Publish OS test results as GitHub step summaries Keep the format, but instead of writing it into a console group, write it into a step summary. Signed-off-by: Tom Wieczorek (cherry picked from commit cfc1ab1d4ace082d52d7126814e1e59e0f3030a8) (cherry picked from commit 255f47973ed41bcdb92648addbbcf3fe750397a7) --- .github/workflows/ostests-e2e.yaml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ostests-e2e.yaml b/.github/workflows/ostests-e2e.yaml index 9eaf0454c140..045ef85545fb 100644 --- a/.github/workflows/ostests-e2e.yaml +++ b/.github/workflows/ostests-e2e.yaml @@ -247,11 +247,18 @@ jobs: run: | fail=0 for f in sonobuoy-e2e-*.tar.gz; do - echo "::group::$f" - bin/sonobuoy results "$f" - numNotPassedOrSkipped=$(bin/sonobuoy results "$f" -p=e2e --mode=detailed | jq --slurp '[.[] | select(.status != "passed" and .status != "skipped")] | length') - echo "Number of tests that didn't pass and weren't skipped: $numNotPassedOrSkipped" - echo ::endgroup:: + { + echo "### $f" + numNotPassedOrSkipped=$(bin/sonobuoy results "$f" -p=e2e --mode=detailed | jq --slurp '[.[] | select(.status != "passed" and .status != "skipped")] | length') + echo '
' + echo '' + echo "Number of tests that didn't pass and weren't skipped: $numNotPassedOrSkipped" + echo '' + echo + echo '```text' + bin/sonobuoy results "$f" + echo '```' + } >>"$GITHUB_STEP_SUMMARY" [ "$numNotPassedOrSkipped" = 0 ] || fail=1 done [ "$fail" = 0 ] || exit 1