diff --git a/.github/workflows/build-image-template.yml b/.github/workflows/build-image-template.yml index 610658c..39ff910 100644 --- a/.github/workflows/build-image-template.yml +++ b/.github/workflows/build-image-template.yml @@ -83,14 +83,14 @@ jobs: ### The publish and periodic rebuilds are based on the latest stable github release tag - name: Checkout latest Github Release tag (${{ inputs.git_latest_release_tag }}) ⚡️ if: inputs.publish_to_registry == 'true' - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ inputs.git_latest_release_tag }} ### The CI is based on the main branch - name: Checkout Repo ⚡️ if: inputs.publish_to_registry == 'false' - uses: actions/checkout@v4 + uses: actions/checkout@v6 ### Common steps between CI and Publish - name: Free up disk space 📦 diff --git a/.github/workflows/build-upload-spark-dist.yml b/.github/workflows/build-upload-spark-dist.yml new file mode 100644 index 0000000..cd6e644 --- /dev/null +++ b/.github/workflows/build-upload-spark-dist.yml @@ -0,0 +1,114 @@ +# +# Copyright 2025 okdp.io +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Build Spark distribution tarball + +on: + workflow_call: + inputs: + spark_version: + description: Spark version + required: true + type: string + scala_version: + description: Scala version + required: true + type: string + java_version: + description: Java version + required: true + type: string + hadoop_version: + description: Hadoop version + required: true + type: string + registry: + description: The container registry + required: false + type: string + default: quay.io + git_latest_release_tag: + description: The latest remote release tag + required: true + type: string + github_tarball_release: + description: "GitHub release tag where to push spark tgz tarballs" + type: string + default: "spark-tarballs" + runs-on: + description: GitHub Actions Runner image + required: false + type: string + default: "ubuntu-latest" + +jobs: + build-upload-spark-dist: + name: dist(scala-${{ inputs.scala_version }}, java-${{ inputs.java_version }}, hadoop-${{ inputs.hadoop_version }}) + runs-on: ${{ inputs.runs-on }} + steps: + - name: Checkout latest Github Release tag (${{ inputs.git_latest_release_tag }}) ⚡️ + uses: actions/checkout@v6 + with: + ref: ${{ inputs.git_latest_release_tag }} + + - name: Set Spark distribution name + run: | + HADOOP_MAJOR_VERSION="${{ inputs.hadoop_version }}" + HADOOP_MAJOR_VERSION="${HADOOP_MAJOR_VERSION%%.*}" + + SPARK_MAJOR_VERSION="${{ inputs.spark_version }}" + SPARK_MAJOR_VERSION="${SPARK_MAJOR_VERSION%%.*}" + + if [[ "${{ inputs.scala_version }}" == "2.12" || ( "${{ inputs.scala_version }}" == "2.13" && "$SPARK_MAJOR_VERSION" -ge 4 ) ]]; then + SPARK_DIST_NAME="spark-${{ inputs.spark_version }}-bin-hadoop${HADOOP_MAJOR_VERSION}" + else + SPARK_DIST_NAME="spark-${{ inputs.spark_version }}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala${{ inputs.scala_version }}" + fi + echo "SPARK_DIST_NAME=${SPARK_DIST_NAME}" >> $GITHUB_ENV + shell: bash + + - name: Build spark distribution from Spark image + run: | + IMAGE="${{ inputs.registry }}/okdp/spark:spark-${{ inputs.spark_version }}-scala-${{ inputs.scala_version }}-java-${{ inputs.java_version }}" + SPARK_HOME=opt/spark + + echo "Pulling base image $IMAGE ..." + docker pull "$IMAGE" + + echo "Creating container from the image $IMAGE ..." + CID=$(docker create "$IMAGE") + + echo "Inspect Spark home directory" + docker export "$CID" | tar -tv "$SPARK_HOME/" + + mkdir -p "$SPARK_DIST_NAME" + + echo "Export Spark home directory content from container image into Spark Dist ${SPARK_DIST_NAME}.tgz" + docker export "$CID" | tar -xvf - -C "$SPARK_DIST_NAME" --strip-components=2 $SPARK_HOME + tar -czf "${SPARK_DIST_NAME}.tgz" "$SPARK_DIST_NAME" + + echo "Clean up" + docker rm "$CID" + shell: bash + + - name: Upload Spark tarball to GitHub release (${{ inputs.github_tarball_release }}) + run: | + echo "Uploading ${SPARK_DIST_NAME}.tgz to release {{ inputs.github_tarball_release }} ..." + gh release upload ${{ inputs.github_tarball_release }} "${SPARK_DIST_NAME}.tgz" --clobber + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f08d99c..2ce91c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: matrix: ${{ steps.ci-versions.outputs.matrix }} steps: - name: Checkout Repo ⚡️ - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Get CI versions matrix 📥 id: ci-versions diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4d8f7d4..ebffe18 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -40,7 +40,8 @@ concurrency: permissions: packages: write - + contents: write + jobs: latest-github-release: @@ -50,7 +51,7 @@ jobs: tag_name: ${{ steps.git-release-tag.outputs.tag_name }} steps: - name: Checkout Repo ⚡️ - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Get latest GitHub Release tag name 📥 id: git-release-tag @@ -77,7 +78,7 @@ jobs: matrix: ${{ steps.release-versions.outputs.matrix }} steps: - name: Checkout Repo ⚡️ - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Get release versions matrix 📥 id: release-versions @@ -85,22 +86,40 @@ jobs: with: use_matrix: ".build/release-versions.yml" - spark-publish: +# spark-images-publish: +# if: github.repository_owner == 'OKDP' && needs.latest-github-release.outputs.tag_name != '' +# name: spark-images-publish (${{ needs.latest-github-release.outputs.tag_name }}/spark-${{ matrix.version.spark_version }}) +# needs: [latest-github-release, get-release-versions] +# strategy: +# fail-fast: false +# matrix: +# version: ${{ fromJson(needs.get-release-versions.outputs.matrix) }} +# uses: ./.github/workflows/build-images-template.yml +# with: +# python_version: ${{ matrix.version.python_version }} +# spark_version: ${{ matrix.version.spark_version }} +# java_version: ${{ matrix.version.java_version }} +# scala_version: ${{ matrix.version.scala_version }} +# hadoop_version: ${{ matrix.version.hadoop_version }} +# registry: ${{ vars.REGISTRY || 'quay.io' }} +# publish_to_registry: "true" +# git_latest_release_tag: ${{ needs.latest-github-release.outputs.tag_name }} +# secrets: inherit + + spark-dist-publish: if: github.repository_owner == 'OKDP' && needs.latest-github-release.outputs.tag_name != '' - name: spark-publish (${{ needs.latest-github-release.outputs.tag_name }}/spark-${{ matrix.version.spark_version }}) + name: spark-dist-publish (${{ needs.latest-github-release.outputs.tag_name }}/spark-${{ matrix.version.spark_version }}) needs: [latest-github-release, get-release-versions] strategy: fail-fast: false - matrix: + matrix: version: ${{ fromJson(needs.get-release-versions.outputs.matrix) }} - uses: ./.github/workflows/build-images-template.yml + uses: ./.github/workflows/build-upload-spark-dist.yml with: - python_version: ${{ matrix.version.python_version }} spark_version: ${{ matrix.version.spark_version }} java_version: ${{ matrix.version.java_version }} scala_version: ${{ matrix.version.scala_version }} hadoop_version: ${{ matrix.version.hadoop_version }} registry: ${{ vars.REGISTRY || 'quay.io' }} - publish_to_registry: "true" git_latest_release_tag: ${{ needs.latest-github-release.outputs.tag_name }} secrets: inherit diff --git a/.github/workflows/sign-images.yml b/.github/workflows/sign-images.yml index 0871a1d..79047dd 100644 --- a/.github/workflows/sign-images.yml +++ b/.github/workflows/sign-images.yml @@ -38,7 +38,7 @@ jobs: matrix: ${{ steps.ci-versions.outputs.matrix }} steps: - name: Checkout Repo - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Get CI versions matrix id: ci-versions @@ -58,7 +58,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up CI registry id: registry-repos