Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build-image-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,14 @@ jobs:
### The publish and periodic rebuilds are based on the latest stable github release tag
- name: Checkout latest Github Release tag (${{ inputs.git_latest_release_tag }}) ⚡️
if: inputs.publish_to_registry == 'true'
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
ref: ${{ inputs.git_latest_release_tag }}

### The CI is based on the main branch
- name: Checkout Repo ⚡️
if: inputs.publish_to_registry == 'false'
uses: actions/checkout@v4
uses: actions/checkout@v6

### Common steps between CI and Publish
- name: Free up disk space 📦
Expand Down
114 changes: 114 additions & 0 deletions .github/workflows/build-upload-spark-dist.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#
# Copyright 2025 okdp.io
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

name: Build Spark distribution tarball

on:
workflow_call:
inputs:
spark_version:
description: Spark version
required: true
type: string
scala_version:
description: Scala version
required: true
type: string
java_version:
description: Java version
required: true
type: string
hadoop_version:
description: Hadoop version
required: true
type: string
registry:
description: The container registry
required: false
type: string
default: quay.io
git_latest_release_tag:
description: The latest remote release tag
required: true
type: string
github_tarball_release:
description: "GitHub release tag where to push spark tgz tarballs"
type: string
default: "spark-tarballs"
runs-on:
description: GitHub Actions Runner image
required: false
type: string
default: "ubuntu-latest"

jobs:
build-upload-spark-dist:
name: dist(scala-${{ inputs.scala_version }}, java-${{ inputs.java_version }}, hadoop-${{ inputs.hadoop_version }})
runs-on: ${{ inputs.runs-on }}
steps:
- name: Checkout latest Github Release tag (${{ inputs.git_latest_release_tag }}) ⚡️
uses: actions/checkout@v6
with:
ref: ${{ inputs.git_latest_release_tag }}

- name: Set Spark distribution name
run: |
HADOOP_MAJOR_VERSION="${{ inputs.hadoop_version }}"
HADOOP_MAJOR_VERSION="${HADOOP_MAJOR_VERSION%%.*}"

SPARK_MAJOR_VERSION="${{ inputs.spark_version }}"
SPARK_MAJOR_VERSION="${SPARK_MAJOR_VERSION%%.*}"

if [[ "${{ inputs.scala_version }}" == "2.12" || ( "${{ inputs.scala_version }}" == "2.13" && "$SPARK_MAJOR_VERSION" -ge 4 ) ]]; then
SPARK_DIST_NAME="spark-${{ inputs.spark_version }}-bin-hadoop${HADOOP_MAJOR_VERSION}"
else
SPARK_DIST_NAME="spark-${{ inputs.spark_version }}-bin-hadoop${HADOOP_MAJOR_VERSION}-scala${{ inputs.scala_version }}"
fi
echo "SPARK_DIST_NAME=${SPARK_DIST_NAME}" >> $GITHUB_ENV
shell: bash

- name: Build spark distribution from Spark image
run: |
IMAGE="${{ inputs.registry }}/okdp/spark:spark-${{ inputs.spark_version }}-scala-${{ inputs.scala_version }}-java-${{ inputs.java_version }}"
SPARK_HOME=opt/spark

echo "Pulling base image $IMAGE ..."
docker pull "$IMAGE"

echo "Creating container from the image $IMAGE ..."
CID=$(docker create "$IMAGE")

echo "Inspect Spark home directory"
docker export "$CID" | tar -tv "$SPARK_HOME/"

mkdir -p "$SPARK_DIST_NAME"

echo "Export Spark home directory content from container image into Spark Dist ${SPARK_DIST_NAME}.tgz"
docker export "$CID" | tar -xvf - -C "$SPARK_DIST_NAME" --strip-components=2 $SPARK_HOME
tar -czf "${SPARK_DIST_NAME}.tgz" "$SPARK_DIST_NAME"

echo "Clean up"
docker rm "$CID"
shell: bash

- name: Upload Spark tarball to GitHub release (${{ inputs.github_tarball_release }})
run: |
echo "Uploading ${SPARK_DIST_NAME}.tgz to release {{ inputs.github_tarball_release }} ..."
gh release upload ${{ inputs.github_tarball_release }} "${SPARK_DIST_NAME}.tgz" --clobber
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: bash

2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
matrix: ${{ steps.ci-versions.outputs.matrix }}
steps:
- name: Checkout Repo ⚡️
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Get CI versions matrix 📥
id: ci-versions
Expand Down
37 changes: 28 additions & 9 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ concurrency:

permissions:
packages: write

contents: write

jobs:

latest-github-release:
Expand All @@ -50,7 +51,7 @@ jobs:
tag_name: ${{ steps.git-release-tag.outputs.tag_name }}
steps:
- name: Checkout Repo ⚡️
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Get latest GitHub Release tag name 📥
id: git-release-tag
Expand All @@ -77,30 +78,48 @@ jobs:
matrix: ${{ steps.release-versions.outputs.matrix }}
steps:
- name: Checkout Repo ⚡️
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Get release versions matrix 📥
id: release-versions
uses: ./.github/actions/spark-version-matrix
with:
use_matrix: ".build/release-versions.yml"

spark-publish:
# spark-images-publish:
# if: github.repository_owner == 'OKDP' && needs.latest-github-release.outputs.tag_name != ''
# name: spark-images-publish (${{ needs.latest-github-release.outputs.tag_name }}/spark-${{ matrix.version.spark_version }})
# needs: [latest-github-release, get-release-versions]
# strategy:
# fail-fast: false
# matrix:
# version: ${{ fromJson(needs.get-release-versions.outputs.matrix) }}
# uses: ./.github/workflows/build-images-template.yml
# with:
# python_version: ${{ matrix.version.python_version }}
# spark_version: ${{ matrix.version.spark_version }}
# java_version: ${{ matrix.version.java_version }}
# scala_version: ${{ matrix.version.scala_version }}
# hadoop_version: ${{ matrix.version.hadoop_version }}
# registry: ${{ vars.REGISTRY || 'quay.io' }}
# publish_to_registry: "true"
# git_latest_release_tag: ${{ needs.latest-github-release.outputs.tag_name }}
# secrets: inherit

spark-dist-publish:
if: github.repository_owner == 'OKDP' && needs.latest-github-release.outputs.tag_name != ''
name: spark-publish (${{ needs.latest-github-release.outputs.tag_name }}/spark-${{ matrix.version.spark_version }})
name: spark-dist-publish (${{ needs.latest-github-release.outputs.tag_name }}/spark-${{ matrix.version.spark_version }})
needs: [latest-github-release, get-release-versions]
strategy:
fail-fast: false
matrix:
matrix:
version: ${{ fromJson(needs.get-release-versions.outputs.matrix) }}
uses: ./.github/workflows/build-images-template.yml
uses: ./.github/workflows/build-upload-spark-dist.yml
with:
python_version: ${{ matrix.version.python_version }}
spark_version: ${{ matrix.version.spark_version }}
java_version: ${{ matrix.version.java_version }}
scala_version: ${{ matrix.version.scala_version }}
hadoop_version: ${{ matrix.version.hadoop_version }}
registry: ${{ vars.REGISTRY || 'quay.io' }}
publish_to_registry: "true"
git_latest_release_tag: ${{ needs.latest-github-release.outputs.tag_name }}
secrets: inherit
4 changes: 2 additions & 2 deletions .github/workflows/sign-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
matrix: ${{ steps.ci-versions.outputs.matrix }}
steps:
- name: Checkout Repo
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Get CI versions matrix
id: ci-versions
Expand All @@ -58,7 +58,7 @@ jobs:

steps:
- name: Checkout Repo
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Set up CI registry
id: registry-repos
Expand Down
Loading