kubeflow · alyssacgoins · Mar 9, 2026 · Mar 6, 2026 · May 7, 2026 · May 12, 2026
diff --git a/.github/actions/test-and-report/action.yml b/.github/actions/test-and-report/action.yml
@@ -96,6 +96,10 @@ inputs:
     description: "Skip API access configuration and token generation (for tests that do not need a cluster)"
     required: false
     default: 'false'
+  mlflow_enabled:
+    description: "Whether MLflow is deployed and available for integration tests"
+    required: false
+    default: 'false'
 
 
 runs:
@@ -264,7 +268,7 @@ runs:
           DISABLE_TLS_CHECK='false'
         fi
 
-        go run github.com/onsi/ginkgo/v2/ginkgo -r -v --cover -p --keep-going --github-output=true --nodes=${{ inputs.num_parallel_nodes }} --label-filter=${{ inputs.test_label }} --silence-skips=true -- -namespace=${{ inputs.default_namespace }} -multiUserMode=$MULTI_USER -useProxy=$USE_PROXY -userNamespace=${{ inputs.user_namespace }} -uploadPipelinesWithKubernetes=${{ inputs.upload_pipelines_with_kubernetes_client}} -pipelineStoreKubernetes=$pipelineStoreKubernetes -disableTlsCheck=$DISABLE_TLS_CHECK -apiScheme=$API_SCHEME -tlsEnabled=$TLS_ENABLED -caCertPath=$CA_CERT_PATH -pullNumber=$PULL_NUMBER -repoName=$REPO_NAME -apiUrl="$API_URL" -authToken="$AUTH_TOKEN" -serviceAccountName="$SERVICE_ACCOUNT_NAME" $BASE_IMAGE_FLAG
+        go run github.com/onsi/ginkgo/v2/ginkgo -r -v --cover -p --keep-going --github-output=true --nodes=${{ inputs.num_parallel_nodes }} --label-filter=${{ inputs.test_label }} --silence-skips=true -- -namespace=${{ inputs.default_namespace }} -multiUserMode=$MULTI_USER -useProxy=$USE_PROXY -userNamespace=${{ inputs.user_namespace }} -uploadPipelinesWithKubernetes=${{ inputs.upload_pipelines_with_kubernetes_client}} -pipelineStoreKubernetes=$pipelineStoreKubernetes -disableTlsCheck=$DISABLE_TLS_CHECK -apiScheme=$API_SCHEME -tlsEnabled=$TLS_ENABLED -caCertPath=$CA_CERT_PATH -pullNumber=$PULL_NUMBER -repoName=$REPO_NAME -apiUrl="$API_URL" -authToken="$AUTH_TOKEN" -serviceAccountName="$SERVICE_ACCOUNT_NAME" -mlflowEnabled=${{ inputs.mlflow_enabled }} $BASE_IMAGE_FLAG
       continue-on-error: true
 
     - name: Collect Pod logs in case of Test Failures

diff --git a/.github/resources/scripts/configure-mlflow.sh b/.github/resources/scripts/configure-mlflow.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# Copyright 2026 The Kubeflow Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Purpose:
+# This script configures KFP to use an already-deployed MLflow instance for
+# MLflow E2E tests.
+#
+# CI helper: patch the KFP API server with plugins.mlflow, roll it out, and
+# port-forward the API server and MLflow so E2E tests can reach both.
+# It also exports workspace/auth variables used by MLflow test helpers.
+#
+# Usage: configure-mlflow.sh <KFP_NAMESPACE> <MLFLOW_NAMESPACE> <CONFIG_JSON_PATH>
+
+set -e
+
+KFP_NAMESPACE="${1:?KFP namespace required}"
+MLFLOW_NAMESPACE="${2:?MLflow namespace required}"
+CONFIG_JSON_PATH="${3:?Path to source config.json required}"
+
+echo "Services in ${MLFLOW_NAMESPACE} namespace:"
+kubectl get svc -n "$MLFLOW_NAMESPACE" --no-headers
+MLFLOW_SVC=$(kubectl get svc -n "$MLFLOW_NAMESPACE" --no-headers -o custom-columns=":metadata.name" | grep -i mlflow | head -1)
+if [ -z "$MLFLOW_SVC" ]; then
+  echo "ERROR: No service matching 'mlflow' found in namespace $MLFLOW_NAMESPACE"
+  exit 1
+fi
+MLFLOW_PORT=$(kubectl get svc -n "$MLFLOW_NAMESPACE" "$MLFLOW_SVC" -o jsonpath='{.spec.ports[0].port}')
+MLFLOW_HOST="${MLFLOW_SVC}.${MLFLOW_NAMESPACE}.svc.cluster.local"
+MLFLOW_STATIC_PREFIX="/mlflow"
+MLFLOW_ENDPOINT="https://${MLFLOW_HOST}:${MLFLOW_PORT}${MLFLOW_STATIC_PREFIX}"
+echo "MLflow service: $MLFLOW_SVC port=$MLFLOW_PORT endpoint=$MLFLOW_ENDPOINT"
+
+MLFLOW_PATCH=$(jq -n --arg endpoint "$MLFLOW_ENDPOINT" '{
+  endpoint: $endpoint,
+  tls: { insecureSkipVerify: true },
+  settings: { workspacesEnabled: true }
+}')
+
+jq --argjson mlflow "$MLFLOW_PATCH" '. + { plugins: { mlflow: $mlflow } }' \
+  "$CONFIG_JSON_PATH" > /tmp/kfp-config.json
+
+echo "Patched config.json plugins.mlflow:"
+jq '.plugins.mlflow' /tmp/kfp-config.json
+
+kubectl create configmap kfp-mlflow-config -n "$KFP_NAMESPACE" \
+  --from-file=config.json=/tmp/kfp-config.json --dry-run=client -o yaml | kubectl apply -f -
+kubectl patch deployment ml-pipeline -n "$KFP_NAMESPACE" --type=strategic -p \
+  '{"spec":{"template":{"spec":{"volumes":[{"name":"mlflow-cfg","configMap":{"name":"kfp-mlflow-config"}}],"containers":[{"name":"ml-pipeline-api-server","volumeMounts":[{"name":"mlflow-cfg","mountPath":"/config/config.json","subPath":"config.json"}]}]}}}}'
+kubectl rollout status deployment/ml-pipeline -n "$KFP_NAMESPACE" --timeout=180s
+
+pkill -f "kubectl port-forward.*ml-pipeline.*8888" || true
+sleep 2
+
+C_DIR="${BASH_SOURCE%/*}"
+"${C_DIR}/forward-port.sh" "$KFP_NAMESPACE" ml-pipeline 8888 8888
+
+for i in $(seq 1 12); do
+  if curl -sf http://localhost:8888/apis/v1beta1/healthz > /dev/null 2>&1; then
+    echo "API server is healthy on localhost:8888"
+    break
+  fi
+  echo "Waiting for API server to become healthy... ($i/12)"
+  sleep 5
+done
+curl -sf http://localhost:8888/apis/v1beta1/healthz > /dev/null 2>&1 || {
+  echo "ERROR: API server not reachable at localhost:8888"
+  exit 1
+}
+
+SA_TOKEN=$(kubectl create token ml-pipeline -n "$KFP_NAMESPACE" --duration=1h 2>/dev/null || true)
+if [ -n "${GITHUB_ENV:-}" ]; then
+  echo "MLFLOW_WORKSPACE=$KFP_NAMESPACE" >> "$GITHUB_ENV"
+  # Later workflow steps need these to re-establish port-forward: background jobs from this step
+  # are terminated when the step exits, so test-and-report starts kubectl port-forward again.
+  echo "MLFLOW_PORT_FORWARD_NS=$MLFLOW_NAMESPACE" >> "$GITHUB_ENV"
+  echo "MLFLOW_PORT_FORWARD_SVC=$MLFLOW_SVC" >> "$GITHUB_ENV"
+  echo "MLFLOW_PORT_FORWARD_REMOTE_PORT=$MLFLOW_PORT" >> "$GITHUB_ENV"
+  if [ -n "$SA_TOKEN" ]; then
+    echo "MLFLOW_BEARER_TOKEN=$SA_TOKEN" >> "$GITHUB_ENV"
+    echo "Exported MLFLOW_BEARER_TOKEN and MLFLOW_WORKSPACE for test helpers"
+  else
+    echo "WARNING: Could not create SA token; MLflow requests may be unauthenticated"
+    echo "Exported MLFLOW_WORKSPACE only"
+  fi
+fi
+
+kubectl port-forward -n "$MLFLOW_NAMESPACE" "svc/$MLFLOW_SVC" "8080:$MLFLOW_PORT" &
+sleep 3
+
+HEALTH_URL="https://localhost:8080${MLFLOW_STATIC_PREFIX}/health"
+CURL_HEADERS=(-H "X-MLflow-Workspace: $KFP_NAMESPACE")
+[ -n "$SA_TOKEN" ] && CURL_HEADERS+=(-H "Authorization: Bearer $SA_TOKEN")
+
+STATUS=000
+for i in $(seq 1 30); do
+  STATUS=$(curl -sk -o /dev/null -w '%{http_code}' --connect-timeout 5 --max-time 10 \
+    "${CURL_HEADERS[@]}" "$HEALTH_URL" 2>/dev/null || echo "000")
+  if [ "$STATUS" != "000" ] && [ "$STATUS" -lt 500 ] 2>/dev/null; then
+    echo "MLflow backend is healthy on localhost:8080 (HTTPS, status=$STATUS)"
+    break
+  fi
+  echo "Waiting for MLflow backend... ($i/30, status=$STATUS)"
+  sleep 5
+done
+if [ "$STATUS" = "000" ] || { [ "$STATUS" -ge 500 ] 2>/dev/null; }; then
+  echo "ERROR: MLflow backend not healthy after 30 attempts (last status=$STATUS)"
+  exit 1
+fi
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
@@ -7,6 +7,11 @@ env:
   PYTHON_VERSION: "3.9"
   USER_NAMESPACE: "kubeflow-user-example-com"
   CA_CERT_PATH: ""
+  AWS_ACCESS_KEY_ID: 'minio'
+  AWS_SECRET_ACCESS_KEY: 'minio123'
+  AWS_S3_BUCKET: 'mlpipeline'
+  MLFLOW_TRACKING_URI: "https://localhost:8080/mlflow"
+  MLFLOW_TRACKING_INSECURE_TLS: "true"
 
 on:
   push:
@@ -256,3 +261,118 @@ jobs:
           python_version: ${{ env.PYTHON_VERSION }}
           user_namespace: ${{ env.USER_NAMESPACE }}
           report_name: "E2EMultiUserTests_K8s=${{ matrix.k8s_version }}_cacheEnabled=${{ matrix.cache_enabled }}_multiUser=${{ matrix.multi_user }}"
+
+
+  end-to-end-critical-mlflow-tests:
+    runs-on: ubuntu-latest
+    needs: build
+    strategy:
+      matrix:
+        k8s_version: [ "v1.34.0" ]
+        cache_enabled: [ "true", "false" ]
+        argo_version: [ "v3.7.3" ]
+        proxy: [ "false" ]
+        test_label: [ "MLflow" ]
+        pod_to_pod_tls_enabled: [ "false" ]
+        multi_user: [ "false" ]
+        artifact_proxy: [ "false" ]
+        artifact_storage: [ "file", "s3" ]
+        backend_store: [ "postgres" ]
+        registry_store: [ "postgres" ]
+      fail-fast: false
+    name: End to End Critical Scenario MLflow Tests - K8s ${{ matrix.k8s_version }} cacheEnabled=${{ matrix.cache_enabled }} artifactStorage=${{ matrix.artifact_storage }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Create cluster
+        uses: ./.github/actions/create-cluster
+        id: create-cluster
+        with:
+          k8s_version: ${{ matrix.k8s_version }}
+          cluster_name: ${{ env.CLUSTER_NAME }}
+
+      - name: Deploy KFP
+        uses: ./.github/actions/deploy
+        if: ${{ steps.create-cluster.outcome == 'success' }}
+        id: deploy
+        with:
+          cache_enabled: ${{ matrix.cache_enabled }}
+          argo_version: ${{ matrix.argo_version }}
+          pod_to_pod_tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }}
+          multi_user: ${{ matrix.multi_user }}
+          artifact_proxy: ${{ matrix.artifact_proxy }}
+          image_path: ${{ needs.build.outputs.IMAGE_PATH }}
+          image_tag: ${{ needs.build.outputs.IMAGE_TAG }}
+          image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }}
+
+      - name: Deploy MLflow
+        id: deploy-mlflow        
+        uses: opendatahub-io/mlflow-operator/.github/actions/deploy@8ab07a89d6d2d6bc2ffa0c8601f4a856a4cb1b18
+        if: ${{ steps.create-cluster.outcome == 'success' }}
+        with:
+          namespace: 'opendatahub'
+          mlflow_image: quay.io/${{ github.repository_owner == 'red-hat-data-services' && 'rhoai' || 'opendatahub' }}/mlflow:odh-stable
+          mlflow_operator_image: quay.io/${{ github.repository_owner == 'red-hat-data-services' && 'rhoai' || 'opendatahub' }}/mlflow-operator:odh-stable
+          backend_store: ${{ matrix.backend_store }}
+          artifact_storage: ${{ matrix.artifact_storage }}
+          registry_store: ${{ matrix.registry_store }}
+          s3_access_key: ${{ env.AWS_ACCESS_KEY_ID }}
+          s3_secret_key: ${{ env.AWS_SECRET_ACCESS_KEY }}
+
+      - name: Wait for MLflow stack readiness
+        shell: bash
+        if: ${{ steps.deploy-mlflow.outcome == 'success' }}
+        run: |
+          kubectl wait --for=condition=Ready pods --field-selector=status.phase=Running -n opendatahub --timeout=180s
+          echo "All running pods in opendatahub are Ready"
+
+      - name: Configure MLflow Plugin and Forward Ports
+        shell: bash
+        id: configure-mlflow-plugin
+        if: ${{ steps.deploy.outcome == 'success' && steps.deploy-mlflow.outcome == 'success' }}
+        run: ./.github/resources/scripts/configure-mlflow.sh kubeflow opendatahub backend/src/apiserver/config/config.json
+
+      - name: Configure Input Variables
+        shell: bash
+        id: configure
+        if: ${{ steps.deploy.outcome == 'success' }}
+        run: |
+          NUMBER_OF_NODES=${{ env.NUMBER_OF_PARALLEL_NODES }}
+          TEST_LABEL=${{ matrix.test_label }}
+          NAMESPACE=${{ env.NAMESPACE }}
+          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+            NUMBER_OF_NODES=${{ inputs.number_of_parallel_tests }}
+            TEST_LABEL=${{ inputs.test_label }}
+            NAMESPACE=${{ inputs.namespace }}
+          fi
+
+          {
+            echo "NUMBER_OF_NODES=$NUMBER_OF_NODES"
+            echo "TEST_LABEL=$TEST_LABEL"
+            echo "NAMESPACE=$NAMESPACE"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Build and upload the sample Modelcar image to Kind
+        id: build-sample-modelcar-image
+        if: ${{ steps.deploy.outcome == 'success' }}
+        run: |
+          docker build -f ./test_data/sdk_compiled_pipelines/valid/critical/modelcar/Dockerfile -t registry.domain.local/modelcar:test .
+          kind --name kfp load docker-image registry.domain.local/modelcar:test
+        continue-on-error: true
+
+      - name: Run Tests
+        uses: ./.github/actions/test-and-report
+        id: test-run
+        if: ${{ steps.configure.outcome == 'success' && steps.deploy-mlflow.outcome == 'success' && steps.configure-mlflow-plugin.outcome == 'success'}}
+        with:
+          cache_enabled: ${{ matrix.cache_enabled }}
+          test_directory: ${{ env.E2E_TESTS_DIR }}
+          test_label: ${{ steps.configure.outputs.TEST_LABEL }}
+          num_parallel_nodes: ${{ steps.configure.outputs.NUMBER_OF_NODES }}
+          default_namespace: ${{ steps.configure.outputs.NAMESPACE }}
+          python_version: ${{ env.PYTHON_VERSION }}
+          report_name: "MLflowTests_K8s=${{ matrix.k8s_version }}_cacheEnabled=${{ matrix.cache_enabled }}_artifactStorage=${{ matrix.artifact_storage }}"
+          tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }}
+          ca_cert_path: ${{ env.CA_CERT_PATH }}
+          mlflow_enabled: 'true'
diff --git a/.github/workflows/presubmit-backend.yml b/.github/workflows/presubmit-backend.yml
@@ -5,13 +5,8 @@ on:
     branches:
       - master
   pull_request:
-    branches:
-      - master
     paths:
       - 'backend/**'
-      - 'test/presubmit-backend-test.sh'
-      - '!**/*.md'
-      - '!**/OWNERS'
 
 jobs:
   backend-tests:

diff --git a/backend/Makefile b/backend/Makefile
@@ -6,17 +6,11 @@ TLS_ENABLED ?= "false"
 CERT_MANAGER_VERSION ?= v1.16.2
 
 # Container Build Params
-CONTAINER_ENGINE ?= $(shell \
-	if command -v docker >/dev/null 2>&1; then \
-		echo docker; \
-	elif command -v podman >/dev/null 2>&1; then \
-		echo podman; \
-	fi \
-)
+CONTAINER_ENGINE ?= podman
 
 # IMG_REGISTRY can be used to automatically prepend registry details. e.g. "quay.io/kubeflow/"
-IMG_REGISTRY ?=
-IMG_TAG_APISERVER         ?= apiserver
+IMG_REGISTRY ?= quay.io/rh-ee-agoins/
+IMG_TAG_APISERVER         ?= apiserver:1007
 IMG_TAG_PERSISTENCEAGENT  ?= persistence-agent
 IMG_TAG_CACHESERVER       ?= cache-server
 IMG_TAG_SCHEDULEDWORKFLOW ?= scheduledworkflow