diff --git a/.github/actions/test-and-report/action.yml b/.github/actions/test-and-report/action.yml index 50b477d4b83..1bfc5495dcf 100644 --- a/.github/actions/test-and-report/action.yml +++ b/.github/actions/test-and-report/action.yml @@ -96,6 +96,10 @@ inputs: description: "Skip API access configuration and token generation (for tests that do not need a cluster)" required: false default: 'false' + mlflow_enabled: + description: "Whether MLflow is deployed and available for integration tests" + required: false + default: 'false' runs: @@ -264,7 +268,7 @@ runs: DISABLE_TLS_CHECK='false' fi - go run github.com/onsi/ginkgo/v2/ginkgo -r -v --cover -p --keep-going --github-output=true --nodes=${{ inputs.num_parallel_nodes }} --label-filter=${{ inputs.test_label }} --silence-skips=true -- -namespace=${{ inputs.default_namespace }} -multiUserMode=$MULTI_USER -useProxy=$USE_PROXY -userNamespace=${{ inputs.user_namespace }} -uploadPipelinesWithKubernetes=${{ inputs.upload_pipelines_with_kubernetes_client}} -pipelineStoreKubernetes=$pipelineStoreKubernetes -disableTlsCheck=$DISABLE_TLS_CHECK -apiScheme=$API_SCHEME -tlsEnabled=$TLS_ENABLED -caCertPath=$CA_CERT_PATH -pullNumber=$PULL_NUMBER -repoName=$REPO_NAME -apiUrl="$API_URL" -authToken="$AUTH_TOKEN" -serviceAccountName="$SERVICE_ACCOUNT_NAME" $BASE_IMAGE_FLAG + go run github.com/onsi/ginkgo/v2/ginkgo -r -v --cover -p --keep-going --github-output=true --nodes=${{ inputs.num_parallel_nodes }} --label-filter=${{ inputs.test_label }} --silence-skips=true -- -namespace=${{ inputs.default_namespace }} -multiUserMode=$MULTI_USER -useProxy=$USE_PROXY -userNamespace=${{ inputs.user_namespace }} -uploadPipelinesWithKubernetes=${{ inputs.upload_pipelines_with_kubernetes_client}} -pipelineStoreKubernetes=$pipelineStoreKubernetes -disableTlsCheck=$DISABLE_TLS_CHECK -apiScheme=$API_SCHEME -tlsEnabled=$TLS_ENABLED -caCertPath=$CA_CERT_PATH -pullNumber=$PULL_NUMBER -repoName=$REPO_NAME -apiUrl="$API_URL" -authToken="$AUTH_TOKEN" -serviceAccountName="$SERVICE_ACCOUNT_NAME" -mlflowEnabled=${{ inputs.mlflow_enabled }} $BASE_IMAGE_FLAG continue-on-error: true - name: Collect Pod logs in case of Test Failures diff --git a/.github/resources/scripts/configure-mlflow.sh b/.github/resources/scripts/configure-mlflow.sh new file mode 100755 index 00000000000..aa587e449f8 --- /dev/null +++ b/.github/resources/scripts/configure-mlflow.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Copyright 2026 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Purpose: +# This script configures KFP to use an already-deployed MLflow instance for +# MLflow E2E tests. +# +# CI helper: patch the KFP API server with plugins.mlflow, roll it out, and +# port-forward the API server and MLflow so E2E tests can reach both. +# It also exports workspace/auth variables used by MLflow test helpers. +# +# Usage: configure-mlflow.sh + +set -e + +KFP_NAMESPACE="${1:?KFP namespace required}" +MLFLOW_NAMESPACE="${2:?MLflow namespace required}" +CONFIG_JSON_PATH="${3:?Path to source config.json required}" + +echo "Services in ${MLFLOW_NAMESPACE} namespace:" +kubectl get svc -n "$MLFLOW_NAMESPACE" --no-headers +MLFLOW_SVC=$(kubectl get svc -n "$MLFLOW_NAMESPACE" --no-headers -o custom-columns=":metadata.name" | grep -i mlflow | head -1) +if [ -z "$MLFLOW_SVC" ]; then + echo "ERROR: No service matching 'mlflow' found in namespace $MLFLOW_NAMESPACE" + exit 1 +fi +MLFLOW_PORT=$(kubectl get svc -n "$MLFLOW_NAMESPACE" "$MLFLOW_SVC" -o jsonpath='{.spec.ports[0].port}') +MLFLOW_HOST="${MLFLOW_SVC}.${MLFLOW_NAMESPACE}.svc.cluster.local" +MLFLOW_STATIC_PREFIX="/mlflow" +MLFLOW_ENDPOINT="https://${MLFLOW_HOST}:${MLFLOW_PORT}${MLFLOW_STATIC_PREFIX}" +echo "MLflow service: $MLFLOW_SVC port=$MLFLOW_PORT endpoint=$MLFLOW_ENDPOINT" + +MLFLOW_PATCH=$(jq -n --arg endpoint "$MLFLOW_ENDPOINT" '{ + endpoint: $endpoint, + tls: { insecureSkipVerify: true }, + settings: { workspacesEnabled: true } +}') + +jq --argjson mlflow "$MLFLOW_PATCH" '. + { plugins: { mlflow: $mlflow } }' \ + "$CONFIG_JSON_PATH" > /tmp/kfp-config.json + +echo "Patched config.json plugins.mlflow:" +jq '.plugins.mlflow' /tmp/kfp-config.json + +kubectl create configmap kfp-mlflow-config -n "$KFP_NAMESPACE" \ + --from-file=config.json=/tmp/kfp-config.json --dry-run=client -o yaml | kubectl apply -f - +kubectl patch deployment ml-pipeline -n "$KFP_NAMESPACE" --type=strategic -p \ + '{"spec":{"template":{"spec":{"volumes":[{"name":"mlflow-cfg","configMap":{"name":"kfp-mlflow-config"}}],"containers":[{"name":"ml-pipeline-api-server","volumeMounts":[{"name":"mlflow-cfg","mountPath":"/config/config.json","subPath":"config.json"}]}]}}}}' +kubectl rollout status deployment/ml-pipeline -n "$KFP_NAMESPACE" --timeout=180s + +pkill -f "kubectl port-forward.*ml-pipeline.*8888" || true +sleep 2 + +C_DIR="${BASH_SOURCE%/*}" +"${C_DIR}/forward-port.sh" "$KFP_NAMESPACE" ml-pipeline 8888 8888 + +for i in $(seq 1 12); do + if curl -sf http://localhost:8888/apis/v1beta1/healthz > /dev/null 2>&1; then + echo "API server is healthy on localhost:8888" + break + fi + echo "Waiting for API server to become healthy... ($i/12)" + sleep 5 +done +curl -sf http://localhost:8888/apis/v1beta1/healthz > /dev/null 2>&1 || { + echo "ERROR: API server not reachable at localhost:8888" + exit 1 +} + +SA_TOKEN=$(kubectl create token ml-pipeline -n "$KFP_NAMESPACE" --duration=1h 2>/dev/null || true) +if [ -n "${GITHUB_ENV:-}" ]; then + echo "MLFLOW_WORKSPACE=$KFP_NAMESPACE" >> "$GITHUB_ENV" + # Later workflow steps need these to re-establish port-forward: background jobs from this step + # are terminated when the step exits, so test-and-report starts kubectl port-forward again. + echo "MLFLOW_PORT_FORWARD_NS=$MLFLOW_NAMESPACE" >> "$GITHUB_ENV" + echo "MLFLOW_PORT_FORWARD_SVC=$MLFLOW_SVC" >> "$GITHUB_ENV" + echo "MLFLOW_PORT_FORWARD_REMOTE_PORT=$MLFLOW_PORT" >> "$GITHUB_ENV" + if [ -n "$SA_TOKEN" ]; then + echo "MLFLOW_BEARER_TOKEN=$SA_TOKEN" >> "$GITHUB_ENV" + echo "Exported MLFLOW_BEARER_TOKEN and MLFLOW_WORKSPACE for test helpers" + else + echo "WARNING: Could not create SA token; MLflow requests may be unauthenticated" + echo "Exported MLFLOW_WORKSPACE only" + fi +fi + +kubectl port-forward -n "$MLFLOW_NAMESPACE" "svc/$MLFLOW_SVC" "8080:$MLFLOW_PORT" & +sleep 3 + +HEALTH_URL="https://localhost:8080${MLFLOW_STATIC_PREFIX}/health" +CURL_HEADERS=(-H "X-MLflow-Workspace: $KFP_NAMESPACE") +[ -n "$SA_TOKEN" ] && CURL_HEADERS+=(-H "Authorization: Bearer $SA_TOKEN") + +STATUS=000 +for i in $(seq 1 30); do + STATUS=$(curl -sk -o /dev/null -w '%{http_code}' --connect-timeout 5 --max-time 10 \ + "${CURL_HEADERS[@]}" "$HEALTH_URL" 2>/dev/null || echo "000") + if [ "$STATUS" != "000" ] && [ "$STATUS" -lt 500 ] 2>/dev/null; then + echo "MLflow backend is healthy on localhost:8080 (HTTPS, status=$STATUS)" + break + fi + echo "Waiting for MLflow backend... ($i/30, status=$STATUS)" + sleep 5 +done +if [ "$STATUS" = "000" ] || { [ "$STATUS" -ge 500 ] 2>/dev/null; }; then + echo "ERROR: MLflow backend not healthy after 30 attempts (last status=$STATUS)" + exit 1 +fi diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 4ac9ac66cbd..4541c87cd25 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -7,6 +7,11 @@ env: PYTHON_VERSION: "3.9" USER_NAMESPACE: "kubeflow-user-example-com" CA_CERT_PATH: "" + AWS_ACCESS_KEY_ID: 'minio' + AWS_SECRET_ACCESS_KEY: 'minio123' + AWS_S3_BUCKET: 'mlpipeline' + MLFLOW_TRACKING_URI: "https://localhost:8080/mlflow" + MLFLOW_TRACKING_INSECURE_TLS: "true" on: push: @@ -256,3 +261,118 @@ jobs: python_version: ${{ env.PYTHON_VERSION }} user_namespace: ${{ env.USER_NAMESPACE }} report_name: "E2EMultiUserTests_K8s=${{ matrix.k8s_version }}_cacheEnabled=${{ matrix.cache_enabled }}_multiUser=${{ matrix.multi_user }}" + + + end-to-end-critical-mlflow-tests: + runs-on: ubuntu-latest + needs: build + strategy: + matrix: + k8s_version: [ "v1.34.0" ] + cache_enabled: [ "true", "false" ] + argo_version: [ "v3.7.3" ] + proxy: [ "false" ] + test_label: [ "MLflow" ] + pod_to_pod_tls_enabled: [ "false" ] + multi_user: [ "false" ] + artifact_proxy: [ "false" ] + artifact_storage: [ "file", "s3" ] + backend_store: [ "postgres" ] + registry_store: [ "postgres" ] + fail-fast: false + name: End to End Critical Scenario MLflow Tests - K8s ${{ matrix.k8s_version }} cacheEnabled=${{ matrix.cache_enabled }} artifactStorage=${{ matrix.artifact_storage }} + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Create cluster + uses: ./.github/actions/create-cluster + id: create-cluster + with: + k8s_version: ${{ matrix.k8s_version }} + cluster_name: ${{ env.CLUSTER_NAME }} + + - name: Deploy KFP + uses: ./.github/actions/deploy + if: ${{ steps.create-cluster.outcome == 'success' }} + id: deploy + with: + cache_enabled: ${{ matrix.cache_enabled }} + argo_version: ${{ matrix.argo_version }} + pod_to_pod_tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }} + multi_user: ${{ matrix.multi_user }} + artifact_proxy: ${{ matrix.artifact_proxy }} + image_path: ${{ needs.build.outputs.IMAGE_PATH }} + image_tag: ${{ needs.build.outputs.IMAGE_TAG }} + image_registry: ${{ needs.build.outputs.IMAGE_REGISTRY }} + + - name: Deploy MLflow + id: deploy-mlflow + uses: opendatahub-io/mlflow-operator/.github/actions/deploy@8ab07a89d6d2d6bc2ffa0c8601f4a856a4cb1b18 + if: ${{ steps.create-cluster.outcome == 'success' }} + with: + namespace: 'opendatahub' + mlflow_image: quay.io/${{ github.repository_owner == 'red-hat-data-services' && 'rhoai' || 'opendatahub' }}/mlflow:odh-stable + mlflow_operator_image: quay.io/${{ github.repository_owner == 'red-hat-data-services' && 'rhoai' || 'opendatahub' }}/mlflow-operator:odh-stable + backend_store: ${{ matrix.backend_store }} + artifact_storage: ${{ matrix.artifact_storage }} + registry_store: ${{ matrix.registry_store }} + s3_access_key: ${{ env.AWS_ACCESS_KEY_ID }} + s3_secret_key: ${{ env.AWS_SECRET_ACCESS_KEY }} + + - name: Wait for MLflow stack readiness + shell: bash + if: ${{ steps.deploy-mlflow.outcome == 'success' }} + run: | + kubectl wait --for=condition=Ready pods --field-selector=status.phase=Running -n opendatahub --timeout=180s + echo "All running pods in opendatahub are Ready" + + - name: Configure MLflow Plugin and Forward Ports + shell: bash + id: configure-mlflow-plugin + if: ${{ steps.deploy.outcome == 'success' && steps.deploy-mlflow.outcome == 'success' }} + run: ./.github/resources/scripts/configure-mlflow.sh kubeflow opendatahub backend/src/apiserver/config/config.json + + - name: Configure Input Variables + shell: bash + id: configure + if: ${{ steps.deploy.outcome == 'success' }} + run: | + NUMBER_OF_NODES=${{ env.NUMBER_OF_PARALLEL_NODES }} + TEST_LABEL=${{ matrix.test_label }} + NAMESPACE=${{ env.NAMESPACE }} + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + NUMBER_OF_NODES=${{ inputs.number_of_parallel_tests }} + TEST_LABEL=${{ inputs.test_label }} + NAMESPACE=${{ inputs.namespace }} + fi + + { + echo "NUMBER_OF_NODES=$NUMBER_OF_NODES" + echo "TEST_LABEL=$TEST_LABEL" + echo "NAMESPACE=$NAMESPACE" + } >> "$GITHUB_OUTPUT" + + - name: Build and upload the sample Modelcar image to Kind + id: build-sample-modelcar-image + if: ${{ steps.deploy.outcome == 'success' }} + run: | + docker build -f ./test_data/sdk_compiled_pipelines/valid/critical/modelcar/Dockerfile -t registry.domain.local/modelcar:test . + kind --name kfp load docker-image registry.domain.local/modelcar:test + continue-on-error: true + + - name: Run Tests + uses: ./.github/actions/test-and-report + id: test-run + if: ${{ steps.configure.outcome == 'success' && steps.deploy-mlflow.outcome == 'success' && steps.configure-mlflow-plugin.outcome == 'success'}} + with: + cache_enabled: ${{ matrix.cache_enabled }} + test_directory: ${{ env.E2E_TESTS_DIR }} + test_label: ${{ steps.configure.outputs.TEST_LABEL }} + num_parallel_nodes: ${{ steps.configure.outputs.NUMBER_OF_NODES }} + default_namespace: ${{ steps.configure.outputs.NAMESPACE }} + python_version: ${{ env.PYTHON_VERSION }} + report_name: "MLflowTests_K8s=${{ matrix.k8s_version }}_cacheEnabled=${{ matrix.cache_enabled }}_artifactStorage=${{ matrix.artifact_storage }}" + tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }} + ca_cert_path: ${{ env.CA_CERT_PATH }} + mlflow_enabled: 'true' diff --git a/.github/workflows/presubmit-backend.yml b/.github/workflows/presubmit-backend.yml index 159a2ac2d4b..73ec0af5d59 100644 --- a/.github/workflows/presubmit-backend.yml +++ b/.github/workflows/presubmit-backend.yml @@ -5,13 +5,8 @@ on: branches: - master pull_request: - branches: - - master paths: - 'backend/**' - - 'test/presubmit-backend-test.sh' - - '!**/*.md' - - '!**/OWNERS' jobs: backend-tests: diff --git a/backend/Makefile b/backend/Makefile index ddae2ea9e47..3334c86158d 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -6,17 +6,11 @@ TLS_ENABLED ?= "false" CERT_MANAGER_VERSION ?= v1.16.2 # Container Build Params -CONTAINER_ENGINE ?= $(shell \ - if command -v docker >/dev/null 2>&1; then \ - echo docker; \ - elif command -v podman >/dev/null 2>&1; then \ - echo podman; \ - fi \ -) +CONTAINER_ENGINE ?= podman # IMG_REGISTRY can be used to automatically prepend registry details. e.g. "quay.io/kubeflow/" -IMG_REGISTRY ?= -IMG_TAG_APISERVER ?= apiserver +IMG_REGISTRY ?= quay.io/rh-ee-agoins/ +IMG_TAG_APISERVER ?= apiserver:1007 IMG_TAG_PERSISTENCEAGENT ?= persistence-agent IMG_TAG_CACHESERVER ?= cache-server IMG_TAG_SCHEDULEDWORKFLOW ?= scheduledworkflow diff --git a/backend/api/v2beta1/go_client/recurring_run.pb.go b/backend/api/v2beta1/go_client/recurring_run.pb.go index 34f1e0a1bcc..0fea9011206 100644 --- a/backend/api/v2beta1/go_client/recurring_run.pb.go +++ b/backend/api/v2beta1/go_client/recurring_run.pb.go @@ -241,7 +241,10 @@ type RecurringRun struct { // Output only. Namespace this recurring run belongs to. Derived from the parent experiment. Namespace string `protobuf:"bytes,16,opt,name=namespace,proto3" json:"namespace,omitempty"` // ID of the parent experiment this recurring run belongs to. - ExperimentId string `protobuf:"bytes,17,opt,name=experiment_id,json=experimentId,proto3" json:"experiment_id,omitempty"` + ExperimentId string `protobuf:"bytes,17,opt,name=experiment_id,json=experimentId,proto3" json:"experiment_id,omitempty"` + // Optional input. Plugin inputs to propagate to each triggered run. + // Each triggered run will inherit these values in its plugins_input field. + PluginsInput map[string]*structpb.Struct `protobuf:"bytes,19,rep,name=plugins_input,json=pluginsInput,proto3" json:"plugins_input,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -416,6 +419,13 @@ func (x *RecurringRun) GetExperimentId() string { return "" } +func (x *RecurringRun) GetPluginsInput() map[string]*structpb.Struct { + if x != nil { + return x.PluginsInput + } + return nil +} + type isRecurringRun_PipelineSource interface { isRecurringRun_PipelineSource() } @@ -1052,7 +1062,8 @@ var File_backend_api_v2beta1_recurring_run_proto protoreflect.FileDescriptor const file_backend_api_v2beta1_recurring_run_proto_rawDesc = "" + "\n" + - "'backend/api/v2beta1/recurring_run.proto\x12&kubeflow.pipelines.backend.api.v2beta1\x1a(backend/api/v2beta1/runtime_config.proto\x1a\x1dbackend/api/v2beta1/run.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x17google/rpc/status.proto\x1a.protoc-gen-openapiv2/options/annotations.proto\"\x9e\t\n" + + "'backend/api/v2beta1/recurring_run.proto\x12&kubeflow.pipelines.backend.api.v2beta1\x1a(backend/api/v2beta1/runtime_config.proto\x1a\x1dbackend/api/v2beta1/run.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x17google/rpc/status.proto\x1a.protoc-gen-openapiv2/options/annotations.proto\"\xe5\n" + + "\n" + "\fRecurringRun\x12(\n" + "\x10recurring_run_id\x18\x01 \x01(\tR\x0erecurringRunId\x12!\n" + "\fdisplay_name\x18\x02 \x01(\tR\vdisplayName\x12 \n" + @@ -1075,7 +1086,11 @@ const file_backend_api_v2beta1_recurring_run_proto_rawDesc = "" + "\n" + "no_catchup\x18\x0f \x01(\bR\tnoCatchup\x12\x1c\n" + "\tnamespace\x18\x10 \x01(\tR\tnamespace\x12#\n" + - "\rexperiment_id\x18\x11 \x01(\tR\fexperimentId\"5\n" + + "\rexperiment_id\x18\x11 \x01(\tR\fexperimentId\x12k\n" + + "\rplugins_input\x18\x13 \x03(\v2F.kubeflow.pipelines.backend.api.v2beta1.RecurringRun.PluginsInputEntryR\fpluginsInput\x1aX\n" + + "\x11PluginsInputEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12-\n" + + "\x05value\x18\x02 \x01(\v2\x17.google.protobuf.StructR\x05value:\x028\x01\"5\n" + "\x04Mode\x12\x14\n" + "\x10MODE_UNSPECIFIED\x10\x00\x12\n" + "\n" + @@ -1153,7 +1168,7 @@ func file_backend_api_v2beta1_recurring_run_proto_rawDescGZIP() []byte { } var file_backend_api_v2beta1_recurring_run_proto_enumTypes = make([]protoimpl.EnumInfo, 3) -var file_backend_api_v2beta1_recurring_run_proto_msgTypes = make([]protoimpl.MessageInfo, 11) +var file_backend_api_v2beta1_recurring_run_proto_msgTypes = make([]protoimpl.MessageInfo, 12) var file_backend_api_v2beta1_recurring_run_proto_goTypes = []any{ (DeletePropagationPolicy)(0), // 0: kubeflow.pipelines.backend.api.v2beta1.DeletePropagationPolicy (RecurringRun_Mode)(0), // 1: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.Mode @@ -1169,49 +1184,52 @@ var file_backend_api_v2beta1_recurring_run_proto_goTypes = []any{ (*CronSchedule)(nil), // 11: kubeflow.pipelines.backend.api.v2beta1.CronSchedule (*PeriodicSchedule)(nil), // 12: kubeflow.pipelines.backend.api.v2beta1.PeriodicSchedule (*Trigger)(nil), // 13: kubeflow.pipelines.backend.api.v2beta1.Trigger - (*structpb.Struct)(nil), // 14: google.protobuf.Struct - (*PipelineVersionReference)(nil), // 15: kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference - (*RuntimeConfig)(nil), // 16: kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig - (*timestamppb.Timestamp)(nil), // 17: google.protobuf.Timestamp - (*status.Status)(nil), // 18: google.rpc.Status - (*emptypb.Empty)(nil), // 19: google.protobuf.Empty + nil, // 14: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.PluginsInputEntry + (*structpb.Struct)(nil), // 15: google.protobuf.Struct + (*PipelineVersionReference)(nil), // 16: kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference + (*RuntimeConfig)(nil), // 17: kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig + (*timestamppb.Timestamp)(nil), // 18: google.protobuf.Timestamp + (*status.Status)(nil), // 19: google.rpc.Status + (*emptypb.Empty)(nil), // 20: google.protobuf.Empty } var file_backend_api_v2beta1_recurring_run_proto_depIdxs = []int32{ - 14, // 0: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.pipeline_spec:type_name -> google.protobuf.Struct - 15, // 1: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.pipeline_version_reference:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference - 16, // 2: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.runtime_config:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig + 15, // 0: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.pipeline_spec:type_name -> google.protobuf.Struct + 16, // 1: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.pipeline_version_reference:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference + 17, // 2: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.runtime_config:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig 13, // 3: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.trigger:type_name -> kubeflow.pipelines.backend.api.v2beta1.Trigger 1, // 4: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.mode:type_name -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun.Mode - 17, // 5: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.created_at:type_name -> google.protobuf.Timestamp - 17, // 6: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.updated_at:type_name -> google.protobuf.Timestamp + 18, // 5: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.created_at:type_name -> google.protobuf.Timestamp + 18, // 6: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.updated_at:type_name -> google.protobuf.Timestamp 2, // 7: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.status:type_name -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun.Status - 18, // 8: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.error:type_name -> google.rpc.Status - 3, // 9: kubeflow.pipelines.backend.api.v2beta1.CreateRecurringRunRequest.recurring_run:type_name -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun - 3, // 10: kubeflow.pipelines.backend.api.v2beta1.ListRecurringRunsResponse.recurringRuns:type_name -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun - 0, // 11: kubeflow.pipelines.backend.api.v2beta1.DeleteRecurringRunRequest.propagation_policy:type_name -> kubeflow.pipelines.backend.api.v2beta1.DeletePropagationPolicy - 17, // 12: kubeflow.pipelines.backend.api.v2beta1.CronSchedule.start_time:type_name -> google.protobuf.Timestamp - 17, // 13: kubeflow.pipelines.backend.api.v2beta1.CronSchedule.end_time:type_name -> google.protobuf.Timestamp - 17, // 14: kubeflow.pipelines.backend.api.v2beta1.PeriodicSchedule.start_time:type_name -> google.protobuf.Timestamp - 17, // 15: kubeflow.pipelines.backend.api.v2beta1.PeriodicSchedule.end_time:type_name -> google.protobuf.Timestamp - 11, // 16: kubeflow.pipelines.backend.api.v2beta1.Trigger.cron_schedule:type_name -> kubeflow.pipelines.backend.api.v2beta1.CronSchedule - 12, // 17: kubeflow.pipelines.backend.api.v2beta1.Trigger.periodic_schedule:type_name -> kubeflow.pipelines.backend.api.v2beta1.PeriodicSchedule - 4, // 18: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.CreateRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.CreateRecurringRunRequest - 5, // 19: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.GetRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.GetRecurringRunRequest - 6, // 20: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.ListRecurringRuns:input_type -> kubeflow.pipelines.backend.api.v2beta1.ListRecurringRunsRequest - 8, // 21: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.EnableRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.EnableRecurringRunRequest - 9, // 22: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DisableRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.DisableRecurringRunRequest - 10, // 23: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DeleteRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.DeleteRecurringRunRequest - 3, // 24: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.CreateRecurringRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun - 3, // 25: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.GetRecurringRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun - 7, // 26: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.ListRecurringRuns:output_type -> kubeflow.pipelines.backend.api.v2beta1.ListRecurringRunsResponse - 19, // 27: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.EnableRecurringRun:output_type -> google.protobuf.Empty - 19, // 28: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DisableRecurringRun:output_type -> google.protobuf.Empty - 19, // 29: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DeleteRecurringRun:output_type -> google.protobuf.Empty - 24, // [24:30] is the sub-list for method output_type - 18, // [18:24] is the sub-list for method input_type - 18, // [18:18] is the sub-list for extension type_name - 18, // [18:18] is the sub-list for extension extendee - 0, // [0:18] is the sub-list for field type_name + 19, // 8: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.error:type_name -> google.rpc.Status + 14, // 9: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.plugins_input:type_name -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun.PluginsInputEntry + 3, // 10: kubeflow.pipelines.backend.api.v2beta1.CreateRecurringRunRequest.recurring_run:type_name -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun + 3, // 11: kubeflow.pipelines.backend.api.v2beta1.ListRecurringRunsResponse.recurringRuns:type_name -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun + 0, // 12: kubeflow.pipelines.backend.api.v2beta1.DeleteRecurringRunRequest.propagation_policy:type_name -> kubeflow.pipelines.backend.api.v2beta1.DeletePropagationPolicy + 18, // 13: kubeflow.pipelines.backend.api.v2beta1.CronSchedule.start_time:type_name -> google.protobuf.Timestamp + 18, // 14: kubeflow.pipelines.backend.api.v2beta1.CronSchedule.end_time:type_name -> google.protobuf.Timestamp + 18, // 15: kubeflow.pipelines.backend.api.v2beta1.PeriodicSchedule.start_time:type_name -> google.protobuf.Timestamp + 18, // 16: kubeflow.pipelines.backend.api.v2beta1.PeriodicSchedule.end_time:type_name -> google.protobuf.Timestamp + 11, // 17: kubeflow.pipelines.backend.api.v2beta1.Trigger.cron_schedule:type_name -> kubeflow.pipelines.backend.api.v2beta1.CronSchedule + 12, // 18: kubeflow.pipelines.backend.api.v2beta1.Trigger.periodic_schedule:type_name -> kubeflow.pipelines.backend.api.v2beta1.PeriodicSchedule + 15, // 19: kubeflow.pipelines.backend.api.v2beta1.RecurringRun.PluginsInputEntry.value:type_name -> google.protobuf.Struct + 4, // 20: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.CreateRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.CreateRecurringRunRequest + 5, // 21: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.GetRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.GetRecurringRunRequest + 6, // 22: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.ListRecurringRuns:input_type -> kubeflow.pipelines.backend.api.v2beta1.ListRecurringRunsRequest + 8, // 23: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.EnableRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.EnableRecurringRunRequest + 9, // 24: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DisableRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.DisableRecurringRunRequest + 10, // 25: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DeleteRecurringRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.DeleteRecurringRunRequest + 3, // 26: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.CreateRecurringRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun + 3, // 27: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.GetRecurringRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.RecurringRun + 7, // 28: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.ListRecurringRuns:output_type -> kubeflow.pipelines.backend.api.v2beta1.ListRecurringRunsResponse + 20, // 29: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.EnableRecurringRun:output_type -> google.protobuf.Empty + 20, // 30: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DisableRecurringRun:output_type -> google.protobuf.Empty + 20, // 31: kubeflow.pipelines.backend.api.v2beta1.RecurringRunService.DeleteRecurringRun:output_type -> google.protobuf.Empty + 26, // [26:32] is the sub-list for method output_type + 20, // [20:26] is the sub-list for method input_type + 20, // [20:20] is the sub-list for extension type_name + 20, // [20:20] is the sub-list for extension extendee + 0, // [0:20] is the sub-list for field type_name } func init() { file_backend_api_v2beta1_recurring_run_proto_init() } @@ -1236,7 +1254,7 @@ func file_backend_api_v2beta1_recurring_run_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_backend_api_v2beta1_recurring_run_proto_rawDesc), len(file_backend_api_v2beta1_recurring_run_proto_rawDesc)), NumEnums: 3, - NumMessages: 11, + NumMessages: 12, NumExtensions: 0, NumServices: 1, }, diff --git a/backend/api/v2beta1/go_client/run.pb.go b/backend/api/v2beta1/go_client/run.pb.go index afdadc9eaaf..70ae82007ec 100644 --- a/backend/api/v2beta1/go_client/run.pb.go +++ b/backend/api/v2beta1/go_client/run.pb.go @@ -119,6 +119,66 @@ func (RuntimeState) EnumDescriptor() ([]byte, []int) { return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{0} } +// Describes the state of a plugin's operations. +// Unlike RuntimeState (which covers pipeline/task lifecycle including CANCELING, +// PAUSED, SKIPPED), PluginState only reflects whether the plugin's own work +// succeeded or failed, independent of the pipeline run outcome. +type PluginState int32 + +const ( + // Default value. The plugin state is unknown or not yet set. + PluginState_PLUGIN_STATE_UNSPECIFIED PluginState = 0 + // Plugin operations are in progress. + PluginState_PLUGIN_RUNNING PluginState = 1 + // Plugin operations completed successfully. + PluginState_PLUGIN_SUCCEEDED PluginState = 2 + // Plugin operations failed. + PluginState_PLUGIN_FAILED PluginState = 3 +) + +// Enum value maps for PluginState. +var ( + PluginState_name = map[int32]string{ + 0: "PLUGIN_STATE_UNSPECIFIED", + 1: "PLUGIN_RUNNING", + 2: "PLUGIN_SUCCEEDED", + 3: "PLUGIN_FAILED", + } + PluginState_value = map[string]int32{ + "PLUGIN_STATE_UNSPECIFIED": 0, + "PLUGIN_RUNNING": 1, + "PLUGIN_SUCCEEDED": 2, + "PLUGIN_FAILED": 3, + } +) + +func (x PluginState) Enum() *PluginState { + p := new(PluginState) + *p = x + return p +} + +func (x PluginState) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (PluginState) Descriptor() protoreflect.EnumDescriptor { + return file_backend_api_v2beta1_run_proto_enumTypes[1].Descriptor() +} + +func (PluginState) Type() protoreflect.EnumType { + return &file_backend_api_v2beta1_run_proto_enumTypes[1] +} + +func (x PluginState) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use PluginState.Descriptor instead. +func (PluginState) EnumDescriptor() ([]byte, []int) { + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{1} +} + // Describes whether an entity is available or archived. type Run_StorageState int32 @@ -156,11 +216,11 @@ func (x Run_StorageState) String() string { } func (Run_StorageState) Descriptor() protoreflect.EnumDescriptor { - return file_backend_api_v2beta1_run_proto_enumTypes[1].Descriptor() + return file_backend_api_v2beta1_run_proto_enumTypes[2].Descriptor() } func (Run_StorageState) Type() protoreflect.EnumType { - return &file_backend_api_v2beta1_run_proto_enumTypes[1] + return &file_backend_api_v2beta1_run_proto_enumTypes[2] } func (x Run_StorageState) Number() protoreflect.EnumNumber { @@ -172,6 +232,55 @@ func (Run_StorageState) EnumDescriptor() ([]byte, []int) { return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{0, 0} } +// Hint for UI rendering of the value. +type MetadataValue_RenderType int32 + +const ( + // Default. No special rendering. + MetadataValue_UNSPECIFIED MetadataValue_RenderType = 0 + // Render the value as a hyperlink. + MetadataValue_URL MetadataValue_RenderType = 1 +) + +// Enum value maps for MetadataValue_RenderType. +var ( + MetadataValue_RenderType_name = map[int32]string{ + 0: "UNSPECIFIED", + 1: "URL", + } + MetadataValue_RenderType_value = map[string]int32{ + "UNSPECIFIED": 0, + "URL": 1, + } +) + +func (x MetadataValue_RenderType) Enum() *MetadataValue_RenderType { + p := new(MetadataValue_RenderType) + *p = x + return p +} + +func (x MetadataValue_RenderType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (MetadataValue_RenderType) Descriptor() protoreflect.EnumDescriptor { + return file_backend_api_v2beta1_run_proto_enumTypes[3].Descriptor() +} + +func (MetadataValue_RenderType) Type() protoreflect.EnumType { + return &file_backend_api_v2beta1_run_proto_enumTypes[3] +} + +func (x MetadataValue_RenderType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use MetadataValue_RenderType.Descriptor instead. +func (MetadataValue_RenderType) EnumDescriptor() ([]byte, []int) { + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{3, 0} +} + type Run struct { state protoimpl.MessageState `protogen:"open.v1"` // Input. ID of the parent experiment. @@ -219,7 +328,13 @@ type Run struct { RecurringRunId string `protobuf:"bytes,16,opt,name=recurring_run_id,json=recurringRunId,proto3" json:"recurring_run_id,omitempty"` // Output. A sequence of run statuses. This field keeps a record // of state transitions. - StateHistory []*RuntimeStatus `protobuf:"bytes,17,rep,name=state_history,json=stateHistory,proto3" json:"state_history,omitempty"` + StateHistory []*RuntimeStatus `protobuf:"bytes,17,rep,name=state_history,json=stateHistory,proto3" json:"state_history,omitempty"` + // Optional input. Plugin-specific inputs provided by the user at run creation. + // Each key is a plugin name (e.g., "mlflow") and the value is arbitrary JSON config. + PluginsInput map[string]*structpb.Struct `protobuf:"bytes,19,rep,name=plugins_input,json=pluginsInput,proto3" json:"plugins_input,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + // Output. Plugin-specific outputs populated by backend components. + // Each key is a plugin name and the value contains the plugin's output entries and state. + PluginsOutput map[string]*PluginOutput `protobuf:"bytes,20,rep,name=plugins_output,json=pluginsOutput,proto3" json:"plugins_output,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -394,6 +509,20 @@ func (x *Run) GetStateHistory() []*RuntimeStatus { return nil } +func (x *Run) GetPluginsInput() map[string]*structpb.Struct { + if x != nil { + return x.PluginsInput + } + return nil +} + +func (x *Run) GetPluginsOutput() map[string]*PluginOutput { + if x != nil { + return x.PluginsOutput + } + return nil +} + type isRun_PipelineSource interface { isRun_PipelineSource() } @@ -544,6 +673,120 @@ func (x *RuntimeStatus) GetError() *status.Status { return nil } +// A typed metadata value with an optional rendering hint for the UI. +type MetadataValue struct { + state protoimpl.MessageState `protogen:"open.v1"` + Value *structpb.Value `protobuf:"bytes,1,opt,name=value,proto3" json:"value,omitempty"` + RenderType *MetadataValue_RenderType `protobuf:"varint,2,opt,name=render_type,json=renderType,proto3,enum=kubeflow.pipelines.backend.api.v2beta1.MetadataValue_RenderType,oneof" json:"render_type,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *MetadataValue) Reset() { + *x = MetadataValue{} + mi := &file_backend_api_v2beta1_run_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *MetadataValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MetadataValue) ProtoMessage() {} + +func (x *MetadataValue) ProtoReflect() protoreflect.Message { + mi := &file_backend_api_v2beta1_run_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MetadataValue.ProtoReflect.Descriptor instead. +func (*MetadataValue) Descriptor() ([]byte, []int) { + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{3} +} + +func (x *MetadataValue) GetValue() *structpb.Value { + if x != nil { + return x.Value + } + return nil +} + +func (x *MetadataValue) GetRenderType() MetadataValue_RenderType { + if x != nil && x.RenderType != nil { + return *x.RenderType + } + return MetadataValue_UNSPECIFIED +} + +// Output from a single plugin, containing keyed metadata entries and overall state. +type PluginOutput struct { + state protoimpl.MessageState `protogen:"open.v1"` + Entries map[string]*MetadataValue `protobuf:"bytes,1,rep,name=entries,proto3" json:"entries,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + State PluginState `protobuf:"varint,2,opt,name=state,proto3,enum=kubeflow.pipelines.backend.api.v2beta1.PluginState" json:"state,omitempty"` + StateMessage string `protobuf:"bytes,3,opt,name=state_message,json=stateMessage,proto3" json:"state_message,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PluginOutput) Reset() { + *x = PluginOutput{} + mi := &file_backend_api_v2beta1_run_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PluginOutput) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PluginOutput) ProtoMessage() {} + +func (x *PluginOutput) ProtoReflect() protoreflect.Message { + mi := &file_backend_api_v2beta1_run_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PluginOutput.ProtoReflect.Descriptor instead. +func (*PluginOutput) Descriptor() ([]byte, []int) { + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{4} +} + +func (x *PluginOutput) GetEntries() map[string]*MetadataValue { + if x != nil { + return x.Entries + } + return nil +} + +func (x *PluginOutput) GetState() PluginState { + if x != nil { + return x.State + } + return PluginState_PLUGIN_STATE_UNSPECIFIED +} + +func (x *PluginOutput) GetStateMessage() string { + if x != nil { + return x.StateMessage + } + return "" +} + // Runtime details of a run. type RunDetails struct { state protoimpl.MessageState `protogen:"open.v1"` @@ -559,7 +802,7 @@ type RunDetails struct { func (x *RunDetails) Reset() { *x = RunDetails{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[3] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -571,7 +814,7 @@ func (x *RunDetails) String() string { func (*RunDetails) ProtoMessage() {} func (x *RunDetails) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[3] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -584,7 +827,7 @@ func (x *RunDetails) ProtoReflect() protoreflect.Message { // Deprecated: Use RunDetails.ProtoReflect.Descriptor instead. func (*RunDetails) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{3} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{5} } func (x *RunDetails) GetPipelineContextId() int64 { @@ -654,7 +897,7 @@ type PipelineTaskDetail struct { func (x *PipelineTaskDetail) Reset() { *x = PipelineTaskDetail{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[4] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -666,7 +909,7 @@ func (x *PipelineTaskDetail) String() string { func (*PipelineTaskDetail) ProtoMessage() {} func (x *PipelineTaskDetail) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[4] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -679,7 +922,7 @@ func (x *PipelineTaskDetail) ProtoReflect() protoreflect.Message { // Deprecated: Use PipelineTaskDetail.ProtoReflect.Descriptor instead. func (*PipelineTaskDetail) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{4} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{6} } func (x *PipelineTaskDetail) GetRunId() string { @@ -819,7 +1062,7 @@ type PipelineTaskExecutorDetail struct { func (x *PipelineTaskExecutorDetail) Reset() { *x = PipelineTaskExecutorDetail{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[5] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -831,7 +1074,7 @@ func (x *PipelineTaskExecutorDetail) String() string { func (*PipelineTaskExecutorDetail) ProtoMessage() {} func (x *PipelineTaskExecutorDetail) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[5] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -844,7 +1087,7 @@ func (x *PipelineTaskExecutorDetail) ProtoReflect() protoreflect.Message { // Deprecated: Use PipelineTaskExecutorDetail.ProtoReflect.Descriptor instead. func (*PipelineTaskExecutorDetail) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{5} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{7} } func (x *PipelineTaskExecutorDetail) GetMainJob() string { @@ -886,7 +1129,7 @@ type ArtifactList struct { func (x *ArtifactList) Reset() { *x = ArtifactList{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[6] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -898,7 +1141,7 @@ func (x *ArtifactList) String() string { func (*ArtifactList) ProtoMessage() {} func (x *ArtifactList) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[6] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -911,7 +1154,7 @@ func (x *ArtifactList) ProtoReflect() protoreflect.Message { // Deprecated: Use ArtifactList.ProtoReflect.Descriptor instead. func (*ArtifactList) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{6} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{8} } func (x *ArtifactList) GetArtifactIds() []int64 { @@ -935,7 +1178,7 @@ type CreateRunRequest struct { func (x *CreateRunRequest) Reset() { *x = CreateRunRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[7] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -947,7 +1190,7 @@ func (x *CreateRunRequest) String() string { func (*CreateRunRequest) ProtoMessage() {} func (x *CreateRunRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[7] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[9] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -960,7 +1203,7 @@ func (x *CreateRunRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CreateRunRequest.ProtoReflect.Descriptor instead. func (*CreateRunRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{7} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{9} } // Deprecated: Marked as deprecated in backend/api/v2beta1/run.proto. @@ -992,7 +1235,7 @@ type GetRunRequest struct { func (x *GetRunRequest) Reset() { *x = GetRunRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[8] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1004,7 +1247,7 @@ func (x *GetRunRequest) String() string { func (*GetRunRequest) ProtoMessage() {} func (x *GetRunRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[8] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1017,7 +1260,7 @@ func (x *GetRunRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetRunRequest.ProtoReflect.Descriptor instead. func (*GetRunRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{8} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{10} } // Deprecated: Marked as deprecated in backend/api/v2beta1/run.proto. @@ -1061,7 +1304,7 @@ type ListRunsRequest struct { func (x *ListRunsRequest) Reset() { *x = ListRunsRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[9] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[11] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1073,7 +1316,7 @@ func (x *ListRunsRequest) String() string { func (*ListRunsRequest) ProtoMessage() {} func (x *ListRunsRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[9] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[11] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1086,7 +1329,7 @@ func (x *ListRunsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ListRunsRequest.ProtoReflect.Descriptor instead. func (*ListRunsRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{9} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{11} } func (x *ListRunsRequest) GetNamespace() string { @@ -1145,7 +1388,7 @@ type TerminateRunRequest struct { func (x *TerminateRunRequest) Reset() { *x = TerminateRunRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[10] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1157,7 +1400,7 @@ func (x *TerminateRunRequest) String() string { func (*TerminateRunRequest) ProtoMessage() {} func (x *TerminateRunRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[10] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[12] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1170,7 +1413,7 @@ func (x *TerminateRunRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use TerminateRunRequest.ProtoReflect.Descriptor instead. func (*TerminateRunRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{10} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{12} } // Deprecated: Marked as deprecated in backend/api/v2beta1/run.proto. @@ -1202,7 +1445,7 @@ type ListRunsResponse struct { func (x *ListRunsResponse) Reset() { *x = ListRunsResponse{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[11] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[13] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1214,7 +1457,7 @@ func (x *ListRunsResponse) String() string { func (*ListRunsResponse) ProtoMessage() {} func (x *ListRunsResponse) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[11] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[13] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1227,7 +1470,7 @@ func (x *ListRunsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use ListRunsResponse.ProtoReflect.Descriptor instead. func (*ListRunsResponse) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{11} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{13} } func (x *ListRunsResponse) GetRuns() []*Run { @@ -1265,7 +1508,7 @@ type ArchiveRunRequest struct { func (x *ArchiveRunRequest) Reset() { *x = ArchiveRunRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[12] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[14] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1277,7 +1520,7 @@ func (x *ArchiveRunRequest) String() string { func (*ArchiveRunRequest) ProtoMessage() {} func (x *ArchiveRunRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[12] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[14] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1290,7 +1533,7 @@ func (x *ArchiveRunRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ArchiveRunRequest.ProtoReflect.Descriptor instead. func (*ArchiveRunRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{12} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{14} } // Deprecated: Marked as deprecated in backend/api/v2beta1/run.proto. @@ -1322,7 +1565,7 @@ type UnarchiveRunRequest struct { func (x *UnarchiveRunRequest) Reset() { *x = UnarchiveRunRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[13] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[15] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1334,7 +1577,7 @@ func (x *UnarchiveRunRequest) String() string { func (*UnarchiveRunRequest) ProtoMessage() {} func (x *UnarchiveRunRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[13] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[15] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1347,7 +1590,7 @@ func (x *UnarchiveRunRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use UnarchiveRunRequest.ProtoReflect.Descriptor instead. func (*UnarchiveRunRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{13} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{15} } // Deprecated: Marked as deprecated in backend/api/v2beta1/run.proto. @@ -1379,7 +1622,7 @@ type DeleteRunRequest struct { func (x *DeleteRunRequest) Reset() { *x = DeleteRunRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[14] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[16] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1391,7 +1634,7 @@ func (x *DeleteRunRequest) String() string { func (*DeleteRunRequest) ProtoMessage() {} func (x *DeleteRunRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[14] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[16] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1404,7 +1647,7 @@ func (x *DeleteRunRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use DeleteRunRequest.ProtoReflect.Descriptor instead. func (*DeleteRunRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{14} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{16} } // Deprecated: Marked as deprecated in backend/api/v2beta1/run.proto. @@ -1436,7 +1679,7 @@ type RetryRunRequest struct { func (x *RetryRunRequest) Reset() { *x = RetryRunRequest{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[15] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[17] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1448,7 +1691,7 @@ func (x *RetryRunRequest) String() string { func (*RetryRunRequest) ProtoMessage() {} func (x *RetryRunRequest) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[15] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[17] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1461,7 +1704,7 @@ func (x *RetryRunRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use RetryRunRequest.ProtoReflect.Descriptor instead. func (*RetryRunRequest) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{15} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{17} } // Deprecated: Marked as deprecated in backend/api/v2beta1/run.proto. @@ -1494,7 +1737,7 @@ type PipelineTaskDetail_ChildTask struct { func (x *PipelineTaskDetail_ChildTask) Reset() { *x = PipelineTaskDetail_ChildTask{} - mi := &file_backend_api_v2beta1_run_proto_msgTypes[18] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[23] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1506,7 +1749,7 @@ func (x *PipelineTaskDetail_ChildTask) String() string { func (*PipelineTaskDetail_ChildTask) ProtoMessage() {} func (x *PipelineTaskDetail_ChildTask) ProtoReflect() protoreflect.Message { - mi := &file_backend_api_v2beta1_run_proto_msgTypes[18] + mi := &file_backend_api_v2beta1_run_proto_msgTypes[23] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1519,7 +1762,7 @@ func (x *PipelineTaskDetail_ChildTask) ProtoReflect() protoreflect.Message { // Deprecated: Use PipelineTaskDetail_ChildTask.ProtoReflect.Descriptor instead. func (*PipelineTaskDetail_ChildTask) Descriptor() ([]byte, []int) { - return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{4, 2} + return file_backend_api_v2beta1_run_proto_rawDescGZIP(), []int{6, 2} } func (x *PipelineTaskDetail_ChildTask) GetChildTask() isPipelineTaskDetail_ChildTask_ChildTask { @@ -1570,7 +1813,7 @@ var File_backend_api_v2beta1_run_proto protoreflect.FileDescriptor const file_backend_api_v2beta1_run_proto_rawDesc = "" + "\n" + - "\x1dbackend/api/v2beta1/run.proto\x12&kubeflow.pipelines.backend.api.v2beta1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x17google/rpc/status.proto\x1a.protoc-gen-openapiv2/options/annotations.proto\x1a(backend/api/v2beta1/runtime_config.proto\"\xcc\t\n" + + "\x1dbackend/api/v2beta1/run.proto\x12&kubeflow.pipelines.backend.api.v2beta1\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x17google/rpc/status.proto\x1a.protoc-gen-openapiv2/options/annotations.proto\x1a(backend/api/v2beta1/runtime_config.proto\"\xe9\f\n" + "\x03Run\x12#\n" + "\rexperiment_id\x18\x01 \x01(\tR\fexperimentId\x12\x15\n" + "\x06run_id\x18\x02 \x01(\tR\x05runId\x12!\n" + @@ -1593,7 +1836,15 @@ const file_backend_api_v2beta1_run_proto_rawDesc = "" + "\vrun_details\x18\x0f \x01(\v22.kubeflow.pipelines.backend.api.v2beta1.RunDetailsR\n" + "runDetails\x12(\n" + "\x10recurring_run_id\x18\x10 \x01(\tR\x0erecurringRunId\x12Z\n" + - "\rstate_history\x18\x11 \x03(\v25.kubeflow.pipelines.backend.api.v2beta1.RuntimeStatusR\fstateHistory\"J\n" + + "\rstate_history\x18\x11 \x03(\v25.kubeflow.pipelines.backend.api.v2beta1.RuntimeStatusR\fstateHistory\x12b\n" + + "\rplugins_input\x18\x13 \x03(\v2=.kubeflow.pipelines.backend.api.v2beta1.Run.PluginsInputEntryR\fpluginsInput\x12e\n" + + "\x0eplugins_output\x18\x14 \x03(\v2>.kubeflow.pipelines.backend.api.v2beta1.Run.PluginsOutputEntryR\rpluginsOutput\x1aX\n" + + "\x11PluginsInputEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12-\n" + + "\x05value\x18\x02 \x01(\v2\x17.google.protobuf.StructR\x05value:\x028\x01\x1av\n" + + "\x12PluginsOutputEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12J\n" + + "\x05value\x18\x02 \x01(\v24.kubeflow.pipelines.backend.api.v2beta1.PluginOutputR\x05value:\x028\x01\"J\n" + "\fStorageState\x12\x1d\n" + "\x19STORAGE_STATE_UNSPECIFIED\x10\x00\x12\r\n" + "\tAVAILABLE\x10\x01\x12\f\n" + @@ -1607,7 +1858,23 @@ const file_backend_api_v2beta1_run_proto_rawDesc = "" + "\vupdate_time\x18\x01 \x01(\v2\x1a.google.protobuf.TimestampR\n" + "updateTime\x12J\n" + "\x05state\x18\x02 \x01(\x0e24.kubeflow.pipelines.backend.api.v2beta1.RuntimeStateR\x05state\x12(\n" + - "\x05error\x18\x03 \x01(\v2\x12.google.rpc.StatusR\x05error\"\xd2\x01\n" + + "\x05error\x18\x03 \x01(\v2\x12.google.rpc.StatusR\x05error\"\xdd\x01\n" + + "\rMetadataValue\x12,\n" + + "\x05value\x18\x01 \x01(\v2\x16.google.protobuf.ValueR\x05value\x12f\n" + + "\vrender_type\x18\x02 \x01(\x0e2@.kubeflow.pipelines.backend.api.v2beta1.MetadataValue.RenderTypeH\x00R\n" + + "renderType\x88\x01\x01\"&\n" + + "\n" + + "RenderType\x12\x0f\n" + + "\vUNSPECIFIED\x10\x00\x12\a\n" + + "\x03URL\x10\x01B\x0e\n" + + "\f_render_type\"\xce\x02\n" + + "\fPluginOutput\x12[\n" + + "\aentries\x18\x01 \x03(\v2A.kubeflow.pipelines.backend.api.v2beta1.PluginOutput.EntriesEntryR\aentries\x12I\n" + + "\x05state\x18\x02 \x01(\x0e23.kubeflow.pipelines.backend.api.v2beta1.PluginStateR\x05state\x12#\n" + + "\rstate_message\x18\x03 \x01(\tR\fstateMessage\x1aq\n" + + "\fEntriesEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12K\n" + + "\x05value\x18\x02 \x01(\v25.kubeflow.pipelines.backend.api.v2beta1.MetadataValueR\x05value:\x028\x01\"\xd2\x01\n" + "\n" + "RunDetails\x12.\n" + "\x13pipeline_context_id\x18\x01 \x01(\x03R\x11pipelineContextId\x125\n" + @@ -1698,7 +1965,12 @@ const file_backend_api_v2beta1_run_proto_rawDesc = "" + "\tCANCELING\x10\x06\x12\f\n" + "\bCANCELED\x10\a\x12\n" + "\n" + - "\x06PAUSED\x10\b2\x99\t\n" + + "\x06PAUSED\x10\b*h\n" + + "\vPluginState\x12\x1c\n" + + "\x18PLUGIN_STATE_UNSPECIFIED\x10\x00\x12\x12\n" + + "\x0ePLUGIN_RUNNING\x10\x01\x12\x14\n" + + "\x10PLUGIN_SUCCEEDED\x10\x02\x12\x11\n" + + "\rPLUGIN_FAILED\x10\x032\x99\t\n" + "\n" + "RunService\x12\x93\x01\n" + "\tCreateRun\x128.kubeflow.pipelines.backend.api.v2beta1.CreateRunRequest\x1a+.kubeflow.pipelines.backend.api.v2beta1.Run\"\x1f\x82\xd3\xe4\x93\x02\x19:\x03run\"\x12/apis/v2beta1/runs\x12\x91\x01\n" + @@ -1730,87 +2002,104 @@ func file_backend_api_v2beta1_run_proto_rawDescGZIP() []byte { return file_backend_api_v2beta1_run_proto_rawDescData } -var file_backend_api_v2beta1_run_proto_enumTypes = make([]protoimpl.EnumInfo, 2) -var file_backend_api_v2beta1_run_proto_msgTypes = make([]protoimpl.MessageInfo, 19) +var file_backend_api_v2beta1_run_proto_enumTypes = make([]protoimpl.EnumInfo, 4) +var file_backend_api_v2beta1_run_proto_msgTypes = make([]protoimpl.MessageInfo, 24) var file_backend_api_v2beta1_run_proto_goTypes = []any{ (RuntimeState)(0), // 0: kubeflow.pipelines.backend.api.v2beta1.RuntimeState - (Run_StorageState)(0), // 1: kubeflow.pipelines.backend.api.v2beta1.Run.StorageState - (*Run)(nil), // 2: kubeflow.pipelines.backend.api.v2beta1.Run - (*PipelineVersionReference)(nil), // 3: kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference - (*RuntimeStatus)(nil), // 4: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus - (*RunDetails)(nil), // 5: kubeflow.pipelines.backend.api.v2beta1.RunDetails - (*PipelineTaskDetail)(nil), // 6: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail - (*PipelineTaskExecutorDetail)(nil), // 7: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskExecutorDetail - (*ArtifactList)(nil), // 8: kubeflow.pipelines.backend.api.v2beta1.ArtifactList - (*CreateRunRequest)(nil), // 9: kubeflow.pipelines.backend.api.v2beta1.CreateRunRequest - (*GetRunRequest)(nil), // 10: kubeflow.pipelines.backend.api.v2beta1.GetRunRequest - (*ListRunsRequest)(nil), // 11: kubeflow.pipelines.backend.api.v2beta1.ListRunsRequest - (*TerminateRunRequest)(nil), // 12: kubeflow.pipelines.backend.api.v2beta1.TerminateRunRequest - (*ListRunsResponse)(nil), // 13: kubeflow.pipelines.backend.api.v2beta1.ListRunsResponse - (*ArchiveRunRequest)(nil), // 14: kubeflow.pipelines.backend.api.v2beta1.ArchiveRunRequest - (*UnarchiveRunRequest)(nil), // 15: kubeflow.pipelines.backend.api.v2beta1.UnarchiveRunRequest - (*DeleteRunRequest)(nil), // 16: kubeflow.pipelines.backend.api.v2beta1.DeleteRunRequest - (*RetryRunRequest)(nil), // 17: kubeflow.pipelines.backend.api.v2beta1.RetryRunRequest - nil, // 18: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.InputsEntry - nil, // 19: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.OutputsEntry - (*PipelineTaskDetail_ChildTask)(nil), // 20: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.ChildTask - (*structpb.Struct)(nil), // 21: google.protobuf.Struct - (*RuntimeConfig)(nil), // 22: kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig - (*timestamppb.Timestamp)(nil), // 23: google.protobuf.Timestamp - (*status.Status)(nil), // 24: google.rpc.Status - (*emptypb.Empty)(nil), // 25: google.protobuf.Empty + (PluginState)(0), // 1: kubeflow.pipelines.backend.api.v2beta1.PluginState + (Run_StorageState)(0), // 2: kubeflow.pipelines.backend.api.v2beta1.Run.StorageState + (MetadataValue_RenderType)(0), // 3: kubeflow.pipelines.backend.api.v2beta1.MetadataValue.RenderType + (*Run)(nil), // 4: kubeflow.pipelines.backend.api.v2beta1.Run + (*PipelineVersionReference)(nil), // 5: kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference + (*RuntimeStatus)(nil), // 6: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus + (*MetadataValue)(nil), // 7: kubeflow.pipelines.backend.api.v2beta1.MetadataValue + (*PluginOutput)(nil), // 8: kubeflow.pipelines.backend.api.v2beta1.PluginOutput + (*RunDetails)(nil), // 9: kubeflow.pipelines.backend.api.v2beta1.RunDetails + (*PipelineTaskDetail)(nil), // 10: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail + (*PipelineTaskExecutorDetail)(nil), // 11: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskExecutorDetail + (*ArtifactList)(nil), // 12: kubeflow.pipelines.backend.api.v2beta1.ArtifactList + (*CreateRunRequest)(nil), // 13: kubeflow.pipelines.backend.api.v2beta1.CreateRunRequest + (*GetRunRequest)(nil), // 14: kubeflow.pipelines.backend.api.v2beta1.GetRunRequest + (*ListRunsRequest)(nil), // 15: kubeflow.pipelines.backend.api.v2beta1.ListRunsRequest + (*TerminateRunRequest)(nil), // 16: kubeflow.pipelines.backend.api.v2beta1.TerminateRunRequest + (*ListRunsResponse)(nil), // 17: kubeflow.pipelines.backend.api.v2beta1.ListRunsResponse + (*ArchiveRunRequest)(nil), // 18: kubeflow.pipelines.backend.api.v2beta1.ArchiveRunRequest + (*UnarchiveRunRequest)(nil), // 19: kubeflow.pipelines.backend.api.v2beta1.UnarchiveRunRequest + (*DeleteRunRequest)(nil), // 20: kubeflow.pipelines.backend.api.v2beta1.DeleteRunRequest + (*RetryRunRequest)(nil), // 21: kubeflow.pipelines.backend.api.v2beta1.RetryRunRequest + nil, // 22: kubeflow.pipelines.backend.api.v2beta1.Run.PluginsInputEntry + nil, // 23: kubeflow.pipelines.backend.api.v2beta1.Run.PluginsOutputEntry + nil, // 24: kubeflow.pipelines.backend.api.v2beta1.PluginOutput.EntriesEntry + nil, // 25: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.InputsEntry + nil, // 26: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.OutputsEntry + (*PipelineTaskDetail_ChildTask)(nil), // 27: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.ChildTask + (*structpb.Struct)(nil), // 28: google.protobuf.Struct + (*RuntimeConfig)(nil), // 29: kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig + (*timestamppb.Timestamp)(nil), // 30: google.protobuf.Timestamp + (*status.Status)(nil), // 31: google.rpc.Status + (*structpb.Value)(nil), // 32: google.protobuf.Value + (*emptypb.Empty)(nil), // 33: google.protobuf.Empty } var file_backend_api_v2beta1_run_proto_depIdxs = []int32{ - 1, // 0: kubeflow.pipelines.backend.api.v2beta1.Run.storage_state:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run.StorageState - 21, // 1: kubeflow.pipelines.backend.api.v2beta1.Run.pipeline_spec:type_name -> google.protobuf.Struct - 3, // 2: kubeflow.pipelines.backend.api.v2beta1.Run.pipeline_version_reference:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference - 22, // 3: kubeflow.pipelines.backend.api.v2beta1.Run.runtime_config:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig - 23, // 4: kubeflow.pipelines.backend.api.v2beta1.Run.created_at:type_name -> google.protobuf.Timestamp - 23, // 5: kubeflow.pipelines.backend.api.v2beta1.Run.scheduled_at:type_name -> google.protobuf.Timestamp - 23, // 6: kubeflow.pipelines.backend.api.v2beta1.Run.finished_at:type_name -> google.protobuf.Timestamp + 2, // 0: kubeflow.pipelines.backend.api.v2beta1.Run.storage_state:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run.StorageState + 28, // 1: kubeflow.pipelines.backend.api.v2beta1.Run.pipeline_spec:type_name -> google.protobuf.Struct + 5, // 2: kubeflow.pipelines.backend.api.v2beta1.Run.pipeline_version_reference:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineVersionReference + 29, // 3: kubeflow.pipelines.backend.api.v2beta1.Run.runtime_config:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeConfig + 30, // 4: kubeflow.pipelines.backend.api.v2beta1.Run.created_at:type_name -> google.protobuf.Timestamp + 30, // 5: kubeflow.pipelines.backend.api.v2beta1.Run.scheduled_at:type_name -> google.protobuf.Timestamp + 30, // 6: kubeflow.pipelines.backend.api.v2beta1.Run.finished_at:type_name -> google.protobuf.Timestamp 0, // 7: kubeflow.pipelines.backend.api.v2beta1.Run.state:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeState - 24, // 8: kubeflow.pipelines.backend.api.v2beta1.Run.error:type_name -> google.rpc.Status - 5, // 9: kubeflow.pipelines.backend.api.v2beta1.Run.run_details:type_name -> kubeflow.pipelines.backend.api.v2beta1.RunDetails - 4, // 10: kubeflow.pipelines.backend.api.v2beta1.Run.state_history:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus - 23, // 11: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus.update_time:type_name -> google.protobuf.Timestamp - 0, // 12: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus.state:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeState - 24, // 13: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus.error:type_name -> google.rpc.Status - 6, // 14: kubeflow.pipelines.backend.api.v2beta1.RunDetails.task_details:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail - 23, // 15: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.create_time:type_name -> google.protobuf.Timestamp - 23, // 16: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.start_time:type_name -> google.protobuf.Timestamp - 23, // 17: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.end_time:type_name -> google.protobuf.Timestamp - 7, // 18: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.executor_detail:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskExecutorDetail - 0, // 19: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.state:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeState - 24, // 20: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.error:type_name -> google.rpc.Status - 18, // 21: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.inputs:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.InputsEntry - 19, // 22: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.outputs:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.OutputsEntry - 4, // 23: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.state_history:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus - 20, // 24: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.child_tasks:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.ChildTask - 2, // 25: kubeflow.pipelines.backend.api.v2beta1.CreateRunRequest.run:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run - 2, // 26: kubeflow.pipelines.backend.api.v2beta1.ListRunsResponse.runs:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run - 8, // 27: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.InputsEntry.value:type_name -> kubeflow.pipelines.backend.api.v2beta1.ArtifactList - 8, // 28: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.OutputsEntry.value:type_name -> kubeflow.pipelines.backend.api.v2beta1.ArtifactList - 9, // 29: kubeflow.pipelines.backend.api.v2beta1.RunService.CreateRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.CreateRunRequest - 10, // 30: kubeflow.pipelines.backend.api.v2beta1.RunService.GetRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.GetRunRequest - 11, // 31: kubeflow.pipelines.backend.api.v2beta1.RunService.ListRuns:input_type -> kubeflow.pipelines.backend.api.v2beta1.ListRunsRequest - 14, // 32: kubeflow.pipelines.backend.api.v2beta1.RunService.ArchiveRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.ArchiveRunRequest - 15, // 33: kubeflow.pipelines.backend.api.v2beta1.RunService.UnarchiveRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.UnarchiveRunRequest - 16, // 34: kubeflow.pipelines.backend.api.v2beta1.RunService.DeleteRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.DeleteRunRequest - 12, // 35: kubeflow.pipelines.backend.api.v2beta1.RunService.TerminateRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.TerminateRunRequest - 17, // 36: kubeflow.pipelines.backend.api.v2beta1.RunService.RetryRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.RetryRunRequest - 2, // 37: kubeflow.pipelines.backend.api.v2beta1.RunService.CreateRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.Run - 2, // 38: kubeflow.pipelines.backend.api.v2beta1.RunService.GetRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.Run - 13, // 39: kubeflow.pipelines.backend.api.v2beta1.RunService.ListRuns:output_type -> kubeflow.pipelines.backend.api.v2beta1.ListRunsResponse - 25, // 40: kubeflow.pipelines.backend.api.v2beta1.RunService.ArchiveRun:output_type -> google.protobuf.Empty - 25, // 41: kubeflow.pipelines.backend.api.v2beta1.RunService.UnarchiveRun:output_type -> google.protobuf.Empty - 25, // 42: kubeflow.pipelines.backend.api.v2beta1.RunService.DeleteRun:output_type -> google.protobuf.Empty - 25, // 43: kubeflow.pipelines.backend.api.v2beta1.RunService.TerminateRun:output_type -> google.protobuf.Empty - 25, // 44: kubeflow.pipelines.backend.api.v2beta1.RunService.RetryRun:output_type -> google.protobuf.Empty - 37, // [37:45] is the sub-list for method output_type - 29, // [29:37] is the sub-list for method input_type - 29, // [29:29] is the sub-list for extension type_name - 29, // [29:29] is the sub-list for extension extendee - 0, // [0:29] is the sub-list for field type_name + 31, // 8: kubeflow.pipelines.backend.api.v2beta1.Run.error:type_name -> google.rpc.Status + 9, // 9: kubeflow.pipelines.backend.api.v2beta1.Run.run_details:type_name -> kubeflow.pipelines.backend.api.v2beta1.RunDetails + 6, // 10: kubeflow.pipelines.backend.api.v2beta1.Run.state_history:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus + 22, // 11: kubeflow.pipelines.backend.api.v2beta1.Run.plugins_input:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run.PluginsInputEntry + 23, // 12: kubeflow.pipelines.backend.api.v2beta1.Run.plugins_output:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run.PluginsOutputEntry + 30, // 13: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus.update_time:type_name -> google.protobuf.Timestamp + 0, // 14: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus.state:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeState + 31, // 15: kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus.error:type_name -> google.rpc.Status + 32, // 16: kubeflow.pipelines.backend.api.v2beta1.MetadataValue.value:type_name -> google.protobuf.Value + 3, // 17: kubeflow.pipelines.backend.api.v2beta1.MetadataValue.render_type:type_name -> kubeflow.pipelines.backend.api.v2beta1.MetadataValue.RenderType + 24, // 18: kubeflow.pipelines.backend.api.v2beta1.PluginOutput.entries:type_name -> kubeflow.pipelines.backend.api.v2beta1.PluginOutput.EntriesEntry + 1, // 19: kubeflow.pipelines.backend.api.v2beta1.PluginOutput.state:type_name -> kubeflow.pipelines.backend.api.v2beta1.PluginState + 10, // 20: kubeflow.pipelines.backend.api.v2beta1.RunDetails.task_details:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail + 30, // 21: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.create_time:type_name -> google.protobuf.Timestamp + 30, // 22: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.start_time:type_name -> google.protobuf.Timestamp + 30, // 23: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.end_time:type_name -> google.protobuf.Timestamp + 11, // 24: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.executor_detail:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskExecutorDetail + 0, // 25: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.state:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeState + 31, // 26: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.error:type_name -> google.rpc.Status + 25, // 27: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.inputs:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.InputsEntry + 26, // 28: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.outputs:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.OutputsEntry + 6, // 29: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.state_history:type_name -> kubeflow.pipelines.backend.api.v2beta1.RuntimeStatus + 27, // 30: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.child_tasks:type_name -> kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.ChildTask + 4, // 31: kubeflow.pipelines.backend.api.v2beta1.CreateRunRequest.run:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run + 4, // 32: kubeflow.pipelines.backend.api.v2beta1.ListRunsResponse.runs:type_name -> kubeflow.pipelines.backend.api.v2beta1.Run + 28, // 33: kubeflow.pipelines.backend.api.v2beta1.Run.PluginsInputEntry.value:type_name -> google.protobuf.Struct + 8, // 34: kubeflow.pipelines.backend.api.v2beta1.Run.PluginsOutputEntry.value:type_name -> kubeflow.pipelines.backend.api.v2beta1.PluginOutput + 7, // 35: kubeflow.pipelines.backend.api.v2beta1.PluginOutput.EntriesEntry.value:type_name -> kubeflow.pipelines.backend.api.v2beta1.MetadataValue + 12, // 36: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.InputsEntry.value:type_name -> kubeflow.pipelines.backend.api.v2beta1.ArtifactList + 12, // 37: kubeflow.pipelines.backend.api.v2beta1.PipelineTaskDetail.OutputsEntry.value:type_name -> kubeflow.pipelines.backend.api.v2beta1.ArtifactList + 13, // 38: kubeflow.pipelines.backend.api.v2beta1.RunService.CreateRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.CreateRunRequest + 14, // 39: kubeflow.pipelines.backend.api.v2beta1.RunService.GetRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.GetRunRequest + 15, // 40: kubeflow.pipelines.backend.api.v2beta1.RunService.ListRuns:input_type -> kubeflow.pipelines.backend.api.v2beta1.ListRunsRequest + 18, // 41: kubeflow.pipelines.backend.api.v2beta1.RunService.ArchiveRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.ArchiveRunRequest + 19, // 42: kubeflow.pipelines.backend.api.v2beta1.RunService.UnarchiveRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.UnarchiveRunRequest + 20, // 43: kubeflow.pipelines.backend.api.v2beta1.RunService.DeleteRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.DeleteRunRequest + 16, // 44: kubeflow.pipelines.backend.api.v2beta1.RunService.TerminateRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.TerminateRunRequest + 21, // 45: kubeflow.pipelines.backend.api.v2beta1.RunService.RetryRun:input_type -> kubeflow.pipelines.backend.api.v2beta1.RetryRunRequest + 4, // 46: kubeflow.pipelines.backend.api.v2beta1.RunService.CreateRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.Run + 4, // 47: kubeflow.pipelines.backend.api.v2beta1.RunService.GetRun:output_type -> kubeflow.pipelines.backend.api.v2beta1.Run + 17, // 48: kubeflow.pipelines.backend.api.v2beta1.RunService.ListRuns:output_type -> kubeflow.pipelines.backend.api.v2beta1.ListRunsResponse + 33, // 49: kubeflow.pipelines.backend.api.v2beta1.RunService.ArchiveRun:output_type -> google.protobuf.Empty + 33, // 50: kubeflow.pipelines.backend.api.v2beta1.RunService.UnarchiveRun:output_type -> google.protobuf.Empty + 33, // 51: kubeflow.pipelines.backend.api.v2beta1.RunService.DeleteRun:output_type -> google.protobuf.Empty + 33, // 52: kubeflow.pipelines.backend.api.v2beta1.RunService.TerminateRun:output_type -> google.protobuf.Empty + 33, // 53: kubeflow.pipelines.backend.api.v2beta1.RunService.RetryRun:output_type -> google.protobuf.Empty + 46, // [46:54] is the sub-list for method output_type + 38, // [38:46] is the sub-list for method input_type + 38, // [38:38] is the sub-list for extension type_name + 38, // [38:38] is the sub-list for extension extendee + 0, // [0:38] is the sub-list for field type_name } func init() { file_backend_api_v2beta1_run_proto_init() } @@ -1824,7 +2113,8 @@ func file_backend_api_v2beta1_run_proto_init() { (*Run_PipelineSpec)(nil), (*Run_PipelineVersionReference)(nil), } - file_backend_api_v2beta1_run_proto_msgTypes[18].OneofWrappers = []any{ + file_backend_api_v2beta1_run_proto_msgTypes[3].OneofWrappers = []any{} + file_backend_api_v2beta1_run_proto_msgTypes[23].OneofWrappers = []any{ (*PipelineTaskDetail_ChildTask_TaskId)(nil), (*PipelineTaskDetail_ChildTask_PodName)(nil), } @@ -1833,8 +2123,8 @@ func file_backend_api_v2beta1_run_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_backend_api_v2beta1_run_proto_rawDesc), len(file_backend_api_v2beta1_run_proto_rawDesc)), - NumEnums: 2, - NumMessages: 19, + NumEnums: 4, + NumMessages: 24, NumExtensions: 0, NumServices: 1, }, diff --git a/backend/api/v2beta1/go_http_client/recurring_run_model/v2beta1_recurring_run.go b/backend/api/v2beta1/go_http_client/recurring_run_model/v2beta1_recurring_run.go index 3796c3cea12..145c9d6bc19 100644 --- a/backend/api/v2beta1/go_http_client/recurring_run_model/v2beta1_recurring_run.go +++ b/backend/api/v2beta1/go_http_client/recurring_run_model/v2beta1_recurring_run.go @@ -63,6 +63,10 @@ type V2beta1RecurringRun struct { // Reference to a pipeline version containing pipeline_id and pipeline_version_id. PipelineVersionReference *V2beta1PipelineVersionReference `json:"pipeline_version_reference,omitempty"` + // Optional input. Plugin inputs to propagate to each triggered run. + // Each triggered run will inherit these values in its plugins_input field. + PluginsInput map[string]interface{} `json:"plugins_input,omitempty"` + // Output. Unique run ID generated by API server. RecurringRunID string `json:"recurring_run_id,omitempty"` diff --git a/backend/api/v2beta1/go_http_client/run_model/metadata_value_render_type.go b/backend/api/v2beta1/go_http_client/run_model/metadata_value_render_type.go new file mode 100644 index 00000000000..4fb705ee1ed --- /dev/null +++ b/backend/api/v2beta1/go_http_client/run_model/metadata_value_render_type.go @@ -0,0 +1,81 @@ +// Code generated by go-swagger; DO NOT EDIT. + +package run_model + +// This file was generated by the swagger tool. +// Editing this file might prove futile when you re-run the swagger generate command + +import ( + "context" + "encoding/json" + + "github.com/go-openapi/errors" + "github.com/go-openapi/strfmt" + "github.com/go-openapi/validate" +) + +// MetadataValueRenderType Hint for UI rendering of the value. +// +// - UNSPECIFIED: Default. No special rendering. +// - URL: Render the value as a hyperlink. +// +// swagger:model MetadataValueRenderType +type MetadataValueRenderType string + +func NewMetadataValueRenderType(value MetadataValueRenderType) *MetadataValueRenderType { + return &value +} + +// Pointer returns a pointer to a freshly-allocated MetadataValueRenderType. +func (m MetadataValueRenderType) Pointer() *MetadataValueRenderType { + return &m +} + +const ( + + // MetadataValueRenderTypeUNSPECIFIED captures enum value "UNSPECIFIED" + MetadataValueRenderTypeUNSPECIFIED MetadataValueRenderType = "UNSPECIFIED" + + // MetadataValueRenderTypeURL captures enum value "URL" + MetadataValueRenderTypeURL MetadataValueRenderType = "URL" +) + +// for schema +var metadataValueRenderTypeEnum []interface{} + +func init() { + var res []MetadataValueRenderType + if err := json.Unmarshal([]byte(`["UNSPECIFIED","URL"]`), &res); err != nil { + panic(err) + } + for _, v := range res { + metadataValueRenderTypeEnum = append(metadataValueRenderTypeEnum, v) + } +} + +func (m MetadataValueRenderType) validateMetadataValueRenderTypeEnum(path, location string, value MetadataValueRenderType) error { + if err := validate.EnumCase(path, location, value, metadataValueRenderTypeEnum, true); err != nil { + return err + } + return nil +} + +// Validate validates this metadata value render type +func (m MetadataValueRenderType) Validate(formats strfmt.Registry) error { + var res []error + + // value enum + if err := m.validateMetadataValueRenderTypeEnum("", "body", m); err != nil { + return err + } + + if len(res) > 0 { + return errors.CompositeValidationError(res...) + } + return nil +} + +// ContextValidate validates this metadata value render type based on context it is used +func (m MetadataValueRenderType) ContextValidate(ctx context.Context, formats strfmt.Registry) error { + return nil +} diff --git a/backend/api/v2beta1/go_http_client/run_model/v2beta1_metadata_value.go b/backend/api/v2beta1/go_http_client/run_model/v2beta1_metadata_value.go new file mode 100644 index 00000000000..04518bd1ef2 --- /dev/null +++ b/backend/api/v2beta1/go_http_client/run_model/v2beta1_metadata_value.go @@ -0,0 +1,112 @@ +// Code generated by go-swagger; DO NOT EDIT. + +package run_model + +// This file was generated by the swagger tool. +// Editing this file might prove futile when you re-run the swagger generate command + +import ( + "context" + + "github.com/go-openapi/errors" + "github.com/go-openapi/strfmt" + "github.com/go-openapi/swag" +) + +// V2beta1MetadataValue A typed metadata value with an optional rendering hint for the UI. +// +// swagger:model v2beta1MetadataValue +type V2beta1MetadataValue struct { + + // render type + RenderType *MetadataValueRenderType `json:"render_type,omitempty"` + + // value + Value interface{} `json:"value,omitempty"` +} + +// Validate validates this v2beta1 metadata value +func (m *V2beta1MetadataValue) Validate(formats strfmt.Registry) error { + var res []error + + if err := m.validateRenderType(formats); err != nil { + res = append(res, err) + } + + if len(res) > 0 { + return errors.CompositeValidationError(res...) + } + return nil +} + +func (m *V2beta1MetadataValue) validateRenderType(formats strfmt.Registry) error { + if swag.IsZero(m.RenderType) { // not required + return nil + } + + if m.RenderType != nil { + if err := m.RenderType.Validate(formats); err != nil { + if ve, ok := err.(*errors.Validation); ok { + return ve.ValidateName("render_type") + } else if ce, ok := err.(*errors.CompositeError); ok { + return ce.ValidateName("render_type") + } + return err + } + } + + return nil +} + +// ContextValidate validate this v2beta1 metadata value based on the context it is used +func (m *V2beta1MetadataValue) ContextValidate(ctx context.Context, formats strfmt.Registry) error { + var res []error + + if err := m.contextValidateRenderType(ctx, formats); err != nil { + res = append(res, err) + } + + if len(res) > 0 { + return errors.CompositeValidationError(res...) + } + return nil +} + +func (m *V2beta1MetadataValue) contextValidateRenderType(ctx context.Context, formats strfmt.Registry) error { + + if m.RenderType != nil { + + if swag.IsZero(m.RenderType) { // not required + return nil + } + + if err := m.RenderType.ContextValidate(ctx, formats); err != nil { + if ve, ok := err.(*errors.Validation); ok { + return ve.ValidateName("render_type") + } else if ce, ok := err.(*errors.CompositeError); ok { + return ce.ValidateName("render_type") + } + return err + } + } + + return nil +} + +// MarshalBinary interface implementation +func (m *V2beta1MetadataValue) MarshalBinary() ([]byte, error) { + if m == nil { + return nil, nil + } + return swag.WriteJSON(m) +} + +// UnmarshalBinary interface implementation +func (m *V2beta1MetadataValue) UnmarshalBinary(b []byte) error { + var res V2beta1MetadataValue + if err := swag.ReadJSON(b, &res); err != nil { + return err + } + *m = res + return nil +} diff --git a/backend/api/v2beta1/go_http_client/run_model/v2beta1_plugin_output.go b/backend/api/v2beta1/go_http_client/run_model/v2beta1_plugin_output.go new file mode 100644 index 00000000000..fa6b4e37340 --- /dev/null +++ b/backend/api/v2beta1/go_http_client/run_model/v2beta1_plugin_output.go @@ -0,0 +1,165 @@ +// Code generated by go-swagger; DO NOT EDIT. + +package run_model + +// This file was generated by the swagger tool. +// Editing this file might prove futile when you re-run the swagger generate command + +import ( + "context" + + "github.com/go-openapi/errors" + "github.com/go-openapi/strfmt" + "github.com/go-openapi/swag" + "github.com/go-openapi/validate" +) + +// V2beta1PluginOutput Output from a single plugin, containing keyed metadata entries and overall state. +// +// swagger:model v2beta1PluginOutput +type V2beta1PluginOutput struct { + + // entries + Entries map[string]V2beta1MetadataValue `json:"entries,omitempty"` + + // state + State *V2beta1PluginState `json:"state,omitempty"` + + // state message + StateMessage string `json:"state_message,omitempty"` +} + +// Validate validates this v2beta1 plugin output +func (m *V2beta1PluginOutput) Validate(formats strfmt.Registry) error { + var res []error + + if err := m.validateEntries(formats); err != nil { + res = append(res, err) + } + + if err := m.validateState(formats); err != nil { + res = append(res, err) + } + + if len(res) > 0 { + return errors.CompositeValidationError(res...) + } + return nil +} + +func (m *V2beta1PluginOutput) validateEntries(formats strfmt.Registry) error { + if swag.IsZero(m.Entries) { // not required + return nil + } + + for k := range m.Entries { + + if err := validate.Required("entries"+"."+k, "body", m.Entries[k]); err != nil { + return err + } + if val, ok := m.Entries[k]; ok { + if err := val.Validate(formats); err != nil { + if ve, ok := err.(*errors.Validation); ok { + return ve.ValidateName("entries" + "." + k) + } else if ce, ok := err.(*errors.CompositeError); ok { + return ce.ValidateName("entries" + "." + k) + } + return err + } + } + + } + + return nil +} + +func (m *V2beta1PluginOutput) validateState(formats strfmt.Registry) error { + if swag.IsZero(m.State) { // not required + return nil + } + + if m.State != nil { + if err := m.State.Validate(formats); err != nil { + if ve, ok := err.(*errors.Validation); ok { + return ve.ValidateName("state") + } else if ce, ok := err.(*errors.CompositeError); ok { + return ce.ValidateName("state") + } + return err + } + } + + return nil +} + +// ContextValidate validate this v2beta1 plugin output based on the context it is used +func (m *V2beta1PluginOutput) ContextValidate(ctx context.Context, formats strfmt.Registry) error { + var res []error + + if err := m.contextValidateEntries(ctx, formats); err != nil { + res = append(res, err) + } + + if err := m.contextValidateState(ctx, formats); err != nil { + res = append(res, err) + } + + if len(res) > 0 { + return errors.CompositeValidationError(res...) + } + return nil +} + +func (m *V2beta1PluginOutput) contextValidateEntries(ctx context.Context, formats strfmt.Registry) error { + + for k := range m.Entries { + + if val, ok := m.Entries[k]; ok { + if err := val.ContextValidate(ctx, formats); err != nil { + return err + } + } + + } + + return nil +} + +func (m *V2beta1PluginOutput) contextValidateState(ctx context.Context, formats strfmt.Registry) error { + + if m.State != nil { + + if swag.IsZero(m.State) { // not required + return nil + } + + if err := m.State.ContextValidate(ctx, formats); err != nil { + if ve, ok := err.(*errors.Validation); ok { + return ve.ValidateName("state") + } else if ce, ok := err.(*errors.CompositeError); ok { + return ce.ValidateName("state") + } + return err + } + } + + return nil +} + +// MarshalBinary interface implementation +func (m *V2beta1PluginOutput) MarshalBinary() ([]byte, error) { + if m == nil { + return nil, nil + } + return swag.WriteJSON(m) +} + +// UnmarshalBinary interface implementation +func (m *V2beta1PluginOutput) UnmarshalBinary(b []byte) error { + var res V2beta1PluginOutput + if err := swag.ReadJSON(b, &res); err != nil { + return err + } + *m = res + return nil +} diff --git a/backend/api/v2beta1/go_http_client/run_model/v2beta1_plugin_state.go b/backend/api/v2beta1/go_http_client/run_model/v2beta1_plugin_state.go new file mode 100644 index 00000000000..b40e0e7d75c --- /dev/null +++ b/backend/api/v2beta1/go_http_client/run_model/v2beta1_plugin_state.go @@ -0,0 +1,92 @@ +// Code generated by go-swagger; DO NOT EDIT. + +package run_model + +// This file was generated by the swagger tool. +// Editing this file might prove futile when you re-run the swagger generate command + +import ( + "context" + "encoding/json" + + "github.com/go-openapi/errors" + "github.com/go-openapi/strfmt" + "github.com/go-openapi/validate" +) + +// V2beta1PluginState Describes the state of a plugin's operations. +// Unlike RuntimeState (which covers pipeline/task lifecycle including CANCELING, +// PAUSED, SKIPPED), PluginState only reflects whether the plugin's own work +// succeeded or failed, independent of the pipeline run outcome. +// +// - PLUGIN_STATE_UNSPECIFIED: Default value. The plugin state is unknown or not yet set. +// - PLUGIN_RUNNING: Plugin operations are in progress. +// - PLUGIN_SUCCEEDED: Plugin operations completed successfully. +// - PLUGIN_FAILED: Plugin operations failed. +// +// swagger:model v2beta1PluginState +type V2beta1PluginState string + +func NewV2beta1PluginState(value V2beta1PluginState) *V2beta1PluginState { + return &value +} + +// Pointer returns a pointer to a freshly-allocated V2beta1PluginState. +func (m V2beta1PluginState) Pointer() *V2beta1PluginState { + return &m +} + +const ( + + // V2beta1PluginStatePLUGINSTATEUNSPECIFIED captures enum value "PLUGIN_STATE_UNSPECIFIED" + V2beta1PluginStatePLUGINSTATEUNSPECIFIED V2beta1PluginState = "PLUGIN_STATE_UNSPECIFIED" + + // V2beta1PluginStatePLUGINRUNNING captures enum value "PLUGIN_RUNNING" + V2beta1PluginStatePLUGINRUNNING V2beta1PluginState = "PLUGIN_RUNNING" + + // V2beta1PluginStatePLUGINSUCCEEDED captures enum value "PLUGIN_SUCCEEDED" + V2beta1PluginStatePLUGINSUCCEEDED V2beta1PluginState = "PLUGIN_SUCCEEDED" + + // V2beta1PluginStatePLUGINFAILED captures enum value "PLUGIN_FAILED" + V2beta1PluginStatePLUGINFAILED V2beta1PluginState = "PLUGIN_FAILED" +) + +// for schema +var v2beta1PluginStateEnum []interface{} + +func init() { + var res []V2beta1PluginState + if err := json.Unmarshal([]byte(`["PLUGIN_STATE_UNSPECIFIED","PLUGIN_RUNNING","PLUGIN_SUCCEEDED","PLUGIN_FAILED"]`), &res); err != nil { + panic(err) + } + for _, v := range res { + v2beta1PluginStateEnum = append(v2beta1PluginStateEnum, v) + } +} + +func (m V2beta1PluginState) validateV2beta1PluginStateEnum(path, location string, value V2beta1PluginState) error { + if err := validate.EnumCase(path, location, value, v2beta1PluginStateEnum, true); err != nil { + return err + } + return nil +} + +// Validate validates this v2beta1 plugin state +func (m V2beta1PluginState) Validate(formats strfmt.Registry) error { + var res []error + + // value enum + if err := m.validateV2beta1PluginStateEnum("", "body", m); err != nil { + return err + } + + if len(res) > 0 { + return errors.CompositeValidationError(res...) + } + return nil +} + +// ContextValidate validates this v2beta1 plugin state based on context it is used +func (m V2beta1PluginState) ContextValidate(ctx context.Context, formats strfmt.Registry) error { + return nil +} diff --git a/backend/api/v2beta1/go_http_client/run_model/v2beta1_run.go b/backend/api/v2beta1/go_http_client/run_model/v2beta1_run.go index eb83d3e8492..fbfe3f4faad 100644 --- a/backend/api/v2beta1/go_http_client/run_model/v2beta1_run.go +++ b/backend/api/v2beta1/go_http_client/run_model/v2beta1_run.go @@ -53,6 +53,14 @@ type V2beta1Run struct { // Reference to a pipeline containing pipeline_id and optionally the pipeline_version_id. PipelineVersionReference *V2beta1PipelineVersionReference `json:"pipeline_version_reference,omitempty"` + // Optional input. Plugin-specific inputs provided by the user at run creation. + // Each key is a plugin name (e.g., "mlflow") and the value is arbitrary JSON config. + PluginsInput map[string]interface{} `json:"plugins_input,omitempty"` + + // Output. Plugin-specific outputs populated by backend components. + // Each key is a plugin name and the value contains the plugin's output entries and state. + PluginsOutput map[string]V2beta1PluginOutput `json:"plugins_output,omitempty"` + // ID of the recurring run that triggered this run. RecurringRunID string `json:"recurring_run_id,omitempty"` @@ -105,6 +113,10 @@ func (m *V2beta1Run) Validate(formats strfmt.Registry) error { res = append(res, err) } + if err := m.validatePluginsOutput(formats); err != nil { + res = append(res, err) + } + if err := m.validateRunDetails(formats); err != nil { res = append(res, err) } @@ -197,6 +209,32 @@ func (m *V2beta1Run) validatePipelineVersionReference(formats strfmt.Registry) e return nil } +func (m *V2beta1Run) validatePluginsOutput(formats strfmt.Registry) error { + if swag.IsZero(m.PluginsOutput) { // not required + return nil + } + + for k := range m.PluginsOutput { + + if err := validate.Required("plugins_output"+"."+k, "body", m.PluginsOutput[k]); err != nil { + return err + } + if val, ok := m.PluginsOutput[k]; ok { + if err := val.Validate(formats); err != nil { + if ve, ok := err.(*errors.Validation); ok { + return ve.ValidateName("plugins_output" + "." + k) + } else if ce, ok := err.(*errors.CompositeError); ok { + return ce.ValidateName("plugins_output" + "." + k) + } + return err + } + } + + } + + return nil +} + func (m *V2beta1Run) validateRunDetails(formats strfmt.Registry) error { if swag.IsZero(m.RunDetails) { // not required return nil @@ -323,6 +361,10 @@ func (m *V2beta1Run) ContextValidate(ctx context.Context, formats strfmt.Registr res = append(res, err) } + if err := m.contextValidatePluginsOutput(ctx, formats); err != nil { + res = append(res, err) + } + if err := m.contextValidateRunDetails(ctx, formats); err != nil { res = append(res, err) } @@ -391,6 +433,21 @@ func (m *V2beta1Run) contextValidatePipelineVersionReference(ctx context.Context return nil } +func (m *V2beta1Run) contextValidatePluginsOutput(ctx context.Context, formats strfmt.Registry) error { + + for k := range m.PluginsOutput { + + if val, ok := m.PluginsOutput[k]; ok { + if err := val.ContextValidate(ctx, formats); err != nil { + return err + } + } + + } + + return nil +} + func (m *V2beta1Run) contextValidateRunDetails(ctx context.Context, formats strfmt.Registry) error { if m.RunDetails != nil { diff --git a/backend/api/v2beta1/python_http_client/README.md b/backend/api/v2beta1/python_http_client/README.md index 255fe65d44b..ff0d161d60f 100644 --- a/backend/api/v2beta1/python_http_client/README.md +++ b/backend/api/v2beta1/python_http_client/README.md @@ -143,6 +143,7 @@ Class | Method | HTTP request | Description - [AuthorizeRequestResources](docs/AuthorizeRequestResources.md) - [AuthorizeRequestVerb](docs/AuthorizeRequestVerb.md) - [GooglerpcStatus](docs/GooglerpcStatus.md) + - [MetadataValueRenderType](docs/MetadataValueRenderType.md) - [PipelineTaskDetailChildTask](docs/PipelineTaskDetailChildTask.md) - [PredicateIntValues](docs/PredicateIntValues.md) - [PredicateLongValues](docs/PredicateLongValues.md) @@ -165,12 +166,15 @@ Class | Method | HTTP request | Description - [V2beta1ListPipelinesResponse](docs/V2beta1ListPipelinesResponse.md) - [V2beta1ListRecurringRunsResponse](docs/V2beta1ListRecurringRunsResponse.md) - [V2beta1ListRunsResponse](docs/V2beta1ListRunsResponse.md) + - [V2beta1MetadataValue](docs/V2beta1MetadataValue.md) - [V2beta1PeriodicSchedule](docs/V2beta1PeriodicSchedule.md) - [V2beta1Pipeline](docs/V2beta1Pipeline.md) - [V2beta1PipelineTaskDetail](docs/V2beta1PipelineTaskDetail.md) - [V2beta1PipelineTaskExecutorDetail](docs/V2beta1PipelineTaskExecutorDetail.md) - [V2beta1PipelineVersion](docs/V2beta1PipelineVersion.md) - [V2beta1PipelineVersionReference](docs/V2beta1PipelineVersionReference.md) + - [V2beta1PluginOutput](docs/V2beta1PluginOutput.md) + - [V2beta1PluginState](docs/V2beta1PluginState.md) - [V2beta1Predicate](docs/V2beta1Predicate.md) - [V2beta1PredicateOperation](docs/V2beta1PredicateOperation.md) - [V2beta1RecurringRun](docs/V2beta1RecurringRun.md) diff --git a/backend/api/v2beta1/python_http_client/docs/MetadataValueRenderType.md b/backend/api/v2beta1/python_http_client/docs/MetadataValueRenderType.md new file mode 100644 index 00000000000..4a7d4a93232 --- /dev/null +++ b/backend/api/v2beta1/python_http_client/docs/MetadataValueRenderType.md @@ -0,0 +1,10 @@ +# MetadataValueRenderType + +Hint for UI rendering of the value. - UNSPECIFIED: Default. No special rendering. - URL: Render the value as a hyperlink. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/backend/api/v2beta1/python_http_client/docs/V2beta1MetadataValue.md b/backend/api/v2beta1/python_http_client/docs/V2beta1MetadataValue.md new file mode 100644 index 00000000000..f8d9c8370ce --- /dev/null +++ b/backend/api/v2beta1/python_http_client/docs/V2beta1MetadataValue.md @@ -0,0 +1,12 @@ +# V2beta1MetadataValue + +A typed metadata value with an optional rendering hint for the UI. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**value** | [**object**](.md) | | [optional] +**render_type** | [**MetadataValueRenderType**](MetadataValueRenderType.md) | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/backend/api/v2beta1/python_http_client/docs/V2beta1PluginOutput.md b/backend/api/v2beta1/python_http_client/docs/V2beta1PluginOutput.md new file mode 100644 index 00000000000..60926fee247 --- /dev/null +++ b/backend/api/v2beta1/python_http_client/docs/V2beta1PluginOutput.md @@ -0,0 +1,13 @@ +# V2beta1PluginOutput + +Output from a single plugin, containing keyed metadata entries and overall state. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- +**entries** | [**dict(str, V2beta1MetadataValue)**](V2beta1MetadataValue.md) | | [optional] +**state** | [**V2beta1PluginState**](V2beta1PluginState.md) | | [optional] +**state_message** | **str** | | [optional] + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/backend/api/v2beta1/python_http_client/docs/V2beta1PluginState.md b/backend/api/v2beta1/python_http_client/docs/V2beta1PluginState.md new file mode 100644 index 00000000000..beb3432990b --- /dev/null +++ b/backend/api/v2beta1/python_http_client/docs/V2beta1PluginState.md @@ -0,0 +1,10 @@ +# V2beta1PluginState + +Describes the state of a plugin's operations. Unlike RuntimeState (which covers pipeline/task lifecycle including CANCELING, PAUSED, SKIPPED), PluginState only reflects whether the plugin's own work succeeded or failed, independent of the pipeline run outcome. - PLUGIN_STATE_UNSPECIFIED: Default value. The plugin state is unknown or not yet set. - PLUGIN_RUNNING: Plugin operations are in progress. - PLUGIN_SUCCEEDED: Plugin operations completed successfully. - PLUGIN_FAILED: Plugin operations failed. +## Properties +Name | Type | Description | Notes +------------ | ------------- | ------------- | ------------- + +[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) + + diff --git a/backend/api/v2beta1/python_http_client/docs/V2beta1RecurringRun.md b/backend/api/v2beta1/python_http_client/docs/V2beta1RecurringRun.md index c40d606964b..829835deec6 100644 --- a/backend/api/v2beta1/python_http_client/docs/V2beta1RecurringRun.md +++ b/backend/api/v2beta1/python_http_client/docs/V2beta1RecurringRun.md @@ -21,6 +21,7 @@ Name | Type | Description | Notes **no_catchup** | **bool** | Optional input field. Whether the recurring run should catch up if behind schedule. If true, the recurring run will only schedule the latest interval if behind schedule. If false, the recurring run will catch up on each past interval. | [optional] **namespace** | **str** | TODO (gkclat): consider removing this field if it can be obtained from the parent experiment. Output only. Namespace this recurring run belongs to. Derived from the parent experiment. | [optional] [readonly] **experiment_id** | **str** | ID of the parent experiment this recurring run belongs to. | [optional] +**plugins_input** | **dict(str, object)** | Optional input. Plugin inputs to propagate to each triggered run. Each triggered run will inherit these values in its plugins_input field. | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/backend/api/v2beta1/python_http_client/docs/V2beta1Run.md b/backend/api/v2beta1/python_http_client/docs/V2beta1Run.md index dbb37fdf0fb..c2659dbc89b 100644 --- a/backend/api/v2beta1/python_http_client/docs/V2beta1Run.md +++ b/backend/api/v2beta1/python_http_client/docs/V2beta1Run.md @@ -21,6 +21,8 @@ Name | Type | Description | Notes **run_details** | [**V2beta1RunDetails**](V2beta1RunDetails.md) | | [optional] **recurring_run_id** | **str** | ID of the recurring run that triggered this run. | [optional] **state_history** | [**list[V2beta1RuntimeStatus]**](V2beta1RuntimeStatus.md) | Output. A sequence of run statuses. This field keeps a record of state transitions. | [optional] +**plugins_input** | **dict(str, object)** | Optional input. Plugin-specific inputs provided by the user at run creation. Each key is a plugin name (e.g., \"mlflow\") and the value is arbitrary JSON config. | [optional] +**plugins_output** | [**dict(str, V2beta1PluginOutput)**](V2beta1PluginOutput.md) | Output. Plugin-specific outputs populated by backend components. Each key is a plugin name and the value contains the plugin's output entries and state. | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py index c88ca322477..1f393b66852 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py @@ -39,6 +39,7 @@ from kfp_server_api.models.authorize_request_resources import AuthorizeRequestResources from kfp_server_api.models.authorize_request_verb import AuthorizeRequestVerb from kfp_server_api.models.googlerpc_status import GooglerpcStatus +from kfp_server_api.models.metadata_value_render_type import MetadataValueRenderType from kfp_server_api.models.pipeline_task_detail_child_task import PipelineTaskDetailChildTask from kfp_server_api.models.predicate_int_values import PredicateIntValues from kfp_server_api.models.predicate_long_values import PredicateLongValues @@ -61,12 +62,15 @@ from kfp_server_api.models.v2beta1_list_pipelines_response import V2beta1ListPipelinesResponse from kfp_server_api.models.v2beta1_list_recurring_runs_response import V2beta1ListRecurringRunsResponse from kfp_server_api.models.v2beta1_list_runs_response import V2beta1ListRunsResponse +from kfp_server_api.models.v2beta1_metadata_value import V2beta1MetadataValue from kfp_server_api.models.v2beta1_periodic_schedule import V2beta1PeriodicSchedule from kfp_server_api.models.v2beta1_pipeline import V2beta1Pipeline from kfp_server_api.models.v2beta1_pipeline_task_detail import V2beta1PipelineTaskDetail from kfp_server_api.models.v2beta1_pipeline_task_executor_detail import V2beta1PipelineTaskExecutorDetail from kfp_server_api.models.v2beta1_pipeline_version import V2beta1PipelineVersion from kfp_server_api.models.v2beta1_pipeline_version_reference import V2beta1PipelineVersionReference +from kfp_server_api.models.v2beta1_plugin_output import V2beta1PluginOutput +from kfp_server_api.models.v2beta1_plugin_state import V2beta1PluginState from kfp_server_api.models.v2beta1_predicate import V2beta1Predicate from kfp_server_api.models.v2beta1_predicate_operation import V2beta1PredicateOperation from kfp_server_api.models.v2beta1_recurring_run import V2beta1RecurringRun diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/models/__init__.py b/backend/api/v2beta1/python_http_client/kfp_server_api/models/__init__.py index de09216b843..8a200422b9d 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/models/__init__.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/models/__init__.py @@ -17,6 +17,7 @@ from kfp_server_api.models.authorize_request_resources import AuthorizeRequestResources from kfp_server_api.models.authorize_request_verb import AuthorizeRequestVerb from kfp_server_api.models.googlerpc_status import GooglerpcStatus +from kfp_server_api.models.metadata_value_render_type import MetadataValueRenderType from kfp_server_api.models.pipeline_task_detail_child_task import PipelineTaskDetailChildTask from kfp_server_api.models.predicate_int_values import PredicateIntValues from kfp_server_api.models.predicate_long_values import PredicateLongValues @@ -39,12 +40,15 @@ from kfp_server_api.models.v2beta1_list_pipelines_response import V2beta1ListPipelinesResponse from kfp_server_api.models.v2beta1_list_recurring_runs_response import V2beta1ListRecurringRunsResponse from kfp_server_api.models.v2beta1_list_runs_response import V2beta1ListRunsResponse +from kfp_server_api.models.v2beta1_metadata_value import V2beta1MetadataValue from kfp_server_api.models.v2beta1_periodic_schedule import V2beta1PeriodicSchedule from kfp_server_api.models.v2beta1_pipeline import V2beta1Pipeline from kfp_server_api.models.v2beta1_pipeline_task_detail import V2beta1PipelineTaskDetail from kfp_server_api.models.v2beta1_pipeline_task_executor_detail import V2beta1PipelineTaskExecutorDetail from kfp_server_api.models.v2beta1_pipeline_version import V2beta1PipelineVersion from kfp_server_api.models.v2beta1_pipeline_version_reference import V2beta1PipelineVersionReference +from kfp_server_api.models.v2beta1_plugin_output import V2beta1PluginOutput +from kfp_server_api.models.v2beta1_plugin_state import V2beta1PluginState from kfp_server_api.models.v2beta1_predicate import V2beta1Predicate from kfp_server_api.models.v2beta1_predicate_operation import V2beta1PredicateOperation from kfp_server_api.models.v2beta1_recurring_run import V2beta1RecurringRun diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/models/metadata_value_render_type.py b/backend/api/v2beta1/python_http_client/kfp_server_api/models/metadata_value_render_type.py new file mode 100644 index 00000000000..5dc1e8a32b1 --- /dev/null +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/models/metadata_value_render_type.py @@ -0,0 +1,100 @@ +# coding: utf-8 + +""" + Kubeflow Pipelines API + + This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition. + + Contact: kubeflow-pipelines@google.com + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kfp_server_api.configuration import Configuration + + +class MetadataValueRenderType(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + allowed enum values + """ + UNSPECIFIED = "UNSPECIFIED" + URL = "URL" + + allowable_values = [UNSPECIFIED, URL] # noqa: E501 + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + } + + attribute_map = { + } + + def __init__(self, local_vars_configuration=None): # noqa: E501 + """MetadataValueRenderType - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + self.discriminator = None + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, MetadataValueRenderType): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, MetadataValueRenderType): + return True + + return self.to_dict() != other.to_dict() diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_metadata_value.py b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_metadata_value.py new file mode 100644 index 00000000000..4c8272ebf8e --- /dev/null +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_metadata_value.py @@ -0,0 +1,146 @@ +# coding: utf-8 + +""" + Kubeflow Pipelines API + + This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition. + + Contact: kubeflow-pipelines@google.com + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kfp_server_api.configuration import Configuration + + +class V2beta1MetadataValue(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'value': 'object', + 'render_type': 'MetadataValueRenderType' + } + + attribute_map = { + 'value': 'value', + 'render_type': 'render_type' + } + + def __init__(self, value=None, render_type=None, local_vars_configuration=None): # noqa: E501 + """V2beta1MetadataValue - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._value = None + self._render_type = None + self.discriminator = None + + if value is not None: + self.value = value + if render_type is not None: + self.render_type = render_type + + @property + def value(self): + """Gets the value of this V2beta1MetadataValue. # noqa: E501 + + + :return: The value of this V2beta1MetadataValue. # noqa: E501 + :rtype: object + """ + return self._value + + @value.setter + def value(self, value): + """Sets the value of this V2beta1MetadataValue. + + + :param value: The value of this V2beta1MetadataValue. # noqa: E501 + :type value: object + """ + + self._value = value + + @property + def render_type(self): + """Gets the render_type of this V2beta1MetadataValue. # noqa: E501 + + + :return: The render_type of this V2beta1MetadataValue. # noqa: E501 + :rtype: MetadataValueRenderType + """ + return self._render_type + + @render_type.setter + def render_type(self, render_type): + """Sets the render_type of this V2beta1MetadataValue. + + + :param render_type: The render_type of this V2beta1MetadataValue. # noqa: E501 + :type render_type: MetadataValueRenderType + """ + + self._render_type = render_type + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V2beta1MetadataValue): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V2beta1MetadataValue): + return True + + return self.to_dict() != other.to_dict() diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_plugin_output.py b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_plugin_output.py new file mode 100644 index 00000000000..a0a27443a56 --- /dev/null +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_plugin_output.py @@ -0,0 +1,172 @@ +# coding: utf-8 + +""" + Kubeflow Pipelines API + + This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition. + + Contact: kubeflow-pipelines@google.com + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kfp_server_api.configuration import Configuration + + +class V2beta1PluginOutput(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + 'entries': 'dict(str, V2beta1MetadataValue)', + 'state': 'V2beta1PluginState', + 'state_message': 'str' + } + + attribute_map = { + 'entries': 'entries', + 'state': 'state', + 'state_message': 'state_message' + } + + def __init__(self, entries=None, state=None, state_message=None, local_vars_configuration=None): # noqa: E501 + """V2beta1PluginOutput - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + + self._entries = None + self._state = None + self._state_message = None + self.discriminator = None + + if entries is not None: + self.entries = entries + if state is not None: + self.state = state + if state_message is not None: + self.state_message = state_message + + @property + def entries(self): + """Gets the entries of this V2beta1PluginOutput. # noqa: E501 + + + :return: The entries of this V2beta1PluginOutput. # noqa: E501 + :rtype: dict(str, V2beta1MetadataValue) + """ + return self._entries + + @entries.setter + def entries(self, entries): + """Sets the entries of this V2beta1PluginOutput. + + + :param entries: The entries of this V2beta1PluginOutput. # noqa: E501 + :type entries: dict(str, V2beta1MetadataValue) + """ + + self._entries = entries + + @property + def state(self): + """Gets the state of this V2beta1PluginOutput. # noqa: E501 + + + :return: The state of this V2beta1PluginOutput. # noqa: E501 + :rtype: V2beta1PluginState + """ + return self._state + + @state.setter + def state(self, state): + """Sets the state of this V2beta1PluginOutput. + + + :param state: The state of this V2beta1PluginOutput. # noqa: E501 + :type state: V2beta1PluginState + """ + + self._state = state + + @property + def state_message(self): + """Gets the state_message of this V2beta1PluginOutput. # noqa: E501 + + + :return: The state_message of this V2beta1PluginOutput. # noqa: E501 + :rtype: str + """ + return self._state_message + + @state_message.setter + def state_message(self, state_message): + """Sets the state_message of this V2beta1PluginOutput. + + + :param state_message: The state_message of this V2beta1PluginOutput. # noqa: E501 + :type state_message: str + """ + + self._state_message = state_message + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V2beta1PluginOutput): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V2beta1PluginOutput): + return True + + return self.to_dict() != other.to_dict() diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_plugin_state.py b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_plugin_state.py new file mode 100644 index 00000000000..3f6644cbe9e --- /dev/null +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_plugin_state.py @@ -0,0 +1,102 @@ +# coding: utf-8 + +""" + Kubeflow Pipelines API + + This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition. + + Contact: kubeflow-pipelines@google.com + Generated by: https://openapi-generator.tech +""" + + +import pprint +import re # noqa: F401 + +import six + +from kfp_server_api.configuration import Configuration + + +class V2beta1PluginState(object): + """NOTE: This class is auto generated by OpenAPI Generator. + Ref: https://openapi-generator.tech + + Do not edit the class manually. + """ + + """ + allowed enum values + """ + STATE_UNSPECIFIED = "PLUGIN_STATE_UNSPECIFIED" + RUNNING = "PLUGIN_RUNNING" + SUCCEEDED = "PLUGIN_SUCCEEDED" + FAILED = "PLUGIN_FAILED" + + allowable_values = [STATE_UNSPECIFIED, RUNNING, SUCCEEDED, FAILED] # noqa: E501 + + """ + Attributes: + openapi_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + openapi_types = { + } + + attribute_map = { + } + + def __init__(self, local_vars_configuration=None): # noqa: E501 + """V2beta1PluginState - a model defined in OpenAPI""" # noqa: E501 + if local_vars_configuration is None: + local_vars_configuration = Configuration() + self.local_vars_configuration = local_vars_configuration + self.discriminator = None + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.openapi_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, V2beta1PluginState): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, V2beta1PluginState): + return True + + return self.to_dict() != other.to_dict() diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_recurring_run.py b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_recurring_run.py index 3cfe372019a..d158f16b41b 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_recurring_run.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_recurring_run.py @@ -50,7 +50,8 @@ class V2beta1RecurringRun(object): 'error': 'GooglerpcStatus', 'no_catchup': 'bool', 'namespace': 'str', - 'experiment_id': 'str' + 'experiment_id': 'str', + 'plugins_input': 'dict(str, object)' } attribute_map = { @@ -71,10 +72,11 @@ class V2beta1RecurringRun(object): 'error': 'error', 'no_catchup': 'no_catchup', 'namespace': 'namespace', - 'experiment_id': 'experiment_id' + 'experiment_id': 'experiment_id', + 'plugins_input': 'plugins_input' } - def __init__(self, recurring_run_id=None, display_name=None, description=None, pipeline_version_id=None, pipeline_spec=None, pipeline_version_reference=None, runtime_config=None, service_account=None, max_concurrency=None, trigger=None, mode=None, created_at=None, updated_at=None, status=None, error=None, no_catchup=None, namespace=None, experiment_id=None, local_vars_configuration=None): # noqa: E501 + def __init__(self, recurring_run_id=None, display_name=None, description=None, pipeline_version_id=None, pipeline_spec=None, pipeline_version_reference=None, runtime_config=None, service_account=None, max_concurrency=None, trigger=None, mode=None, created_at=None, updated_at=None, status=None, error=None, no_catchup=None, namespace=None, experiment_id=None, plugins_input=None, local_vars_configuration=None): # noqa: E501 """V2beta1RecurringRun - a model defined in OpenAPI""" # noqa: E501 if local_vars_configuration is None: local_vars_configuration = Configuration() @@ -98,6 +100,7 @@ def __init__(self, recurring_run_id=None, display_name=None, description=None, p self._no_catchup = None self._namespace = None self._experiment_id = None + self._plugins_input = None self.discriminator = None if recurring_run_id is not None: @@ -136,6 +139,8 @@ def __init__(self, recurring_run_id=None, display_name=None, description=None, p self.namespace = namespace if experiment_id is not None: self.experiment_id = experiment_id + if plugins_input is not None: + self.plugins_input = plugins_input @property def recurring_run_id(self): @@ -539,6 +544,29 @@ def experiment_id(self, experiment_id): self._experiment_id = experiment_id + @property + def plugins_input(self): + """Gets the plugins_input of this V2beta1RecurringRun. # noqa: E501 + + Optional input. Plugin inputs to propagate to each triggered run. Each triggered run will inherit these values in its plugins_input field. # noqa: E501 + + :return: The plugins_input of this V2beta1RecurringRun. # noqa: E501 + :rtype: dict(str, object) + """ + return self._plugins_input + + @plugins_input.setter + def plugins_input(self, plugins_input): + """Sets the plugins_input of this V2beta1RecurringRun. + + Optional input. Plugin inputs to propagate to each triggered run. Each triggered run will inherit these values in its plugins_input field. # noqa: E501 + + :param plugins_input: The plugins_input of this V2beta1RecurringRun. # noqa: E501 + :type plugins_input: dict(str, object) + """ + + self._plugins_input = plugins_input + def to_dict(self): """Returns the model properties as a dict""" result = {} diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_run.py b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_run.py index 5f7c81436b7..d04caa49689 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_run.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/models/v2beta1_run.py @@ -50,7 +50,9 @@ class V2beta1Run(object): 'error': 'GooglerpcStatus', 'run_details': 'V2beta1RunDetails', 'recurring_run_id': 'str', - 'state_history': 'list[V2beta1RuntimeStatus]' + 'state_history': 'list[V2beta1RuntimeStatus]', + 'plugins_input': 'dict(str, object)', + 'plugins_output': 'dict(str, V2beta1PluginOutput)' } attribute_map = { @@ -71,10 +73,12 @@ class V2beta1Run(object): 'error': 'error', 'run_details': 'run_details', 'recurring_run_id': 'recurring_run_id', - 'state_history': 'state_history' + 'state_history': 'state_history', + 'plugins_input': 'plugins_input', + 'plugins_output': 'plugins_output' } - def __init__(self, experiment_id=None, run_id=None, display_name=None, storage_state=None, description=None, pipeline_version_id=None, pipeline_spec=None, pipeline_version_reference=None, runtime_config=None, service_account=None, created_at=None, scheduled_at=None, finished_at=None, state=None, error=None, run_details=None, recurring_run_id=None, state_history=None, local_vars_configuration=None): # noqa: E501 + def __init__(self, experiment_id=None, run_id=None, display_name=None, storage_state=None, description=None, pipeline_version_id=None, pipeline_spec=None, pipeline_version_reference=None, runtime_config=None, service_account=None, created_at=None, scheduled_at=None, finished_at=None, state=None, error=None, run_details=None, recurring_run_id=None, state_history=None, plugins_input=None, plugins_output=None, local_vars_configuration=None): # noqa: E501 """V2beta1Run - a model defined in OpenAPI""" # noqa: E501 if local_vars_configuration is None: local_vars_configuration = Configuration() @@ -98,6 +102,8 @@ def __init__(self, experiment_id=None, run_id=None, display_name=None, storage_s self._run_details = None self._recurring_run_id = None self._state_history = None + self._plugins_input = None + self._plugins_output = None self.discriminator = None if experiment_id is not None: @@ -136,6 +142,10 @@ def __init__(self, experiment_id=None, run_id=None, display_name=None, storage_s self.recurring_run_id = recurring_run_id if state_history is not None: self.state_history = state_history + if plugins_input is not None: + self.plugins_input = plugins_input + if plugins_output is not None: + self.plugins_output = plugins_output @property def experiment_id(self): @@ -539,6 +549,52 @@ def state_history(self, state_history): self._state_history = state_history + @property + def plugins_input(self): + """Gets the plugins_input of this V2beta1Run. # noqa: E501 + + Optional input. Plugin-specific inputs provided by the user at run creation. Each key is a plugin name (e.g., \"mlflow\") and the value is arbitrary JSON config. # noqa: E501 + + :return: The plugins_input of this V2beta1Run. # noqa: E501 + :rtype: dict(str, object) + """ + return self._plugins_input + + @plugins_input.setter + def plugins_input(self, plugins_input): + """Sets the plugins_input of this V2beta1Run. + + Optional input. Plugin-specific inputs provided by the user at run creation. Each key is a plugin name (e.g., \"mlflow\") and the value is arbitrary JSON config. # noqa: E501 + + :param plugins_input: The plugins_input of this V2beta1Run. # noqa: E501 + :type plugins_input: dict(str, object) + """ + + self._plugins_input = plugins_input + + @property + def plugins_output(self): + """Gets the plugins_output of this V2beta1Run. # noqa: E501 + + Output. Plugin-specific outputs populated by backend components. Each key is a plugin name and the value contains the plugin's output entries and state. # noqa: E501 + + :return: The plugins_output of this V2beta1Run. # noqa: E501 + :rtype: dict(str, V2beta1PluginOutput) + """ + return self._plugins_output + + @plugins_output.setter + def plugins_output(self, plugins_output): + """Sets the plugins_output of this V2beta1Run. + + Output. Plugin-specific outputs populated by backend components. Each key is a plugin name and the value contains the plugin's output entries and state. # noqa: E501 + + :param plugins_output: The plugins_output of this V2beta1Run. # noqa: E501 + :type plugins_output: dict(str, V2beta1PluginOutput) + """ + + self._plugins_output = plugins_output + def to_dict(self): """Returns the model properties as a dict""" result = {} diff --git a/backend/api/v2beta1/python_http_client/test/test_metadata_value_render_type.py b/backend/api/v2beta1/python_http_client/test/test_metadata_value_render_type.py new file mode 100644 index 00000000000..bf924caae77 --- /dev/null +++ b/backend/api/v2beta1/python_http_client/test/test_metadata_value_render_type.py @@ -0,0 +1,51 @@ +# coding: utf-8 + +""" + Kubeflow Pipelines API + + This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition. + + Contact: kubeflow-pipelines@google.com + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +import kfp_server_api +from kfp_server_api.models.metadata_value_render_type import MetadataValueRenderType # noqa: E501 +from kfp_server_api.rest import ApiException + +class TestMetadataValueRenderType(unittest.TestCase): + """MetadataValueRenderType unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test MetadataValueRenderType + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kfp_server_api.models.metadata_value_render_type.MetadataValueRenderType() # noqa: E501 + if include_optional : + return MetadataValueRenderType( + ) + else : + return MetadataValueRenderType( + ) + + def testMetadataValueRenderType(self): + """Test MetadataValueRenderType""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_recurring_runs_response.py b/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_recurring_runs_response.py index 3859523cf90..4d4ee8a870d 100644 --- a/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_recurring_runs_response.py +++ b/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_recurring_runs_response.py @@ -76,7 +76,10 @@ def make_instance(self, include_optional): ], ), no_catchup = True, namespace = '0', - experiment_id = '0', ) + experiment_id = '0', + plugins_input = { + 'key' : None + }, ) ], total_size = 56, next_page_token = '0' diff --git a/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_runs_response.py b/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_runs_response.py index 588296c5ce9..fbcfbff8788 100644 --- a/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_runs_response.py +++ b/backend/api/v2beta1/python_http_client/test/test_v2beta1_list_runs_response.py @@ -112,7 +112,19 @@ def make_instance(self, include_optional): state_history = [ kfp_server_api.models.v2beta1_runtime_status.v2beta1RuntimeStatus( update_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'), ) - ], ) + ], + plugins_input = { + 'key' : None + }, + plugins_output = { + 'key' : kfp_server_api.models.v2beta1_plugin_output.v2beta1PluginOutput( + entries = { + 'key' : kfp_server_api.models.v2beta1_metadata_value.v2beta1MetadataValue( + value = kfp_server_api.models.value.value(), + render_type = 'UNSPECIFIED', ) + }, + state_message = '0', ) + }, ) ], total_size = 56, next_page_token = '0' diff --git a/backend/api/v2beta1/python_http_client/test/test_v2beta1_metadata_value.py b/backend/api/v2beta1/python_http_client/test/test_v2beta1_metadata_value.py new file mode 100644 index 00000000000..067120b4acd --- /dev/null +++ b/backend/api/v2beta1/python_http_client/test/test_v2beta1_metadata_value.py @@ -0,0 +1,53 @@ +# coding: utf-8 + +""" + Kubeflow Pipelines API + + This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition. + + Contact: kubeflow-pipelines@google.com + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +import kfp_server_api +from kfp_server_api.models.v2beta1_metadata_value import V2beta1MetadataValue # noqa: E501 +from kfp_server_api.rest import ApiException + +class TestV2beta1MetadataValue(unittest.TestCase): + """V2beta1MetadataValue unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V2beta1MetadataValue + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kfp_server_api.models.v2beta1_metadata_value.V2beta1MetadataValue() # noqa: E501 + if include_optional : + return V2beta1MetadataValue( + value = kfp_server_api.models.value.value(), + render_type = 'UNSPECIFIED' + ) + else : + return V2beta1MetadataValue( + ) + + def testV2beta1MetadataValue(self): + """Test V2beta1MetadataValue""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/backend/api/v2beta1/python_http_client/test/test_v2beta1_plugin_state.py b/backend/api/v2beta1/python_http_client/test/test_v2beta1_plugin_state.py new file mode 100644 index 00000000000..ea0844c381a --- /dev/null +++ b/backend/api/v2beta1/python_http_client/test/test_v2beta1_plugin_state.py @@ -0,0 +1,51 @@ +# coding: utf-8 + +""" + Kubeflow Pipelines API + + This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition. + + Contact: kubeflow-pipelines@google.com + Generated by: https://openapi-generator.tech +""" + + +from __future__ import absolute_import + +import unittest +import datetime + +import kfp_server_api +from kfp_server_api.models.v2beta1_plugin_state import V2beta1PluginState # noqa: E501 +from kfp_server_api.rest import ApiException + +class TestV2beta1PluginState(unittest.TestCase): + """V2beta1PluginState unit test stubs""" + + def setUp(self): + pass + + def tearDown(self): + pass + + def make_instance(self, include_optional): + """Test V2beta1PluginState + include_option is a boolean, when False only required + params are included, when True both required and + optional params are included """ + # model = kfp_server_api.models.v2beta1_plugin_state.V2beta1PluginState() # noqa: E501 + if include_optional : + return V2beta1PluginState( + ) + else : + return V2beta1PluginState( + ) + + def testV2beta1PluginState(self): + """Test V2beta1PluginState""" + inst_req_only = self.make_instance(include_optional=False) + inst_req_and_optional = self.make_instance(include_optional=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/backend/api/v2beta1/python_http_client/test/test_v2beta1_recurring_run.py b/backend/api/v2beta1/python_http_client/test/test_v2beta1_recurring_run.py index ef66273c4dd..9b10753a5b0 100644 --- a/backend/api/v2beta1/python_http_client/test/test_v2beta1_recurring_run.py +++ b/backend/api/v2beta1/python_http_client/test/test_v2beta1_recurring_run.py @@ -74,7 +74,10 @@ def make_instance(self, include_optional): ], ), no_catchup = True, namespace = '0', - experiment_id = '0' + experiment_id = '0', + plugins_input = { + 'key' : None + } ) else : return V2beta1RecurringRun( diff --git a/backend/api/v2beta1/python_http_client/test/test_v2beta1_run.py b/backend/api/v2beta1/python_http_client/test/test_v2beta1_run.py index bcdd39a53d1..072d49c9e65 100644 --- a/backend/api/v2beta1/python_http_client/test/test_v2beta1_run.py +++ b/backend/api/v2beta1/python_http_client/test/test_v2beta1_run.py @@ -128,7 +128,20 @@ def make_instance(self, include_optional): 'key' : None } ], ), ) - ] + ], + plugins_input = { + 'key' : None + }, + plugins_output = { + 'key' : kfp_server_api.models.v2beta1_plugin_output.v2beta1PluginOutput( + entries = { + 'key' : kfp_server_api.models.v2beta1_metadata_value.v2beta1MetadataValue( + value = kfp_server_api.models.value.value(), + render_type = 'UNSPECIFIED', ) + }, + state = 'PLUGIN_STATE_UNSPECIFIED', + state_message = '0', ) + } ) else : return V2beta1Run( diff --git a/backend/api/v2beta1/recurring_run.proto b/backend/api/v2beta1/recurring_run.proto index c5b57674c57..11fd86c6ae9 100644 --- a/backend/api/v2beta1/recurring_run.proto +++ b/backend/api/v2beta1/recurring_run.proto @@ -154,6 +154,10 @@ message RecurringRun { // ID of the parent experiment this recurring run belongs to. string experiment_id = 17; + + // Optional input. Plugin inputs to propagate to each triggered run. + // Each triggered run will inherit these values in its plugins_input field. + map plugins_input = 19; } message CreateRecurringRunRequest { diff --git a/backend/api/v2beta1/run.proto b/backend/api/v2beta1/run.proto index 075b3c392d8..5cae378c2c6 100644 --- a/backend/api/v2beta1/run.proto +++ b/backend/api/v2beta1/run.proto @@ -200,6 +200,14 @@ message Run { // Output. A sequence of run statuses. This field keeps a record // of state transitions. repeated RuntimeStatus state_history = 17; + + // Optional input. Plugin-specific inputs provided by the user at run creation. + // Each key is a plugin name (e.g., "mlflow") and the value is arbitrary JSON config. + map plugins_input = 19; + + // Output. Plugin-specific outputs populated by backend components. + // Each key is a plugin name and the value contains the plugin's output entries and state. + map plugins_output = 20; } // Reference to an existing pipeline version. @@ -258,6 +266,44 @@ message RuntimeStatus { google.rpc.Status error = 3; } +// A typed metadata value with an optional rendering hint for the UI. +message MetadataValue { + // Hint for UI rendering of the value. + enum RenderType { + // Default. No special rendering. + UNSPECIFIED = 0; + // Render the value as a hyperlink. + URL = 1; + } + google.protobuf.Value value = 1; + optional RenderType render_type = 2; +} + +// Describes the state of a plugin's operations. +// Unlike RuntimeState (which covers pipeline/task lifecycle including CANCELING, +// PAUSED, SKIPPED), PluginState only reflects whether the plugin's own work +// succeeded or failed, independent of the pipeline run outcome. +enum PluginState { + // Default value. The plugin state is unknown or not yet set. + PLUGIN_STATE_UNSPECIFIED = 0; + + // Plugin operations are in progress. + PLUGIN_RUNNING = 1; + + // Plugin operations completed successfully. + PLUGIN_SUCCEEDED = 2; + + // Plugin operations failed. + PLUGIN_FAILED = 3; +} + +// Output from a single plugin, containing keyed metadata entries and overall state. +message PluginOutput { + map entries = 1; + PluginState state = 2; + string state_message = 3; +} + // Runtime details of a run. message RunDetails { // Pipeline context ID of a run. diff --git a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json index 6c10b343a8e..7cd5d577aaf 100644 --- a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json @@ -2340,6 +2340,13 @@ "experiment_id": { "type": "string", "description": "ID of the parent experiment this recurring run belongs to." + }, + "plugins_input": { + "type": "object", + "additionalProperties": { + "type": "object" + }, + "description": "Optional input. Plugin inputs to propagate to each triggered run.\nEach triggered run will inherit these values in its plugins_input field." } } }, @@ -2380,6 +2387,15 @@ }, "description": "Trigger defines what starts a pipeline run." }, + "MetadataValueRenderType": { + "type": "string", + "enum": [ + "UNSPECIFIED", + "URL" + ], + "default": "UNSPECIFIED", + "description": "Hint for UI rendering of the value.\n\n - UNSPECIFIED: Default. No special rendering.\n - URL: Render the value as a hyperlink." + }, "PipelineTaskDetailChildTask": { "type": "object", "properties": { @@ -2430,6 +2446,16 @@ } } }, + "v2beta1MetadataValue": { + "type": "object", + "properties": { + "value": {}, + "render_type": { + "$ref": "#/definitions/MetadataValueRenderType" + } + }, + "description": "A typed metadata value with an optional rendering hint for the UI." + }, "v2beta1PipelineTaskDetail": { "type": "object", "properties": { @@ -2546,6 +2572,35 @@ }, "description": "Runtime information of a pipeline task executor." }, + "v2beta1PluginOutput": { + "type": "object", + "properties": { + "entries": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/v2beta1MetadataValue" + } + }, + "state": { + "$ref": "#/definitions/v2beta1PluginState" + }, + "state_message": { + "type": "string" + } + }, + "description": "Output from a single plugin, containing keyed metadata entries and overall state." + }, + "v2beta1PluginState": { + "type": "string", + "enum": [ + "PLUGIN_STATE_UNSPECIFIED", + "PLUGIN_RUNNING", + "PLUGIN_SUCCEEDED", + "PLUGIN_FAILED" + ], + "default": "PLUGIN_STATE_UNSPECIFIED", + "description": "Describes the state of a plugin's operations.\nUnlike RuntimeState (which covers pipeline/task lifecycle including CANCELING,\nPAUSED, SKIPPED), PluginState only reflects whether the plugin's own work\nsucceeded or failed, independent of the pipeline run outcome.\n\n - PLUGIN_STATE_UNSPECIFIED: Default value. The plugin state is unknown or not yet set.\n - PLUGIN_RUNNING: Plugin operations are in progress.\n - PLUGIN_SUCCEEDED: Plugin operations completed successfully.\n - PLUGIN_FAILED: Plugin operations failed." + }, "v2beta1Run": { "type": "object", "properties": { @@ -2627,6 +2682,20 @@ "$ref": "#/definitions/v2beta1RuntimeStatus" }, "description": "Output. A sequence of run statuses. This field keeps a record\nof state transitions." + }, + "plugins_input": { + "type": "object", + "additionalProperties": { + "type": "object" + }, + "description": "Optional input. Plugin-specific inputs provided by the user at run creation.\nEach key is a plugin name (e.g., \"mlflow\") and the value is arbitrary JSON config." + }, + "plugins_output": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/v2beta1PluginOutput" + }, + "description": "Output. Plugin-specific outputs populated by backend components.\nEach key is a plugin name and the value contains the plugin's output entries and state." } } }, diff --git a/backend/api/v2beta1/swagger/recurring_run.swagger.json b/backend/api/v2beta1/swagger/recurring_run.swagger.json index a57fd352b0b..6715081d19d 100644 --- a/backend/api/v2beta1/swagger/recurring_run.swagger.json +++ b/backend/api/v2beta1/swagger/recurring_run.swagger.json @@ -480,6 +480,13 @@ "experiment_id": { "type": "string", "description": "ID of the parent experiment this recurring run belongs to." + }, + "plugins_input": { + "type": "object", + "additionalProperties": { + "type": "object" + }, + "description": "Optional input. Plugin inputs to propagate to each triggered run.\nEach triggered run will inherit these values in its plugins_input field." } } }, diff --git a/backend/api/v2beta1/swagger/run.swagger.json b/backend/api/v2beta1/swagger/run.swagger.json index 881204749c1..3d4523bb835 100644 --- a/backend/api/v2beta1/swagger/run.swagger.json +++ b/backend/api/v2beta1/swagger/run.swagger.json @@ -366,6 +366,15 @@ } }, "definitions": { + "MetadataValueRenderType": { + "type": "string", + "enum": [ + "UNSPECIFIED", + "URL" + ], + "default": "UNSPECIFIED", + "description": "Hint for UI rendering of the value.\n\n - UNSPECIFIED: Default. No special rendering.\n - URL: Render the value as a hyperlink." + }, "PipelineTaskDetailChildTask": { "type": "object", "properties": { @@ -458,6 +467,16 @@ } } }, + "v2beta1MetadataValue": { + "type": "object", + "properties": { + "value": {}, + "render_type": { + "$ref": "#/definitions/MetadataValueRenderType" + } + }, + "description": "A typed metadata value with an optional rendering hint for the UI." + }, "v2beta1PipelineTaskDetail": { "type": "object", "properties": { @@ -588,6 +607,35 @@ }, "description": "Reference to an existing pipeline version." }, + "v2beta1PluginOutput": { + "type": "object", + "properties": { + "entries": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/v2beta1MetadataValue" + } + }, + "state": { + "$ref": "#/definitions/v2beta1PluginState" + }, + "state_message": { + "type": "string" + } + }, + "description": "Output from a single plugin, containing keyed metadata entries and overall state." + }, + "v2beta1PluginState": { + "type": "string", + "enum": [ + "PLUGIN_STATE_UNSPECIFIED", + "PLUGIN_RUNNING", + "PLUGIN_SUCCEEDED", + "PLUGIN_FAILED" + ], + "default": "PLUGIN_STATE_UNSPECIFIED", + "description": "Describes the state of a plugin's operations.\nUnlike RuntimeState (which covers pipeline/task lifecycle including CANCELING,\nPAUSED, SKIPPED), PluginState only reflects whether the plugin's own work\nsucceeded or failed, independent of the pipeline run outcome.\n\n - PLUGIN_STATE_UNSPECIFIED: Default value. The plugin state is unknown or not yet set.\n - PLUGIN_RUNNING: Plugin operations are in progress.\n - PLUGIN_SUCCEEDED: Plugin operations completed successfully.\n - PLUGIN_FAILED: Plugin operations failed." + }, "v2beta1Run": { "type": "object", "properties": { @@ -669,6 +717,20 @@ "$ref": "#/definitions/v2beta1RuntimeStatus" }, "description": "Output. A sequence of run statuses. This field keeps a record\nof state transitions." + }, + "plugins_input": { + "type": "object", + "additionalProperties": { + "type": "object" + }, + "description": "Optional input. Plugin-specific inputs provided by the user at run creation.\nEach key is a plugin name (e.g., \"mlflow\") and the value is arbitrary JSON config." + }, + "plugins_output": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/v2beta1PluginOutput" + }, + "description": "Output. Plugin-specific outputs populated by backend components.\nEach key is a plugin name and the value contains the plugin's output entries and state." } } }, diff --git a/backend/src/apiserver/client/kubernetes_core_fake.go b/backend/src/apiserver/client/kubernetes_core_fake.go index 190aefc08d6..056bb6aacda 100644 --- a/backend/src/apiserver/client/kubernetes_core_fake.go +++ b/backend/src/apiserver/client/kubernetes_core_fake.go @@ -21,6 +21,7 @@ import ( policyv1 "k8s.io/api/policy/v1" policyv1beta1 "k8s.io/api/policy/v1beta1" "k8s.io/client-go/kubernetes" + k8sfake "k8s.io/client-go/kubernetes/fake" v1 "k8s.io/client-go/kubernetes/typed/core/v1" ) @@ -36,8 +37,7 @@ func (c *FakeKuberneteCoreClient) PodClient(namespace string) v1.PodInterface { } func (c *FakeKuberneteCoreClient) GetClientSet() kubernetes.Interface { - // Return nil for fake implementation - tests that need this should use a mock - return nil + return k8sfake.NewClientset() } func NewFakeKuberneteCoresClient() *FakeKuberneteCoreClient { @@ -57,8 +57,7 @@ func (c *FakeKubernetesCoreClientWithBadPodClient) PodClient(namespace string) v } func (c *FakeKubernetesCoreClientWithBadPodClient) GetClientSet() kubernetes.Interface { - // Return nil for fake implementation - return nil + return k8sfake.NewClientset() } func (c *FakePodClient) EvictV1(context.Context, *policyv1.Eviction) error { diff --git a/backend/src/apiserver/client/kubernetes_core_fake_test.go b/backend/src/apiserver/client/kubernetes_core_fake_test.go index 9c72896a17f..1f21b27ae81 100644 --- a/backend/src/apiserver/client/kubernetes_core_fake_test.go +++ b/backend/src/apiserver/client/kubernetes_core_fake_test.go @@ -44,8 +44,8 @@ func TestFakeKuberneteCoreClient_PodClientPanicsOnEmptyNamespace(t *testing.T) { func TestFakeKuberneteCoreClient_GetClientSet(t *testing.T) { client := NewFakeKuberneteCoresClient() clientSet := client.GetClientSet() - if clientSet != nil { - t.Error("GetClientSet() expected nil for fake implementation, got non-nil") + if clientSet == nil { + t.Error("GetClientSet() returned nil, expected a fake clientset") } } @@ -60,8 +60,8 @@ func TestFakeKubernetesCoreClientWithBadPodClient_PodClient(t *testing.T) { func TestFakeKubernetesCoreClientWithBadPodClient_GetClientSet(t *testing.T) { client := NewFakeKubernetesCoreClientWithBadPodClient() clientSet := client.GetClientSet() - if clientSet != nil { - t.Error("GetClientSet() expected nil for fake implementation, got non-nil") + if clientSet == nil { + t.Error("GetClientSet() returned nil, expected a fake clientset") } } diff --git a/backend/src/apiserver/client_manager/client_manager.go b/backend/src/apiserver/client_manager/client_manager.go index 9757bba2b6c..b1064d09579 100644 --- a/backend/src/apiserver/client_manager/client_manager.go +++ b/backend/src/apiserver/client_manager/client_manager.go @@ -569,20 +569,7 @@ func runLegacyUpgradeFlow(db *gorm.DB, dialect SQLDialect) error { func autoMigrate(db *gorm.DB) error { glog.Infof("Running AutoMigrate.") - if err := db.AutoMigrate( - &model.DBStatus{}, - &model.DefaultExperiment{}, - &model.Experiment{}, - &model.Pipeline{}, - &model.PipelineVersion{}, - &model.PipelineTag{}, - &model.PipelineVersionTag{}, - &model.Job{}, - &model.Run{}, - &model.RunMetric{}, - &model.Task{}, - &model.ResourceReference{}, - ); err != nil { + if err := db.AutoMigrate(model.AllModels()...); err != nil { return fmt.Errorf("AutoMigrate failed: %w", err) } diff --git a/backend/src/apiserver/client_manager/client_manager_test.go b/backend/src/apiserver/client_manager/client_manager_test.go index d4dbd5a09ed..53437143070 100644 --- a/backend/src/apiserver/client_manager/client_manager_test.go +++ b/backend/src/apiserver/client_manager/client_manager_test.go @@ -433,3 +433,48 @@ func TestLoadAWSConfig_WithCredentials(t *testing.T) { assert.Equal(t, "test-key", creds.AccessKeyID) assert.Equal(t, "test-secret", creds.SecretAccessKey) } + +func TestAutoMigrateSucceeds(t *testing.T) { + db := getTestSQLite(t) + require.NoError(t, autoMigrate(db)) + + sqlDB, err := db.DB() + require.NoError(t, err) + + assertColumnExists := func(table, column string) { + t.Helper() + rows, err := sqlDB.Query(fmt.Sprintf("PRAGMA table_info(%s)", table)) + require.NoError(t, err) + defer func() { require.NoError(t, rows.Close()) }() + + var found bool + for rows.Next() { + var cid int + var name, ctype string + var notnull int + var dfltValue *string + var pk int + require.NoError(t, rows.Scan(&cid, &name, &ctype, ¬null, &dfltValue, &pk)) + if name == column { + found = true + break + } + } + require.True(t, found, "expected column %q on table %q after autoMigrate", column, table) + } + + // Spot-check one column per table as a smoke test. + // We don't verify every column. The goal is to confirm + // that all models in AllModels() are migrated. + assertColumnExists("db_statuses", "HaveSamplesLoaded") + assertColumnExists("default_experiments", "DefaultExperimentId") + assertColumnExists("experiments", "Name") + assertColumnExists("pipelines", "UUID") + assertColumnExists("pipeline_versions", "UUID") + assertColumnExists("jobs", "PluginsInput") + assertColumnExists("run_details", "PluginsInput") + assertColumnExists("run_details", "PluginsOutput") + assertColumnExists("run_metrics", "RunUUID") + assertColumnExists("tasks", "RunUUID") + assertColumnExists("resource_references", "ResourceUUID") +} diff --git a/backend/src/apiserver/common/config.go b/backend/src/apiserver/common/config.go index 0d9277c4de9..3b5043c06c6 100644 --- a/backend/src/apiserver/common/config.go +++ b/backend/src/apiserver/common/config.go @@ -15,7 +15,9 @@ package common import ( + "fmt" "strconv" + "strings" "time" "github.com/golang/glog" @@ -50,8 +52,19 @@ const ( PipelineURLAllowHTTP string = "PIPELINE_URL_ALLOW_HTTP" PipelineURLTimeout string = "PIPELINE_URL_TIMEOUT" PipelineURLValidationEnabled string = "PIPELINE_URL_VALIDATION_ENABLED" + PluginMaxKeys string = "PLUGIN_MAX_KEYS" + PluginMaxPayloadBytes string = "PLUGIN_MAX_PAYLOAD_BYTES" + PluginMaxTotalPayloadBytes string = "PLUGIN_MAX_TOTAL_PAYLOAD_BYTES" + PluginMaxNestingDepth string = "PLUGIN_MAX_NESTING_DEPTH" ) +type PluginLimitsConfig struct { + MaxKeys int + MaxPayloadBytes int + MaxTotalPayloadBytes int + MaxNestingDepth int +} + func IsPipelineVersionUpdatedByDefault() bool { return GetBoolConfigWithDefault(UpdatePipelineVersionByDefault, true) } @@ -107,6 +120,27 @@ func GetIntConfigWithDefault(configName string, value int) int { return viper.GetInt(configName) } +func getPositiveIntConfigWithDefault(configName string, value int) (int, error) { + if !viper.IsSet(configName) { + return value, nil + } + + raw := strings.TrimSpace(viper.GetString(configName)) + if raw == "" { + return 0, fmt.Errorf("invalid value for %s: must be a positive integer", configName) + } + + parsed, err := strconv.Atoi(raw) + if err != nil { + return 0, fmt.Errorf("invalid value for %s: %w", configName, err) + } + if parsed <= 0 { + return 0, fmt.Errorf("invalid value for %s: must be > 0", configName) + } + + return parsed, nil +} + func GetDurationConfig(configName string) time.Duration { if !viper.IsSet(configName) { glog.Fatalf("Please specify flag %s", configName) @@ -193,3 +227,39 @@ func GetDefaultSecurityContextRunAsGroup() string { func GetDefaultSecurityContextRunAsNonRoot() string { return GetStringConfigWithDefault(DefaultSecurityContextRunAsNonRoot, "") } + +func GetPluginLimitsConfig() (PluginLimitsConfig, error) { + maxKeys, err := getPositiveIntConfigWithDefault(PluginMaxKeys, DefaultPluginMaxKeys) + if err != nil { + return PluginLimitsConfig{}, err + } + maxPayloadBytes, err := getPositiveIntConfigWithDefault(PluginMaxPayloadBytes, DefaultPluginMaxPayloadBytes) + if err != nil { + return PluginLimitsConfig{}, err + } + maxTotalPayloadBytes, err := getPositiveIntConfigWithDefault(PluginMaxTotalPayloadBytes, DefaultPluginMaxTotalPayloadBytes) + if err != nil { + return PluginLimitsConfig{}, err + } + maxNestingDepth, err := getPositiveIntConfigWithDefault(PluginMaxNestingDepth, DefaultPluginMaxNestingDepth) + if err != nil { + return PluginLimitsConfig{}, err + } + + if maxTotalPayloadBytes < maxPayloadBytes { + return PluginLimitsConfig{}, fmt.Errorf( + "invalid plugin limits: %s (%d) must be >= %s (%d)", + PluginMaxTotalPayloadBytes, + maxTotalPayloadBytes, + PluginMaxPayloadBytes, + maxPayloadBytes, + ) + } + + return PluginLimitsConfig{ + MaxKeys: maxKeys, + MaxPayloadBytes: maxPayloadBytes, + MaxTotalPayloadBytes: maxTotalPayloadBytes, + MaxNestingDepth: maxNestingDepth, + }, nil +} diff --git a/backend/src/apiserver/common/config_test.go b/backend/src/apiserver/common/config_test.go index 3a3b5b07993..12cb852b5c6 100644 --- a/backend/src/apiserver/common/config_test.go +++ b/backend/src/apiserver/common/config_test.go @@ -14,11 +14,13 @@ package common import ( + "os" "testing" "time" "github.com/spf13/viper" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // NOTE: These tests use viper.Reset() which mutates the global viper singleton. @@ -676,3 +678,120 @@ func TestGetMetadataServiceName(t *testing.T) { }) } } + +func TestGetPluginLimitsConfigDefaults(t *testing.T) { + t.Cleanup(viper.Reset) + viper.Reset() + + limits, err := GetPluginLimitsConfig() + require.NoError(t, err) + + assert.Equal(t, DefaultPluginMaxKeys, limits.MaxKeys) + assert.Equal(t, DefaultPluginMaxPayloadBytes, limits.MaxPayloadBytes) + assert.Equal(t, DefaultPluginMaxTotalPayloadBytes, limits.MaxTotalPayloadBytes) + assert.Equal(t, DefaultPluginMaxNestingDepth, limits.MaxNestingDepth) +} + +func TestGetPluginLimitsConfigOverrides(t *testing.T) { + t.Cleanup(viper.Reset) + viper.Reset() + + viper.Set(PluginMaxKeys, "8") + viper.Set(PluginMaxPayloadBytes, "32768") + viper.Set(PluginMaxTotalPayloadBytes, "131072") + viper.Set(PluginMaxNestingDepth, "6") + + limits, err := GetPluginLimitsConfig() + require.NoError(t, err) + + assert.Equal(t, 8, limits.MaxKeys) + assert.Equal(t, 32768, limits.MaxPayloadBytes) + assert.Equal(t, 131072, limits.MaxTotalPayloadBytes) + assert.Equal(t, 6, limits.MaxNestingDepth) +} + +func TestGetPluginLimitsConfigRejectsInvalidValues(t *testing.T) { + tests := []struct { + name string + key string + value string + wantError string + }{ + { + name: "reject zero max keys", + key: PluginMaxKeys, + value: "0", + wantError: PluginMaxKeys, + }, + { + name: "reject negative max payload bytes", + key: PluginMaxPayloadBytes, + value: "-1", + wantError: PluginMaxPayloadBytes, + }, + { + name: "reject malformed max total payload bytes", + key: PluginMaxTotalPayloadBytes, + value: "not-a-number", + wantError: PluginMaxTotalPayloadBytes, + }, + { + name: "reject empty max nesting depth", + key: PluginMaxNestingDepth, + value: "", + wantError: PluginMaxNestingDepth, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Cleanup(viper.Reset) + viper.Reset() + + viper.Set(tt.key, tt.value) + + _, err := GetPluginLimitsConfig() + require.Error(t, err) + assert.ErrorContains(t, err, tt.wantError) + }) + } +} + +func TestGetPluginLimitsConfigRejectsOverflow(t *testing.T) { + t.Cleanup(viper.Reset) + viper.Reset() + + viper.Set(PluginMaxPayloadBytes, "999999999999999999999999999999") + + _, err := GetPluginLimitsConfig() + require.Error(t, err) + assert.ErrorContains(t, err, PluginMaxPayloadBytes) +} + +func TestGetPluginLimitsConfigRejectsCrossFieldInvariant(t *testing.T) { + t.Cleanup(viper.Reset) + viper.Reset() + + viper.Set(PluginMaxPayloadBytes, "1024") + viper.Set(PluginMaxTotalPayloadBytes, "512") + + _, err := GetPluginLimitsConfig() + require.Error(t, err) + assert.ErrorContains(t, err, PluginMaxTotalPayloadBytes) +} + +func TestGetPluginLimitsConfigConflictingSourceUsesGetterContract(t *testing.T) { + t.Cleanup(func() { + require.NoError(t, os.Unsetenv(PluginMaxKeys)) + viper.Reset() + }) + viper.Reset() + viper.AutomaticEnv() + + require.NoError(t, os.Setenv(PluginMaxKeys, "7")) + viper.Set(PluginMaxKeys, "9") + + limits, err := GetPluginLimitsConfig() + require.NoError(t, err) + assert.Equal(t, 9, limits.MaxKeys) +} diff --git a/backend/src/apiserver/common/const.go b/backend/src/apiserver/common/const.go index fd40c0a6c80..d4d41f2a5ab 100644 --- a/backend/src/apiserver/common/const.go +++ b/backend/src/apiserver/common/const.go @@ -82,9 +82,15 @@ const ( DefaultClusterDomain string = "cluster.local" ) -// ClearTagsMetadataKey is the gRPC metadata key set by the HTTP middleware -// when the client sends an empty tags map ("tags":{}) to signal that all -// tags should be removed. Protobuf binary encoding cannot distinguish an -// empty map from nil, so this header preserves the intent across the -// HTTP→gRPC proxy roundtrip. -const ClearTagsMetadataKey = "x-clear-tags" +const ( + // ClearTagsMetadataKey is the gRPC metadata key set by the HTTP middleware + // when the client sends an empty tags map ("tags":{}) to signal that all + // tags should be removed. Protobuf binary encoding cannot distinguish an + // empty map from nil, so this header preserves the intent across the + // HTTP→gRPC proxy roundtrip. + ClearTagsMetadataKey = "x-clear-tags" + DefaultPluginMaxKeys = 16 + DefaultPluginMaxPayloadBytes = 64 * 1024 + DefaultPluginMaxTotalPayloadBytes = 256 * 1024 + DefaultPluginMaxNestingDepth = 10 +) diff --git a/backend/src/apiserver/config/config.json b/backend/src/apiserver/config/config.json index cd668c2b04b..3ea86f11798 100644 --- a/backend/src/apiserver/config/config.json +++ b/backend/src/apiserver/config/config.json @@ -24,5 +24,18 @@ "CacheEnabled": "true", "CRON_SCHEDULE_TIMEZONE": "UTC", "CACHE_IMAGE": "ghcr.io/containerd/busybox", - "CACHE_NODE_RESTRICTIONS": "false" + "CACHE_NODE_RESTRICTIONS": "false", + "plugins": { + "mlflow": { + "endpoint": "https://mlflow.opendatahub.svc.cluster.local:8443", + "timeout": "60s", + "tls": { + "insecureSkipVerify": false, + "caBundlePath": "/etc/mlflow-tracking-ca/ca.crt" + }, + "settings": { + "workspacesEnabled": true + } + } + } } diff --git a/backend/src/apiserver/main.go b/backend/src/apiserver/main.go index fc1331ee9c4..3abf69ad029 100644 --- a/backend/src/apiserver/main.go +++ b/backend/src/apiserver/main.go @@ -634,6 +634,9 @@ func initConfig() error { if err := viper.ReadInConfig(); err != nil { return fmt.Errorf("config file error: %w", err) } + if _, err := common.GetPluginLimitsConfig(); err != nil { + glog.Fatalf("Invalid plugin limits configuration: %v", err) + } // Watch for configuration change viper.WatchConfig() @@ -641,6 +644,9 @@ func initConfig() error { if err := viper.ReadInConfig(); err != nil { glog.Errorf("Failed to reload config: %v", err) } + if _, err := common.GetPluginLimitsConfig(); err != nil { + glog.Fatalf("Invalid plugin limits configuration: %v", err) + } }) proxy.InitializeConfigWithEnv() diff --git a/backend/src/apiserver/model/common.go b/backend/src/apiserver/model/common.go index 2cc954f9f5b..85e656d4b32 100644 --- a/backend/src/apiserver/model/common.go +++ b/backend/src/apiserver/model/common.go @@ -72,3 +72,20 @@ func (lt *LargeText) UnmarshalJSON(b []byte) error { *lt = LargeText(s) return nil } + +func AllModels() []any { + return []any{ + &DBStatus{}, + &DefaultExperiment{}, + &Experiment{}, + &Pipeline{}, + &PipelineVersion{}, + &PipelineTag{}, + &PipelineVersionTag{}, + &Job{}, + &Run{}, + &RunMetric{}, + &Task{}, + &ResourceReference{}, + } +} diff --git a/backend/src/apiserver/model/job.go b/backend/src/apiserver/model/job.go index 334cac8a7a4..a0f074e36f4 100644 --- a/backend/src/apiserver/model/job.go +++ b/backend/src/apiserver/model/job.go @@ -109,7 +109,8 @@ type Job struct { ResourceReferences []*ResourceReference `gorm:"-"` Trigger PipelineSpec - Conditions string `gorm:"column:Conditions; not null;"` + Conditions string `gorm:"column:Conditions; not null;"` + PluginsInputString *LargeText `gorm:"column:PluginsInput; default:null;"` } // Converts to v1beta1-compatible internal representation of job. diff --git a/backend/src/apiserver/model/run.go b/backend/src/apiserver/model/run.go index a1e2162c155..7f580b70db6 100644 --- a/backend/src/apiserver/model/run.go +++ b/backend/src/apiserver/model/run.go @@ -315,10 +315,12 @@ type RunDetails struct { // varchar(125) is carefully chosen to ensure composite index constraints remain // within MySQL's 767-byte limit (e.g., when combined with ExperimentId and FinishedAtInSec). // For details on type lengths and index safety, refer to comments in the Pipeline struct. - Conditions string `gorm:"column:Conditions; type:varchar(125); not null; index:experimentuuid_conditions_finishedatinsec,priority:2;index:namespace_conditions_finishedatinsec,priority:2"` - State RuntimeState `gorm:"column:State; default:null;"` - StateHistoryString LargeText `gorm:"column:StateHistory; default:null;"` - StateHistory []*RuntimeStatus `gorm:"-;"` + Conditions string `gorm:"column:Conditions; type:varchar(125); not null; index:experimentuuid_conditions_finishedatinsec,priority:2;index:namespace_conditions_finishedatinsec,priority:2"` + State RuntimeState `gorm:"column:State; default:null;"` + StateHistoryString LargeText `gorm:"column:StateHistory; default:null;"` + StateHistory []*RuntimeStatus `gorm:"-;"` + PluginsInputString *LargeText `gorm:"column:PluginsInput; default:null;"` + PluginsOutputString *LargeText `gorm:"column:PluginsOutput; default:null;"` // Serialized runtime details of a run in v2beta1 PipelineRuntimeManifest LargeText `gorm:"column:PipelineRuntimeManifest; not null;"` // Serialized Argo CRD in v1beta1 diff --git a/backend/src/apiserver/plugins/all/all.go b/backend/src/apiserver/plugins/all/all.go new file mode 100644 index 00000000000..ddae33703b4 --- /dev/null +++ b/backend/src/apiserver/plugins/all/all.go @@ -0,0 +1,5 @@ +package all + +import ( + _ "github.com/kubeflow/pipelines/backend/src/apiserver/plugins/mlflow" +) diff --git a/backend/src/apiserver/plugins/config.go b/backend/src/apiserver/plugins/config.go new file mode 100644 index 00000000000..5e75a4d1732 --- /dev/null +++ b/backend/src/apiserver/plugins/config.go @@ -0,0 +1,249 @@ +package plugins + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/golang/glog" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + "github.com/kubeflow/pipelines/backend/src/apiserver/model" + commonplugins "github.com/kubeflow/pipelines/backend/src/common/plugins" + "github.com/kubeflow/pipelines/backend/src/common/util" + "google.golang.org/protobuf/encoding/protojson" + apierrors "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// KubeClientProvider abstracts Kubernetes clientset access. +type KubeClientProvider interface { + GetClientSet() kubernetes.Interface +} + +type PluginConfig struct { + Endpoint string `json:"endpoint,omitempty" mapstructure:"endpoint"` + Timeout string `json:"timeout,omitempty" mapstructure:"timeout"` + TLS *commonplugins.TLSConfig `json:"tls,omitempty" mapstructure:"tls"` + Settings map[string]interface{} `json:"settings,omitempty" mapstructure:"settings"` +} + +const ( + EntryRootRunID = "root_run_id" +) + +const ( + LauncherConfigMapName = "kfp-launcher" + LauncherConfigKey = "plugins.mlflow" +) + +// InjectPluginRuntimeEnv upserts plugin-provided environment variables into the +// driver and launcher containers of the execution spec. +func InjectPluginRuntimeEnv(executionSpec util.ExecutionSpec, env map[string]string) error { + if len(env) == 0 || executionSpec == nil { + return nil + } + return executionSpec.UpsertRuntimeEnvVars(env, + util.ExecutionRuntimeRoleDriver, + util.ExecutionRuntimeRoleLauncher, + ) +} + +func GetNamespacePluginConfig(ctx context.Context, clientSet kubernetes.Interface, namespace string) (map[string]*PluginConfig, error) { + if namespace == "" { + return nil, util.NewInternalServerError(fmt.Errorf("namespace is empty"), "namespace must be specified when reading plugin config") + } + if clientSet == nil { + return nil, util.NewInternalServerError(fmt.Errorf("clientSet is nil"), "Kubernetes clientset must be provided when reading plugin namespace config") + } + cm, err := clientSet.CoreV1().ConfigMaps(namespace).Get(ctx, LauncherConfigMapName, v1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return nil, nil + } + return nil, util.NewInternalServerError(err, "failed to read plugin namespace config from configmap %q in namespace %q", LauncherConfigMapName, namespace) + } + raw, ok := cm.Data[LauncherConfigKey] + if !ok || raw == "" { + return nil, nil + } + var cfg map[string]*PluginConfig + if err := json.Unmarshal([]byte(raw), &cfg); err != nil { + return nil, util.NewInternalServerError(err, "failed to parse plugin config from key %q in configmap %q/%q", LauncherConfigKey, namespace, LauncherConfigMapName) + } + return cfg, nil +} + +type PluginsOutputEnvelope struct { + others map[string]json.RawMessage +} + +func (e *PluginsOutputEnvelope) UnmarshalJSON(data []byte) error { + var all map[string]json.RawMessage + if err := json.Unmarshal(data, &all); err != nil { + return err + } + if len(all) > 0 { + e.others = all + } + return nil +} + +func (e PluginsOutputEnvelope) MarshalJSON() ([]byte, error) { + if len(e.others) == 0 { + return []byte("{}"), nil + } + return json.Marshal(e.others) +} + +// set stores a plugin entry by name. +func (e *PluginsOutputEnvelope) set(name string, data json.RawMessage) { + if e.others == nil { + e.others = make(map[string]json.RawMessage) + } + e.others[name] = data +} + +// SetPendingRunPluginOutput serializes the given PluginOutput into PendingRun.PluginsOutput. +func SetPendingRunPluginOutput(run *PendingRun, pluginName string, output *apiv2beta1.PluginOutput) error { + if run == nil || output == nil || pluginName == "" { + return nil + } + result, err := upsertPluginOutput(run.PluginsOutput, pluginName, output) + if err != nil { + return err + } + run.PluginsOutput = &result + return nil +} + +// upsertPluginOutput merges a single plugin's output into an existing +// plugins_output JSON string, returning the updated JSON. +func upsertPluginOutput(existing *string, pluginName string, output *apiv2beta1.PluginOutput) (string, error) { + marshaledOutput, err := protojson.Marshal(output) + if err != nil { + return "", fmt.Errorf("failed to marshal plugin output for %q: %w", pluginName, err) + } + var envelope PluginsOutputEnvelope + if existing != nil && *existing != "" { + if err := json.Unmarshal([]byte(*existing), &envelope); err != nil { + return "", fmt.Errorf("failed to unmarshal existing plugins_output: %w", err) + } + } + envelope.set(pluginName, marshaledOutput) + marshaledMap, err := json.Marshal(envelope) + if err != nil { + return "", fmt.Errorf("failed to marshal plugins_output map: %w", err) + } + return string(marshaledMap), nil +} + +func GetParentRunID(output *apiv2beta1.PluginOutput) string { + return GetStringEntry(output, EntryRootRunID) +} + +func GetStringEntry(output *apiv2beta1.PluginOutput, key string) string { + if output == nil || output.Entries == nil || key == "" { + return "" + } + entry, ok := output.Entries[key] + if !ok || entry == nil || entry.Value == nil { + return "" + } + return entry.Value.GetStringValue() +} + +// PersistPluginsOutput serializes the PersistedRun's PluginsOutput and writes +// it to the database via the given store. +func PersistPluginsOutput(run *PersistedRun, store RunPluginOutputStore) error { + lt, err := SerializePluginsOutput(run.PluginsOutput) + if err != nil { + return fmt.Errorf("failed to serialize plugins_output for run %q: %w", run.RunID, err) + } + return store.UpdateRunPluginsOutput(run.RunID, lt) +} + +func SerializePluginsOutput(outputs map[string]*apiv2beta1.PluginOutput) (*model.LargeText, error) { + if len(outputs) == 0 { + return nil, nil + } + var envelope PluginsOutputEnvelope + for key, output := range outputs { + marshaledOutput, err := protojson.Marshal(output) + if err != nil { + return nil, fmt.Errorf("failed to marshal plugin output for %q: %w", key, err) + } + envelope.set(key, marshaledOutput) + } + marshaledMap, err := json.Marshal(envelope) + if err != nil { + return nil, fmt.Errorf("failed to marshal plugins_output map: %w", err) + } + lt := model.LargeText(string(marshaledMap)) + return <, nil +} + +// ResolveRunPluginConfigForHandler retrieves the plugin configuration for a given handler, merging namespace-level overrides with global config. +func ResolveRunPluginConfigForHandler(handler RunPluginHandler, namespacePluginCfg map[string]*PluginConfig) (*PluginConfig, error) { + glog.Infof("Resolving plugin config for handler %s", handler.Name()) + // retrieve namespace-level config for the specified handler + if handler == nil { + return nil, fmt.Errorf("handler is nil") + } + var runPluginCfg *PluginConfig + handlerNamespacePluginCfg, ok := namespacePluginCfg[handler.Name()] + if ok { + // merge plugin global config and namespace config + var err error + runPluginCfg, err = MergePluginConfig(handlerNamespacePluginCfg, handler.GlobalPluginConfig()) + if err != nil { + return nil, fmt.Errorf("failed to merge plugin configs for handler %q: %v", handler.Name(), err) + } + } else { + glog.Infof("no namespace-level plugin config found for %s. Only global plugin configs will be applied", handler.Name()) + runPluginCfg = handler.GlobalPluginConfig() + } + glog.Infof("Resolved plugin config for handler %s: %v", handler.Name(), runPluginCfg) + return runPluginCfg, nil +} + +// MergePluginConfig merges namespace-level overrides into the global config. +// The namespace config takes precedence on non-zero fields. +func MergePluginConfig(namespaceCfg *PluginConfig, globalCfg *PluginConfig) (*PluginConfig, error) { + if namespaceCfg == nil { + glog.Infof("namespace-level plugin config is nil, using global config: %v", globalCfg) + return globalCfg, nil + } + if globalCfg == nil { + return nil, fmt.Errorf("globalCfg is nil, cannot merge namespace-level config: %v", namespaceCfg) + } + merged := &PluginConfig{ + Endpoint: globalCfg.Endpoint, + Timeout: globalCfg.Timeout, + TLS: globalCfg.TLS, + Settings: globalCfg.Settings, + } + + if namespaceCfg.Endpoint != "" { + merged.Endpoint = namespaceCfg.Endpoint + } + if namespaceCfg.Timeout != "" { + merged.Timeout = namespaceCfg.Timeout + } + if namespaceCfg.TLS != nil { + merged.TLS = namespaceCfg.TLS + } + merged.Settings = mergeSettings(namespaceCfg.Settings, merged.Settings) + return merged, nil +} + +func mergeSettings(ns, global map[string]interface{}) map[string]interface{} { + merged := make(map[string]interface{}, len(global)+len(ns)) + for key, value := range global { + merged[key] = value + } + for key, value := range ns { + merged[key] = value + } + return merged +} diff --git a/backend/src/apiserver/plugins/config_test.go b/backend/src/apiserver/plugins/config_test.go new file mode 100644 index 00000000000..c19a3fe3862 --- /dev/null +++ b/backend/src/apiserver/plugins/config_test.go @@ -0,0 +1,32 @@ +package plugins + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMergePluginConfigAndSettingsDefaults(t *testing.T) { + globalDesc := "Global desc" + wsDisabled := "false" + global := &PluginConfig{ + Endpoint: "https://global-mlflow.example.com", + Timeout: "30s", + Settings: map[string]interface{}{"experimentDescription": globalDesc}, + } + namespace := &PluginConfig{ + Endpoint: "https://ns-mlflow.example.com", + Settings: map[string]interface{}{"workspacesEnabled": wsDisabled}, + } + + merged, err := MergePluginConfig(namespace, global) + require.NoError(t, err) + + assert.Equal(t, "https://ns-mlflow.example.com", merged.Endpoint) + assert.Equal(t, "30s", merged.Timeout) + assert.Equal(t, 2, len(merged.Settings)) + assert.Equal(t, wsDisabled, merged.Settings["workspacesEnabled"]) + assert.Equal(t, globalDesc, merged.Settings["experimentDescription"]) + +} diff --git a/backend/src/apiserver/plugins/dispatcher.go b/backend/src/apiserver/plugins/dispatcher.go new file mode 100644 index 00000000000..2e4aa0fc2ef --- /dev/null +++ b/backend/src/apiserver/plugins/dispatcher.go @@ -0,0 +1,242 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package plugins + +import ( + "context" + "fmt" + "sort" + "time" + + "github.com/golang/glog" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + "github.com/kubeflow/pipelines/backend/src/apiserver/model" + "github.com/kubeflow/pipelines/backend/src/common/util" +) + +// RunPluginOutputStore abstracts the DB write needed to persist plugin output. +type RunPluginOutputStore interface { + UpdateRunPluginsOutput(runID string, pluginsOutput *model.LargeText) error +} + +// RunPluginDispatcher orchestrates plugin lifecycle hooks. +type RunPluginDispatcher interface { + // OnBeforeRunCreation is called before the workflow is created. + // The dispatcher reads run.PluginsInput and may write run.PluginsOutput. + // Returns an error only for validation failures that should block + // run creation. + OnBeforeRunCreation(ctx context.Context, run *PendingRun, executionSpec util.ExecutionSpec) error + + // OnRunEnd is called when a run reaches a terminal state. Returns + // true if all plugin syncs succeeded. + OnRunEnd(ctx context.Context, run *PersistedRun) bool + + // OnRunRetry is called when a run is retried. + OnRunRetry(ctx context.Context, run *PersistedRun) error +} + +// NoOpDispatcher is a RunPluginDispatcher that does nothing. +type NoOpDispatcher struct{} + +func (NoOpDispatcher) OnBeforeRunCreation(context.Context, *PendingRun, util.ExecutionSpec) error { + return nil +} +func (NoOpDispatcher) OnRunEnd(context.Context, *PersistedRun) bool { return true } +func (NoOpDispatcher) OnRunRetry(context.Context, *PersistedRun) error { + return nil +} + +var _ RunPluginDispatcher = NoOpDispatcher{} + +// NewRunPluginDispatcherImpl initializes a new RunPluginDispatcherImpl with the given handlers, client provider, and output store. +// Returns an error if handlers are nil or empty. +func NewRunPluginDispatcherImpl(handlers []RunPluginHandler, kubeClients KubeClientProvider, runOutputStore RunPluginOutputStore) (*RunPluginDispatcherImpl, error) { + glog.Infof("NewRunPluginDispatcherImpl with %d handlers", len(handlers)) + if handlers == nil || len(handlers) == 0 { + return nil, fmt.Errorf("NewRunPluginDispatcherImpl requires non-nil slice containing minimum one handler") + } + sorted := make([]RunPluginHandler, len(handlers)) + copy(sorted, handlers) + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].Name() < sorted[j].Name() + }) + return &RunPluginDispatcherImpl{ + handlers: sorted, + startedHandlers: make(map[string]bool), + kubeClients: kubeClients, + runOutputStore: runOutputStore, + }, nil +} + +var _ RunPluginDispatcher = (*RunPluginDispatcherImpl)(nil) + +// RunPluginDispatcherImpl implements PluginDispatcher. +type RunPluginDispatcherImpl struct { + handlers []RunPluginHandler + startedHandlers map[string]bool + kubeClients KubeClientProvider + runOutputStore RunPluginOutputStore +} + +const preRunPluginTimeout = 3 * time.Second + +// OnBeforeRunCreation parses plugin input, resolves namespace-level plugin config, +// and creates the parent run and injects env vars +// into the execution for each registered handler. On success it writes run.PluginsOutput. +func (d *RunPluginDispatcherImpl) OnBeforeRunCreation(ctx context.Context, run *PendingRun, executionSpec util.ExecutionSpec) error { + if d == nil || run == nil || executionSpec == nil { + return fmt.Errorf("dispatcher, run, and executionSpec must be non-nil") + } + // retrieve namespace-level plugin configs for all registered plugins. + namespacePluginCfg, err := GetNamespacePluginConfig(ctx, d.kubeClients.GetClientSet(), run.Namespace) + if err != nil { + return fmt.Errorf("failed to get map of namespace plugin configs for run %q: %v", run.RunID, err) + } + + // Limit plugin pre-run calls to a short timeout budget while still + // honoring parent request cancellation. + pluginCtx, cancel := context.WithTimeout(ctx, preRunPluginTimeout) + defer cancel() + d.startedHandlers = make(map[string]bool) + for _, handler := range d.handlers { + // retrieve global and namespace-level configs for the specified plugin handler + runPluginCfg, cfgErr := ResolveRunPluginConfigForHandler(handler, namespacePluginCfg) + if cfgErr != nil { + glog.Warningf("Failed to resolve plugin config for %s on run %q (run creation will continue): %v", handler.Name(), run.RunID, cfgErr) + continue + } + pluginOutput, pluginRuntimeEnv, pluginErr := handler.OnBeforeRunCreation(pluginCtx, run, runPluginCfg) + if pluginErr != nil { + glog.Warningf("%s OnBeforeRunCreation failed for run %q (run creation will continue): %v", handler.Name(), run.RunID, pluginErr) + } + if pluginOutput == nil { + continue + } + if err := SetPendingRunPluginOutput(run, handler.Name(), pluginOutput); err != nil { + glog.Warningf("Failed to persist %s plugin output for run %q: %v", handler.Name(), run.RunID, err) + } + if len(pluginRuntimeEnv) != 0 { + if err := InjectPluginRuntimeEnv(executionSpec, pluginRuntimeEnv); err != nil { + glog.Warningf("Failed to inject %s runtime env for run %q: %v", handler.Name(), run.RunID, err) + } + } + } + + return nil +} + +// OnRunEnd syncs the plugin parent and nested runs at terminal state. +// Returns true if the sync succeeded or there is nothing to retry. +func (d *RunPluginDispatcherImpl) OnRunEnd(ctx context.Context, run *PersistedRun) bool { + + namespacePluginCfg, err := GetNamespacePluginConfig(ctx, d.kubeClients.GetClientSet(), run.Namespace) + if err != nil { + glog.Errorf("failed to get namespace plugin config for run %q: %v", run.RunID, err) + return false + } + + for _, handler := range d.handlers { + // retrieve global and namespace-level configs for the specified handler + runPluginCfg, cfgErr := ResolveRunPluginConfigForHandler(handler, namespacePluginCfg) + if cfgErr != nil { + glog.Warningf("Failed to resolve plugin config for %s on run %q: %v", handler.Name(), run.RunID, cfgErr) + continue + } + + pluginOutput := run.PluginsOutput[handler.Name()] + hasParentRun := pluginOutput != nil && GetParentRunID(pluginOutput) != "" + + syncOK := d.executePostAction(run, "OnRunEnd", runPluginCfg, &handler, func(h *RunPluginHandler, r *PersistedRun, cfg *PluginConfig) { + if err := handler.OnRunEnd(ctx, r, cfg); err != nil { + glog.Warningf("%s OnRunEnd failed for run %q: %v", handler.Name(), run.RunID, err) + } + }) + + // Only signal retry-needed when there's a parent run that failed to close. + if hasParentRun && !syncOK { + return false + } + } + return true +} + +// OnRunRetry reopens the parent run and any failed/killed nested runs. +func (d *RunPluginDispatcherImpl) OnRunRetry(ctx context.Context, run *PersistedRun) error { + if d == nil || run == nil { + return fmt.Errorf("dispatcher and run must be non-nil") + } + + // retrieve namespace-level plugin config + namespacePluginCfg, err := GetNamespacePluginConfig(ctx, d.kubeClients.GetClientSet(), run.Namespace) + if err != nil { + return fmt.Errorf("failed to get map of namespace plugin configs for run %q: %v", run.RunID, err) + } + for _, handler := range d.handlers { + // retrieve global and namespace-level configs for the specified handler + runPluginCfg, cfgErr := ResolveRunPluginConfigForHandler(handler, namespacePluginCfg) + if cfgErr != nil { + glog.Warningf("Failed to resolve plugin config for %s on run %q: %v", handler.Name(), run.RunID, cfgErr) + continue + } + + d.executePostAction(run, "HandleRetry", runPluginCfg, &handler, func(h *RunPluginHandler, r *PersistedRun, c *PluginConfig) { + handler.HandleRetry(ctx, r, c) + }) + } + return nil +} + +// executePostAction handles the common setup for OnRunEnd and HandleRetry. +// Returns true if the plugin sync succeeded. +func (d *RunPluginDispatcherImpl) executePostAction( + run *PersistedRun, + hookName string, + resolvedCfg *PluginConfig, + handler *RunPluginHandler, + invoke func(*RunPluginHandler, *PersistedRun, *PluginConfig), +) bool { + if resolvedCfg == nil { + var handlerName string + if handler != nil { + handlerName = (*handler).Name() + } + msg := fmt.Sprintf("MLflow %s sync failed: config unavailable", hookName) + glog.Warningf("%s %s skipped: resolved plugin config is nil for run %q", handlerName, hookName, run.RunID) + if po := run.PluginsOutput[handlerName]; po != nil { + po.State = apiv2beta1.PluginState_PLUGIN_FAILED + po.StateMessage = msg + } + if err := PersistPluginsOutput(run, d.runOutputStore); err != nil { + glog.Warningf("%s %s: failed to persist plugin output for run %q: %v", handlerName, hookName, run.RunID, err) + } + return false + } + invoke(handler, run, resolvedCfg) + + var handlerResolved RunPluginHandler + if handler != nil { + handlerResolved = *handler + } + handlerSyncOk := false + if po := run.PluginsOutput[handlerResolved.Name()]; po != nil { + handlerSyncOk = po.State == apiv2beta1.PluginState_PLUGIN_SUCCEEDED + } + + if err := PersistPluginsOutput(run, d.runOutputStore); err != nil { + glog.Warningf("%s %s: failed to persist plugin output for run %q: %v", handlerResolved.Name(), hookName, run.RunID, err) + return false + } + return handlerSyncOk +} diff --git a/backend/src/apiserver/plugins/dispatcher_test.go b/backend/src/apiserver/plugins/dispatcher_test.go new file mode 100644 index 00000000000..a1b61893093 --- /dev/null +++ b/backend/src/apiserver/plugins/dispatcher_test.go @@ -0,0 +1,248 @@ +package plugins + +import ( + "context" + "fmt" + "testing" + + workflowapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + "github.com/kubeflow/pipelines/backend/src/common/util" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ RunPluginHandler = (*fakeHandler)(nil) + +type fakeHandler struct { + name string + pluginConfig *PluginConfig + pluginOutput *apiv2beta1.PluginOutput + startErr error + endErr error + envVars map[string]string + envErr error + customProps map[string]string +} + +func (f *fakeHandler) Name() string { return f.name } + +func (f *fakeHandler) GlobalPluginConfig() *PluginConfig { + return f.pluginConfig +} + +func (f *fakeHandler) OnBeforeRunCreation(_ context.Context, _ *PendingRun, _ *PluginConfig) (*apiv2beta1.PluginOutput, map[string]string, error) { + return f.pluginOutput, f.envVars, f.startErr +} + +func (f *fakeHandler) HandleRetry(_ context.Context, _ *PersistedRun, _ *PluginConfig) {} + +func (f *fakeHandler) OnRunEnd(_ context.Context, _ *PersistedRun, _ *PluginConfig) error { + return f.endErr +} + +var pendingRun = &PendingRun{ + RunID: "run-123", + Namespace: "test-ns", +} + +var persistedRun = &PersistedRun{ + RunID: "run-123", + Namespace: "test-ns", + State: "SUCCEEDED", + PluginsOutput: map[string]*apiv2beta1.PluginOutput{}, +} + +func newFakeExecutionSpec() util.ExecutionSpec { + return util.NewWorkflow(&workflowapi.Workflow{ + TypeMeta: metav1.TypeMeta{Kind: "Workflow", APIVersion: "argoproj.io/v1alpha1"}, + ObjectMeta: metav1.ObjectMeta{Name: "test-wf", Namespace: "test-ns"}, + }) +} + +func newFakeDispatcher(handlers []RunPluginHandler) (*RunPluginDispatcherImpl, error) { + return NewRunPluginDispatcherImpl(handlers, &fakeKubeClientProvider{}, &fakeRunPluginOutputStore{}) +} + +func TestNewRunPluginDispatcherImpl_SingleHandler_Success(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, err := newFakeDispatcher([]RunPluginHandler{handler}) + + require.NoError(t, err) + require.NotNil(t, dispatcher) + require.NotNil(t, dispatcher.handlers) + require.Len(t, dispatcher.handlers, 1) +} + +func TestNewRunPluginDispatcherImpl_NilHandlers_Failure(t *testing.T) { + dispatcher, err := newFakeDispatcher(nil) + + require.Nil(t, dispatcher) + require.Error(t, err) + assert.Equal(t, "NewRunPluginDispatcherImpl requires non-nil slice containing minimum one handler", err.Error()) +} + +func TestNewRunPluginDispatcherImpl_EmptyHandlers_Failure(t *testing.T) { + dispatcher, err := newFakeDispatcher([]RunPluginHandler{}) + + require.Nil(t, dispatcher) + require.Error(t, err) + assert.Equal(t, "NewRunPluginDispatcherImpl requires non-nil slice containing minimum one handler", err.Error()) +} + +func TestOnBeforeRunCreation_SingleHandler_Success(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + pluginOutput: &apiv2beta1.PluginOutput{}, + } + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + err := dispatcher.OnBeforeRunCreation(context.Background(), pendingRun, newFakeExecutionSpec()) + + require.NoError(t, err) +} + +func TestOnBeforeRunCreation_MultipleHandlers_Success(t *testing.T) { + handler1 := &fakeHandler{ + name: "FakePluginA", + pluginOutput: &apiv2beta1.PluginOutput{}, + } + handler2 := &fakeHandler{ + name: "FakePluginB", + pluginOutput: &apiv2beta1.PluginOutput{}, + } + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler1, handler2}) + + err := dispatcher.OnBeforeRunCreation(context.Background(), pendingRun, newFakeExecutionSpec()) + + require.NoError(t, err) +} + +func TestOnBeforeRunCreation_NilDispatcher_Failure(t *testing.T) { + var dispatcher *RunPluginDispatcherImpl + + err := dispatcher.OnBeforeRunCreation(context.Background(), pendingRun, newFakeExecutionSpec()) + + require.Error(t, err) + assert.Equal(t, "dispatcher, run, and executionSpec must be non-nil", err.Error()) +} + +func TestOnBeforeRunCreation_NilRun_Failure(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin", pluginOutput: &apiv2beta1.PluginOutput{}} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + err := dispatcher.OnBeforeRunCreation(context.Background(), nil, newFakeExecutionSpec()) + + require.Error(t, err) + assert.Equal(t, "dispatcher, run, and executionSpec must be non-nil", err.Error()) +} + +func TestOnBeforeRunCreation_NilExecutionSpec_Failure(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin", pluginOutput: &apiv2beta1.PluginOutput{}} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + err := dispatcher.OnBeforeRunCreation(context.Background(), pendingRun, nil) + + require.Error(t, err) + assert.Equal(t, "dispatcher, run, and executionSpec must be non-nil", err.Error()) +} + +func TestOnRunEnd_SingleHandler_Success(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + result := dispatcher.OnRunEnd(context.Background(), persistedRun) + + assert.True(t, result) +} + +func TestOnRunEnd_MultipleHandlers_Success(t *testing.T) { + handler1 := &fakeHandler{name: "FakePluginA"} + handler2 := &fakeHandler{name: "FakePluginB"} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler1, handler2}) + + result := dispatcher.OnRunEnd(context.Background(), persistedRun) + + assert.True(t, result) +} + +func TestOnRunEnd_NilDispatcher_Failure(t *testing.T) { + var dispatcher *RunPluginDispatcherImpl + + assert.Panics(t, func() { + dispatcher.OnRunEnd(context.Background(), persistedRun) + }) +} + +func TestOnRunEnd_NilRun_Failure(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + assert.Panics(t, func() { + dispatcher.OnRunEnd(context.Background(), nil) + }) +} + +func TestOnRunRetry_SingleHandler_Success(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + err := dispatcher.OnRunRetry(context.Background(), persistedRun) + + require.NoError(t, err) +} + +func TestOnRunRetry_MultipleHandlers_Success(t *testing.T) { + handler1 := &fakeHandler{name: "FakePluginA"} + handler2 := &fakeHandler{name: "FakePluginB"} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler1, handler2}) + + err := dispatcher.OnRunRetry(context.Background(), persistedRun) + + require.NoError(t, err) +} + +func TestOnRunRetryNilDispatcher_Failure(t *testing.T) { + var dispatcher *RunPluginDispatcherImpl + + err := dispatcher.OnRunRetry(context.Background(), persistedRun) + + require.Error(t, err) + assert.Equal(t, "dispatcher and run must be non-nil", err.Error()) +} + +func TestOnRunRetry_NilRun_Failure(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + err := dispatcher.OnRunRetry(context.Background(), nil) + + require.Error(t, err) + assert.Equal(t, "dispatcher and run must be non-nil", err.Error()) +} + +func TestOnBeforeRunCreation_HandlerFailure_ContinuesExecution(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + pluginOutput: &apiv2beta1.PluginOutput{}, + startErr: fmt.Errorf("plugin startup failed"), + } + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + err := dispatcher.OnBeforeRunCreation(context.Background(), pendingRun, newFakeExecutionSpec()) + + require.NoError(t, err) +} + +func TestOnRunEnd_HandlerFailure_ReturnsTrueWithoutParentRun(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + endErr: fmt.Errorf("plugin end failed"), + } + dispatcher, _ := newFakeDispatcher([]RunPluginHandler{handler}) + + result := dispatcher.OnRunEnd(context.Background(), persistedRun) + + assert.True(t, result) +} diff --git a/backend/src/apiserver/plugins/handler.go b/backend/src/apiserver/plugins/handler.go new file mode 100644 index 00000000000..ca0a86c6700 --- /dev/null +++ b/backend/src/apiserver/plugins/handler.go @@ -0,0 +1,58 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package plugins + +import ( + "context" + "encoding/json" + "time" + + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" +) + +// PluginsInputMap is the JSON envelope for the plugins_input model +// field. Each key is a plugin name and the value is that plugin's raw config. +type PluginsInputMap map[string]json.RawMessage + +// PendingRun holds the minimal run information available before a KFP run is +// persisted. +type PendingRun struct { + RunID string + DisplayName string + Namespace string + PipelineID string + PipelineVersionID string + PluginsInput *string // raw JSON from model, read by dispatcher + PluginsOutput *string // raw JSON produced by dispatcher, written back to model +} + +// PersistedRun holds the minimal run information for post-run plugin hooks +type PersistedRun struct { + RunID string + Namespace string + State string // RuntimeState string, e.g. "SUCCEEDED", "FAILED" + FinishedAt *time.Time // nil if not yet finished + // PluginsOutput is the deserialized plugins_output map. + PluginsOutput map[string]*apiv2beta1.PluginOutput +} + +// RunPluginHandler defines the generic run-level plugin lifecycle hooks +type RunPluginHandler interface { + Name() string + GlobalPluginConfig() *PluginConfig + OnBeforeRunCreation(ctx context.Context, run *PendingRun, config *PluginConfig) (*apiv2beta1.PluginOutput, map[string]string, error) + HandleRetry(ctx context.Context, run *PersistedRun, config *PluginConfig) + OnRunEnd(ctx context.Context, run *PersistedRun, config *PluginConfig) error +} diff --git a/backend/src/apiserver/plugins/mlflow/config.go b/backend/src/apiserver/plugins/mlflow/config.go new file mode 100644 index 00000000000..dee8436a66b --- /dev/null +++ b/backend/src/apiserver/plugins/mlflow/config.go @@ -0,0 +1,289 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mlflow + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "strconv" + "strings" + + "github.com/golang/glog" + apiserverPlugins "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/kubeflow/pipelines/backend/src/common/util" + "github.com/pkg/errors" + "github.com/spf13/viper" +) + +const ( + // DefaultExperimentName is the MLflow experiment name used when the user + // and admin configuration do not specify one. + DefaultExperimentName = "KFP-Default" + // DefaultTimeout is the default HTTP request timeout for the MLflow client. + DefaultTimeout = "30s" +) + +// ApplySettingsDefaults applies default values to a parsed MLflowPluginSettings. +func ApplySettingsDefaults(settings *commonmlflow.MLflowPluginSettings) *commonmlflow.MLflowPluginSettings { + if settings == nil { + settings = &commonmlflow.MLflowPluginSettings{} + } + if settings.WorkspacesEnabled == nil { + defaultEnabled := true + settings.WorkspacesEnabled = &defaultEnabled + } + if settings.DefaultExperimentName == "" { + settings.DefaultExperimentName = DefaultExperimentName + } + if settings.ExperimentDescription == nil { + d := DefaultExperimentDescription + settings.ExperimentDescription = &d + } + return settings +} + +// MLflowPluginInput represents the user-facing plugins_input.mlflow schema. +type MLflowPluginInput struct { + ExperimentName string `json:"experiment_name,omitempty"` + ExperimentID string `json:"experiment_id,omitempty"` + Disabled bool `json:"disabled,omitempty"` +} + +// IsEnabled reports whether the global plugins.mlflow configuration is present, +// indicating the API server has opted in to MLflow integration. +func IsEnabled() bool { + return viper.IsSet("plugins.mlflow") +} + +// GetGlobalMLflowConfig reads the global plugins.mlflow configuration +func GetGlobalMLflowConfig() (apiserverPlugins.PluginConfig, bool, error) { + if !viper.IsSet("plugins.mlflow") { + return apiserverPlugins.PluginConfig{}, false, nil + } + raw := viper.Get("plugins.mlflow") + data, err := json.Marshal(raw) + if err != nil { + return apiserverPlugins.PluginConfig{}, false, util.NewInvalidInputError("failed to marshal global plugins.mlflow config: %v", err) + } + var cfg apiserverPlugins.PluginConfig + if err := json.Unmarshal(data, &cfg); err != nil { + return apiserverPlugins.PluginConfig{}, false, util.NewInvalidInputError("failed to parse global plugins.mlflow config: %v", err) + } + return cfg, true, nil +} + +// ResolveMLflowPluginConfig builds an MLflowPluginConfig for the given input generic PluginConfig. +func ResolveMLflowPluginConfig(runPluginConfig *apiserverPlugins.PluginConfig, resolvedMLflowSettings *commonmlflow.MLflowPluginSettings) (*commonmlflow.MLflowPluginConfig, error) { + if runPluginConfig == nil || resolvedMLflowSettings == nil { + return nil, fmt.Errorf("runPluginConfig and resolvedMLflowSettings must be non-nil") + } + + resolvedTimeout := runPluginConfig.Timeout + if resolvedTimeout == "" { + resolvedTimeout = DefaultTimeout + } + + resolvedMLflowCfg := &commonmlflow.MLflowPluginConfig{ + Endpoint: runPluginConfig.Endpoint, + Timeout: resolvedTimeout, + TLS: runPluginConfig.TLS, + Settings: resolvedMLflowSettings, + } + return resolvedMLflowCfg, nil +} + +// BuildMLflowRunRequestContext constructs a fully initialized RequestContext by +// performing API-server-specific validation and then delegating to the common +// BuildRequestContext. +func BuildMLflowRunRequestContext(namespace string, requestCfg *commonmlflow.MLflowPluginConfig) (*commonmlflow.RequestContext, error) { + if requestCfg == nil { + return nil, util.NewInternalServerError(errors.New("MLflow config is nil"), "cannot build MLflow request context without a resolved config") + } + if requestCfg.Endpoint == "" { + return nil, util.NewInvalidInputError("plugins.mlflow endpoint must be set") + } + settings := requestCfg.Settings + if settings == nil { + return nil, util.NewInternalServerError(errors.New("MLflow plugin settings are nil"), "BuildMLflowRequestContext requires resolved settings") + } + workspacesEnabled := settings.WorkspacesEnabled != nil && *settings.WorkspacesEnabled + return commonmlflow.BuildMLflowRequestContext(*requestCfg, namespace, workspacesEnabled) +} + +// ResolveMLflowPluginInput parses the plugins_input.mlflow JSON from a run model, +// validates it against the MLflowPluginInput schema, and applies defaults. +func ResolveMLflowPluginInput(pluginsInputString *string) (*MLflowPluginInput, error) { + if pluginsInputString == nil || *pluginsInputString == "" { + return nil, nil + } + + var pluginInputs apiserverPlugins.PluginsInputMap + if err := json.Unmarshal([]byte(*pluginsInputString), &pluginInputs); err != nil { + return nil, util.NewInvalidInputError("plugins_input must be a valid JSON object: %v", err) + } + //todo: should be just mlflow? + rawInput, ok := pluginInputs["mlflow"] + if !ok || len(rawInput) == 0 { + return nil, nil + } + + decoder := json.NewDecoder(bytes.NewReader(rawInput)) + decoder.DisallowUnknownFields() + input := &MLflowPluginInput{} + if err := decoder.Decode(input); err != nil { + return nil, util.NewInvalidInputError("plugins_input.mlflow must follow schema {experiment_name?: string, experiment_id?: string, disabled?: bool}: %v", err) + } + var trailing json.RawMessage + if err := decoder.Decode(&trailing); err != io.EOF { + return nil, util.NewInvalidInputError("plugins_input.mlflow must be a single JSON object") + } + + if input.Disabled { + return input, nil + } + if input.ExperimentID != "" { + return input, nil + } + if input.ExperimentName == "" { + input.ExperimentName = DefaultExperimentName + } + return input, nil +} + +// SelectMLflowExperiment chooses the selector used for MLflow experiment resolution. +// Priority: user-provided experiment_id > user-provided experiment_name > +// admin-configured defaultExperimentName > hardcoded "KFP-Default". +func SelectMLflowExperiment(input *MLflowPluginInput, settings *commonmlflow.MLflowPluginSettings) (experimentID string, experimentName string) { + if input != nil { + if input.ExperimentID != "" { + return input.ExperimentID, "" + } + if input.ExperimentName != "" { + return "", input.ExperimentName + } + } + if settings != nil && settings.DefaultExperimentName != "" { + return "", settings.DefaultExperimentName + } + return "", DefaultExperimentName +} + +// InjectMLflowRuntimeEnv sets KFP_MLFLOW_CONFIG on driver and launcher +// containers. +func InjectMLflowRuntimeEnv(executionSpec util.ExecutionSpec, env map[string]string) error { + if len(env) == 0 || executionSpec == nil { + return nil + } + return executionSpec.UpsertRuntimeEnvVars(env, + util.ExecutionRuntimeRoleDriver, + util.ExecutionRuntimeRoleLauncher, + ) +} + +// ToMLflowTerminalStatus converts a KFP RuntimeState string to an MLflow +// terminal status. +func ToMLflowTerminalStatus(stateV2 string) string { + switch stateV2 { + case "SUCCEEDED": + return "FINISHED" + case "CANCELED", "CANCELING": + return "KILLED" + default: + return "FAILED" + } +} + +// ResolvePluginSettings parses and validates MLflow plugin settings from raw map, and applies default values where +// necessary. +func ResolvePluginSettings(rawSettings map[string]interface{}) *commonmlflow.MLflowPluginSettings { + var settings commonmlflow.MLflowPluginSettings + for key, value := range rawSettings { + switch strings.ToLower(key) { + case "workspacesenabled": + settings.WorkspacesEnabled = asBoolPointer(key, value) + case "experimentdescription": + settings.ExperimentDescription = asStringPointer(key, value) + case "defaultexperimentname": + if s, ok := asString(key, value); ok { + settings.DefaultExperimentName = s + } + case "kfpbaseurl": + if s, ok := asString(key, value); ok { + settings.KFPBaseURL = s + } + case "kfprunurlpathtemplate": + if s, ok := asString(key, value); ok { + settings.KFPRunURLPathTemplate = s + } + case "mlflowbaseurl": + if s, ok := asString(key, value); ok { + settings.MLflowBaseURL = s + } + case "mlflowuipathprefix": + if s, ok := asString(key, value); ok { + settings.MLflowUIPathPrefix = s + } + case "injectuserenvvars": + settings.InjectUserEnvVars = asBoolPointer(key, value) + default: + glog.Warningf("unrecognized MLflow plugin setting: %s", key) + } + } + return ApplySettingsDefaults(&settings) +} + +func asBoolPointer(key string, value interface{}) *bool { + switch v := value.(type) { + case bool: + return &v + case *bool: + return v + case string: + parsed, err := strconv.ParseBool(v) + if err != nil { + glog.Errorf("failed to parse %s as bool from MLflow plugin settings: %v", key, err) + return nil + } + return &parsed + default: + glog.Errorf("unexpected type %T for MLflow plugin setting %s", value, key) + return nil + } +} + +func asStringPointer(key string, value interface{}) *string { + if s, ok := asString(key, value); ok { + return &s + } + return nil +} + +func asString(key string, value interface{}) (string, bool) { + switch v := value.(type) { + case string: + return v, true + case *string: + if v != nil { + return *v, true + } + return "", false + default: + glog.Errorf("unexpected type %T for MLflow plugin setting %s", value, key) + return "", false + } +} diff --git a/backend/src/apiserver/plugins/mlflow/config_test.go b/backend/src/apiserver/plugins/mlflow/config_test.go new file mode 100644 index 00000000000..619da0490cf --- /dev/null +++ b/backend/src/apiserver/plugins/mlflow/config_test.go @@ -0,0 +1,539 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mlflow + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + workflowapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + apiserverPlugins "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" + commonplugins "github.com/kubeflow/pipelines/backend/src/common/plugins" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/kubeflow/pipelines/backend/src/common/util" + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/encoding/protojson" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" +) + +// setupFakeKubernetesConfig writes a temp kubeconfig with the given bearer token +// and sets the KUBECONFIG env var so util.GetKubernetesConfig() picks it up. +func setupFakeKubernetesConfig(t *testing.T, token string) { + t.Helper() + kubeconfig := fmt.Sprintf(`apiVersion: v1 +kind: Config +clusters: +- cluster: + server: https://localhost + name: test +contexts: +- context: + cluster: test + user: test + name: test +current-context: test +users: +- name: test + user: + token: %s +`, token) + p := filepath.Join(t.TempDir(), "kubeconfig") + require.NoError(t, os.WriteFile(p, []byte(kubeconfig), 0600)) + t.Setenv("KUBECONFIG", p) +} + +func TestResolveMLflowPluginInput(t *testing.T) { + tests := []struct { + name string + input *string + want *MLflowPluginInput + wantError bool + }{ + { + name: "nil input returns nil", + input: nil, + want: nil, + }, + { + name: "empty string returns nil", + input: strPtr(""), + want: nil, + }, + { + name: "missing mlflow block returns nil", + input: strPtr(`{"other":{"x":"y"}}`), + want: nil, + }, + { + name: "missing experiment_name defaults", + input: strPtr(`{"plugins.mlflow":{"disabled":false}}`), + want: &MLflowPluginInput{ExperimentName: DefaultExperimentName}, + }, + { + name: "valid experiment_name is used", + input: strPtr(`{"plugins.mlflow":{"experiment_name":"exp-1"}}`), + want: &MLflowPluginInput{ExperimentName: "exp-1"}, + }, + { + name: "experiment_id takes precedence and is preserved", + input: strPtr(`{"plugins.mlflow":{"experiment_name":"exp-1","experiment_id":"42"}}`), + want: &MLflowPluginInput{ExperimentName: "exp-1", ExperimentID: "42"}, + }, + { + name: "disabled true is accepted", + input: strPtr(`{"plugins.mlflow":{"disabled":true}}`), + want: &MLflowPluginInput{Disabled: true}, + }, + { + name: "invalid plugins_input json errors", + input: strPtr(`{`), + wantError: true, + }, + { + name: "non-string experiment_name errors", + input: strPtr(`{"plugins.mlflow":{"experiment_name":123}}`), + wantError: true, + }, + { + name: "non-string experiment_id errors", + input: strPtr(`{"plugins.mlflow":{"experiment_id":123}}`), + wantError: true, + }, + { + name: "unknown field is rejected", + input: strPtr(`{"plugins.mlflow":{"experiment_name":"exp-1","unknown":"x"}}`), + wantError: true, + }, + { + name: "mlflow block must be an object", + input: strPtr(`{"plugins.mlflow":"not-an-object"}`), + wantError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ResolveMLflowPluginInput(tt.input) + if tt.wantError { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestSelectMLflowExperiment(t *testing.T) { + tests := []struct { + name string + input *MLflowPluginInput + settings *commonmlflow.MLflowPluginSettings + wantExperimentID string + wantExperimentName string + }{ + { + name: "nil input and nil settings defaults to KFP-Default", + input: nil, + settings: nil, + wantExperimentID: "", + wantExperimentName: DefaultExperimentName, + }, + { + name: "experiment_id takes precedence", + input: &MLflowPluginInput{ExperimentID: "42", ExperimentName: "ignored"}, + settings: nil, + wantExperimentID: "42", + wantExperimentName: "", + }, + { + name: "uses experiment_name when id absent", + input: &MLflowPluginInput{ExperimentName: "exp-a"}, + settings: nil, + wantExperimentID: "", + wantExperimentName: "exp-a", + }, + { + name: "empty input falls back to admin-configured default", + input: &MLflowPluginInput{}, + settings: &commonmlflow.MLflowPluginSettings{DefaultExperimentName: "Team-Pipelines"}, + wantExperimentID: "", + wantExperimentName: "Team-Pipelines", + }, + { + name: "nil input falls back to admin-configured default", + input: nil, + settings: &commonmlflow.MLflowPluginSettings{DefaultExperimentName: "Org-Default"}, + wantExperimentID: "", + wantExperimentName: "Org-Default", + }, + { + name: "user experiment_name overrides admin default", + input: &MLflowPluginInput{ExperimentName: "user-exp"}, + settings: &commonmlflow.MLflowPluginSettings{DefaultExperimentName: "Admin-Default"}, + wantExperimentID: "", + wantExperimentName: "user-exp", + }, + { + name: "empty input and empty admin default falls back to KFP-Default", + input: &MLflowPluginInput{}, + settings: &commonmlflow.MLflowPluginSettings{}, + wantExperimentID: "", + wantExperimentName: DefaultExperimentName, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotID, gotName := SelectMLflowExperiment(tt.input, tt.settings) + assert.Equal(t, tt.wantExperimentID, gotID) + assert.Equal(t, tt.wantExperimentName, gotName) + }) + } +} + +func TestGetGlobalMLflowConfig(t *testing.T) { + originalPlugins := viper.Get("plugins") + t.Cleanup(func() { + viper.Set("plugins", originalPlugins) + }) + + viper.Set("plugins", map[string]interface{}{ + "mlflow": map[string]interface{}{ + "endpoint": "https://mlflow.example.com", + "timeout": "10s", + "settings": map[string]interface{}{ + "workspacesEnabled": false, + }, + }, + }) + + cfg, ok, err := GetGlobalMLflowConfig() + require.NoError(t, err) + require.True(t, ok) + assert.Equal(t, "https://mlflow.example.com", cfg.Endpoint) + assert.Equal(t, "10s", cfg.Timeout) + require.NotNil(t, cfg.Settings) +} + +func TestBuildMLflowRequestContextKubernetesAuth(t *testing.T) { + setupFakeKubernetesConfig(t, "sa-token-value") + + workspacesEnabled := true + mlflowPluginCfg := &commonmlflow.MLflowPluginConfig{ + Endpoint: "https://mlflow.example.com", + Timeout: "12s", + TLS: &commonplugins.TLSConfig{ + InsecureSkipVerify: false, + }, + Settings: &commonmlflow.MLflowPluginSettings{ + WorkspacesEnabled: &workspacesEnabled, + }, + } + + mlflowCtx, err := BuildMLflowRunRequestContext("ns1", mlflowPluginCfg) + require.NoError(t, err) + require.NotNil(t, mlflowCtx) + assert.Equal(t, "https://mlflow.example.com", mlflowCtx.BaseURL.String()) + assert.True(t, mlflowCtx.WorkspacesEnabled) + assert.NotNil(t, mlflowCtx.Client) +} + +func TestEnsureExperimentExists(t *testing.T) { + t.Run("returns existing experiment from get-by-name", func(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "Bearer bearer-secret", r.Header.Get("Authorization")) + assert.Equal(t, "ns1", r.Header.Get("X-MLflow-Workspace")) + assert.Equal(t, "/api/2.0/mlflow/experiments/get-by-name", r.URL.Path) + assert.Equal(t, "my-exp", r.URL.Query().Get("experiment_name")) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"experiment":{"experiment_id":"42","name":"my-exp"}}`)) + })) + defer server.Close() + + mlflowCtx := newTestMLflowRequestContext(t, server.URL) + defaultDesc := DefaultExperimentDescription + exp, err := EnsureExperimentExists(context.Background(), mlflowCtx, "", "my-exp", &defaultDesc) + require.NoError(t, err) + require.NotNil(t, exp) + assert.Equal(t, "42", exp.ID) + assert.Equal(t, "my-exp", exp.Name) + }) + + t.Run("creates experiment when get-by-name returns not found", func(t *testing.T) { + var callCount int + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "Bearer bearer-secret", r.Header.Get("Authorization")) + assert.Equal(t, "ns1", r.Header.Get("X-MLflow-Workspace")) + switch r.URL.Path { + case "/api/2.0/mlflow/experiments/get-by-name": + callCount++ + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"error_code":"RESOURCE_DOES_NOT_EXIST","message":"not found"}`)) + case "/api/2.0/mlflow/experiments/create": + bodyBytes, _ := io.ReadAll(r.Body) + assert.Contains(t, string(bodyBytes), `"name":"my-exp"`) + assert.Contains(t, string(bodyBytes), `"description":"Created by Kubeflow Pipelines"`) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"experiment_id":"99"}`)) + default: + t.Fatalf("unexpected path %s", r.URL.Path) + } + })) + defer server.Close() + + defaultDesc := DefaultExperimentDescription + mlflowCtx := newTestMLflowRequestContext(t, server.URL) + exp, err := EnsureExperimentExists(context.Background(), mlflowCtx, "", "my-exp", &defaultDesc) + require.NoError(t, err) + require.NotNil(t, exp) + assert.Equal(t, "99", exp.ID) + assert.Equal(t, "my-exp", exp.Name) + assert.Equal(t, 1, callCount) + }) +} + +func TestBuildKFPTags(t *testing.T) { + run := &apiserverPlugins.PendingRun{ + RunID: "kfp-run-1", + Namespace: "ns-1", + PipelineID: "pipeline-1", + PipelineVersionID: "pipeline-version-1", + } + tags := BuildKFPTags(run, "", "") + require.Len(t, tags, 4) + assert.Contains(t, tags, commonmlflow.Tag{Key: TagKFPRunID, Value: "kfp-run-1"}) + assert.Contains(t, tags, commonmlflow.Tag{Key: TagKFPRunURL, Value: ""}) + assert.Contains(t, tags, commonmlflow.Tag{Key: TagKFPPipelineID, Value: "pipeline-1"}) + assert.Contains(t, tags, commonmlflow.Tag{Key: TagKFPPipelineVersionID, Value: "pipeline-version-1"}) +} + +func TestBuildKFPTags_WithBaseURL(t *testing.T) { + run := &apiserverPlugins.PendingRun{ + RunID: "run-1", + Namespace: "ns-1", + } + tags := BuildKFPTags(run, "https://kfp.example.com", "") + require.Len(t, tags, 2) + assert.Contains(t, tags, commonmlflow.Tag{ + Key: TagKFPRunURL, + Value: "https://kfp.example.com/#/runs/details/run-1", + }) +} + +func TestBuildKFPTags_WithCustomPathTemplate(t *testing.T) { + run := &apiserverPlugins.PendingRun{ + RunID: "run-b", + Namespace: "proj-1", + } + tmpl := "/demo/console/pipelines/{namespace}/runs/{run_id}" + tags := BuildKFPTags(run, "https://console.example.com", tmpl) + require.Len(t, tags, 2) + assert.Contains(t, tags, commonmlflow.Tag{ + Key: TagKFPRunURL, + Value: "https://console.example.com/demo/console/pipelines/proj-1/runs/run-b", + }) +} + +func TestBuildKFPTags_NilRun(t *testing.T) { + assert.Nil(t, BuildKFPTags(nil, "", "")) +} + +func TestCreateRunWithKFPTags(t *testing.T) { + var receivedBody map[string]interface{} + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "Bearer bearer-secret", r.Header.Get("Authorization")) + assert.Equal(t, "ns1", r.Header.Get("X-MLflow-Workspace")) + switch r.URL.Path { + case "/api/2.0/mlflow/runs/create": + body, _ := io.ReadAll(r.Body) + require.NoError(t, json.Unmarshal(body, &receivedBody)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"mlflow-parent-run-1"}}}`)) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + mlflowCtx := newTestMLflowRequestContext(t, server.URL) + + run := &apiserverPlugins.PendingRun{ + RunID: "kfp-run-1", + PipelineID: "pipeline-1", + PipelineVersionID: "pipeline-version-1", + } + tags := BuildKFPTags(run, "", "") + mlflowRunID, err := mlflowCtx.Client.CreateRun(context.Background(), "exp-1", "sample-run", tags) + require.NoError(t, err) + assert.Equal(t, "mlflow-parent-run-1", mlflowRunID) + + // Verify tags were included in the CreateRun request body. + rawTags, ok := receivedBody["tags"].([]interface{}) + require.True(t, ok, "tags should be present in CreateRun body") + assert.Len(t, rawTags, 4) +} + +func TestBuildPluginOutput(t *testing.T) { + output := SuccessfulPluginOutput("exp-1", "my-exp", "run-1", "https://mlflow.example/runs/run-1", "https://mlflow.example") + require.NotNil(t, output) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, output.State) + require.Contains(t, output.Entries, "run_url") + require.NotNil(t, output.Entries["run_url"].RenderType) + assert.Equal(t, apiv2beta1.MetadataValue_URL, *output.Entries["run_url"].RenderType) +} + +func TestSetPendingRunPluginOutput(t *testing.T) { + // Start with a PendingRun that already has another plugin's output. + existing := `{"other":{"state":"PLUGIN_SUCCEEDED"}}` + run := &apiserverPlugins.PendingRun{ + RunID: "run-1", + PluginsOutput: &existing, + } + mlflowOutput := SuccessfulPluginOutput("exp-1", "my-exp", "run-1", "https://mlflow.example/runs/run-1", "https://mlflow.example") + err := SetPendingRunPluginOutput(run, "mlflow", mlflowOutput) + require.NoError(t, err) + require.NotNil(t, run.PluginsOutput) + + var envelope pluginsOutputEnvelope + require.NoError(t, json.Unmarshal([]byte(*run.PluginsOutput), &envelope)) + assert.NotNil(t, envelope.others["other"], "pre-existing 'other' entry should be preserved") + assert.NotEmpty(t, envelope.MLflow, "mlflow entry should be set") + var parsed apiv2beta1.PluginOutput + require.NoError(t, protojson.Unmarshal(envelope.MLflow, &parsed)) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, parsed.State) + assert.Contains(t, parsed.Entries, "experiment_id") +} + +func TestToMLflowTerminalStatus(t *testing.T) { + assert.Equal(t, "FINISHED", ToMLflowTerminalStatus("SUCCEEDED")) + assert.Equal(t, "KILLED", ToMLflowTerminalStatus("CANCELED")) + assert.Equal(t, "KILLED", ToMLflowTerminalStatus("CANCELING")) + assert.Equal(t, "FAILED", ToMLflowTerminalStatus("FAILED")) + assert.Equal(t, "FAILED", ToMLflowTerminalStatus("UNKNOWN")) +} + +func strPtr(s string) *string { + return &s +} + +// fakeKubeClientProvider implements KubeClientProvider for tests. +type fakeKubeClientProvider struct { + clientSet kubernetes.Interface +} + +func (f *fakeKubeClientProvider) GetClientSet() kubernetes.Interface { + return f.clientSet +} + +func TestResolveMLflowCredentials_EmptySAToken(t *testing.T) { + setupFakeKubernetesConfig(t, "") + + _, err := commonmlflow.ResolveMLflowCredentials() + require.Error(t, err) + assert.Contains(t, err.Error(), "bearer token is empty") +} + +func TestInjectMLflowRuntimeEnv(t *testing.T) { + workflow := util.NewWorkflow(&workflowapi.Workflow{ + Spec: workflowapi.WorkflowSpec{ + Templates: []workflowapi.Template{ + { + Name: "system-dag-driver", + Metadata: workflowapi.Metadata{ + Annotations: map[string]string{ + util.AnnotationKeyRuntimeRole: string(util.ExecutionRuntimeRoleDriver), + }, + }, + Container: &corev1.Container{Args: []string{"--type", "DAG"}}, + }, + { + Name: "system-container-impl", + Metadata: workflowapi.Metadata{ + Annotations: map[string]string{ + util.AnnotationKeyRuntimeRole: string(util.ExecutionRuntimeRoleLauncher), + }, + }, + Container: &corev1.Container{}, + InitContainers: []workflowapi.UserContainer{ + {Container: corev1.Container{Name: "kfp-launcher", Args: []string{"--copy", "/kfp-launcher/launch"}}}, + }, + }, + }, + }, + }) + + env := map[string]string{ + commonmlflow.EnvMLflowConfig: `{"endpoint":"https://mlflow.example.com","parentRunId":"abc"}`, + } + err := InjectMLflowRuntimeEnv(workflow, env) + require.NoError(t, err) + + expectedEnv := corev1.EnvVar{Name: commonmlflow.EnvMLflowConfig, Value: env[commonmlflow.EnvMLflowConfig]} + + // Driver container gets the env var. + assert.Contains(t, workflow.Spec.Templates[0].Container.Env, expectedEnv) + + // Launcher main container (template with --copy init container) gets the env var. + assert.Contains(t, workflow.Spec.Templates[1].Container.Env, expectedEnv) + + // Launcher init container does NOT get the env var (it only copies the binary). + assert.NotContains(t, workflow.Spec.Templates[1].InitContainers[0].Env, expectedEnv) +} + +func TestInjectMLflowRuntimeEnv_NilSpec(t *testing.T) { + err := InjectMLflowRuntimeEnv(nil, map[string]string{"key": "val"}) + require.NoError(t, err, "nil spec should be a no-op") +} + +func TestInjectMLflowRuntimeEnv_EmptyEnv(t *testing.T) { + workflow := util.NewWorkflow(&workflowapi.Workflow{}) + err := InjectMLflowRuntimeEnv(workflow, map[string]string{}) + require.NoError(t, err, "empty env should be a no-op") +} + +// newTestMLflowRequestContext creates a *commonmlflow.RequestContext backed by a service account +// token, pointing at serverURL with workspaces enabled. +func newTestMLflowRequestContext(t *testing.T, serverURL string) *commonmlflow.RequestContext { + t.Helper() + setupFakeKubernetesConfig(t, "bearer-secret") + + enabled := true + mlflowPluginCfg := &commonmlflow.MLflowPluginConfig{ + Endpoint: serverURL, + Timeout: "10s", + TLS: &commonplugins.TLSConfig{ + InsecureSkipVerify: false, + }, + Settings: &commonmlflow.MLflowPluginSettings{ + WorkspacesEnabled: &enabled, + }, + } + ctx, err := BuildMLflowRunRequestContext("ns1", mlflowPluginCfg) + require.NoError(t, err) + require.NotNil(t, ctx) + return ctx +} diff --git a/backend/src/apiserver/plugins/mlflow/factory.go b/backend/src/apiserver/plugins/mlflow/factory.go new file mode 100644 index 00000000000..48b65bc2663 --- /dev/null +++ b/backend/src/apiserver/plugins/mlflow/factory.go @@ -0,0 +1,30 @@ +package mlflow + +import ( + "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" +) + +func init() { + plugins.RegisterHandlerFactory(&mlflowHandlerFactory{}) +} + +type mlflowHandlerFactory struct{} + +func (f *mlflowHandlerFactory) Name() string { + return "MLflow" +} + +func (f *mlflowHandlerFactory) IsEnabled() bool { + return IsEnabled() +} + +func (f *mlflowHandlerFactory) Create() (plugins.RunPluginHandler, error) { + cfg, ok, err := GetGlobalMLflowConfig() + if err != nil { + return nil, err + } + if !ok { + return nil, nil + } + return NewMLflowRunHandler(&cfg), nil +} diff --git a/backend/src/apiserver/plugins/mlflow/factory_test.go b/backend/src/apiserver/plugins/mlflow/factory_test.go new file mode 100644 index 00000000000..564512724c6 --- /dev/null +++ b/backend/src/apiserver/plugins/mlflow/factory_test.go @@ -0,0 +1,86 @@ +package mlflow + +import ( + "testing" + + "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMlflowHandlerFactory_Name(t *testing.T) { + factory := &mlflowHandlerFactory{} + + assert.Equal(t, "MLflow", factory.Name()) +} + +func TestMlflowHandlerFactory_IsEnabled_ConfigSet(t *testing.T) { + viper.Set("plugins.mlflow", map[string]interface{}{ + "endpoint": "http://localhost", + }) + t.Cleanup(viper.Reset) + + factory := &mlflowHandlerFactory{} + + assert.True(t, factory.IsEnabled()) +} + +func TestMlflowHandlerFactory_IsEnabled_ConfigUnset(t *testing.T) { + viper.Reset() + + factory := &mlflowHandlerFactory{} + + assert.False(t, factory.IsEnabled()) +} + +func TestMlflowHandlerFactory_Create_Success(t *testing.T) { + viper.Set("plugins.mlflow", map[string]interface{}{ + "endpoint": "http://localhost", + "timeout": "30s", + }) + t.Cleanup(viper.Reset) + + factory := &mlflowHandlerFactory{} + handler, err := factory.Create() + + require.NoError(t, err) + require.NotNil(t, handler) + assert.Equal(t, "mlflow", handler.Name()) +} + +func TestMlflowHandlerFactory_Create_ConfigUnset(t *testing.T) { + viper.Reset() + + factory := &mlflowHandlerFactory{} + handler, err := factory.Create() + + require.NoError(t, err) + assert.Nil(t, handler) +} + +func TestMlflowHandlerFactory_Create_InvalidConfig(t *testing.T) { + viper.Set("plugins.mlflow", "not-a-valid-json-object") + t.Cleanup(viper.Reset) + + factory := &mlflowHandlerFactory{} + handler, err := factory.Create() + + require.Error(t, err) + assert.Nil(t, handler) +} + +func TestInitRegistersFactory(t *testing.T) { + t.Cleanup(plugins.ResetRegistry) + + registered := plugins.RegisteredFactories() + + var found bool + for _, factory := range registered { + if factory.Name() == "MLflow" { + found = true + break + } + } + assert.True(t, found, "init() should register an MLflow factory in the global registry") +} diff --git a/backend/src/apiserver/plugins/mlflow/handler.go b/backend/src/apiserver/plugins/mlflow/handler.go new file mode 100644 index 00000000000..d2a0ce04367 --- /dev/null +++ b/backend/src/apiserver/plugins/mlflow/handler.go @@ -0,0 +1,265 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mlflow + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/golang/glog" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + apiserverPlugins "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" +) + +var _ apiserverPlugins.RunPluginHandler = (*Handler)(nil) + +// Handler implements PluginHandler for the MLflow integration. +type Handler struct { + // pluginGlobalConfig holds the global configuration for the MLflow plugin, including endpoint, timeout, and TLS settings. + pluginGlobalConfig *apiserverPlugins.PluginConfig + + // RunStartEnv is populated by OnBeforeRunCreation with the single + // KFP_MLFLOW_CONFIG env var for the driver and launcher. + RunStartEnv map[string]string +} + +// NewMLflowRunHandler creates a new MLflow plugin handler. +func NewMLflowRunHandler(cfg *apiserverPlugins.PluginConfig) *Handler { + return &Handler{ + pluginGlobalConfig: cfg, + } +} + +// Name returns the name of the MLflow plugin handler. +func (h *Handler) Name() string { + return "mlflow" +} + +// GlobalPluginConfig re-reads the global plugin configuration from Viper so +// that runtime config changes (e.g. admin removing the MLflow config) are +// reflected without restarting the API server. +func (h *Handler) GlobalPluginConfig() *apiserverPlugins.PluginConfig { + cfg, ok, err := GetGlobalMLflowConfig() + if err != nil || !ok { + return nil + } + return &cfg +} + +// OnBeforeRunCreation creates the MLflow experiment and parent run, tags it +// with KFP metadata, and populates RunStartEnv with tracking env vars. +func (h *Handler) OnBeforeRunCreation(ctx context.Context, run *apiserverPlugins.PendingRun, config *apiserverPlugins.PluginConfig) (*apiv2beta1.PluginOutput, map[string]string, error) { + if h == nil || run == nil || config == nil { + return nil, nil, nil + } + mlflowPluginInput, err := ResolveMLflowPluginInput(run.PluginsInput) + if err != nil { + return nil, nil, fmt.Errorf("MLflow run cancelled due to error retrieving run-level plugin input: %s", err) + } + if mlflowPluginInput != nil && mlflowPluginInput.Disabled { + return nil, nil, nil + } + if mlflowPluginInput == nil { + mlflowPluginInput = &MLflowPluginInput{} + } + + endpoint := config.Endpoint + + resolvedSettings := ResolvePluginSettings(config.Settings) + + experimentID, experimentName := SelectMLflowExperiment(mlflowPluginInput, resolvedSettings) + if experimentID != "" { + glog.V(4).Infof("Resolved MLflow experiment selector for run creation: experiment_id=%q (create-by-name skipped)", experimentID) + } else { + glog.V(4).Infof("Resolved MLflow experiment selector for run creation: experiment_name=%q", experimentName) + } + + resolvedCfg, err := ResolveMLflowPluginConfig(config, resolvedSettings) + if err != nil { + message := "MLflow config resolution failed; run creation will continue: " + err.Error() + glog.Warningf("MLflow OnBeforeRunCreation failed for run %q (%s)", run.RunID, message) + pluginOutput := FailedPluginOutput(experimentID, experimentName, "", "", "", message) + if outputErr := SetPendingRunPluginOutput(run, h.Name(), pluginOutput); outputErr != nil { + glog.Warningf("Failed to persist MLflow plugin output for run %q: %v", run.RunID, outputErr) + } + return nil, nil, err + } + //todo: refactor here. + settings := resolvedCfg.Settings + + mlflowRequestCtx, err := BuildMLflowRunRequestContext(run.Namespace, resolvedCfg) + if err != nil { + return FailedPluginOutput(experimentID, experimentName, "", "", endpoint, fmt.Sprintf("failed to build MLflow request context: %v", err)), nil, err + } + + mlflowExperiment, err := EnsureExperimentExists( + ctx, + mlflowRequestCtx, + experimentID, + experimentName, + settings.ExperimentDescription, + ) + if err != nil { + return FailedPluginOutput(experimentID, experimentName, "", "", endpoint, err.Error()), nil, err + } + + tags := BuildKFPTags(run, settings.KFPBaseURL, settings.KFPRunURLPathTemplate) + parentRunID, err := mlflowRequestCtx.Client.CreateRun(ctx, mlflowExperiment.ID, run.DisplayName, tags) + if err != nil { + return FailedPluginOutput(mlflowExperiment.ID, mlflowExperiment.Name, "", "", endpoint, err.Error()), nil, err + } + + insecureSkipVerify := false + if config.TLS != nil { + insecureSkipVerify = config.TLS.InsecureSkipVerify + } + workspace := "" + if settings.WorkspacesEnabled != nil && *settings.WorkspacesEnabled { + workspace = run.Namespace + } + // TLS.CABundlePath is intentionally omitted: driver/launcher pods do not + // have access to the API server's CA bundle file. CA trust for those pods + // is configured via cluster-wide trusted CA injection or volume mounts + // managed outside the plugin config. Only InsecureSkipVerify is carried + // over because it is a boolean flag independent of filesystem context. + mlflowRuntimeConfig := commonmlflow.MLflowRuntimeConfig{ + Endpoint: mlflowRequestCtx.BaseURL.String(), + Workspace: workspace, + WorkspacesEnabled: settings.WorkspacesEnabled != nil && *settings.WorkspacesEnabled, + ParentRunID: parentRunID, + ExperimentID: mlflowExperiment.ID, + AuthType: commonmlflow.AuthTypeKubernetes, + Timeout: resolvedCfg.Timeout, + InsecureSkipVerify: insecureSkipVerify, + InjectUserEnvVars: settings.InjectUserEnvVars != nil && *settings.InjectUserEnvVars, + } + mlflowConfigJSON, err := json.Marshal(mlflowRuntimeConfig) + if err != nil { + return FailedPluginOutput(mlflowExperiment.ID, mlflowExperiment.Name, parentRunID, "", endpoint, fmt.Sprintf("failed to marshal MLflow runtime config: %v", err)), nil, err + } + runStartEnv := map[string]string{ + commonmlflow.EnvMLflowConfig: string(mlflowConfigJSON), + } + h.RunStartEnv = runStartEnv + + runURL := BuildRunURL(mlflowRequestCtx, mlflowExperiment.ID, parentRunID, settings) + return SuccessfulPluginOutput(mlflowExperiment.ID, mlflowExperiment.Name, parentRunID, runURL, endpoint), runStartEnv, nil +} + +// OnRunEnd marks the MLflow parent run and any active nested runs as +// complete/failed when the KFP run reaches a terminal state. +func (h *Handler) OnRunEnd(ctx context.Context, run *apiserverPlugins.PersistedRun, config *apiserverPlugins.PluginConfig) error { + if h == nil || run == nil { + return nil + } + if config == nil { + pluginOutput := run.PluginsOutput[PluginName] + if pluginOutput != nil { + SetPluginOutputState(pluginOutput, apiv2beta1.PluginState_PLUGIN_FAILED, "MLflow terminal sync failed: config unavailable") + } + return nil + } + resolvedSettings := ResolvePluginSettings(config.Settings) + resolvedMLflowCfg, err := ResolveMLflowPluginConfig(config, resolvedSettings) + if err != nil { + return err + } + return h.syncOnRunTerminal(ctx, run, resolvedMLflowCfg, run.Namespace) +} + +// syncOnRunTerminal marks the MLflow parent and nested runs as complete/failed. +func (h *Handler) syncOnRunTerminal(ctx context.Context, run *apiserverPlugins.PersistedRun, config *commonmlflow.MLflowPluginConfig, namespace string) error { + endTimeMs := int64(0) + endTimeRef := (*int64)(nil) + if run.FinishedAt != nil { + endTimeMs = run.FinishedAt.UnixMilli() + endTimeRef = &endTimeMs + } + terminalStatus := ToMLflowTerminalStatus(run.State) + h.syncMLflowRuns(ctx, run, config, RunSyncModeTerminal, terminalStatus, endTimeRef, "terminal", namespace) + return nil +} + +// HandleRetry reopens the MLflow parent run and any failed/killed nested runs. +func (h *Handler) HandleRetry(ctx context.Context, run *apiserverPlugins.PersistedRun, config *apiserverPlugins.PluginConfig) { + if config == nil { + pluginOutput := run.PluginsOutput[PluginName] + if pluginOutput != nil { + SetPluginOutputState(pluginOutput, apiv2beta1.PluginState_PLUGIN_FAILED, "MLflow retry sync failed: config unavailable") + } + return + } + resolvedSettings := ResolvePluginSettings(config.Settings) + resolvedMLflowCfg, err := ResolveMLflowPluginConfig(config, resolvedSettings) + if err != nil { + glog.Errorf("failed to resolve MLflow plugin config: %v", err) + return + } + + h.syncMLflowRuns(ctx, run, resolvedMLflowCfg, RunSyncModeRetry, "", nil, "retry", run.Namespace) +} + +// syncMLflowRuns resolves the MLflow request context, syncs the parent and nested runs, and +// updates the plugin output state. +func (h *Handler) syncMLflowRuns(ctx context.Context, run *apiserverPlugins.PersistedRun, config *commonmlflow.MLflowPluginConfig, mode RunSyncMode, terminalStatus string, endTimeRef *int64, label string, namespace string) { + pluginOutput := run.PluginsOutput[PluginName] + if pluginOutput == nil { + return + } + + parentRunID := GetParentRunID(pluginOutput) + experimentID := GetStringEntry(pluginOutput, EntryExperimentID) + if parentRunID == "" { + msg := fmt.Sprintf("MLflow %s sync skipped: missing parent root_run_id in plugins_output.mlflow", label) + glog.Warning(msg) + SetPluginOutputState(pluginOutput, apiv2beta1.PluginState_PLUGIN_FAILED, msg) + return + } + + if config == nil { + msg := fmt.Sprintf("MLflow %s sync failed: config unavailable", label) + glog.Warning(msg) + SetPluginOutputState(pluginOutput, apiv2beta1.PluginState_PLUGIN_FAILED, msg) + return + } + + // Use the endpoint stored at run-start time so that in-flight runs + // always talk to the MLflow server where their parent run was created, + // even if the admin changes the endpoint while the run is in progress. + storedEndpoint := GetStringEntry(pluginOutput, EntryEndpoint) + if storedEndpoint != "" { + config.Endpoint = storedEndpoint + } + + mlflowRequestCtx, err := BuildMLflowRunRequestContext(namespace, config) + if err != nil { + msg := fmt.Sprintf("MLflow %s sync failed: %v", label, err) + glog.Warning(msg) + SetPluginOutputState(pluginOutput, apiv2beta1.PluginState_PLUGIN_FAILED, msg) + return + } + + syncErrors := SyncParentAndNestedRuns(ctx, mlflowRequestCtx, parentRunID, experimentID, mode, terminalStatus, endTimeRef) + if len(syncErrors) > 0 { + msg := strings.Join(syncErrors, "; ") + glog.Warningf("MLflow %s sync encountered errors for run %s: %s", label, run.RunID, msg) + SetPluginOutputState(pluginOutput, apiv2beta1.PluginState_PLUGIN_FAILED, msg) + } else { + SetPluginOutputState(pluginOutput, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, "") + } +} diff --git a/backend/src/apiserver/plugins/mlflow/handler_test.go b/backend/src/apiserver/plugins/mlflow/handler_test.go new file mode 100644 index 00000000000..42ca8cfcdaa --- /dev/null +++ b/backend/src/apiserver/plugins/mlflow/handler_test.go @@ -0,0 +1,683 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mlflow + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" + + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + "github.com/kubeflow/pipelines/backend/src/apiserver/common" + "github.com/kubeflow/pipelines/backend/src/apiserver/model" + apiserverPlugins "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" + commonplugins "github.com/kubeflow/pipelines/backend/src/common/plugins" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ---- Helpers ---- + +func setupSAToken(t *testing.T) func() { + t.Helper() + setupFakeKubernetesConfig(t, "test-sa-token") + return func() {} // cleanup handled by t.Cleanup in setupFakeKubernetesConfig +} + +func testGeneralPluginConfig(endpoint string) *apiserverPlugins.PluginConfig { + return &apiserverPlugins.PluginConfig{ + Endpoint: endpoint, + Timeout: "10s", + TLS: &commonplugins.TLSConfig{ + InsecureSkipVerify: false, + }, + Settings: map[string]interface{}{ + "WorkspacesEnabled": "true", + }, + } +} + +func testPluginConfig(endpoint string) *commonmlflow.MLflowPluginConfig { + enabled := true + return &commonmlflow.MLflowPluginConfig{ + Endpoint: endpoint, + Timeout: "10s", + Settings: &commonmlflow.MLflowPluginSettings{WorkspacesEnabled: &enabled}, + } +} + +func updatedTestPendingRun(id, displayName string, pluginsInput *MLflowPluginInput) *apiserverPlugins.PendingRun { + var pluginsInputPtr *string + if pluginsInput != nil { + wrapper := map[string]interface{}{ + "plugins.mlflow": pluginsInput, + } + jsonData, _ := json.Marshal(wrapper) + pluginsInputStr := string(jsonData) + pluginsInputPtr = &pluginsInputStr + } + return &apiserverPlugins.PendingRun{ + RunID: id, + DisplayName: displayName, + Namespace: "ns1", + PluginsInput: pluginsInputPtr, + } +} + +func testPendingRun(id, displayName string) *apiserverPlugins.PendingRun { + return &apiserverPlugins.PendingRun{ + RunID: id, + DisplayName: displayName, + Namespace: "ns1", + } +} + +func testPersistedRun(id string) *apiserverPlugins.PersistedRun { + return &apiserverPlugins.PersistedRun{ + RunID: id, + Namespace: "ns1", + PluginsOutput: make(map[string]*apiv2beta1.PluginOutput), + } +} + +func testPersistedRunWithPluginOutput(id string, pluginOutput *apiv2beta1.PluginOutput) *apiserverPlugins.PersistedRun { + r := testPersistedRun(id) + if pluginOutput != nil { + r.PluginsOutput[PluginName] = pluginOutput + } + return r +} + +// ---- OnBeforeRunCreation tests ---- + +func TestOnBeforeRunCreation_NilConfig_ReturnsNil(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + output, env, err := handler.OnBeforeRunCreation(context.Background(), testPendingRun("r1", "run-1"), nil) + + require.NoError(t, err) + assert.Empty(t, env) + assert.Nil(t, output) +} + +// todo: namespace=ns1 --> are we missing out by missing namespace? +func TestOnBeforeRunCreation_Disabled_ReturnsNil(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + pluginInput := &MLflowPluginInput{Disabled: true} + output, env, err := handler.OnBeforeRunCreation(context.Background(), updatedTestPendingRun("r1", "run-1", pluginInput), testGeneralPluginConfig("http://localhost")) + + require.NoError(t, err) + assert.Empty(t, env) + assert.Nil(t, output) +} + +func TestOnBeforeRunCreation_NilInput_ReturnsNil(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + output, env, err := handler.OnBeforeRunCreation(context.Background(), updatedTestPendingRun("r1", "run-1", nil), testGeneralPluginConfig("http://localhost")) + + require.NoError(t, err) + assert.Empty(t, env) + assert.Nil(t, output) +} + +func TestOnBeforeRunCreation_Success(t *testing.T) { + cleanup := setupSAToken(t) + defer cleanup() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/experiments/get-by-name": + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"experiment":{"experiment_id":"exp-42","name":"Default"}}`)) + case "/api/2.0/mlflow/runs/create": + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"mlflow-run-1"}}}`)) + case "/api/2.0/mlflow/runs/set-tag": + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + viper.Set(common.MultiUserMode, false) + t.Cleanup(func() { viper.Set(common.MultiUserMode, nil) }) + + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + run := updatedTestPendingRun("kfp-run-1", "my-run", &MLflowPluginInput{}) + output, env, err := handler.OnBeforeRunCreation(context.Background(), run, testGeneralPluginConfig(server.URL)) + require.NoError(t, err) + require.NotEmpty(t, env) + require.NotNil(t, output) + + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, output.State) + assert.Contains(t, output.Entries, EntryExperimentID) + assert.Equal(t, "exp-42", output.Entries[EntryExperimentID].Value.GetStringValue()) + assert.Contains(t, output.Entries, EntryRootRunID) + assert.Equal(t, "mlflow-run-1", output.Entries[EntryRootRunID].Value.GetStringValue()) + + // Verify RunStartEnv contains single KFP_MLFLOW_CONFIG JSON env var + require.NotEmpty(t, handler.RunStartEnv) + assert.Contains(t, handler.RunStartEnv, commonmlflow.EnvMLflowConfig) + + var rtCfg commonmlflow.MLflowRuntimeConfig + require.NoError(t, json.Unmarshal([]byte(handler.RunStartEnv[commonmlflow.EnvMLflowConfig]), &rtCfg)) + assert.Contains(t, rtCfg.Endpoint, server.URL) + assert.Equal(t, "ns1", rtCfg.Workspace) + assert.Equal(t, "mlflow-run-1", rtCfg.ParentRunID) + assert.Equal(t, "exp-42", rtCfg.ExperimentID) + assert.Equal(t, "kubernetes", rtCfg.AuthType) + assert.False(t, rtCfg.InjectUserEnvVars, "InjectUserEnvVars should default to false") +} + +func TestOnBeforeRunCreation_MLflowFailure_ReturnsFailedOutput(t *testing.T) { + cleanup := setupSAToken(t) + defer cleanup() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error_code":"INTERNAL_ERROR","message":"server down"}`)) + })) + defer server.Close() + + viper.Set(common.MultiUserMode, false) + t.Cleanup(func() { viper.Set(common.MultiUserMode, nil) }) + + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + run := updatedTestPendingRun("kfp-run-2", "run-2", &MLflowPluginInput{}) + output, env, err := handler.OnBeforeRunCreation(context.Background(), run, testGeneralPluginConfig(server.URL)) + require.Error(t, err) + assert.Empty(t, env) + require.NotNil(t, output) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_FAILED, output.State) + assert.NotEmpty(t, output.StateMessage) +} + +// ---- OnRunEnd / syncOnRunTerminal tests ---- + +func TestOnRunEnd_NilRun_ReturnsNil(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + err := handler.OnRunEnd(context.Background(), nil, testGeneralPluginConfig("http://localhost")) + require.NoError(t, err) +} + +func TestOnRunEnd_NoPluginOutput_ReturnsNil(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + run := testPersistedRun("r1") + err := handler.OnRunEnd(context.Background(), run, testGeneralPluginConfig("http://localhost")) + require.NoError(t, err) +} + +func TestOnRunEnd_MissingRootRunID_SetsFailedState(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + // Build a run with plugin output that has no root_run_id + pluginOutput := SuccessfulPluginOutput("42", "Default", "", "", "") + run := testPersistedRunWithPluginOutput("r-missing-root", pluginOutput) + + err := handler.OnRunEnd(context.Background(), run, testGeneralPluginConfig("http://localhost")) + require.NoError(t, err) + + // Verify the plugin output was updated in place + result := run.PluginsOutput[PluginName] + require.NotNil(t, result) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_FAILED, result.State) + assert.Contains(t, result.StateMessage, "missing parent root_run_id") +} + +func TestOnRunEnd_NilConfig_SetsFailedState(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + pluginOutput := SuccessfulPluginOutput("42", "Default", "parent-1", "", "") + run := testPersistedRunWithPluginOutput("r-nil-config", pluginOutput) + + err := handler.OnRunEnd(context.Background(), run, nil) + require.NoError(t, err) + + result := run.PluginsOutput[PluginName] + require.NotNil(t, result) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_FAILED, result.State) + assert.Contains(t, result.StateMessage, "config unavailable") +} + +func TestOnRunEnd_Success(t *testing.T) { + cleanup := setupSAToken(t) + defer cleanup() + + var updateCalls []string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/runs/update": + body, _ := io.ReadAll(r.Body) + updateCalls = append(updateCalls, string(body)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + case "/api/2.0/mlflow/runs/search": + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"runs":[]}`)) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + pluginOutput := SuccessfulPluginOutput("exp-1", "Default", "mlflow-parent-1", "", server.URL) + run := testPersistedRunWithPluginOutput("r-end-1", pluginOutput) + run.State = "SUCCEEDED" + + err := handler.OnRunEnd(context.Background(), run, testGeneralPluginConfig(server.URL)) + require.NoError(t, err) + + // Parent run should have been updated + require.NotEmpty(t, updateCalls) + assert.Contains(t, updateCalls[0], "mlflow-parent-1") + assert.Contains(t, updateCalls[0], "FINISHED") // SUCCEEDED maps to FINISHED + + // Plugin output should be updated in place + result := run.PluginsOutput[PluginName] + require.NotNil(t, result) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, result.State) +} + +// ---- HandleRetry tests ---- + +func TestHandleRetry_NoPluginOutput_NoOp(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + run := testPersistedRun("r-retry-noop") + + handler.HandleRetry(context.Background(), run, testGeneralPluginConfig("http://localhost")) + // No plugin output → nothing to do + assert.Empty(t, run.PluginsOutput) +} + +func TestHandleRetry_MissingRootRunID_SetsFailedState(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + pluginOutput := SuccessfulPluginOutput("42", "Default", "", "", "") + run := testPersistedRunWithPluginOutput("r-retry-no-root", pluginOutput) + + handler.HandleRetry(context.Background(), run, testGeneralPluginConfig("http://localhost")) + + result := run.PluginsOutput[PluginName] + require.NotNil(t, result) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_FAILED, result.State) + assert.Contains(t, result.StateMessage, "missing parent root_run_id") +} + +func TestHandleRetry_NilConfig_SetsFailedState(t *testing.T) { + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + pluginOutput := SuccessfulPluginOutput("42", "Default", "parent-1", "", "") + run := testPersistedRunWithPluginOutput("r-retry-nil-config", pluginOutput) + + handler.HandleRetry(context.Background(), run, nil) + + result := run.PluginsOutput[PluginName] + require.NotNil(t, result) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_FAILED, result.State) + assert.Contains(t, result.StateMessage, "config unavailable") +} + +func TestHandleRetry_Success(t *testing.T) { + cleanup := setupSAToken(t) + defer cleanup() + + var updatePayloads []string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/runs/update": + body, _ := io.ReadAll(r.Body) + updatePayloads = append(updatePayloads, string(body)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + case "/api/2.0/mlflow/runs/search": + // Return one failed nested run + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"runs":[{"info":{"run_id":"nested-1","status":"FAILED"}}]}`)) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + handler := NewMLflowRunHandler(testGeneralPluginConfig("http://localhost")) + + pluginOutput := FailedPluginOutput("exp-1", "Default", "parent-1", "", server.URL, "previous failure") + run := testPersistedRunWithPluginOutput("r-retry-ok", pluginOutput) + + handler.HandleRetry(context.Background(), run, testGeneralPluginConfig(server.URL)) + + // Parent reopened + nested-1 reopened = 2 update calls + require.Len(t, updatePayloads, 2) + assert.Contains(t, updatePayloads[0], "parent-1") + assert.Contains(t, updatePayloads[0], "RUNNING") + assert.Contains(t, updatePayloads[1], "nested-1") + assert.Contains(t, updatePayloads[1], "RUNNING") + + // Plugin output updated in place + result := run.PluginsOutput[PluginName] + require.NotNil(t, result) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, result.State) +} + +// ---- BuildKFPRunURL tests ---- + +func TestBuildKFPRunURL(t *testing.T) { + tests := []struct { + name string + runID string + namespace string + kfpBaseURL string + pathTemplate string + wantURL string + }{ + { + name: "empty runID returns empty", + runID: "", + wantURL: "", + }, + { + name: "no base URL returns empty", + runID: "abc", + wantURL: "", + }, + { + name: "default KFP UI hash route", + runID: "run-xyz", + namespace: "team-a", + kfpBaseURL: "https://kfp.example.com", + wantURL: "https://kfp.example.com/#/runs/details/run-xyz", + }, + { + name: "default hash route without namespace segment", + runID: "run-xyz", + namespace: "", + kfpBaseURL: "https://kfp.example.com", + wantURL: "https://kfp.example.com/#/runs/details/run-xyz", + }, + { + name: "path template with placeholders", + runID: "run-b", + namespace: "ns-a", + kfpBaseURL: "https://console.example.com", + pathTemplate: "/demo/console/pipelines/{namespace}/runs/{run_id}", + wantURL: "https://console.example.com/demo/console/pipelines/ns-a/runs/run-b", + }, + { + name: "path template without leading slash normalized", + runID: "r", + namespace: "n", + kfpBaseURL: "https://x.example", + pathTemplate: "clusters/{namespace}/runs/{run_id}", + wantURL: "https://x.example/clusters/n/runs/r", + }, + { + name: "template with namespace placeholder rejects empty ns", + runID: "run-xyz", + namespace: "", + kfpBaseURL: "https://kfp.example.com", + pathTemplate: "/demo/console/pipelines/{namespace}/runs/{run_id}", + wantURL: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := BuildKFPRunURL(tt.runID, tt.namespace, tt.kfpBaseURL, tt.pathTemplate) + assert.Equal(t, tt.wantURL, got) + }) + } +} + +func TestBuildRunURL(t *testing.T) { + mustParseURL := func(raw string) *url.URL { + t.Helper() + u, err := url.Parse(raw) + require.NoError(t, err) + return u + } + tests := []struct { + name string + requestCtx *commonmlflow.RequestContext + experimentID string + runID string + settings *commonmlflow.MLflowPluginSettings + wantURL string + }{ + { + name: "endpoint base with default hash route", + requestCtx: &commonmlflow.RequestContext{BaseURL: mustParseURL("https://tracking.example:5000")}, + experimentID: "5", + runID: "abc123", + wantURL: "https://tracking.example:5000/#/experiments/5/runs/abc123", + }, + { + name: "mlflowBaseURL overrides browser entry point", + requestCtx: &commonmlflow.RequestContext{ + BaseURL: mustParseURL("http://mlflow.internal.svc.cluster.local:5000"), + }, + experimentID: "9", + runID: "run-z", + settings: &commonmlflow.MLflowPluginSettings{ + MLflowBaseURL: "https://mlflow.example.com", + }, + wantURL: "https://mlflow.example.com/#/experiments/9/runs/run-z", + }, + { + name: "optional path prefix before fragment", + requestCtx: &commonmlflow.RequestContext{ + BaseURL: mustParseURL("https://dashboard.example.com"), + }, + experimentID: "1", + runID: "r1", + settings: &commonmlflow.MLflowPluginSettings{MLflowUIPathPrefix: "/mlflow"}, + wantURL: "https://dashboard.example.com/mlflow/#/experiments/1/runs/r1", + }, + { + name: "workspace query in hash fragment", + requestCtx: &commonmlflow.RequestContext{ + BaseURL: mustParseURL("https://tracking.example"), + WorkspacesEnabled: true, + Workspace: "mlflow-ws-1", + }, + experimentID: "5", + runID: "abc123", + wantURL: "https://tracking.example/#/experiments/5/runs/abc123?workspace=mlflow-ws-1", + }, + { + name: "mlflowBaseURL without requestCtx.BaseURL", + requestCtx: &commonmlflow.RequestContext{}, + experimentID: "2", + runID: "run-a", + settings: &commonmlflow.MLflowPluginSettings{ + MLflowBaseURL: "https://ml.example", + }, + wantURL: "https://ml.example/#/experiments/2/runs/run-a", + }, + { + name: "no mount base yields empty", + requestCtx: &commonmlflow.RequestContext{}, + experimentID: "5", + runID: "x", + wantURL: "", + }, + { + name: "missing experiment id yields empty", + requestCtx: &commonmlflow.RequestContext{BaseURL: mustParseURL("https://x")}, + experimentID: "", + runID: "y", + wantURL: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := BuildRunURL(tt.requestCtx, tt.experimentID, tt.runID, tt.settings) + assert.Equal(t, tt.wantURL, got) + }) + } +} + +func TestShouldSyncNestedRun(t *testing.T) { + t.Run("terminal mode syncs non-terminal statuses", func(t *testing.T) { + assert.True(t, shouldSyncNestedRun(RunSyncModeTerminal, "RUNNING")) + assert.True(t, shouldSyncNestedRun(RunSyncModeTerminal, "SCHEDULED")) + assert.True(t, shouldSyncNestedRun(RunSyncModeTerminal, "PENDING")) + assert.True(t, shouldSyncNestedRun(RunSyncModeTerminal, "")) + assert.False(t, shouldSyncNestedRun(RunSyncModeTerminal, "FINISHED")) + assert.False(t, shouldSyncNestedRun(RunSyncModeTerminal, "FAILED")) + assert.False(t, shouldSyncNestedRun(RunSyncModeTerminal, "KILLED")) + }) + + t.Run("retry mode syncs only failed and killed", func(t *testing.T) { + assert.True(t, shouldSyncNestedRun(RunSyncModeRetry, "FAILED")) + assert.True(t, shouldSyncNestedRun(RunSyncModeRetry, "KILLED")) + assert.False(t, shouldSyncNestedRun(RunSyncModeRetry, "RUNNING")) + assert.False(t, shouldSyncNestedRun(RunSyncModeRetry, "FINISHED")) + }) +} + +// ---- ModelToPersistedRun tests ---- + +func TestModelToPersistedRun_NilModel(t *testing.T) { + _, err := ModelToPersistedRun(nil, "ns1") + require.Error(t, err) +} + +func TestModelToPersistedRun_BasicFields(t *testing.T) { + pluginsJSON := `{"mlflow":{"entries":{"root_run_id":{"value":"parent-1"}},"state":"PLUGIN_SUCCEEDED"}}` + lt := model.LargeText(pluginsJSON) + m := &model.Run{ + UUID: "run-123", + } + m.RunDetails.State = "SUCCEEDED" + m.RunDetails.FinishedAtInSec = 1700000000 + m.RunDetails.PluginsOutputString = < + + pr, err := ModelToPersistedRun(m, "ns1") + require.NoError(t, err) + require.NotNil(t, pr) + assert.Equal(t, "run-123", pr.RunID) + assert.Equal(t, "ns1", pr.Namespace) + assert.Equal(t, "SUCCEEDED", pr.State) + require.NotNil(t, pr.FinishedAt) + assert.Equal(t, int64(1700000000), pr.FinishedAt.Unix()) + require.NotNil(t, pr.PluginsOutput[PluginName]) + assert.Equal(t, "parent-1", GetParentRunID(pr.PluginsOutput[PluginName])) +} + +// ---- SerializePluginsOutput / DeserializePluginsOutput tests ---- + +func TestSerializeDeserializePluginsOutput_RoundTrip(t *testing.T) { + original := map[string]*apiv2beta1.PluginOutput{ + "mlflow": SuccessfulPluginOutput("exp-1", "Default", "parent-1", "", ""), + "other_plugin": {State: apiv2beta1.PluginState_PLUGIN_SUCCEEDED}, + } + lt, err := SerializePluginsOutput(original) + require.NoError(t, err) + require.NotNil(t, lt) + assert.Contains(t, string(*lt), "mlflow") + assert.Contains(t, string(*lt), "other_plugin") + + result, err := DeserializePluginsOutput(lt) + require.NoError(t, err) + assert.Len(t, result, 2) + assert.NotNil(t, result["mlflow"]) + assert.NotNil(t, result["other_plugin"]) + assert.Equal(t, "parent-1", GetParentRunID(result["mlflow"])) +} + +// ---- SyncParentAndNestedRuns pagination test ---- + +func TestSyncParentAndNestedRuns_Pagination(t *testing.T) { + var updateCalls []string + // Track search calls per parent run ID to handle pagination and recursive child searches. + searchCallsByParent := map[string]int{} + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/runs/update": + body, _ := io.ReadAll(r.Body) + updateCalls = append(updateCalls, string(body)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + case "/api/2.0/mlflow/runs/search": + body, _ := io.ReadAll(r.Body) + // Determine which parent run this search is for by inspecting the filter. + parentID := "parent-1" + if strings.Contains(string(body), "nested-p1") { + parentID = "nested-p1" + } else if strings.Contains(string(body), "nested-p2") { + parentID = "nested-p2" + } + searchCallsByParent[parentID]++ + w.WriteHeader(http.StatusOK) + switch parentID { + case "parent-1": + if searchCallsByParent[parentID] == 1 { + // First page: one nested run + next_page_token + _, _ = w.Write([]byte(`{"runs":[{"info":{"run_id":"nested-p1","status":"RUNNING"}}],"next_page_token":"page2"}`)) + } else { + // Second page: one nested run, no more pages + _, _ = w.Write([]byte(`{"runs":[{"info":{"run_id":"nested-p2","status":"RUNNING"}}]}`)) + } + default: + // Nested runs have no children + _, _ = w.Write([]byte(`{"runs":[]}`)) + } + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + setupFakeKubernetesConfig(t, "sa-token") + + enabled := true + requestCfg := &commonmlflow.MLflowPluginConfig{ + Endpoint: server.URL, + Timeout: "10s", + TLS: &commonplugins.TLSConfig{ + InsecureSkipVerify: true, + }, + Settings: &commonmlflow.MLflowPluginSettings{WorkspacesEnabled: &enabled}, + } + mlflowCtx, err := BuildMLflowRunRequestContext("ns1", requestCfg) + require.NoError(t, err) + + endTime := int64(1700000000000) + syncErrors := SyncParentAndNestedRuns(context.Background(), mlflowCtx, "parent-1", "exp-1", RunSyncModeTerminal, "FINISHED", &endTime) + assert.Empty(t, syncErrors) + + // 2 search calls for parent-1 (pagination) + 1 each for nested-p1 and nested-p2 (no children) = 4 total + assert.Equal(t, 2, searchCallsByParent["parent-1"]) + assert.Equal(t, 1, searchCallsByParent["nested-p1"]) + assert.Equal(t, 1, searchCallsByParent["nested-p2"]) + // 1 parent update + 2 nested updates = 3 total + assert.Len(t, updateCalls, 3) + // Verify nested runs were updated + found := strings.Join(updateCalls, " | ") + assert.Contains(t, found, "nested-p1") + assert.Contains(t, found, "nested-p2") +} diff --git a/backend/src/apiserver/plugins/mlflow/util.go b/backend/src/apiserver/plugins/mlflow/util.go new file mode 100644 index 00000000000..6f7ee5c9a93 --- /dev/null +++ b/backend/src/apiserver/plugins/mlflow/util.go @@ -0,0 +1,513 @@ +package mlflow + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "strings" + "time" + + "github.com/golang/glog" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + "github.com/kubeflow/pipelines/backend/src/apiserver/model" + apiserverPlugins "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/kubeflow/pipelines/backend/src/common/util" + "google.golang.org/protobuf/encoding/protojson" + structpb "google.golang.org/protobuf/types/known/structpb" +) + +//todo: to be renamed to "util.go" after comparison + +// todo: some if not all can be moved to config.go +type pluginsOutputEnvelope struct { + MLflow json.RawMessage + others map[string]json.RawMessage +} + +func (e *pluginsOutputEnvelope) UnmarshalJSON(data []byte) error { + var all map[string]json.RawMessage + if err := json.Unmarshal(data, &all); err != nil { + return err + } + e.MLflow = all[PluginName] + delete(all, PluginName) + if len(all) > 0 { + e.others = all + } + return nil +} + +func (e pluginsOutputEnvelope) MarshalJSON() ([]byte, error) { + all := make(map[string]json.RawMessage, len(e.others)+1) + for k, v := range e.others { + all[k] = v + } + if len(e.MLflow) > 0 { + all[PluginName] = e.MLflow + } + if len(all) == 0 { + return []byte("{}"), nil + } + return json.Marshal(all) +} + +// set stores a plugin entry by name. +func (e *pluginsOutputEnvelope) set(name string, data json.RawMessage) { + switch name { + case PluginName: + e.MLflow = data + default: + if e.others == nil { + e.others = make(map[string]json.RawMessage) + } + e.others[name] = data + } +} + +func (e *pluginsOutputEnvelope) forEachEntry(fn func(name string, payload json.RawMessage)) { + if len(e.MLflow) > 0 { + fn(PluginName, e.MLflow) + } + for name, payload := range e.others { + fn(name, payload) + } +} + +const ( + DefaultExperimentDescription = "Created by Kubeflow Pipelines" + PluginName = "mlflow" + TagKFPRunID = "kfp.pipeline_run_id" + TagKFPRunURL = "kfp.pipeline_run_url" + TagKFPPipelineID = "kfp.pipeline_id" + TagKFPPipelineVersionID = "kfp.pipeline_version_id" + EntryExperimentName = "experiment_name" + EntryExperimentID = "experiment_id" + EntryRootRunID = "root_run_id" + EntryRunURL = "run_url" + EntryEndpoint = "endpoint" +) + +type Experiment struct { + ID string + Name string +} + +type RunSyncMode string + +const ( + RunSyncModeTerminal RunSyncMode = "terminal" + RunSyncModeRetry RunSyncMode = "retry" +) + +// EnsureExperimentExists looks up the MLflow experiment by ID or name, and creates it +// if it does not already exist. +func EnsureExperimentExists(ctx context.Context, requestCtx *commonmlflow.RequestContext, experimentID, experimentName string, description *string) (*Experiment, error) { + if requestCtx == nil || requestCtx.Client == nil { + return nil, util.NewInvalidInputError("MLflow request context is required") + } + if experimentID != "" { + exp, err := requestCtx.Client.GetExperiment(ctx, experimentID) + if err != nil { + return nil, fmt.Errorf("experiment ID %q not found in MLflow: %w", experimentID, err) + } + return &Experiment{ID: exp.ID, Name: exp.Name}, nil + } + existing, err := requestCtx.Client.GetExperimentByName(ctx, experimentName) + if err == nil { + return &Experiment{ID: existing.ID, Name: existing.Name}, nil + } + if !commonmlflow.IsNotFoundError(err) { + return nil, err + } + return CreateExperiment(ctx, requestCtx, experimentName, description) +} + +// CreateExperiment creates an MLflow experiment and handles the race condition +// where another request may have created the same experiment concurrently. +func CreateExperiment(ctx context.Context, requestCtx *commonmlflow.RequestContext, experimentName string, description *string) (*Experiment, error) { + createdID, createErr := requestCtx.Client.CreateExperiment(ctx, experimentName, description) + if createErr == nil { + return &Experiment{ID: createdID, Name: experimentName}, nil + } + if commonmlflow.IsAlreadyExistsError(createErr) { + // Race-safe fallback: another request created it between get-by-name and create. + existing, err := requestCtx.Client.GetExperimentByName(ctx, experimentName) + if err != nil { + return nil, err + } + return &Experiment{ID: existing.ID, Name: existing.Name}, nil + } + return nil, createErr +} + +// BuildKFPRunURL builds a link from kfpBaseURL to the pipeline run details page. +func BuildKFPRunURL(runID, namespace, kfpBaseURL, pathTemplate string) string { + if runID == "" || kfpBaseURL == "" { + glog.V(4).Infof( + "BuildKFPRunURL returned empty URL due to missing input(s): runID_empty=%t kfpBaseURL_empty=%t", + runID == "", + kfpBaseURL == "", + ) + return "" + } + pathTemplate = strings.TrimSpace(pathTemplate) + if pathTemplate == "" { + base := strings.TrimRight(kfpBaseURL, "/") + return fmt.Sprintf("%s/#/runs/details/%s", base, url.PathEscape(runID)) + } + if namespace == "" && strings.Contains(pathTemplate, "{namespace}") { + glog.V(4).Infof("BuildKFPRunURL returned empty URL: namespace required when template contains {namespace}") + return "" + } + base := strings.TrimRight(kfpBaseURL, "/") + rendered := strings.ReplaceAll(pathTemplate, "{run_id}", url.PathEscape(runID)) + rendered = strings.ReplaceAll(rendered, "{namespace}", url.PathEscape(namespace)) + if !(strings.HasPrefix(rendered, "/") || strings.HasPrefix(rendered, "#")) { + rendered = "/" + rendered + } + return base + rendered +} + +// BuildKFPTags builds MLflow tags containing KFP metadata for a pipeline run. +func BuildKFPTags(run *apiserverPlugins.PendingRun, kfpBaseURL, kfpRunURLPathTemplate string) []commonmlflow.Tag { + if run == nil { + return nil + } + tags := []commonmlflow.Tag{ + {Key: TagKFPRunID, Value: run.RunID}, + {Key: TagKFPRunURL, Value: BuildKFPRunURL(run.RunID, run.Namespace, kfpBaseURL, kfpRunURLPathTemplate)}, + } + if run.PipelineID != "" { + tags = append(tags, commonmlflow.Tag{Key: TagKFPPipelineID, Value: run.PipelineID}) + } + if run.PipelineVersionID != "" { + tags = append(tags, commonmlflow.Tag{Key: TagKFPPipelineVersionID, Value: run.PipelineVersionID}) + } + return tags +} + +// mlflowTrackingUIMountBase resolves the UI link prefix for MLflow Tracking UI links. +func mlflowTrackingUIMountBase(requestCtx *commonmlflow.RequestContext, settings *commonmlflow.MLflowPluginSettings) string { + if settings != nil { + if b := strings.TrimSpace(settings.MLflowBaseURL); b != "" { + return strings.TrimRight(b, "/") + } + } + if requestCtx != nil && requestCtx.BaseURL != nil { + return strings.TrimRight(requestCtx.BaseURL.String(), "/") + } + return "" +} + +func normalizeMlflowUIPathPrefix(prefix string) string { + prefix = strings.TrimSpace(prefix) + if prefix == "" { + return "" + } + if !strings.HasPrefix(prefix, "/") { + prefix = "/" + prefix + } + return strings.TrimRight(prefix, "/") +} + +// BuildRunURL returns the MLflow Tracking UI URL for a run. +func BuildRunURL(requestCtx *commonmlflow.RequestContext, experimentID, runID string, settings *commonmlflow.MLflowPluginSettings) string { + if experimentID == "" || runID == "" { + glog.V(4).Infof( + "BuildRunURL returned empty URL due to missing input(s): experimentID_empty=%t runID_empty=%t", + experimentID == "", + runID == "", + ) + return "" + } + trackingUIBase := mlflowTrackingUIMountBase(requestCtx, settings) + if trackingUIBase == "" { + glog.V(4).Infof( + "BuildRunURL returned empty URL: no mlflowBaseURL and requestCtx.BaseURL is unavailable", + ) + return "" + } + uiPathPrefix := "" + if settings != nil { + uiPathPrefix = normalizeMlflowUIPathPrefix(settings.MLflowUIPathPrefix) + } + + trackingMlflowRunPath := fmt.Sprintf( + "/experiments/%s/runs/%s", + url.PathEscape(experimentID), + url.PathEscape(runID), + ) + if requestCtx != nil && requestCtx.WorkspacesEnabled && requestCtx.Workspace != "" { + trackingMlflowRunPath = fmt.Sprintf("%s?workspace=%s", trackingMlflowRunPath, url.QueryEscape(requestCtx.Workspace)) + } + return trackingUIBase + uiPathPrefix + "/#" + trackingMlflowRunPath +} + +func SuccessfulPluginOutput(experimentID, experimentName, runID, runURL, endpoint string) *apiv2beta1.PluginOutput { + return buildPluginOutput(experimentID, experimentName, runID, runURL, endpoint, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, "") +} + +func FailedPluginOutput(experimentID, experimentName, runID, runURL, endpoint, stateMessage string) *apiv2beta1.PluginOutput { + return buildPluginOutput(experimentID, experimentName, runID, runURL, endpoint, apiv2beta1.PluginState_PLUGIN_FAILED, stateMessage) +} + +// upsertPluginOutput merges a single plugin's output into an existing +// plugins_output JSON string, returning the updated JSON. +func upsertPluginOutput(existing *string, pluginName string, output *apiv2beta1.PluginOutput) (string, error) { + marshaledOutput, err := protojson.Marshal(output) + if err != nil { + return "", fmt.Errorf("failed to marshal plugin output for %q: %w", pluginName, err) + } + var envelope pluginsOutputEnvelope + if existing != nil && *existing != "" { + if err := json.Unmarshal([]byte(*existing), &envelope); err != nil { + return "", fmt.Errorf("failed to unmarshal existing plugins_output: %w", err) + } + } + envelope.set(pluginName, marshaledOutput) + marshaledMap, err := json.Marshal(envelope) + if err != nil { + return "", fmt.Errorf("failed to marshal plugins_output map: %w", err) + } + return string(marshaledMap), nil +} + +// ModelToPersistedRun converts a model.Run to a PersistedRun for the +// post-run plugin hooks (OnRunEnd, OnRunRetry). +func ModelToPersistedRun(m *model.Run, namespace string) (*apiserverPlugins.PersistedRun, error) { + if m == nil { + return nil, fmt.Errorf("model.Run is nil") + } + pluginsOutput, err := DeserializePluginsOutput(m.PluginsOutputString) + if err != nil { + return nil, fmt.Errorf("failed to deserialize plugins_output for run %q: %w", m.UUID, err) + } + pr := &apiserverPlugins.PersistedRun{ + RunID: m.UUID, + Namespace: namespace, + State: string(m.RunDetails.State), + PluginsOutput: pluginsOutput, + } + if m.RunDetails.FinishedAtInSec > 0 { + t := time.Unix(m.RunDetails.FinishedAtInSec, 0) + pr.FinishedAt = &t + } + return pr, nil +} + +// SetPendingRunPluginOutput serializes the given PluginOutput into PendingRun.PluginsOutput. +func SetPendingRunPluginOutput(run *apiserverPlugins.PendingRun, pluginName string, output *apiv2beta1.PluginOutput) error { + if run == nil || output == nil || pluginName == "" { + return nil + } + result, err := upsertPluginOutput(run.PluginsOutput, pluginName, output) + if err != nil { + return err + } + run.PluginsOutput = &result + return nil +} + +func DeserializePluginsOutput(raw *model.LargeText) (map[string]*apiv2beta1.PluginOutput, error) { + result := make(map[string]*apiv2beta1.PluginOutput) + if raw == nil || *raw == "" { + return result, nil + } + var envelope pluginsOutputEnvelope + if err := json.Unmarshal([]byte(*raw), &envelope); err != nil { + return nil, fmt.Errorf("failed to unmarshal plugins_output: %w", err) + } + envelope.forEachEntry(func(name string, payload json.RawMessage) { + output := &apiv2beta1.PluginOutput{} + if err := protojson.Unmarshal(payload, output); err == nil { + result[name] = output + } + }) + return result, nil +} + +func SerializePluginsOutput(outputs map[string]*apiv2beta1.PluginOutput) (*model.LargeText, error) { + if len(outputs) == 0 { + return nil, nil + } + var envelope pluginsOutputEnvelope + for key, output := range outputs { + marshaledOutput, err := protojson.Marshal(output) + if err != nil { + return nil, fmt.Errorf("failed to marshal plugin output for %q: %w", key, err) + } + envelope.set(key, marshaledOutput) + } + marshaledMap, err := json.Marshal(envelope) + if err != nil { + return nil, fmt.Errorf("failed to marshal plugins_output map: %w", err) + } + lt := model.LargeText(string(marshaledMap)) + return <, nil +} + +// PersistPluginsOutput serializes the PersistedRun's PluginsOutput and writes +// it to the database via the given store. +func PersistPluginsOutput(run *apiserverPlugins.PersistedRun, store apiserverPlugins.RunPluginOutputStore) error { + lt, err := SerializePluginsOutput(run.PluginsOutput) + if err != nil { + return fmt.Errorf("failed to serialize plugins_output for run %q: %w", run.RunID, err) + } + return store.UpdateRunPluginsOutput(run.RunID, lt) +} + +func GetStringEntry(output *apiv2beta1.PluginOutput, key string) string { + if output == nil || output.Entries == nil || key == "" { + return "" + } + entry, ok := output.Entries[key] + if !ok || entry == nil || entry.Value == nil { + return "" + } + return entry.Value.GetStringValue() +} + +func GetParentRunID(output *apiv2beta1.PluginOutput) string { + return GetStringEntry(output, EntryRootRunID) +} + +func SetPluginOutputState(output *apiv2beta1.PluginOutput, state apiv2beta1.PluginState, stateMessage string) { + if output == nil { + return + } + output.State = state + output.StateMessage = stateMessage +} + +// maxSearchPages caps SearchRuns pagination to prevent infinite loops. +const maxSearchPages = 100 + +// maxNestingDepth caps recursive nested run traversal. +const maxNestingDepth = 4 + +func SyncParentAndNestedRuns(ctx context.Context, requestCtx *commonmlflow.RequestContext, parentRunID, experimentID string, mode RunSyncMode, terminalStatus string, endTimeMs *int64) []string { + if requestCtx == nil || requestCtx.Client == nil { + return []string{"MLflow request context is required"} + } + if parentRunID == "" { + return []string{"MLflow parent run_id is required"} + } + targetStatus := terminalStatus + parentAction := "update parent run status" + switch mode { + case RunSyncModeRetry: + targetStatus = "RUNNING" + parentAction = "reopen parent run" + case RunSyncModeTerminal: + // keep caller-provided terminal status + default: + return []string{fmt.Sprintf("unsupported MLflow run sync mode %q", mode)} + } + var syncErrors []string + if err := requestCtx.Client.UpdateRun(ctx, parentRunID, targetStatus, endTimeMs); err != nil { + syncErrors = append(syncErrors, fmt.Sprintf("failed to %s: %v", parentAction, err)) + } + if experimentID == "" { + return syncErrors + } + // Recursively update all nested runs + nestedErrors := syncNestedRuns(ctx, requestCtx, parentRunID, experimentID, mode, targetStatus, endTimeMs, 0) + syncErrors = append(syncErrors, nestedErrors...) + return syncErrors +} + +// syncNestedRuns searches for MLflow runs tagged with the given parentRunID and +// updates their status. It recurses into each found run to handle deeper nesting +// (e.g., parent → loop nested run → iteration nested run). +func syncNestedRuns(ctx context.Context, requestCtx *commonmlflow.RequestContext, parentRunID, experimentID string, mode RunSyncMode, targetStatus string, endTimeMs *int64, depth int) []string { + if depth >= maxNestingDepth { + return []string{fmt.Sprintf("max nesting depth (%d) reached when syncing children of run %s", maxNestingDepth, parentRunID)} + } + action := "close nested run" + if mode == RunSyncModeRetry { + action = "reopen nested run" + } + var syncErrors []string + filter := fmt.Sprintf(`tags.%q = '%s'`, commonmlflow.TagNestedRunParentRunID, parentRunID) + pageToken := "" + for page := 0; page < maxSearchPages; page++ { + searchResp, err := requestCtx.Client.SearchRuns(ctx, []string{experimentID}, filter, 1000, pageToken) + if err != nil { + syncErrors = append(syncErrors, fmt.Sprintf("failed to search nested runs of %s: %v", parentRunID, err)) + break + } + for _, runPayload := range searchResp.Runs { + mlflowRun := &searchRunPayload{} + if err := json.Unmarshal(runPayload, mlflowRun); err != nil { + syncErrors = append(syncErrors, fmt.Sprintf("failed to decode nested run payload: %v", err)) + continue + } + nestedRunID := mlflowRun.Info.RunID + if nestedRunID == "" { + nestedRunID = mlflowRun.Info.RunUUID + } + if nestedRunID == "" || nestedRunID == parentRunID || !shouldSyncNestedRun(mode, mlflowRun.Info.Status) { + continue + } + childErrors := syncNestedRuns(ctx, requestCtx, nestedRunID, experimentID, mode, targetStatus, endTimeMs, depth+1) + syncErrors = append(syncErrors, childErrors...) + if err := requestCtx.Client.UpdateRun(ctx, nestedRunID, targetStatus, endTimeMs); err != nil { + syncErrors = append(syncErrors, fmt.Sprintf("failed to %s %s: %v", action, nestedRunID, err)) + } + } + if searchResp.NextPageToken == "" { + break + } + pageToken = searchResp.NextPageToken + } + return syncErrors +} + +func buildPluginOutput(experimentID, experimentName, runID, runURL, endpoint string, state apiv2beta1.PluginState, stateMessage string) *apiv2beta1.PluginOutput { + entries := map[string]*apiv2beta1.MetadataValue{} + if experimentName != "" { + entries[EntryExperimentName] = &apiv2beta1.MetadataValue{Value: structpb.NewStringValue(experimentName)} + } + if experimentID != "" { + entries[EntryExperimentID] = &apiv2beta1.MetadataValue{Value: structpb.NewStringValue(experimentID)} + } + if runID != "" { + entries[EntryRootRunID] = &apiv2beta1.MetadataValue{Value: structpb.NewStringValue(runID)} + } + if runURL != "" { + entries[EntryRunURL] = &apiv2beta1.MetadataValue{ + Value: structpb.NewStringValue(runURL), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + } + } + if endpoint != "" { + entries[EntryEndpoint] = &apiv2beta1.MetadataValue{Value: structpb.NewStringValue(endpoint)} + } + return &apiv2beta1.PluginOutput{ + Entries: entries, + State: state, + StateMessage: stateMessage, + } +} + +func shouldSyncNestedRun(mode RunSyncMode, status string) bool { + upperStatus := strings.ToUpper(status) + switch mode { + case RunSyncModeTerminal: + return upperStatus != "FINISHED" && upperStatus != "FAILED" && upperStatus != "KILLED" + case RunSyncModeRetry: + return upperStatus == "FAILED" || upperStatus == "KILLED" + default: + return false + } +} + +type searchRunPayload struct { + Info struct { + RunID string `json:"run_id"` + RunUUID string `json:"run_uuid"` + Status string `json:"status"` + } `json:"info"` +} diff --git a/backend/src/apiserver/plugins/registry.go b/backend/src/apiserver/plugins/registry.go new file mode 100644 index 00000000000..7ac61bb9ead --- /dev/null +++ b/backend/src/apiserver/plugins/registry.go @@ -0,0 +1,67 @@ +package plugins + +import ( + "fmt" + "sync" + + "github.com/golang/glog" +) + +type HandlerFactory interface { + + // Name returns the unique identifier for this plugin factory. + Name() string + // IsEnabled reports whether the plugin should be activated in the current environment. + IsEnabled() bool + // Create constructs and returns a ready-to-use RunPluginHandler. + Create() (RunPluginHandler, error) +} + +var ( + registryMu sync.RWMutex + factories []HandlerFactory +) + +// RegisterHandlerFactory adds a HandlerFactory to the global registry. +// Typically called from a plugin package's init() function.' +func RegisterHandlerFactory(factory HandlerFactory) { + registryMu.Lock() + defer registryMu.Unlock() + factories = append(factories, factory) +} + +// RegisteredFactories returns a snapshot of all registered handler factories. +func RegisteredFactories() []HandlerFactory { + registryMu.RLock() + defer registryMu.RUnlock() + result := make([]HandlerFactory, len(factories)) + copy(result, factories) + return result +} + +// ResetRegistry clears all registered factories. Intended for use in tests only. +func ResetRegistry() { + registryMu.Lock() + defer registryMu.Unlock() + factories = nil +} + +func GetPluginDispatcher(kubeClients KubeClientProvider, runOutputStore RunPluginOutputStore) (RunPluginDispatcher, error) { + var handlers []RunPluginHandler + + for _, factory := range RegisteredFactories() { + if !factory.IsEnabled() { + continue + } + handler, err := factory.Create() + if err != nil { + return NoOpDispatcher{}, fmt.Errorf("failed to initialize %s plugin handler: %v", factory.Name(), err) + } + handlers = append(handlers, handler) + } + if len(handlers) == 0 { + glog.Infof("No plugin handlers enabled, returning no-op dispatcher") + return NoOpDispatcher{}, nil + } + return NewRunPluginDispatcherImpl(handlers, kubeClients, runOutputStore) +} diff --git a/backend/src/apiserver/plugins/registry_test.go b/backend/src/apiserver/plugins/registry_test.go new file mode 100644 index 00000000000..428a5e86eec --- /dev/null +++ b/backend/src/apiserver/plugins/registry_test.go @@ -0,0 +1,118 @@ +package plugins + +import ( + "fmt" + "testing" + + "github.com/kubeflow/pipelines/backend/src/apiserver/model" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/client-go/kubernetes" + fakeclientset "k8s.io/client-go/kubernetes/fake" +) + +var _ HandlerFactory = (*fakeFactory)(nil) + +type fakeFactory struct { + name string + enabled bool + handler RunPluginHandler + err error +} + +func (f *fakeFactory) Name() string { return f.name } +func (f *fakeFactory) IsEnabled() bool { return f.enabled } +func (f *fakeFactory) Create() (RunPluginHandler, error) { return f.handler, f.err } + +type fakeKubeClientProvider struct{} + +func (f *fakeKubeClientProvider) GetClientSet() kubernetes.Interface { + return fakeclientset.NewClientset() +} + +type fakeRunPluginOutputStore struct{} + +func (f *fakeRunPluginOutputStore) UpdateRunPluginsOutput(_ string, _ *model.LargeText) error { + return nil +} + +func resetRegistryForTest(t *testing.T) { + t.Helper() + ResetRegistry() + t.Cleanup(ResetRegistry) +} + +func TestRegisterHandlerFactory_AppearsInSnapshot(t *testing.T) { + resetRegistryForTest(t) + + factory := &fakeFactory{name: "A", enabled: true} + RegisterHandlerFactory(factory) + + registered := RegisteredFactories() + require.Len(t, registered, 1) + assert.Equal(t, "A", registered[0].Name()) +} + +func TestRegisteredFactories_SnapshotIsolation(t *testing.T) { + resetRegistryForTest(t) + + RegisterHandlerFactory(&fakeFactory{name: "A"}) + snapshot := RegisteredFactories() + snapshot[0] = &fakeFactory{name: "Mutated"} + + assert.Equal(t, "A", RegisteredFactories()[0].Name()) +} + +func TestResetRegistry_ClearsAll(t *testing.T) { + resetRegistryForTest(t) + + RegisterHandlerFactory(&fakeFactory{name: "A"}) + ResetRegistry() + + assert.Empty(t, RegisteredFactories()) +} + +func TestGetPluginDispatcher_NoFactories_ReturnsNoOp(t *testing.T) { + resetRegistryForTest(t) + + dispatcher, err := GetPluginDispatcher(&fakeKubeClientProvider{}, &fakeRunPluginOutputStore{}) + + require.NoError(t, err) + assert.IsType(t, NoOpDispatcher{}, dispatcher) +} + +func TestGetPluginDispatcher_AllDisabled_ReturnsNoOp(t *testing.T) { + resetRegistryForTest(t) + RegisterHandlerFactory(&fakeFactory{name: "A", enabled: false}) + + dispatcher, err := GetPluginDispatcher(&fakeKubeClientProvider{}, &fakeRunPluginOutputStore{}) + + require.NoError(t, err) + assert.IsType(t, NoOpDispatcher{}, dispatcher) +} + +func TestGetPluginDispatcher_OneEnabled_ReturnsImpl(t *testing.T) { + resetRegistryForTest(t) + handler := &fakeHandler{name: "A"} + RegisterHandlerFactory(&fakeFactory{name: "A", enabled: true, handler: handler}) + + dispatcher, err := GetPluginDispatcher(&fakeKubeClientProvider{}, &fakeRunPluginOutputStore{}) + + require.NoError(t, err) + assert.IsType(t, &RunPluginDispatcherImpl{}, dispatcher) +} + +func TestGetPluginDispatcher_CreateFails_ReturnsError(t *testing.T) { + resetRegistryForTest(t) + RegisterHandlerFactory(&fakeFactory{ + name: "Broken", + enabled: true, + err: fmt.Errorf("init failed"), + }) + + dispatcher, err := GetPluginDispatcher(&fakeKubeClientProvider{}, &fakeRunPluginOutputStore{}) + + require.Error(t, err) + assert.Contains(t, err.Error(), "Broken") + assert.Equal(t, dispatcher, NoOpDispatcher{}) +} diff --git a/backend/src/apiserver/resource/resource_manager.go b/backend/src/apiserver/resource/resource_manager.go index 3bd5aa77070..a513bcfc4e1 100644 --- a/backend/src/apiserver/resource/resource_manager.go +++ b/backend/src/apiserver/resource/resource_manager.go @@ -26,21 +26,22 @@ import ( "time" "unicode/utf8" - apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" - scheduledworkflow "github.com/kubeflow/pipelines/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1" - "github.com/cenkalti/backoff" "github.com/golang/glog" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" "github.com/kubeflow/pipelines/backend/src/apiserver/archive" kfpauth "github.com/kubeflow/pipelines/backend/src/apiserver/auth" "github.com/kubeflow/pipelines/backend/src/apiserver/client" "github.com/kubeflow/pipelines/backend/src/apiserver/common" "github.com/kubeflow/pipelines/backend/src/apiserver/list" "github.com/kubeflow/pipelines/backend/src/apiserver/model" + apiserverPlugins "github.com/kubeflow/pipelines/backend/src/apiserver/plugins" + apiservermlflow "github.com/kubeflow/pipelines/backend/src/apiserver/plugins/mlflow" "github.com/kubeflow/pipelines/backend/src/apiserver/storage" "github.com/kubeflow/pipelines/backend/src/apiserver/template" exec "github.com/kubeflow/pipelines/backend/src/common" "github.com/kubeflow/pipelines/backend/src/common/util" + scheduledworkflow "github.com/kubeflow/pipelines/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1" scheduledworkflowclient "github.com/kubeflow/pipelines/backend/src/crd/pkg/client/clientset/versioned/typed/scheduledworkflow/v1beta1" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -148,10 +149,11 @@ type ResourceManager struct { uuid util.UUIDGeneratorInterface authenticators []kfpauth.Authenticator options *ResourceManagerOptions + pluginDispatcher apiserverPlugins.RunPluginDispatcher } func NewResourceManager(clientManager ClientManagerInterface, options *ResourceManagerOptions) *ResourceManager { - return &ResourceManager{ + rm := &ResourceManager{ experimentStore: clientManager.ExperimentStore(), pipelineStore: clientManager.PipelineStore(), jobStore: clientManager.JobStore(), @@ -172,6 +174,12 @@ func NewResourceManager(clientManager ClientManagerInterface, options *ResourceM authenticators: clientManager.Authenticators(), options: options, } + dispatcher, err := apiserverPlugins.GetPluginDispatcher(rm.k8sCoreClient, rm.runStore) + if err != nil { + glog.Errorf("Failed to create plugin dispatcher: %v", err) + } + rm.pluginDispatcher = dispatcher + return rm } func (r *ResourceManager) getWorkflowClient(namespace string) util.ExecutionInterface { @@ -703,6 +711,34 @@ func (r *ResourceManager) CreateRun(ctx context.Context, run *model.Run) (*model executionSpec.SetOwnerReferences(swf) } + // Run plugin lifecycle hooks before workflow creation. + pendingRun := &apiserverPlugins.PendingRun{ + RunID: run.UUID, + DisplayName: run.DisplayName, + Namespace: k8sNamespace, + PipelineID: run.PipelineSpec.PipelineId, + PipelineVersionID: run.PipelineSpec.PipelineVersionId, + PluginsInput: (*string)(run.PluginsInputString), + } + if err := r.pluginDispatcher.OnBeforeRunCreation(ctx, pendingRun, executionSpec); err != nil { + return nil, err + } + // Copy plugin output back to the model. + if pendingRun.PluginsOutput != nil { + lt := model.LargeText(*pendingRun.PluginsOutput) + run.PluginsOutputString = < + } + + runPersisted := false + + defer func() { + if !runPersisted { + if pr, prErr := apiservermlflow.ModelToPersistedRun(run, k8sNamespace); prErr == nil { + r.pluginDispatcher.OnRunEnd(ctx, pr) + } + } + }() + newExecSpec, err := r.getWorkflowClient(k8sNamespace).Create(ctx, executionSpec, v1.CreateOptions{}) if err != nil { if err, ok := err.(net.Error); ok && err.Timeout() { @@ -732,11 +768,14 @@ func (r *ResourceManager) CreateRun(ctx context.Context, run *model.Run) (*model run.RunDetails.ScheduledAtInSec = run.RunDetails.CreatedAtInSec } run.State = model.RuntimeStatePending + newRun, err := r.runStore.CreateRun(run) if err != nil { return nil, util.Wrap(err, "Failed to create a run") } + runPersisted = true + // Upon run creation, update owning experiment err = r.experimentStore.SetLastRunTimestamp(newRun) if err != nil { @@ -1079,8 +1118,26 @@ func (r *ResourceManager) RetryRun(ctx context.Context, runId string) error { } newExecSpec = newCreatedWorkflow } + // Notify plugins of retry + if run.PluginsOutputString != nil && *run.PluginsOutputString != "" { + if pr, prErr := apiservermlflow.ModelToPersistedRun(run, namespace); prErr == nil { + err = r.pluginDispatcher.OnRunRetry(ctx, pr) + if err != nil { + return util.NewInternalServerError(err, "Failed to notify plugins of retry for run %s", runId) + } + } + } + condition := string(newExecSpec.ExecutionStatus().Condition()) - err = r.runStore.UpdateRun(&model.Run{UUID: runId, RunDetails: model.RunDetails{Conditions: condition, FinishedAtInSec: 0, WorkflowRuntimeManifest: model.LargeText(newExecSpec.ToStringForStore()), State: model.RuntimeState(condition).ToV2()}}) + err = r.runStore.UpdateRun(&model.Run{ + UUID: runId, + RunDetails: model.RunDetails{ + Conditions: condition, + FinishedAtInSec: 0, + WorkflowRuntimeManifest: model.LargeText(newExecSpec.ToStringForStore()), + State: model.RuntimeState(condition).ToV2(), + }, + }) if err != nil { return util.NewInternalServerError(err, "Failed to retry run %s due to error updating entry", runId) } @@ -1236,6 +1293,10 @@ func (r *ResourceManager) CreateJob(ctx context.Context, job *model.Job) (*model var scheduledWorkflow *scheduledworkflow.ScheduledWorkflow var tmpl template.Template + // When plugins are enabled the SWF controller must call the CreateRun API + // so that per-run plugin logic executes. + pluginsEnabled := job.PluginsInputString != nil && *job.PluginsInputString != "" + // If the pipeline version or pipeline spec is provided, this means the user wants to pin to a specific pipeline. // Otherwise, always let the ScheduledWorkflow controller pick the latest. if job.PipelineVersionId != "" || job.PipelineSpecManifest != "" || job.WorkflowSpecManifest != "" { @@ -1247,9 +1308,15 @@ func (r *ResourceManager) CreateJob(ctx context.Context, job *model.Job) (*model return nil, util.NewInternalServerError(err, "Failed to create a recurring run with an invalid pipeline spec manifest") } - // TODO(gkcalat): consider changing the flow. Other resource UUIDs are assigned by their respective stores (DB). - // Convert modelJob into scheduledWorkflow. - scheduledWorkflow, err = tmpl.ScheduledWorkflow(job) + if pluginsEnabled { + // Plugin-enabled: create a lightweight SWF without inline workflow spec + // so the SWF controller calls the CreateRun API for per-run plugin logic. + scheduledWorkflow, err = template.NewGenericScheduledWorkflow(job) + } else { + // TODO(gkcalat): consider changing the flow. Other resource UUIDs are assigned by their respective stores (DB). + // Convert modelJob into scheduledWorkflow. + scheduledWorkflow, err = tmpl.ScheduledWorkflow(job) + } if err != nil { return nil, util.Wrap(err, "Failed to create a recurring run during scheduled workflow creation") } @@ -1581,6 +1648,19 @@ func (r *ResourceManager) ReportWorkflowResource(ctx context.Context, execSpec u } } if execStatus.IsInFinalState() { + // Notify plugins of terminal state. If a plugin sync fails and + // needs retry, defer the persistedFinalState label so the + // persistence agent re-reports the workflow on its next cycle. + if run != nil && run.PluginsOutputString != nil && *run.PluginsOutputString != "" { + pr, prErr := apiservermlflow.ModelToPersistedRun(run, execSpec.ExecutionNamespace()) + if prErr != nil { + glog.Warningf("Failed to build PersistedRun for plugin sync on run %q: %v", run.UUID, prErr) + } else if !r.pluginDispatcher.OnRunEnd(ctx, pr) { + glog.Warningf("Plugin sync failed for run %q; deferring persistedFinalState label so persistence agent retries", run.UUID) + return nil, nil + } + } + err := addWorkflowLabel(ctx, r.getWorkflowClient(execSpec.ExecutionNamespace()), execSpec.ExecutionName(), util.LabelKeyWorkflowPersistedFinalState, "true") if err != nil { message := fmt.Sprintf("Failed to add PersistedFinalState label to workflow %s", execSpec.ExecutionName()) diff --git a/backend/src/apiserver/resource/resource_manager_test.go b/backend/src/apiserver/resource/resource_manager_test.go index 7417390ab07..294bc69a240 100644 --- a/backend/src/apiserver/resource/resource_manager_test.go +++ b/backend/src/apiserver/resource/resource_manager_test.go @@ -19,22 +19,27 @@ import ( "encoding/json" "fmt" "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" "strings" "testing" "time" "unicode/utf8" - apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" - "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" - "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo-workflows/v3/util/file" + apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" "github.com/kubeflow/pipelines/backend/src/apiserver/client" "github.com/kubeflow/pipelines/backend/src/apiserver/common" + "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" "github.com/kubeflow/pipelines/backend/src/apiserver/list" "github.com/kubeflow/pipelines/backend/src/apiserver/model" + apiservermlflow "github.com/kubeflow/pipelines/backend/src/apiserver/plugins/mlflow" "github.com/kubeflow/pipelines/backend/src/apiserver/storage" "github.com/kubeflow/pipelines/backend/src/apiserver/template" + "github.com/kubeflow/pipelines/backend/src/common/util" swfapi "github.com/kubeflow/pipelines/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1" "github.com/pkg/errors" @@ -52,6 +57,51 @@ func initEnvVars() { proxy.InitializeConfigWithEmptyForTests() } +// setupTestSAToken writes a temp kubeconfig with the given bearer token and +// sets the KUBECONFIG env var so util.GetKubernetesConfig() picks it up. +func setupTestSAToken(t *testing.T, token string) { + t.Helper() + kubeconfig := fmt.Sprintf(`apiVersion: v1 +kind: Config +clusters: +- cluster: + server: https://localhost + name: test +contexts: +- context: + cluster: test + user: test + name: test +current-context: test +users: +- name: test + user: + token: %s +`, token) + p := filepath.Join(t.TempDir(), "kubeconfig") + require.NoError(t, os.WriteFile(p, []byte(kubeconfig), 0600)) + t.Setenv("KUBECONFIG", p) +} + +// setupMLflowViperConfig sets plugins.mlflow in Viper and restores the original +// value when the test completes. +func setupMLflowViperConfig(t *testing.T, endpoint string) { + t.Helper() + origConfig := viper.Get("plugins.mlflow") + hadConfig := viper.IsSet("plugins.mlflow") + viper.Set("plugins.mlflow", map[string]interface{}{ + "endpoint": endpoint, + "timeout": "10s", + }) + t.Cleanup(func() { + if hadConfig { + viper.Set("plugins.mlflow", origConfig) + } else { + viper.Set("plugins.mlflow", nil) + } + }) +} + type FakeBadObjectStore struct{} func (m *FakeBadObjectStore) GetPipelineKey(pipelineID string) string { @@ -2434,6 +2484,118 @@ func TestCreateRun_StoreRunMetadataError(t *testing.T) { assert.Contains(t, err.Error(), "database is closed") } +func TestCreateRun_WithMLflowPlugin(t *testing.T) { + // Set up a fake MLflow server that handles experiment lookup and run creation. + // Tags are passed inline in the CreateRun body (atomic tagging). + var ( + experimentGetCalled bool + runCreateCalled bool + createRunBody string + ) + mlflowServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/experiments/get-by-name": + experimentGetCalled = true + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"experiment":{"experiment_id":"mlflow-exp-1","name":"Default"}}`)) + case "/api/2.0/mlflow/runs/create": + runCreateCalled = true + defer r.Body.Close() + body, _ := io.ReadAll(r.Body) + createRunBody = string(body) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"mlflow-parent-run-1"}}}`)) + default: + http.NotFound(w, r) + } + })) + defer mlflowServer.Close() + + setupTestSAToken(t, "test-sa-token") + setupMLflowViperConfig(t, mlflowServer.URL) + + store, manager, exp := initWithExperiment(t) + defer store.Close() + + // Build a run with plugins_input that triggers MLflow integration. + pluginsInput := `{"plugins.mlflow":{"experiment_name":"Default"}}` + pluginsInputLT := model.LargeText(pluginsInput) + apiRun := &model.Run{ + DisplayName: "mlflow-test-run", + PipelineSpec: model.PipelineSpec{ + WorkflowSpecManifest: model.LargeText(testWorkflow.ToStringForStore()), + Parameters: "[{\"name\":\"param1\",\"value\":\"world\"}]", + }, + ExperimentId: exp.UUID, + RunDetails: model.RunDetails{ + PluginsInputString: &pluginsInputLT, + }, + } + + runDetail, err := manager.CreateRun(context.Background(), apiRun) + require.NoError(t, err) + require.NotNil(t, runDetail) + + // Verify MLflow API calls were made. + assert.True(t, experimentGetCalled, "MLflow experiment lookup should have been called") + assert.True(t, runCreateCalled, "MLflow run creation should have been called") + assert.Contains(t, createRunBody, "kfp.pipeline_run_id", "CreateRun body should contain KFP tags") + + // Verify plugins_output is persisted on the run. + storedRun, err := manager.GetRun(runDetail.UUID) + require.NoError(t, err) + require.NotNil(t, storedRun.PluginsOutputString, "PluginsOutputString should be set") + assert.Contains(t, string(*storedRun.PluginsOutputString), "mlflow-parent-run-1") + assert.Contains(t, string(*storedRun.PluginsOutputString), "mlflow-exp-1") + + // Parse and verify the plugin output structure. + outputs, err := apiservermlflow.DeserializePluginsOutput(storedRun.PluginsOutputString) + require.NoError(t, err) + output := outputs[apiservermlflow.PluginName] + require.NotNil(t, output) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, output.State) + assert.Equal(t, "mlflow-exp-1", output.Entries[apiservermlflow.EntryExperimentID].Value.GetStringValue()) + assert.Equal(t, "mlflow-parent-run-1", output.Entries[apiservermlflow.EntryRootRunID].Value.GetStringValue()) + assert.Contains(t, output.Entries[apiservermlflow.EntryRunURL].Value.GetStringValue(), "mlflow-parent-run-1") + +} + +// TestCreateRun_NoMLflowConfig verifies that run creation succeeds without +// error when no MLflow plugin config is set at either the global or namespace +// level. The unconditional MLflow dispatcher must short-circuit cleanly. +func TestCreateRun_NoMLflowConfig(t *testing.T) { + // Ensure no global MLflow config. + origConfig := viper.Get("plugins.mlflow") + viper.Set("plugins.mlflow", nil) + t.Cleanup(func() { + viper.Set("plugins.mlflow", origConfig) + }) + + store, manager, exp := initWithExperiment(t) + defer store.Close() + + apiRun := &model.Run{ + DisplayName: "no-mlflow-run", + PipelineSpec: model.PipelineSpec{ + WorkflowSpecManifest: model.LargeText(testWorkflow.ToStringForStore()), + Parameters: "[{\"name\":\"param1\",\"value\":\"world\"}]", + }, + ExperimentId: exp.UUID, + } + + runDetail, err := manager.CreateRun(context.Background(), apiRun) + require.NoError(t, err) + require.NotNil(t, runDetail) + + // Verify plugins_output is not set (no plugin ran). + storedRun, err := manager.GetRun(runDetail.UUID) + require.NoError(t, err) + assert.True(t, + storedRun.PluginsOutputString == nil || *storedRun.PluginsOutputString == "", + "PluginsOutputString should be empty when MLflow is not configured", + ) +} + func TestDeleteRun(t *testing.T) { store, manager, runDetail := initWithOneTimeRun(t) defer store.Close() @@ -2583,6 +2745,75 @@ func TestRetryRun(t *testing.T) { assert.Equal(t, actualRunDetail.RunDetails.State, model.RuntimeStateRunning) } +func TestRetryRun_ReopensMLflowParentAndFailedNestedRuns(t *testing.T) { + type updateCall struct { + RunID string + Status string + } + var updateCalls []updateCall + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/runs/update": + defer r.Body.Close() + var payload struct { + RunID string `json:"run_id"` + Status string `json:"status"` + } + require.NoError(t, json.NewDecoder(r.Body).Decode(&payload)) + updateCalls = append(updateCalls, updateCall{RunID: payload.RunID, Status: payload.Status}) + _, _ = w.Write([]byte(`{}`)) + case "/api/2.0/mlflow/runs/search": + body, _ := io.ReadAll(r.Body) + r.Body.Close() + if strings.Contains(string(body), "parent-run-1") { + _, _ = w.Write([]byte(`{ + "runs": [ + {"info":{"run_id":"nested-failed","status":"FAILED"}}, + {"info":{"run_id":"nested-killed","status":"KILLED"}}, + {"info":{"run_id":"nested-finished","status":"FINISHED"}} + ] + }`)) + } else { + _, _ = w.Write([]byte(`{"runs":[]}`)) + } + default: + http.NotFound(w, r) + } + })) + defer server.Close() + + setupTestSAToken(t, "retry-token") + setupMLflowViperConfig(t, server.URL) + + store, manager, runDetail := initWithOneTimeFailedRun(t) + defer store.Close() + + runWithPluginOutput, err := manager.GetRun(runDetail.UUID) + require.NoError(t, err) + mlflowOutput := apiservermlflow.SuccessfulPluginOutput("exp-1", "exp-1", "parent-run-1", server.URL+"/runs/parent-run-1", server.URL) + lt, err := apiservermlflow.SerializePluginsOutput(map[string]*apiv2beta1.PluginOutput{apiservermlflow.PluginName: mlflowOutput}) + require.NoError(t, err) + runWithPluginOutput.PluginsOutputString = lt + require.NoError(t, manager.runStore.UpdateRun(runWithPluginOutput)) + + err = manager.RetryRun(context.Background(), runDetail.UUID) + require.NoError(t, err) + + assert.Contains(t, updateCalls, updateCall{RunID: "parent-run-1", Status: "RUNNING"}) + assert.Contains(t, updateCalls, updateCall{RunID: "nested-failed", Status: "RUNNING"}) + assert.Contains(t, updateCalls, updateCall{RunID: "nested-killed", Status: "RUNNING"}) + assert.NotContains(t, updateCalls, updateCall{RunID: "nested-finished", Status: "RUNNING"}) + + updatedRun, err := manager.GetRun(runDetail.UUID) + require.NoError(t, err) + updatedOutputs, err := apiservermlflow.DeserializePluginsOutput(updatedRun.PluginsOutputString) + require.NoError(t, err) + updatedOutput := updatedOutputs[apiservermlflow.PluginName] + require.NotNil(t, updatedOutput) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, updatedOutput.State) + assert.Equal(t, "", updatedOutput.StateMessage) +} + func TestRetryRun_RunNotExist(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() @@ -3440,6 +3671,70 @@ func TestReportWorkflowResource_WorkflowCompleted(t *testing.T) { assert.Equal(t, wf.ExecutionObjectMeta().Labels[util.LabelKeyWorkflowPersistedFinalState], "true") } +// TestReportWorkflow_WithMLflowOnRunEnd verifies that when a run has PluginsOutputString +// set, reporting a terminal workflow triggers the plugin dispatcher's +// OnRunEnd, which updates the plugin output state. +func TestReportWorkflow_WithMLflowOnRunEnd(t *testing.T) { + // Set a dummy MLflow config so the manager creates a real MLflow dispatcher, + // then clear it before the run lifecycle to simulate config being unavailable + // at OnRunEnd time. This verifies that OnRunEnd still fires and sets + // PLUGIN_FAILED because config is unavailable. + setupMLflowViperConfig(t, "http://dummy-mlflow:5000") + + store, manager, exp := initWithExperiment(t) + + // Now clear MLflow config to simulate it being unavailable at runtime. + viper.Set("plugins", nil) + t.Cleanup(func() { + viper.Set("plugins", nil) + }) + defer store.Close() + + // Pre-populate PluginsOutputString to simulate a prior OnBeforeRunCreation success. + pluginsOutputJSON := `{"mlflow":{"entries":{"experiment_id":{"value":"exp-1"},"root_run_id":{"value":"parent-run-1"}},"state":"PLUGIN_SUCCEEDED"}}` + pluginsOutput := model.LargeText(pluginsOutputJSON) + apiRun := &model.Run{ + DisplayName: "mlflow-run", + PipelineSpec: model.PipelineSpec{ + WorkflowSpecManifest: model.LargeText(testWorkflow.ToStringForStore()), + Parameters: "[{\"name\":\"param1\",\"value\":\"world\"}]", + }, + ExperimentId: exp.UUID, + RunDetails: model.RunDetails{ + PluginsOutputString: &pluginsOutput, + }, + } + run, err := manager.CreateRun(context.Background(), apiRun) + require.NoError(t, err) + + // Verify PluginsOutputString was persisted at creation time. + createdRun, err := manager.GetRun(run.UUID) + require.NoError(t, err) + require.NotNil(t, createdRun.PluginsOutputString) + assert.Contains(t, string(*createdRun.PluginsOutputString), "parent-run-1") + + // Report a terminal (failed) workflow. + workflow := util.NewWorkflow(&v1alpha1.Workflow{ + ObjectMeta: v1.ObjectMeta{ + Name: run.K8SName, + Namespace: "ns1", + UID: types.UID(run.UUID), + Labels: map[string]string{util.LabelKeyWorkflowRunId: run.UUID}, + }, + Status: v1alpha1.WorkflowStatus{Phase: v1alpha1.WorkflowFailed}, + }) + _, err = manager.ReportWorkflowResource(context.Background(), workflow) + require.NoError(t, err) + + // After terminal report, the plugin dispatcher's OnRunEnd should have fired. + // Without MLflow config in Viper, the handler sets PLUGIN_FAILED. + updatedRun, err := manager.GetRun(run.UUID) + require.NoError(t, err) + require.NotNil(t, updatedRun.PluginsOutputString, "PluginsOutputString should be updated after terminal report") + assert.Contains(t, string(*updatedRun.PluginsOutputString), "PLUGIN_FAILED") + assert.Contains(t, string(*updatedRun.PluginsOutputString), "config unavailable") +} + func TestReportWorkflowResource_WorkflowCompleted_WorkflowNotFound(t *testing.T) { store, manager, run := initWithOneTimeRun(t) defer store.Close() diff --git a/backend/src/apiserver/server/api_converter.go b/backend/src/apiserver/server/api_converter.go index 8784159d15a..a33f6ea3882 100644 --- a/backend/src/apiserver/server/api_converter.go +++ b/backend/src/apiserver/server/api_converter.go @@ -17,8 +17,10 @@ package server import ( "encoding/json" "fmt" + "net/url" "sort" "strconv" + "strings" "time" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" @@ -31,10 +33,44 @@ import ( swapi "github.com/kubeflow/pipelines/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1" "github.com/pkg/errors" "github.com/robfig/cron" + "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/structpb" "google.golang.org/protobuf/types/known/timestamppb" ) +const ( + urlSchemeJavaScript = "javascript:" + urlSchemeData = "data:" + urlSchemeVBScript = "vbscript:" + + pluginErrInvalidLimitsConfig = "invalid plugin limits configuration" + pluginErrPluginsInputTooManyKeys = "number of plugins_input entries" + pluginErrPluginsInputNilEntry = "plugins_input[%q] must not be nil" + pluginErrPluginsInputInvalidValue = "plugins_input[%q] contains invalid nested value" + pluginErrPluginsInputNestingDepth = "plugins_input[%q] nesting depth exceeds maximum" + pluginErrPluginsInputEntrySize = "plugins_input[%q] size" + pluginErrPluginsInputTotalSize = "plugins_input total size" + pluginErrPluginsInputMarshalEntry = "marshal plugins_input[%q]: %w" + pluginErrPluginsInputMarshalMap = "marshal plugins_input map: %w" + + pluginErrPluginsOutputTooManyKeys = "number of plugins_output entries" + pluginErrPluginsOutputEntrySize = "plugins_output[%q] size" + pluginErrPluginsOutputTotalSize = "plugins_output total size" + pluginErrPluginsOutputMarshalEntry = "marshal plugins_output[%q]: %w" + pluginErrPluginsOutputMarshalMap = "marshal plugins_output map: %w" + pluginErrPluginsOutputNilMetadata = "plugins_output[%q].entries[%q] metadata must not be nil" + pluginErrPluginsOutputNilValue = "plugins_output[%q].entries[%q].value must not be nil" + pluginErrPluginsOutputInvalidValue = "plugins_output[%q].entries[%q] contains invalid nested value" + pluginErrPluginsOutputNestingDepth = "plugins_output[%q].entries[%q] nesting depth exceeds maximum" + + pluginErrStructValueNil = "struct value must not be nil" + pluginErrStructFieldNil = "struct field %q must not be nil" + pluginErrValueNil = "value must not be nil" + pluginErrValueKindUnset = "unsupported or unset value kind" + + pluginErrExceedsMaxBytes = " (%d bytes) exceeds maximum %d bytes" +) + // Converts API experiment to its internal representation. // Supports both v1beta1 abd v2beta1 API. func toModelExperiment(e interface{}) (*model.Experiment, error) { @@ -1179,6 +1215,8 @@ func toModelRun(r interface{}) (*model.Run, error) { var state model.RuntimeState var stateHistory []*model.RuntimeStatus var tasks []*model.Task + var pluginsInputStr, pluginsOutputStr *string + var err error switch r := r.(type) { case *apiv1beta1.Run: return toModelRun(&apiv1beta1.RunDetail{Run: r}) @@ -1258,6 +1296,23 @@ func toModelRun(r interface{}) (*model.Run, error) { } else { return nil, util.NewInternalServerError(err, "Failed to convert a API run detail to its internal representation due to error converting runtime state history") } + if pluginsInputStr, err = pluginsInputToJSON(apiRunV2.GetPluginsInput()); err != nil { + return nil, util.NewInternalServerError(err, "Failed to convert plugins_input to JSON") + } + pluginLimitsConfig, err := common.GetPluginLimitsConfig() + if err != nil { + return nil, util.NewInvalidInputError("Invalid plugins limits configuration: %v", err) + } + if err = validatePluginsInputLimits(apiRunV2.GetPluginsInput(), pluginLimitsConfig); err != nil { + return nil, util.NewInvalidInputError("Invalid plugins_input: %v", err) + } + if err = validatePluginsOutputWithLimits(apiRunV2.GetPluginsOutput(), pluginLimitsConfig); err != nil { + return nil, util.NewInvalidInputError("Invalid plugins_output: %v", err) + } + if pluginsOutputStr, err = pluginsOutputToJSON(apiRunV2.GetPluginsOutput()); err != nil { + return nil, util.NewInternalServerError(err, "Failed to convert plugins_output to JSON") + } + namespace = "" workflowSpec = "" // TODO(gkcalat): implement runtime details of a run logic based on the apiRunV2.RuDetails(). @@ -1361,6 +1416,8 @@ func toModelRun(r interface{}) (*model.Run, error) { PipelineRuntimeManifest: model.LargeText(runtimePipelineSpec), WorkflowRuntimeManifest: model.LargeText(runtimeWorkflowSpec), TaskDetails: tasks, + PluginsInputString: stringToLargeText(pluginsInputStr), + PluginsOutputString: stringToLargeText(pluginsOutputStr), }, } @@ -1545,7 +1602,39 @@ func toApiRun(r *model.Run) *apiv2beta1.Run { FinishedAt: timestamppb.New(time.Unix(r.RunDetails.FinishedAtInSec, 0)), RunDetails: apiRd, } - err := util.NewInvalidInputError("Failed to parse the pipeline source") + var err error + apiRunV2.PluginsInput, err = jsonToPluginsInput(largeTextToString(r.PluginsInputString)) + if err != nil { + return &apiv2beta1.Run{ + RunId: r.UUID, + ExperimentId: r.ExperimentId, + Error: util.ToRpcStatus(util.Wrap(err, "Failed to convert internal run representation to its API counterpart: invalid plugins_input")), + } + } + apiRunV2.PluginsOutput, err = jsonToPluginsOutput(largeTextToString(r.PluginsOutputString)) + if err != nil { + return &apiv2beta1.Run{ + RunId: r.UUID, + ExperimentId: r.ExperimentId, + Error: util.ToRpcStatus(util.Wrap(err, "Failed to convert internal run representation to its API counterpart: invalid plugins_output")), + } + } + pluginLimitsConfig, err := common.GetPluginLimitsConfig() + if err != nil { + return &apiv2beta1.Run{ + RunId: r.UUID, + ExperimentId: r.ExperimentId, + Error: util.ToRpcStatus(util.Wrap(err, "Failed to convert internal run representation to its API counterpart: invalid plugins_output")), + } + } + if err = validatePluginsOutputWithLimits(apiRunV2.PluginsOutput, pluginLimitsConfig); err != nil { + return &apiv2beta1.Run{ + RunId: r.UUID, + ExperimentId: r.ExperimentId, + Error: util.ToRpcStatus(util.Wrap(err, "Failed to convert internal run representation to its API counterpart: invalid plugins_output")), + } + } + err = util.NewInvalidInputError("Failed to parse the pipeline source") if r.PipelineSpec.PipelineVersionId != "" { apiRunV2.PipelineSource = &apiv2beta1.Run_PipelineVersionReference{ PipelineVersionReference: &apiv2beta1.PipelineVersionReference{ @@ -1870,6 +1959,7 @@ func toModelJob(j interface{}) (*model.Job, error) { var maxConcur, createTime, updateTime int64 var noCatchup, isEnabled bool var trigger *model.Trigger + var jobPluginsInputStr *string resRefs := make([]*model.ResourceReference, 0) switch apiJob := j.(type) { case *apiv1beta1.Job: @@ -1976,6 +2066,18 @@ func toModelJob(j interface{}) (*model.Job, error) { k8sName = jobName specParams = "" workflowSpec = "" + + jobPluginsInputStr, err = pluginsInputToJSON(apiJob.GetPluginsInput()) + if err != nil { + return nil, util.NewInternalServerError(err, "Failed to convert plugins_input to JSON") + } + pluginLimitsConfig, err := common.GetPluginLimitsConfig() + if err != nil { + return nil, util.NewInvalidInputError("Invalid plugins limits configuration: %v", err) + } + if err = validatePluginsInputLimits(apiJob.GetPluginsInput(), pluginLimitsConfig); err != nil { + return nil, util.NewInvalidInputError("Invalid plugins_input: %v", err) + } default: return nil, util.NewUnknownApiVersionError("RecurringRun", j) } @@ -2021,6 +2123,7 @@ func toModelJob(j interface{}) (*model.Job, error) { Conditions: status.ToString(), ExperimentId: experimentId, ResourceReferences: resRefs, + PluginsInputString: stringToLargeText(jobPluginsInputStr), Trigger: *trigger, PipelineSpec: model.PipelineSpec{ PipelineId: pipelineId, @@ -2256,6 +2359,14 @@ func toApiRecurringRun(j *model.Job) *apiv2beta1.RecurringRun { Namespace: j.Namespace, ExperimentId: j.ExperimentId, } + var err error + apiRecurringRunV2.PluginsInput, err = jsonToPluginsInput(largeTextToString(j.PluginsInputString)) + if err != nil { + return &apiv2beta1.RecurringRun{ + RecurringRunId: j.UUID, + Error: util.ToRpcStatus(util.Wrap(err, "Failed to convert recurring run's internal representation to its API counterpart: invalid plugins_input")), + } + } if j.PipelineSpec.PipelineId == "" && j.PipelineSpec.PipelineVersionId == "" { spec, err := YamlStringToPipelineSpecStruct(string(j.PipelineSpecManifest)) @@ -2508,3 +2619,328 @@ func toApiRuntimeStatuses(s []*model.RuntimeStatus) []*apiv2beta1.RuntimeStatus } return statuses } + +func largeTextToString(lt *model.LargeText) *string { + if lt == nil { + return nil + } + s := string(*lt) + return &s +} + +func stringToLargeText(s *string) *model.LargeText { + if s == nil || *s == "" { + return nil + } + lt := model.LargeText(*s) + return < +} + +func pluginsInputToJSON(pluginsInput map[string]*structpb.Struct) (*string, error) { + if len(pluginsInput) == 0 { + return nil, nil + } + raw := make(map[string]json.RawMessage, len(pluginsInput)) + for k, v := range pluginsInput { + b, err := protojson.Marshal(v) + if err != nil { + return nil, fmt.Errorf("marshal plugins_input[%q]: %w", k, err) + } + raw[k] = b + } + out, err := json.Marshal(raw) + if err != nil { + return nil, fmt.Errorf("marshal plugins_input map: %w", err) + } + s := string(out) + return &s, nil +} + +func jsonToPluginsInput(jsonStr *string) (map[string]*structpb.Struct, error) { + if jsonStr == nil || *jsonStr == "" { + return nil, nil + } + var raw map[string]json.RawMessage + if err := json.Unmarshal([]byte(*jsonStr), &raw); err != nil { + return nil, fmt.Errorf("unmarshal plugins_input: %w", err) + } + result := make(map[string]*structpb.Struct, len(raw)) + for k, v := range raw { + st := &structpb.Struct{} + if err := protojson.Unmarshal(v, st); err != nil { + return nil, fmt.Errorf("unmarshal plugins_input[%q]: %w", k, err) + } + result[k] = st + } + return result, nil +} + +func pluginsOutputToJSON(pluginsOutput map[string]*apiv2beta1.PluginOutput) (*string, error) { + if len(pluginsOutput) == 0 { + return nil, nil + } + raw := make(map[string]json.RawMessage, len(pluginsOutput)) + for k, v := range pluginsOutput { + b, err := protojson.Marshal(v) + if err != nil { + return nil, fmt.Errorf("marshal plugins_output[%q]: %w", k, err) + } + raw[k] = b + } + out, err := json.Marshal(raw) + if err != nil { + return nil, fmt.Errorf("marshal plugins_output map: %w", err) + } + s := string(out) + return &s, nil +} + +func validatePluginsOutput(pluginsOutput map[string]*apiv2beta1.PluginOutput) error { + limits, err := common.GetPluginLimitsConfig() + if err != nil { + return fmt.Errorf("%s: %w", pluginErrInvalidLimitsConfig, err) + } + return validatePluginsOutputWithLimits(pluginsOutput, limits) +} + +func validatePluginsOutputWithLimits(pluginsOutput map[string]*apiv2beta1.PluginOutput, limits common.PluginLimitsConfig) error { + if err := validatePluginsOutputLimits(pluginsOutput, limits); err != nil { + return err + } + for pluginKey, output := range pluginsOutput { + if output == nil { + continue + } + if err := validatePluginOutputEntries(pluginKey, output.Entries); err != nil { + return err + } + } + return nil +} + +func validatePluginsInputLimits(pluginsInput map[string]*structpb.Struct, limits common.PluginLimitsConfig) error { + if len(pluginsInput) > limits.MaxKeys { + return fmt.Errorf("%s (%d) exceeds maximum %d", pluginErrPluginsInputTooManyKeys, len(pluginsInput), limits.MaxKeys) + } + raw := make(map[string]json.RawMessage, len(pluginsInput)) + for pluginKey, pluginStruct := range pluginsInput { + if pluginStruct == nil { + return fmt.Errorf(pluginErrPluginsInputNilEntry, pluginKey) + } + depth, err := structDepth(pluginStruct) + if err != nil { + return fmt.Errorf(pluginErrPluginsInputInvalidValue+": %w", pluginKey, err) + } + if depth > limits.MaxNestingDepth { + return fmt.Errorf(pluginErrPluginsInputNestingDepth+" %d", pluginKey, limits.MaxNestingDepth) + } + pluginBytes, err := protojson.Marshal(pluginStruct) + if err != nil { + return fmt.Errorf(pluginErrPluginsInputMarshalEntry, pluginKey, err) + } + if len(pluginBytes) > limits.MaxPayloadBytes { + return fmt.Errorf(pluginErrPluginsInputEntrySize+pluginErrExceedsMaxBytes, pluginKey, len(pluginBytes), limits.MaxPayloadBytes) + } + raw[pluginKey] = pluginBytes + } + serialized, err := json.Marshal(raw) + if err != nil { + return fmt.Errorf(pluginErrPluginsInputMarshalMap, err) + } + if len(serialized) > limits.MaxTotalPayloadBytes { + return fmt.Errorf(pluginErrPluginsInputTotalSize+pluginErrExceedsMaxBytes, len(serialized), limits.MaxTotalPayloadBytes) + } + return nil +} + +func validatePluginsOutputLimits(pluginsOutput map[string]*apiv2beta1.PluginOutput, limits common.PluginLimitsConfig) error { + if len(pluginsOutput) > limits.MaxKeys { + return fmt.Errorf("%s (%d) exceeds maximum %d", pluginErrPluginsOutputTooManyKeys, len(pluginsOutput), limits.MaxKeys) + } + raw := make(map[string]json.RawMessage, len(pluginsOutput)) + for pluginKey, output := range pluginsOutput { + if err := validateSinglePluginOutputLimit(pluginKey, output, limits); err != nil { + return err + } + if output == nil { + continue + } + pluginBytes, err := protojson.Marshal(output) + if err != nil { + return fmt.Errorf(pluginErrPluginsOutputMarshalEntry, pluginKey, err) + } + if len(pluginBytes) > limits.MaxPayloadBytes { + return fmt.Errorf( + pluginErrPluginsOutputEntrySize+pluginErrExceedsMaxBytes, + pluginKey, + len(pluginBytes), + limits.MaxPayloadBytes, + ) + } + raw[pluginKey] = pluginBytes + } + serialized, err := json.Marshal(raw) + if err != nil { + return fmt.Errorf(pluginErrPluginsOutputMarshalMap, err) + } + if len(serialized) > limits.MaxTotalPayloadBytes { + return fmt.Errorf(pluginErrPluginsOutputTotalSize+pluginErrExceedsMaxBytes, len(serialized), limits.MaxTotalPayloadBytes) + } + return nil +} + +func validateSinglePluginOutputLimit( + pluginKey string, + output *apiv2beta1.PluginOutput, + limits common.PluginLimitsConfig, +) error { + if output == nil { + return nil + } + for entryKey, metadata := range output.Entries { + if metadata == nil { + return fmt.Errorf(pluginErrPluginsOutputNilMetadata, pluginKey, entryKey) + } + if metadata.Value == nil { + return fmt.Errorf(pluginErrPluginsOutputNilValue, pluginKey, entryKey) + } + depth, err := valueDepth(metadata.Value) + if err != nil { + return fmt.Errorf(pluginErrPluginsOutputInvalidValue+": %w", pluginKey, entryKey, err) + } + if depth > limits.MaxNestingDepth { + return fmt.Errorf(pluginErrPluginsOutputNestingDepth+" %d", pluginKey, entryKey, limits.MaxNestingDepth) + } + } + return nil +} + +func structDepth(s *structpb.Struct) (int, error) { + if s == nil { + return 0, fmt.Errorf(pluginErrStructValueNil) + } + maxDepth := 1 + for fieldKey, fieldValue := range s.Fields { + if fieldValue == nil { + return 0, fmt.Errorf(pluginErrStructFieldNil, fieldKey) + } + fieldDepth, err := valueDepth(fieldValue) + if err != nil { + return 0, err + } + currentDepth := 1 + fieldDepth + if currentDepth > maxDepth { + maxDepth = currentDepth + } + } + return maxDepth, nil +} + +func valueDepth(v *structpb.Value) (int, error) { + if v == nil { + return 0, fmt.Errorf(pluginErrValueNil) + } + switch kind := v.Kind.(type) { + case *structpb.Value_StructValue: + return structDepth(kind.StructValue) + case *structpb.Value_ListValue: + maxDepth := 1 + for _, item := range kind.ListValue.Values { + itemDepth, err := valueDepth(item) + if err != nil { + return 0, err + } + currentDepth := 1 + itemDepth + if currentDepth > maxDepth { + maxDepth = currentDepth + } + } + return maxDepth, nil + case *structpb.Value_NullValue, *structpb.Value_NumberValue, *structpb.Value_StringValue, *structpb.Value_BoolValue: + return 0, nil + default: + return 0, fmt.Errorf(pluginErrValueKindUnset) + } +} + +func validatePluginOutputEntries(pluginKey string, entries map[string]*apiv2beta1.MetadataValue) error { + for entryKey, metadata := range entries { + if metadata == nil || metadata.Value == nil { + continue + } + if metadata.GetRenderType() != apiv2beta1.MetadataValue_URL { + continue + } + if err := validateURLMetadataValue(pluginKey, entryKey, metadata); err != nil { + return err + } + } + return nil +} + +func validateURLMetadataValue(pluginKey string, entryKey string, metadata *apiv2beta1.MetadataValue) error { + urlValue, err := getURLMetadataString(pluginKey, entryKey, metadata) + if err != nil { + return err + } + lowerTrimmed := strings.ToLower(urlValue) + if hasDisallowedURLSchemePrefix(lowerTrimmed) { + return fmt.Errorf("plugins_output[%q].entries[%q] has disallowed URL scheme", pluginKey, entryKey) + } + parsed, err := url.Parse(urlValue) + if err != nil { + return fmt.Errorf("plugins_output[%q].entries[%q] has invalid URL: %w", pluginKey, entryKey, err) + } + if parsed.Scheme == "" || parsed.Host == "" { + return fmt.Errorf("plugins_output[%q].entries[%q] has invalid URL: missing scheme or host", pluginKey, entryKey) + } + if !isAllowedURLScheme(parsed.Scheme) { + return fmt.Errorf("plugins_output[%q].entries[%q] URL scheme must be http or https", pluginKey, entryKey) + } + return nil +} + +func getURLMetadataString(pluginKey string, entryKey string, metadata *apiv2beta1.MetadataValue) (string, error) { + stringValue, isStringValue := metadata.Value.Kind.(*structpb.Value_StringValue) + if !isStringValue { + return "", fmt.Errorf("plugins_output[%q].entries[%q] URL render_type requires string value", pluginKey, entryKey) + } + return strings.TrimSpace(stringValue.StringValue), nil +} + +func hasDisallowedURLSchemePrefix(urlValueLower string) bool { + for _, disallowedScheme := range []string{ + urlSchemeJavaScript, + urlSchemeData, + urlSchemeVBScript, + } { + if strings.HasPrefix(urlValueLower, disallowedScheme) { + return true + } + } + return false +} + +func isAllowedURLScheme(urlScheme string) bool { + lowerScheme := strings.ToLower(urlScheme) + return lowerScheme == "http" || lowerScheme == "https" +} + +func jsonToPluginsOutput(jsonStr *string) (map[string]*apiv2beta1.PluginOutput, error) { + if jsonStr == nil || *jsonStr == "" { + return nil, nil + } + var raw map[string]json.RawMessage + if err := json.Unmarshal([]byte(*jsonStr), &raw); err != nil { + return nil, fmt.Errorf("unmarshal plugins_output: %w", err) + } + result := make(map[string]*apiv2beta1.PluginOutput, len(raw)) + for k, v := range raw { + po := &apiv2beta1.PluginOutput{} + if err := protojson.Unmarshal(v, po); err != nil { + return nil, fmt.Errorf("unmarshal plugins_output[%q]: %w", k, err) + } + result[k] = po + } + return result, nil +} diff --git a/backend/src/apiserver/server/api_converter_test.go b/backend/src/apiserver/server/api_converter_test.go index 118586f8ff5..8da9078efaf 100644 --- a/backend/src/apiserver/server/api_converter_test.go +++ b/backend/src/apiserver/server/api_converter_test.go @@ -15,6 +15,7 @@ package server import ( + "fmt" "strings" "testing" "time" @@ -23,17 +24,75 @@ import ( "github.com/google/go-cmp/cmp" apiv1beta1 "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + "github.com/kubeflow/pipelines/backend/src/apiserver/common" "github.com/kubeflow/pipelines/backend/src/apiserver/model" "github.com/kubeflow/pipelines/backend/src/apiserver/template" "github.com/kubeflow/pipelines/backend/src/common/util" "github.com/pkg/errors" + "github.com/spf13/viper" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "google.golang.org/genproto/googleapis/rpc/status" "google.golang.org/protobuf/types/known/structpb" "google.golang.org/protobuf/types/known/timestamppb" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const testPluginsExperimentName = "my-exp" +const testPluginsRecurringExperimentName = "recurring-exp" +const testPluginsJobName = "test-job" +const testPluginsUnsafeJavaScriptURL = "javascript:alert(1)" +const testPluginsURLBase = "https://example.com/" + +func setPluginLimitsConfigForTest(t *testing.T, values map[string]string) { + t.Helper() + t.Cleanup(func() { + viper.Reset() + viper.Set(common.PipelineURLValidationEnabled, "false") + }) + viper.Reset() + viper.Set(common.PipelineURLValidationEnabled, "false") + for k, v := range values { + viper.Set(k, v) + } +} + +func strPtr(s string) *string { + return &s +} + +func testLargeTextPtr(s string) *model.LargeText { + lt := model.LargeText(s) + return < +} + +// createPluginInputMapWithNKeys builds n plugin input entries with a small valid payload. +func createPluginInputMapWithNKeys(n int) map[string]*structpb.Struct { + input := make(map[string]*structpb.Struct, n) + for i := range n { + input[fmt.Sprintf("plugin-%d", i)] = &structpb.Struct{ + Fields: map[string]*structpb.Value{"k": structpb.NewStringValue("ok")}, + } + } + return input +} + +func createPluginOutputMapWithNKeys(n int) map[string]*apiv2beta1.PluginOutput { + output := make(map[string]*apiv2beta1.PluginOutput, n) + for i := range n { + output[fmt.Sprintf("plugin-%d", i)] = &apiv2beta1.PluginOutput{ + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(testPluginsURLBase), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + } + } + return output +} + func TestToModelExperiment(t *testing.T) { tests := []struct { name string @@ -4746,6 +4805,104 @@ func TestToApiRunStorageStateV1(t *testing.T) { } } +func TestPluginsInputToJSON(t *testing.T) { + t.Run("nil map returns nil", func(t *testing.T) { + got, err := pluginsInputToJSON(nil) + require.NoError(t, err) + assert.Nil(t, got, "nil input should produce nil *string, not empty string") + }) + + t.Run("empty map returns nil", func(t *testing.T) { + got, err := pluginsInputToJSON(map[string]*structpb.Struct{}) + require.NoError(t, err) + assert.Nil(t, got, "empty input should produce nil *string, not empty string") + }) + + t.Run("single key round-trips", func(t *testing.T) { + input := map[string]*structpb.Struct{ + "mlflow": {Fields: map[string]*structpb.Value{ + "experiment_name": structpb.NewStringValue(testPluginsExperimentName), + }}, + } + got, err := pluginsInputToJSON(input) + require.NoError(t, err) + require.NotNil(t, got) + parsed, err := jsonToPluginsInput(got) + require.NoError(t, err) + require.Len(t, parsed, 1) + require.Contains(t, parsed, "mlflow") + assert.Equal(t, input["mlflow"].Fields, parsed["mlflow"].Fields) + }) + + t.Run("multiple keys round-trip", func(t *testing.T) { + input := map[string]*structpb.Struct{ + "mlflow": {Fields: map[string]*structpb.Value{ + "experiment_name": structpb.NewStringValue(testPluginsExperimentName), + }}, + "other": {Fields: map[string]*structpb.Value{ + "key": structpb.NewBoolValue(true), + }}, + } + got, err := pluginsInputToJSON(input) + require.NoError(t, err) + require.NotNil(t, got) + parsed, err := jsonToPluginsInput(got) + require.NoError(t, err) + require.Len(t, parsed, len(input)) + for k, v := range input { + require.Contains(t, parsed, k) + assert.Equal(t, v.Fields, parsed[k].Fields) + } + }) +} + +func TestJSONToPluginsInput(t *testing.T) { + tests := []struct { + name string + input *string + wantNil bool + wantErr bool + }{ + { + name: "nil pointer", + input: nil, + wantNil: true, + }, + { + name: "empty string", + input: strPtr(""), + wantNil: true, + }, + { + name: "valid JSON", + input: strPtr(`{"mlflow":{"experiment_name":"` + testPluginsExperimentName + `"}}`), + }, + { + name: "malformed JSON", + input: strPtr(`{not valid`), + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := jsonToPluginsInput(tt.input) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + if tt.wantNil { + assert.Nil(t, got) + return + } + require.NotNil(t, got) + require.Len(t, got, 1) + require.Contains(t, got, "mlflow") + assert.Equal(t, testPluginsExperimentName, got["mlflow"].Fields["experiment_name"].GetStringValue()) + }) + } +} + func TestToApiExperimentStorageState(t *testing.T) { tests := []struct { name string @@ -4770,6 +4927,110 @@ func TestToApiExperimentStorageState(t *testing.T) { } } +func TestPluginsOutputToJSON(t *testing.T) { + t.Run("nil map returns nil", func(t *testing.T) { + got, err := pluginsOutputToJSON(nil) + require.NoError(t, err) + assert.Nil(t, got, "nil input should produce nil *string, not empty string") + }) + + t.Run("empty map returns nil", func(t *testing.T) { + got, err := pluginsOutputToJSON(map[string]*apiv2beta1.PluginOutput{}) + require.NoError(t, err) + assert.Nil(t, got, "empty input should produce nil *string, not empty string") + }) + + t.Run("with entries and state round-trips", func(t *testing.T) { + input := map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("https://mlflow.example.com/runs/abc"), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + "experiment_id": { + Value: structpb.NewStringValue("42"), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_SUCCEEDED, + StateMessage: "MLflow run created", + }, + "other": { + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + StateMessage: "in progress", + }, + } + got, err := pluginsOutputToJSON(input) + require.NoError(t, err) + require.NotNil(t, got) + parsed, err := jsonToPluginsOutput(got) + require.NoError(t, err) + require.Len(t, parsed, len(input)) + for k, v := range input { + require.Contains(t, parsed, k) + assert.Equal(t, v.State, parsed[k].State) + assert.Equal(t, v.StateMessage, parsed[k].StateMessage) + require.Len(t, parsed[k].Entries, len(v.Entries)) + for ek, ev := range v.Entries { + require.Contains(t, parsed[k].Entries, ek) + assert.Equal(t, ev.Value.GetStringValue(), parsed[k].Entries[ek].Value.GetStringValue()) + assert.Equal(t, ev.RenderType, parsed[k].Entries[ek].RenderType) + } + } + }) +} + +func TestJSONToPluginsOutput(t *testing.T) { + tests := []struct { + name string + input *string + wantNil bool + wantErr bool + }{ + { + name: "nil pointer", + input: nil, + wantNil: true, + }, + { + name: "empty string", + input: strPtr(""), + wantNil: true, + }, + { + name: "malformed JSON", + input: strPtr(`{broken`), + wantErr: true, + }, + { + name: "valid JSON with enum fields", + input: strPtr(`{"mlflow":{"entries":{"run_url":{"value":"https://mlflow.example.com","renderType":"URL"}},"state":"PLUGIN_SUCCEEDED","stateMessage":"ok"}}`), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := jsonToPluginsOutput(tt.input) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + if tt.wantNil { + assert.Nil(t, got) + return + } + require.NotNil(t, got) + require.Len(t, got, 1) + require.Contains(t, got, "mlflow") + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, got["mlflow"].State) + assert.Equal(t, "ok", got["mlflow"].StateMessage) + require.Len(t, got["mlflow"].Entries, 1) + require.Contains(t, got["mlflow"].Entries, "run_url") + assert.Equal(t, "https://mlflow.example.com", got["mlflow"].Entries["run_url"].Value.GetStringValue()) + }) + } +} + func TestToApiExperimentStorageStateV1(t *testing.T) { tests := []struct { name string @@ -4794,6 +5055,223 @@ func TestToApiExperimentStorageStateV1(t *testing.T) { } } +func TestValidatePluginsOutput(t *testing.T) { + tests := []struct { + name string + input map[string]*apiv2beta1.PluginOutput + wantErr bool + }{ + { + name: "nil map", + input: nil, + }, + { + name: "empty map", + input: map[string]*apiv2beta1.PluginOutput{}, + }, + { + name: "valid http URL content type", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("http://example.com/run/1"), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + }, + { + name: "valid https URL content type", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("https://example.com/run/1"), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + }, + { + name: "plain string without scheme", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_id": { + Value: structpb.NewStringValue("abc123"), + }, + }, + }, + }, + }, + { + name: "javascript scheme without URL content type is allowed", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(testPluginsUnsafeJavaScriptURL), + }, + }, + }, + }, + }, + { + name: "data scheme without URL content type is allowed", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg=="), + }, + }, + }, + }, + }, + { + name: "vbscript scheme without URL content type is allowed", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("vbscript:msgbox(1)"), + }, + }, + }, + }, + }, + { + name: "url content type with ftp rejected", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("ftp://example.com/run/1"), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + { + name: "url content type with malformed URL rejected", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("http://%"), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + { + name: "url content type with empty string rejected", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(""), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + { + name: "url content type with whitespace-only string rejected", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(" "), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + { + name: "url content type with javascript rejected", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(testPluginsUnsafeJavaScriptURL), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + { + name: "url content type with mixed-case javascript and leading spaces rejected", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(" JaVaScRiPt:alert(1)"), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + { + name: "mixed valid and invalid entries", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_id": { + Value: structpb.NewStringValue("abc123"), + }, + "run_url": { + Value: structpb.NewStringValue(testPluginsUnsafeJavaScriptURL), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + { + name: "url content type with non-string value rejected", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewNumberValue(42), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validatePluginsOutput(tt.input) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + }) + } +} + func TestToApiPipelineVersionsV1_Empty(t *testing.T) { result := toApiPipelineVersionsV1([]*model.PipelineVersion{}) assert.NotNil(t, result) @@ -4913,3 +5391,695 @@ func TestToPipelineSpecRuntimeConfig_InvalidJSON(t *testing.T) { // v1 parameter parsing, causing toPipelineSpecRuntimeConfig to return nil. assert.Nil(t, result) } + +func TestValidatePluginsInputLimits(t *testing.T) { + tooLongPayloadValue := strings.Repeat("a", 55000) + + tests := []struct { + name string + input map[string]*structpb.Struct + wantErrContains string + }{ + { + name: "nil map", + input: nil, + }, + { + name: "empty map", + input: map[string]*structpb.Struct{}, + }, + { + name: "accepts multiple small plugin input payloads", + input: map[string]*structpb.Struct{ + "plugin-0": {Fields: map[string]*structpb.Value{"k": structpb.NewStringValue("ok")}}, + "plugin-1": {Fields: map[string]*structpb.Value{"k": structpb.NewStringValue("ok")}}, + }, + }, + { + name: "rejects plugin input map with too many keys", + input: createPluginInputMapWithNKeys(common.DefaultPluginMaxKeys + 1), + wantErrContains: pluginErrPluginsInputTooManyKeys, + }, + { + name: "rejects plugin input entry exceeding per-plugin size", + input: map[string]*structpb.Struct{ + "plugin-0": { + Fields: map[string]*structpb.Value{ + "k": structpb.NewStringValue(strings.Repeat("a", common.DefaultPluginMaxPayloadBytes*2)), + }, + }, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsInputEntrySize, "plugin-0"), + }, + { + name: "rejects plugin input map exceeding total payload size", + input: map[string]*structpb.Struct{ + "plugin-0": {Fields: map[string]*structpb.Value{"k": structpb.NewStringValue(tooLongPayloadValue)}}, + "plugin-1": {Fields: map[string]*structpb.Value{"k": structpb.NewStringValue(tooLongPayloadValue)}}, + "plugin-2": {Fields: map[string]*structpb.Value{"k": structpb.NewStringValue(tooLongPayloadValue)}}, + "plugin-3": {Fields: map[string]*structpb.Value{"k": structpb.NewStringValue(tooLongPayloadValue)}}, + "plugin-4": {Fields: map[string]*structpb.Value{"k": structpb.NewStringValue(tooLongPayloadValue)}}, + }, + wantErrContains: pluginErrPluginsInputTotalSize, + }, + { + name: "nesting too deep", + input: map[string]*structpb.Struct{ + "mlflow": makeDeepStruct(common.DefaultPluginMaxNestingDepth + 1), + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsInputNestingDepth, "mlflow"), + }, + { + name: "at configured boundaries", + input: map[string]*structpb.Struct{ + "mlflow": makeDeepStruct(common.DefaultPluginMaxNestingDepth), + }, + }, + { + name: "reject nil plugin struct entry", + input: map[string]*structpb.Struct{ + "mlflow": nil, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsInputNilEntry, "mlflow"), + }, + { + name: "reject unset value kind in plugins_input", + input: map[string]*structpb.Struct{ + "mlflow": { + Fields: map[string]*structpb.Value{ + "broken": {}, + }, + }, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsInputInvalidValue, "mlflow"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + limits, err := common.GetPluginLimitsConfig() + require.NoError(t, err) + err = validatePluginsInputLimits(tt.input, limits) + if tt.wantErrContains != "" { + require.Error(t, err) + require.ErrorContains(t, err, tt.wantErrContains) + return + } + require.NoError(t, err) + }) + } +} + +func TestValidatePluginsOutputLimits(t *testing.T) { + tooLongPayloadValue := strings.Repeat("a", 55000) + + tests := []struct { + name string + input map[string]*apiv2beta1.PluginOutput + wantErrContains string + }{ + { + name: "allow nil plugin output entry", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": nil, + }, + }, + { + name: "accepts multiple small plugin output payloads", + input: map[string]*apiv2beta1.PluginOutput{ + "plugin-0": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": {Value: structpb.NewStringValue(testPluginsURLBase), RenderType: apiv2beta1.MetadataValue_URL.Enum()}, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + "plugin-1": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": {Value: structpb.NewStringValue(testPluginsURLBase), RenderType: apiv2beta1.MetadataValue_URL.Enum()}, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + }, + { + name: "rejects plugin output map with too many keys", + input: createPluginOutputMapWithNKeys(common.DefaultPluginMaxKeys + 1), + wantErrContains: pluginErrPluginsOutputTooManyKeys, + }, + { + name: "rejects plugin output entry exceeding per-plugin size", + input: map[string]*apiv2beta1.PluginOutput{ + "plugin-0": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(testPluginsURLBase + strings.Repeat("a", common.DefaultPluginMaxPayloadBytes*2)), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsOutputEntrySize, "plugin-0"), + }, + { + name: "rejects plugin output map exceeding total payload size", + input: map[string]*apiv2beta1.PluginOutput{ + "plugin-0": {Entries: map[string]*apiv2beta1.MetadataValue{"run_url": {Value: structpb.NewStringValue(testPluginsURLBase + tooLongPayloadValue), RenderType: apiv2beta1.MetadataValue_URL.Enum()}}, State: apiv2beta1.PluginState_PLUGIN_RUNNING}, + "plugin-1": {Entries: map[string]*apiv2beta1.MetadataValue{"run_url": {Value: structpb.NewStringValue(testPluginsURLBase + tooLongPayloadValue), RenderType: apiv2beta1.MetadataValue_URL.Enum()}}, State: apiv2beta1.PluginState_PLUGIN_RUNNING}, + "plugin-2": {Entries: map[string]*apiv2beta1.MetadataValue{"run_url": {Value: structpb.NewStringValue(testPluginsURLBase + tooLongPayloadValue), RenderType: apiv2beta1.MetadataValue_URL.Enum()}}, State: apiv2beta1.PluginState_PLUGIN_RUNNING}, + "plugin-3": {Entries: map[string]*apiv2beta1.MetadataValue{"run_url": {Value: structpb.NewStringValue(testPluginsURLBase + tooLongPayloadValue), RenderType: apiv2beta1.MetadataValue_URL.Enum()}}, State: apiv2beta1.PluginState_PLUGIN_RUNNING}, + "plugin-4": {Entries: map[string]*apiv2beta1.MetadataValue{"run_url": {Value: structpb.NewStringValue(testPluginsURLBase + tooLongPayloadValue), RenderType: apiv2beta1.MetadataValue_URL.Enum()}}, State: apiv2beta1.PluginState_PLUGIN_RUNNING}, + }, + wantErrContains: pluginErrPluginsOutputTotalSize, + }, + { + name: "nested metadata value too deep", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "nested": { + Value: makeDeepValue(common.DefaultPluginMaxNestingDepth + 1), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsOutputNestingDepth, "mlflow", "nested"), + }, + { + name: "at configured boundaries", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "nested": { + Value: makeDeepValue(common.DefaultPluginMaxNestingDepth), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + }, + { + name: "reject nil metadata entry", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": nil, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsOutputNilMetadata, "mlflow", "run_url"), + }, + { + name: "reject nil metadata value", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: nil, + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsOutputNilValue, "mlflow", "run_url"), + }, + { + name: "reject unset value kind in plugins_output", + input: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: &structpb.Value{}, + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + wantErrContains: fmt.Sprintf(pluginErrPluginsOutputInvalidValue, "mlflow", "run_url"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + limits, err := common.GetPluginLimitsConfig() + require.NoError(t, err) + err = validatePluginsOutputLimits(tt.input, limits) + if tt.wantErrContains != "" { + require.Error(t, err) + require.ErrorContains(t, err, tt.wantErrContains) + return + } + require.NoError(t, err) + }) + } +} + +func TestValidatePluginsInputLimitsUsesConfiguredOverrides(t *testing.T) { + input := map[string]*structpb.Struct{ + "mlflow": { + Fields: map[string]*structpb.Value{ + "k": structpb.NewStringValue("ok"), + }, + }, + "other": { + Fields: map[string]*structpb.Value{ + "k": structpb.NewStringValue("ok"), + }, + }, + } + + setPluginLimitsConfigForTest(t, map[string]string{ + common.PluginMaxKeys: "1", + }) + + limits, err := common.GetPluginLimitsConfig() + require.NoError(t, err) + err = validatePluginsInputLimits(input, limits) + require.Error(t, err) + assert.ErrorContains(t, err, "exceeds maximum 1") +} + +func TestValidatePluginsOutputLimitsUsesConfiguredOverrides(t *testing.T) { + output := map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(testPluginsURLBase), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + } + + setPluginLimitsConfigForTest(t, map[string]string{ + common.PluginMaxPayloadBytes: "64", + }) + + limits, err := common.GetPluginLimitsConfig() + require.NoError(t, err) + err = validatePluginsOutputLimits(output, limits) + require.Error(t, err) + assert.ErrorContains(t, err, "exceeds maximum 64 bytes") +} + +func TestValidatePluginsInputLimitsUsesNestingDepthOverride(t *testing.T) { + input := map[string]*structpb.Struct{ + "mlflow": { + Fields: map[string]*structpb.Value{ + "nested": makeDeepValue(3), + }, + }, + } + + setPluginLimitsConfigForTest(t, map[string]string{ + common.PluginMaxNestingDepth: "2", + }) + + limits, err := common.GetPluginLimitsConfig() + require.NoError(t, err) + err = validatePluginsInputLimits(input, limits) + require.Error(t, err) + assert.ErrorContains(t, err, "nesting depth exceeds maximum 2") +} + +func TestValidatePluginsOutputLimitsUsesTotalPayloadOverride(t *testing.T) { + output := map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue("https://example.com/run1"), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + "other": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "status": { + Value: structpb.NewStringValue("https://example.com/run2"), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_SUCCEEDED, + }, + } + + setPluginLimitsConfigForTest(t, map[string]string{ + common.PluginMaxTotalPayloadBytes: "10", + common.PluginMaxPayloadBytes: "10", + }) + + limits, err := common.GetPluginLimitsConfig() + require.NoError(t, err) + err = validatePluginsOutputLimits(output, limits) + require.Error(t, err) + assert.ErrorContains(t, err, "exceeds maximum 10 bytes") +} + +func makeDeepStruct(depth int) *structpb.Struct { + current := structpb.NewStringValue("leaf") + for range depth { + current = structpb.NewStructValue(&structpb.Struct{ + Fields: map[string]*structpb.Value{"nested": current}, + }) + } + return current.GetStructValue() +} + +func makeDeepValue(depth int) *structpb.Value { + current := structpb.NewStringValue("leaf") + for range depth { + current = structpb.NewStructValue(&structpb.Struct{ + Fields: map[string]*structpb.Value{"nested": current}, + }) + } + return current +} + +func TestToModelRunPluginsFields(t *testing.T) { + pluginsInput := map[string]*structpb.Struct{ + "mlflow": {Fields: map[string]*structpb.Value{ + "experiment_name": structpb.NewStringValue(testPluginsExperimentName), + }}, + "other": {Fields: map[string]*structpb.Value{ + "key": structpb.NewBoolValue(true), + }}, + } + pluginsOutput := map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "root_run_id": {Value: structpb.NewStringValue("abc123")}, + }, + State: apiv2beta1.PluginState_PLUGIN_SUCCEEDED, + StateMessage: "ok", + }, + "other": { + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + StateMessage: "in progress", + }, + } + + t.Run("with plugins fields", func(t *testing.T) { + run := &apiv2beta1.Run{ + RunId: "run1", + DisplayName: "test", + PipelineSource: &apiv2beta1.Run_PipelineVersionReference{ + PipelineVersionReference: &apiv2beta1.PipelineVersionReference{ + PipelineId: "p1", PipelineVersionId: "pv1", + }, + }, + PluginsInput: pluginsInput, + PluginsOutput: pluginsOutput, + } + got, err := toModelRun(run) + require.NoError(t, err) + require.NotNil(t, got.PluginsInputString) + require.NotNil(t, got.PluginsOutputString) + + parsedInput, err := jsonToPluginsInput(largeTextToString(got.PluginsInputString)) + require.NoError(t, err) + assert.Equal(t, testPluginsExperimentName, parsedInput["mlflow"].Fields["experiment_name"].GetStringValue()) + + parsedOutput, err := jsonToPluginsOutput(largeTextToString(got.PluginsOutputString)) + require.NoError(t, err) + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, parsedOutput["mlflow"].State) + assert.Equal(t, "abc123", parsedOutput["mlflow"].Entries["root_run_id"].Value.GetStringValue()) + }) + + t.Run("nil plugins fields", func(t *testing.T) { + apiRun := &apiv2beta1.Run{ + RunId: "run2", + DisplayName: "test-nil", + PipelineSource: &apiv2beta1.Run_PipelineVersionReference{ + PipelineVersionReference: &apiv2beta1.PipelineVersionReference{ + PipelineId: "p1", PipelineVersionId: "pv1", + }, + }, + } + got, err := toModelRun(apiRun) + require.NoError(t, err) + assert.Nil(t, got.PluginsInputString) + assert.Nil(t, got.PluginsOutputString) + }) + + t.Run("invalid plugins output URL scheme returns error", func(t *testing.T) { + apiRun := &apiv2beta1.Run{ + RunId: "run3", + DisplayName: "test-invalid", + PipelineSource: &apiv2beta1.Run_PipelineVersionReference{ + PipelineVersionReference: &apiv2beta1.PipelineVersionReference{ + PipelineId: "p1", PipelineVersionId: "pv1", + }, + }, + PluginsOutput: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(testPluginsUnsafeJavaScriptURL), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + }, + }, + } + + _, err := toModelRun(apiRun) + require.Error(t, err) + }) + + t.Run("plugins_input exceeding limits returns error", func(t *testing.T) { + apiRun := &apiv2beta1.Run{ + RunId: "run4", + DisplayName: "test-too-large-input", + PipelineSource: &apiv2beta1.Run_PipelineVersionReference{ + PipelineVersionReference: &apiv2beta1.PipelineVersionReference{ + PipelineId: "p1", PipelineVersionId: "pv1", + }, + }, + PluginsInput: map[string]*structpb.Struct{ + "mlflow": { + Fields: map[string]*structpb.Value{ + "blob": structpb.NewStringValue(strings.Repeat("a", common.DefaultPluginMaxPayloadBytes*2)), + }, + }, + }, + } + + _, err := toModelRun(apiRun) + require.Error(t, err) + }) + + t.Run("plugins_output exceeding limits returns error", func(t *testing.T) { + apiRun := &apiv2beta1.Run{ + RunId: "run5", + DisplayName: "test-too-large-output", + PipelineSource: &apiv2beta1.Run_PipelineVersionReference{ + PipelineVersionReference: &apiv2beta1.PipelineVersionReference{ + PipelineId: "p1", PipelineVersionId: "pv1", + }, + }, + PluginsOutput: map[string]*apiv2beta1.PluginOutput{ + "mlflow": { + Entries: map[string]*apiv2beta1.MetadataValue{ + "run_url": { + Value: structpb.NewStringValue(testPluginsURLBase + strings.Repeat("a", common.DefaultPluginMaxPayloadBytes*2)), + RenderType: apiv2beta1.MetadataValue_URL.Enum(), + }, + }, + State: apiv2beta1.PluginState_PLUGIN_RUNNING, + }, + }, + } + + _, err := toModelRun(apiRun) + require.Error(t, err) + }) +} + +func TestToApiRunPluginsFields(t *testing.T) { + inputJSON := `{"mlflow":{"experiment_name":"` + testPluginsExperimentName + `"},"other":{"key":true}}` + outputJSON := `{"mlflow":{"entries":{"root_run_id":{"value":"abc123"}},"state":"PLUGIN_SUCCEEDED","stateMessage":"ok"},"other":{"state":"PLUGIN_RUNNING","stateMessage":"in progress"}}` + + t.Run("with plugins fields", func(t *testing.T) { + modelRun := &model.Run{ + UUID: "run1", + DisplayName: "test", + PipelineSpec: model.PipelineSpec{ + PipelineVersionId: "pv1", + PipelineId: "p1", + }, + RunDetails: model.RunDetails{ + PluginsInputString: testLargeTextPtr(inputJSON), + PluginsOutputString: testLargeTextPtr(outputJSON), + }, + } + got := toApiRun(modelRun) + require.Len(t, got.PluginsInput, 2) + require.Contains(t, got.PluginsInput, "mlflow") + assert.Equal(t, testPluginsExperimentName, got.PluginsInput["mlflow"].Fields["experiment_name"].GetStringValue()) + require.Contains(t, got.PluginsInput, "other") + assert.Equal(t, true, got.PluginsInput["other"].Fields["key"].GetBoolValue()) + + require.Len(t, got.PluginsOutput, 2) + require.Contains(t, got.PluginsOutput, "mlflow") + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_SUCCEEDED, got.PluginsOutput["mlflow"].State) + assert.Equal(t, "abc123", got.PluginsOutput["mlflow"].Entries["root_run_id"].Value.GetStringValue()) + require.Contains(t, got.PluginsOutput, "other") + assert.Equal(t, apiv2beta1.PluginState_PLUGIN_RUNNING, got.PluginsOutput["other"].State) + }) + + t.Run("nil plugins fields", func(t *testing.T) { + modelRun := &model.Run{ + UUID: "run2", + DisplayName: "test-nil", + PipelineSpec: model.PipelineSpec{ + PipelineVersionId: "pv1", + PipelineId: "p1", + }, + RunDetails: model.RunDetails{}, + } + got := toApiRun(modelRun) + assert.Nil(t, got.PluginsInput) + assert.Nil(t, got.PluginsOutput) + }) + + t.Run("invalid plugins output URL in storage returns API error", func(t *testing.T) { + modelRun := &model.Run{ + UUID: "run3", + DisplayName: "test-invalid", + PipelineSpec: model.PipelineSpec{ + PipelineVersionId: "pv1", + PipelineId: "p1", + }, + RunDetails: model.RunDetails{ + PluginsOutputString: testLargeTextPtr(`{"mlflow":{"entries":{"run_url":{"value":"` + testPluginsUnsafeJavaScriptURL + `","renderType":"URL"}}}}`), + }, + } + got := toApiRun(modelRun) + require.NotNil(t, got.Error) + assert.Nil(t, got.PluginsOutput) + }) +} + +func TestToModelJobPluginsInput(t *testing.T) { + pluginsInput := map[string]*structpb.Struct{ + "mlflow": {Fields: map[string]*structpb.Value{ + "experiment_name": structpb.NewStringValue(testPluginsRecurringExperimentName), + }}, + "other": {Fields: map[string]*structpb.Value{ + "enabled": structpb.NewBoolValue(true), + }}, + } + + t.Run("with plugins_input", func(t *testing.T) { + apiJob := &apiv2beta1.RecurringRun{ + RecurringRunId: "job1", + DisplayName: testPluginsJobName, + MaxConcurrency: 1, + Mode: apiv2beta1.RecurringRun_ENABLE, + Trigger: &apiv2beta1.Trigger{ + Trigger: &apiv2beta1.Trigger_PeriodicSchedule{ + PeriodicSchedule: &apiv2beta1.PeriodicSchedule{IntervalSecond: 60}, + }, + }, + PluginsInput: pluginsInput, + } + got, err := toModelJob(apiJob) + require.NoError(t, err) + require.NotNil(t, got.PluginsInputString) + + parsedInput, err := jsonToPluginsInput(largeTextToString(got.PluginsInputString)) + require.NoError(t, err) + require.Len(t, parsedInput, 2) + require.Contains(t, parsedInput, "mlflow") + assert.Equal(t, testPluginsRecurringExperimentName, parsedInput["mlflow"].Fields["experiment_name"].GetStringValue()) + require.Contains(t, parsedInput, "other") + assert.Equal(t, true, parsedInput["other"].Fields["enabled"].GetBoolValue()) + }) + + t.Run("nil plugins_input", func(t *testing.T) { + apiJob := &apiv2beta1.RecurringRun{ + RecurringRunId: "job2", + DisplayName: "test-job-nil", + MaxConcurrency: 1, + Mode: apiv2beta1.RecurringRun_ENABLE, + Trigger: &apiv2beta1.Trigger{ + Trigger: &apiv2beta1.Trigger_PeriodicSchedule{ + PeriodicSchedule: &apiv2beta1.PeriodicSchedule{IntervalSecond: 60}, + }, + }, + } + got, err := toModelJob(apiJob) + require.NoError(t, err) + assert.Nil(t, got.PluginsInputString) + }) + + t.Run("plugins_input exceeding limits returns error", func(t *testing.T) { + apiJob := &apiv2beta1.RecurringRun{ + RecurringRunId: "job3", + DisplayName: "test-job-too-large", + MaxConcurrency: 1, + Mode: apiv2beta1.RecurringRun_ENABLE, + Trigger: &apiv2beta1.Trigger{ + Trigger: &apiv2beta1.Trigger_PeriodicSchedule{ + PeriodicSchedule: &apiv2beta1.PeriodicSchedule{IntervalSecond: 60}, + }, + }, + PluginsInput: map[string]*structpb.Struct{ + "mlflow": { + Fields: map[string]*structpb.Value{ + "blob": structpb.NewStringValue(strings.Repeat("a", common.DefaultPluginMaxPayloadBytes*2)), + }, + }, + }, + } + + _, err := toModelJob(apiJob) + require.Error(t, err) + }) +} + +func TestToApiRecurringRunPluginsInput(t *testing.T) { + inputJSON := `{"mlflow":{"experiment_name":"` + testPluginsRecurringExperimentName + `"},"other":{"enabled":true}}` + + t.Run("with plugins_input", func(t *testing.T) { + modelJob := &model.Job{ + UUID: "job1", + DisplayName: testPluginsJobName, + K8SName: testPluginsJobName, + Enabled: true, + Conditions: "ENABLED", + MaxConcurrency: 1, + PluginsInputString: testLargeTextPtr(inputJSON), + PipelineSpec: model.PipelineSpec{ + PipelineId: "p1", + PipelineVersionId: "pv1", + }, + } + got := toApiRecurringRun(modelJob) + require.Len(t, got.PluginsInput, 2) + require.Contains(t, got.PluginsInput, "mlflow") + assert.Equal(t, testPluginsRecurringExperimentName, got.PluginsInput["mlflow"].Fields["experiment_name"].GetStringValue()) + require.Contains(t, got.PluginsInput, "other") + assert.Equal(t, true, got.PluginsInput["other"].Fields["enabled"].GetBoolValue()) + }) + + t.Run("empty plugins_input", func(t *testing.T) { + modelJob := &model.Job{ + UUID: "job2", + DisplayName: "test-job-empty", + K8SName: "test-job-empty", + Enabled: true, + Conditions: "ENABLED", + MaxConcurrency: 1, + PipelineSpec: model.PipelineSpec{ + PipelineId: "p1", + PipelineVersionId: "pv1", + }, + } + got := toApiRecurringRun(modelJob) + assert.Nil(t, got.PluginsInput) + }) +} diff --git a/backend/src/apiserver/server/run_server.go b/backend/src/apiserver/server/run_server.go index 786b8eca079..9e29d68e410 100644 --- a/backend/src/apiserver/server/run_server.go +++ b/backend/src/apiserver/server/run_server.go @@ -16,6 +16,7 @@ package server import ( "context" + "fmt" "google.golang.org/protobuf/types/known/emptypb" @@ -515,6 +516,14 @@ func (s *RunServer) CreateRun(ctx context.Context, request *apiv2beta1.CreateRun createRunRequests.Inc() } + // plugins_output is server-owned; reject requests that attempt to set it. + if request.GetRun() != nil && request.GetRun().PluginsOutput != nil { + return nil, util.NewBadRequestError( + fmt.Errorf("plugins_output must not be set by the client"), + "plugins_output is server-owned and populated exclusively via plugin lifecycle hooks", + ) + } + modelRun, err := toModelRun(request.GetRun()) if err != nil { return nil, util.Wrap(err, "CreateJob(job.ToV2())Failed to create a run due to conversion error") diff --git a/backend/src/apiserver/storage/db_fake.go b/backend/src/apiserver/storage/db_fake.go index a192b355183..f306c3a9586 100644 --- a/backend/src/apiserver/storage/db_fake.go +++ b/backend/src/apiserver/storage/db_fake.go @@ -29,20 +29,7 @@ func NewFakeDB() (*DB, error) { return nil, util.Wrap(err, "Could not create the GORM database") } // Create tables - if err := dbInstance.AutoMigrate( - &model.Experiment{}, - &model.Job{}, - &model.Pipeline{}, - &model.PipelineVersion{}, - &model.PipelineTag{}, - &model.PipelineVersionTag{}, - &model.ResourceReference{}, - &model.Run{}, - &model.RunMetric{}, - &model.Task{}, - &model.DBStatus{}, - &model.DefaultExperiment{}, - ); err != nil { + if err := dbInstance.AutoMigrate(model.AllModels()...); err != nil { return nil, util.Wrap(err, "Failed to automigrate models") } diff --git a/backend/src/apiserver/storage/job_store.go b/backend/src/apiserver/storage/job_store.go index 55f0b4ed812..77462f65c88 100644 --- a/backend/src/apiserver/storage/job_store.go +++ b/backend/src/apiserver/storage/job_store.go @@ -54,6 +54,7 @@ var jobColumns = []string{ "PipelineRoot", "ExperimentUUID", "PipelineVersionId", + "PluginsInput", } type JobStoreInterface interface { @@ -233,7 +234,7 @@ func (s *JobStore) scanRows(r *sql.Rows) ([]*model.Job, error) { var cronScheduleStartTimeInSec, cronScheduleEndTimeInSec, createdAtInSec, periodicScheduleStartTimeInSec, periodicScheduleEndTimeInSec, intervalSecond, updatedAtInSec sql.NullInt64 var cron, resourceReferencesInString, runtimeParameters, pipelineRoot sql.NullString - var experimentId, pipelineVersionId sql.NullString + var experimentID, pipelineVersionID, pluginsInput sql.NullString var enabled, noCatchup bool var maxConcurrency int64 err := r.Scan( @@ -242,17 +243,17 @@ func (s *JobStore) scanRows(r *sql.Rows) ([]*model.Job, error) { &cronScheduleStartTimeInSec, &cronScheduleEndTimeInSec, &cron, &periodicScheduleStartTimeInSec, &periodicScheduleEndTimeInSec, &intervalSecond, &pipelineId, &pipelineName, &pipelineSpecManifest, &workflowSpecManifest, ¶meters, - &conditions, &runtimeParameters, &pipelineRoot, &experimentId, - &pipelineVersionId, &resourceReferencesInString) + &conditions, &runtimeParameters, &pipelineRoot, &experimentID, + &pipelineVersionID, &pluginsInput, &resourceReferencesInString) if err != nil { return nil, err } resourceReferences, _ := parseResourceReferences(resourceReferencesInString) - expId := experimentId.String - pvId := pipelineVersionId.String + expID := experimentID.String + pvID := pipelineVersionID.String if len(resourceReferences) > 0 { - if expId == "" { - expId = model.GetRefIdFromResourceReferences(resourceReferences, model.ExperimentResourceType) + if expID == "" { + expID = model.GetRefIdFromResourceReferences(resourceReferences, model.ExperimentResourceType) } if namespace == "" { namespace = model.GetRefIdFromResourceReferences(resourceReferences, model.NamespaceResourceType) @@ -260,8 +261,8 @@ func (s *JobStore) scanRows(r *sql.Rows) ([]*model.Job, error) { if pipelineId == "" { pipelineId = model.GetRefIdFromResourceReferences(resourceReferences, model.PipelineResourceType) } - if pvId == "" { - pvId = model.GetRefIdFromResourceReferences(resourceReferences, model.PipelineVersionResourceType) + if pvID == "" { + pvID = model.GetRefIdFromResourceReferences(resourceReferences, model.PipelineVersionResourceType) } } runtimeConfig := parseRuntimeConfig(runtimeParameters, pipelineRoot) @@ -271,10 +272,10 @@ func (s *JobStore) scanRows(r *sql.Rows) ([]*model.Job, error) { K8SName: name, Namespace: namespace, ServiceAccount: serviceAccount, - Description: string(description), + Description: description, Enabled: enabled, Conditions: conditions, - ExperimentId: expId, + ExperimentId: expID, MaxConcurrency: maxConcurrency, NoCatchup: noCatchup, // ResourceReferences: resourceReferences, @@ -292,7 +293,7 @@ func (s *JobStore) scanRows(r *sql.Rows) ([]*model.Job, error) { }, PipelineSpec: model.PipelineSpec{ PipelineId: pipelineId, - PipelineVersionId: pvId, + PipelineVersionId: pvID, PipelineName: pipelineName, PipelineSpecManifest: model.LargeText(pipelineSpecManifest), WorkflowSpecManifest: model.LargeText(workflowSpecManifest), @@ -302,6 +303,10 @@ func (s *JobStore) scanRows(r *sql.Rows) ([]*model.Job, error) { CreatedAtInSec: createdAtInSec.Int64, UpdatedAtInSec: updatedAtInSec.Int64, } + if pluginsInput.Valid { + lt := model.LargeText(pluginsInput.String) + job.PluginsInputString = < + } job = job.ToV2() jobs = append(jobs, job) } @@ -374,6 +379,7 @@ func (s *JobStore) CreateJob(j *model.Job) (*model.Job, error) { "PipelineRoot": j.PipelineSpec.RuntimeConfig.PipelineRoot, "ExperimentUUID": j.ExperimentId, "PipelineVersionId": j.PipelineSpec.PipelineVersionId, + "PluginsInput": largeTextToNullableSQL(j.PluginsInputString), }).ToSql() if err != nil { return nil, util.NewInternalServerError(err, "Failed to create query to add job to job table: %v", diff --git a/backend/src/apiserver/storage/job_store_test.go b/backend/src/apiserver/storage/job_store_test.go index 0a89bd1dcb1..71538f45097 100644 --- a/backend/src/apiserver/storage/job_store_test.go +++ b/backend/src/apiserver/storage/job_store_test.go @@ -15,6 +15,7 @@ package storage import ( + "database/sql" "testing" "time" @@ -27,6 +28,7 @@ import ( "github.com/kubeflow/pipelines/backend/src/common/util" swfapi "github.com/kubeflow/pipelines/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "google.golang.org/grpc/codes" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -971,3 +973,75 @@ func TestJobAPIFieldMap(t *testing.T) { assert.Contains(t, jobColumns, modelField) } } + +func TestCreateJobPluginsInput(t *testing.T) { + db := NewFakeDBOrFatal() + defer func() { require.NoError(t, db.Close()) }() + expStore := NewExperimentStore(db, util.NewFakeTimeForEpoch(), util.NewFakeUUIDGeneratorOrFatal(defaultFakeExpId, nil)) + experiment, err := expStore.CreateExperiment(&model.Experiment{Name: "exp1"}) + require.NoError(t, err) + pipelineStore := NewPipelineStore(db, util.NewFakeTimeForEpoch(), util.NewFakeUUIDGeneratorOrFatal(defaultFakeExpId, nil)) + pipeline, err := pipelineStore.CreatePipeline(&model.Pipeline{Name: "p1"}) + require.NoError(t, err) + jobStore := NewJobStore(db, util.NewFakeTimeForEpoch(), nil) + + t.Run("with data round-trips", func(t *testing.T) { + const jobUUID = "plugins-job-1" + job := &model.Job{ + UUID: jobUUID, + DisplayName: "plugins job", + K8SName: "plugins-job", + Namespace: "n1", + PipelineSpec: model.PipelineSpec{ + PipelineId: pipeline.UUID, + PipelineName: "p1", + }, + CreatedAtInSec: 1, + UpdatedAtInSec: 1, + Enabled: true, + ExperimentId: experiment.UUID, + PluginsInputString: testLargeTextPtr(`{"mlflow":{"experiment_name":"job-exp"}}`), + } + + created, err := jobStore.CreateJob(job.ToV1()) + require.NoError(t, err) + require.NotNil(t, created) + + got, err := jobStore.GetJob(jobUUID) + require.NoError(t, err) + require.NotNil(t, got.PluginsInputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"experiment_name":"job-exp"}}`), *got.PluginsInputString) + }) + + t.Run("nil writes NULL", func(t *testing.T) { + const jobUUID = "empty-plugins-job-1" + job := &model.Job{ + UUID: jobUUID, + DisplayName: "empty plugins job", + K8SName: "empty-plugins-job", + Namespace: "n1", + PipelineSpec: model.PipelineSpec{ + PipelineId: pipeline.UUID, + PipelineName: "p1", + }, + CreatedAtInSec: 1, + UpdatedAtInSec: 1, + Enabled: true, + ExperimentId: experiment.UUID, + } + + created, err := jobStore.CreateJob(job.ToV1()) + require.NoError(t, err) + require.NotNil(t, created) + + got, err := jobStore.GetJob(jobUUID) + require.NoError(t, err) + assert.Nil(t, got.PluginsInputString, "nil PluginsInputString should round-trip as nil") + + var pluginsInput sql.NullString + row := db.QueryRow("SELECT PluginsInput FROM jobs WHERE UUID = ?", jobUUID) + err = row.Scan(&pluginsInput) + require.NoError(t, err) + assert.False(t, pluginsInput.Valid, "PluginsInput column should be NULL, not empty string") + }) +} diff --git a/backend/src/apiserver/storage/run_store.go b/backend/src/apiserver/storage/run_store.go index d1aa167c8a1..10c2443b0ee 100644 --- a/backend/src/apiserver/storage/run_store.go +++ b/backend/src/apiserver/storage/run_store.go @@ -54,6 +54,8 @@ var runColumns = []string{ "JobUUID", "State", "StateHistory", + "PluginsInput", + "PluginsOutput", "PipelineContextId", "PipelineRunContextId", } @@ -81,6 +83,11 @@ type RunStoreInterface interface { // Note: only state, runtime manifest can be updated. Does not update dependent tasks. UpdateRun(run *model.Run) (err error) + // Updates only the PluginsOutput column for a run. Use this when plugin + // handlers need to persist output without touching core run fields (State, + // Conditions, etc.) to avoid redundant writes and potential clobbering. + UpdateRunPluginsOutput(runID string, pluginsOutput *model.LargeText) error + // Archives a run. ArchiveRun(runId string) error @@ -304,7 +311,7 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { pipelineName, pipelineSpecManifest, workflowSpecManifest, parameters, pipelineRuntimeManifest, workflowRuntimeManifest string var createdAtInSec, scheduledAtInSec, finishedAtInSec, pipelineContextId, pipelineRunContextId sql.NullInt64 - var metricsInString, resourceReferencesInString, tasksInString, runtimeParameters, pipelineRoot, jobId, state, stateHistory, pipelineVersionId sql.NullString + var metricsInString, resourceReferencesInString, tasksInString, runtimeParameters, pipelineRoot, jobID, state, stateHistory, pluginsInput, pluginsOutput, pipelineVersionID sql.NullString err := rows.Scan( &uuid, &experimentUUID, @@ -319,7 +326,7 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { &finishedAtInSec, &conditions, &pipelineId, - &pipelineVersionId, + &pipelineVersionID, &pipelineName, &pipelineSpecManifest, &workflowSpecManifest, @@ -328,9 +335,11 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { &pipelineRoot, &pipelineRuntimeManifest, &workflowRuntimeManifest, - &jobId, + &jobID, &state, &stateHistory, + &pluginsInput, + &pluginsOutput, &pipelineContextId, &pipelineRunContextId, &resourceReferencesInString, @@ -357,8 +366,8 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { if err != nil { return nil, util.NewInternalServerError(err, "Failed to parse task details") } - jId := jobId.String - pvId := pipelineVersionId.String + jID := jobID.String + pvID := pipelineVersionID.String if len(resourceReferences) > 0 { if experimentUUID == "" { experimentUUID = model.GetRefIdFromResourceReferences(resourceReferences, model.ExperimentResourceType) @@ -369,11 +378,11 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { if pipelineId == "" { pipelineId = model.GetRefIdFromResourceReferences(resourceReferences, model.PipelineResourceType) } - if pvId == "" { - pvId = model.GetRefIdFromResourceReferences(resourceReferences, model.PipelineVersionResourceType) + if pvID == "" { + pvID = model.GetRefIdFromResourceReferences(resourceReferences, model.PipelineVersionResourceType) } - if jId == "" { - jId = model.GetRefIdFromResourceReferences(resourceReferences, model.JobResourceType) + if jID == "" { + jID = model.GetRefIdFromResourceReferences(resourceReferences, model.JobResourceType) } } runtimeConfig := parseRuntimeConfig(runtimeParameters, pipelineRoot) @@ -389,8 +398,8 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { StorageState: model.StorageState(storageState), Namespace: namespace, ServiceAccount: serviceAccount, - Description: string(description), - RecurringRunId: jId, + Description: description, + RecurringRunId: jID, RunDetails: model.RunDetails{ CreatedAtInSec: createdAtInSec.Int64, ScheduledAtInSec: scheduledAtInSec.Int64, @@ -408,7 +417,7 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { ResourceReferences: resourceReferences, PipelineSpec: model.PipelineSpec{ PipelineId: pipelineId, - PipelineVersionId: pvId, + PipelineVersionId: pvID, PipelineName: pipelineName, PipelineSpecManifest: model.LargeText(pipelineSpecManifest), WorkflowSpecManifest: model.LargeText(workflowSpecManifest), @@ -416,6 +425,14 @@ func (s *RunStore) scanRowsToRuns(rows *sql.Rows) ([]*model.Run, error) { RuntimeConfig: runtimeConfig, }, } + if pluginsInput.Valid { + lt := model.LargeText(pluginsInput.String) + run.PluginsInputString = < + } + if pluginsOutput.Valid { + lt := model.LargeText(pluginsOutput.String) + run.PluginsOutputString = < + } run = run.ToV2() runs = append(runs, run) } @@ -517,6 +534,8 @@ func (s *RunStore) CreateRun(r *model.Run) (*model.Run, error) { "JobUUID": r.RecurringRunId, "State": r.RunDetails.State.ToString(), "StateHistory": stateHistoryString, + "PluginsInput": largeTextToNullableSQL(r.PluginsInputString), + "PluginsOutput": largeTextToNullableSQL(r.PluginsOutputString), }).ToSql() if err != nil { return nil, util.NewInternalServerError(err, "Failed to create query to store run to run table: '%v/%v", @@ -565,15 +584,24 @@ func (s *RunStore) UpdateRun(run *model.Run) error { if historyString, err := json.Marshal(run.RunDetails.StateHistory); err == nil { stateHistoryString = string(historyString) } + updateFields := sq.Eq{ + "Conditions": run.Conditions, + "State": run.State.ToString(), + "StateHistory": stateHistoryString, + "FinishedAtInSec": run.FinishedAtInSec, + "WorkflowRuntimeManifest": run.WorkflowRuntimeManifest, + } + // PluginsOutput is only updated when explicitly set by the caller (e.g. + // MLflow terminal sync, retry). A nil pointer means "leave unchanged" so + // that normal state-update callers don't accidentally overwrite it. + // Note: PluginsInput is intentionally omitted — it is immutable after + // run creation and never updated. + if run.PluginsOutputString != nil { + updateFields["PluginsOutput"] = largeTextToNullableSQL(run.PluginsOutputString) + } sql, args, err := sq. Update("run_details"). - SetMap(sq.Eq{ - "Conditions": run.Conditions, - "State": run.State.ToString(), - "StateHistory": stateHistoryString, - "FinishedAtInSec": run.FinishedAtInSec, - "WorkflowRuntimeManifest": run.WorkflowRuntimeManifest, - }). + SetMap(updateFields). Where(sq.Eq{"UUID": run.UUID}). ToSql() if err != nil { @@ -607,6 +635,36 @@ func (s *RunStore) UpdateRun(run *model.Run) error { return nil } +// UpdateRunPluginsOutput updates only the PluginsOutput column for the given +// run, leaving all other columns untouched. This avoids redundant writes of +// core run fields (State, Conditions, WorkflowRuntimeManifest, etc.) when +// plugin handlers need to persist their output after the run state has already +// been committed. +func (s *RunStore) UpdateRunPluginsOutput(runID string, pluginsOutput *model.LargeText) error { + sql, args, err := sq. + Update("run_details"). + SetMap(sq.Eq{ + "PluginsOutput": largeTextToNullableSQL(pluginsOutput), + }). + Where(sq.Eq{"UUID": runID}). + ToSql() + if err != nil { + return util.NewInternalServerError(err, "Failed to create query to update plugins output for run %s", runID) + } + result, err := s.db.DB.Exec(sql, args...) + if err != nil { + return util.NewInternalServerError(err, "Failed to update plugins output for run %s", runID) + } + r, err := result.RowsAffected() + if err != nil { + return util.NewInternalServerError(err, "Failed to update plugins output for run %s", runID) + } + if r == 0 { + return util.Wrap(util.NewResourceNotFoundError("Run", runID), "Failed to update plugins output for run") + } + return nil +} + func (s *RunStore) ArchiveRun(runId string) error { sql, args, err := sq. Update("run_details"). diff --git a/backend/src/apiserver/storage/run_store_test.go b/backend/src/apiserver/storage/run_store_test.go index 1077cf9729e..1ec0c7dc1bf 100644 --- a/backend/src/apiserver/storage/run_store_test.go +++ b/backend/src/apiserver/storage/run_store_test.go @@ -27,6 +27,7 @@ import ( "github.com/kubeflow/pipelines/backend/src/apiserver/model" "github.com/kubeflow/pipelines/backend/src/common/util" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "google.golang.org/grpc/codes" "k8s.io/apimachinery/pkg/util/json" ) @@ -37,6 +38,11 @@ const ( defaultFakeRunIdThree = "123e4567-e89b-12d3-a456-426655440023" ) +func testLargeTextPtr(s string) *model.LargeText { + lt := model.LargeText(s) + return < +} + type RunMetricSorter []*model.RunMetric func (r RunMetricSorter) Len() int { return len(r) } @@ -1492,3 +1498,213 @@ func TestRunAPIFieldMap(t *testing.T) { assert.Contains(t, runColumns, modelField) } } + +func TestCreateRunWithPluginsFields(t *testing.T) { + const runUUID = "plugins-run-1" + db, runStore := initializeRunStore() + defer func() { require.NoError(t, db.Close()) }() + + run := &model.Run{ + UUID: runUUID, + ExperimentId: defaultFakeExpId, + K8SName: "plugins-run", + DisplayName: "plugins-run", + Namespace: "n1", + StorageState: model.StorageStateAvailable, + RunDetails: model.RunDetails{ + CreatedAtInSec: 100, + Conditions: "Running", + State: model.RuntimeStateRunning, + WorkflowRuntimeManifest: "workflow1", + PluginsInputString: testLargeTextPtr(`{"mlflow":{"experiment_name":"my-exp"}}`), + PluginsOutputString: testLargeTextPtr(`{"mlflow":{"entries":{"root_run_id":{"value":"abc123"}},"state":"PLUGIN_SUCCEEDED","stateMessage":"ok"}}`), + }, + } + _, err := runStore.CreateRun(run) + require.NoError(t, err) + + got, err := runStore.GetRun(runUUID) + require.NoError(t, err) + require.NotNil(t, got.PluginsInputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"experiment_name":"my-exp"}}`), *got.PluginsInputString) + require.NotNil(t, got.PluginsOutputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"entries":{"root_run_id":{"value":"abc123"}},"state":"PLUGIN_SUCCEEDED","stateMessage":"ok"}}`), *got.PluginsOutputString) +} + +func TestCreateRunWithEmptyPluginsFieldsWritesNull(t *testing.T) { + const runUUID = "empty-plugins-1" + db, runStore := initializeRunStore() + defer func() { require.NoError(t, db.Close()) }() + + run := &model.Run{ + UUID: runUUID, + ExperimentId: defaultFakeExpId, + K8SName: "empty-plugins-run", + DisplayName: "empty-plugins-run", + Namespace: "n1", + StorageState: model.StorageStateAvailable, + RunDetails: model.RunDetails{ + CreatedAtInSec: 100, + Conditions: "Running", + State: model.RuntimeStateRunning, + WorkflowRuntimeManifest: "workflow1", + }, + } + _, err := runStore.CreateRun(run) + require.NoError(t, err) + + got, err := runStore.GetRun(runUUID) + require.NoError(t, err) + assert.Nil(t, got.PluginsInputString, "nil plugins_input should round-trip as nil") + assert.Nil(t, got.PluginsOutputString, "nil plugins_output should round-trip as nil") + + // Verify at the DB level that the columns are NULL, not empty strings. + // The read path only sets the field when sql.NullString.Valid is true, + // so a non-zero value here would mean '' was written instead of NULL. + var pluginsInput, pluginsOutput sql.NullString + row := db.QueryRow("SELECT PluginsInput, PluginsOutput FROM run_details WHERE UUID = ?", runUUID) + err = row.Scan(&pluginsInput, &pluginsOutput) + require.NoError(t, err) + assert.False(t, pluginsInput.Valid, "PluginsInput column should be NULL, not empty string") + assert.False(t, pluginsOutput.Valid, "PluginsOutput column should be NULL, not empty string") +} + +func TestUpdateRunPreservesPluginsFields(t *testing.T) { + const runUUID = "preserve-plugins-1" + db, runStore := initializeRunStore() + defer func() { require.NoError(t, db.Close()) }() + + run := &model.Run{ + UUID: runUUID, + ExperimentId: defaultFakeExpId, + K8SName: "preserve-run", + DisplayName: "preserve-run", + Namespace: "n1", + StorageState: model.StorageStateAvailable, + RunDetails: model.RunDetails{ + CreatedAtInSec: 100, + Conditions: "Running", + State: model.RuntimeStateRunning, + WorkflowRuntimeManifest: "workflow1", + PluginsInputString: testLargeTextPtr(`{"mlflow":{"experiment_name":"preserved"}}`), + PluginsOutputString: testLargeTextPtr(`{"mlflow":{"state":"PLUGIN_RUNNING"}}`), + }, + } + _, err := runStore.CreateRun(run) + require.NoError(t, err) + + run.State = model.RuntimeStateSucceeded + run.Conditions = "Succeeded" + err = runStore.UpdateRun(run) + require.NoError(t, err) + + got, err := runStore.GetRun(runUUID) + require.NoError(t, err) + require.NotNil(t, got.PluginsInputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"experiment_name":"preserved"}}`), *got.PluginsInputString) + require.NotNil(t, got.PluginsOutputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"state":"PLUGIN_RUNNING"}}`), *got.PluginsOutputString) + assert.Equal(t, model.RuntimeStateSucceeded, got.State) +} + +func TestUpdateRunPluginsOutputOnly(t *testing.T) { + const runUUID = "plugins-output-only-1" + db, runStore := initializeRunStore() + defer func() { require.NoError(t, db.Close()) }() + + run := &model.Run{ + UUID: runUUID, + ExperimentId: defaultFakeExpId, + K8SName: "plugins-output-run", + DisplayName: "plugins-output-run", + Namespace: "n1", + StorageState: model.StorageStateAvailable, + RunDetails: model.RunDetails{ + CreatedAtInSec: 100, + Conditions: "Running", + State: model.RuntimeStateRunning, + WorkflowRuntimeManifest: "original-manifest", + PluginsInputString: testLargeTextPtr(`{"mlflow":{"experiment_name":"my-exp"}}`), + PluginsOutputString: testLargeTextPtr(`{"mlflow":{"state":"PLUGIN_RUNNING"}}`), + }, + } + _, err := runStore.CreateRun(run) + require.NoError(t, err) + + // Update only PluginsOutput — core fields must remain untouched. + updatedOutput := testLargeTextPtr(`{"mlflow":{"state":"PLUGIN_SUCCEEDED","stateMessage":""}}`) + err = runStore.UpdateRunPluginsOutput(runUUID, updatedOutput) + require.NoError(t, err) + + got, err := runStore.GetRun(runUUID) + require.NoError(t, err) + + // Core fields should be unchanged. + assert.Equal(t, model.RuntimeStateRunning, got.State) + assert.Equal(t, "Running", got.Conditions) + assert.Equal(t, model.LargeText("original-manifest"), got.WorkflowRuntimeManifest) + + // PluginsInput should be unchanged. + require.NotNil(t, got.PluginsInputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"experiment_name":"my-exp"}}`), *got.PluginsInputString) + + // PluginsOutput should be updated. + require.NotNil(t, got.PluginsOutputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"state":"PLUGIN_SUCCEEDED","stateMessage":""}}`), *got.PluginsOutputString) +} + +func TestUpdateRunPluginsOutputNotFound(t *testing.T) { + db, runStore := initializeRunStore() + defer func() { require.NoError(t, db.Close()) }() + + output := testLargeTextPtr(`{"mlflow":{"state":"PLUGIN_FAILED"}}`) + err := runStore.UpdateRunPluginsOutput("non-existent-run-id", output) + require.Error(t, err) + assert.Contains(t, err.Error(), "not found") +} + +func TestListRunsReturnsPluginsFields(t *testing.T) { + const runName = "list-run" + db, runStore := initializeRunStore() + defer func() { require.NoError(t, db.Close()) }() + + run := &model.Run{ + UUID: "list-plugins-1", + ExperimentId: defaultFakeExpId, + K8SName: runName, + DisplayName: runName, + Namespace: "n1", + StorageState: model.StorageStateAvailable, + RunDetails: model.RunDetails{ + CreatedAtInSec: 200, + Conditions: "Running", + State: model.RuntimeStateRunning, + WorkflowRuntimeManifest: "workflow1", + PluginsInputString: testLargeTextPtr(`{"mlflow":{"experiment_name":"list-exp"}}`), + PluginsOutputString: testLargeTextPtr(`{"mlflow":{"state":"PLUGIN_RUNNING"}}`), + }, + } + _, err := runStore.CreateRun(run) + require.NoError(t, err) + + filterProto := &api.Filter{ + Predicates: []*api.Predicate{ + { + Key: "name", + Op: api.Predicate_EQUALS, + Value: &api.Predicate_StringValue{StringValue: runName}, + }, + }, + } + newFilter, err := filter.New(filterProto) + require.NoError(t, err) + opts, err := list.NewOptions(&model.Run{}, 10, "id", newFilter) + require.NoError(t, err) + runs, _, _, err := runStore.ListRuns(&model.FilterContext{}, opts) + require.NoError(t, err) + require.Len(t, runs, 1) + require.NotNil(t, runs[0].PluginsInputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"experiment_name":"list-exp"}}`), *runs[0].PluginsInputString) + require.NotNil(t, runs[0].PluginsOutputString) + assert.Equal(t, model.LargeText(`{"mlflow":{"state":"PLUGIN_RUNNING"}}`), *runs[0].PluginsOutputString) +} diff --git a/backend/src/apiserver/storage/sql_null_util.go b/backend/src/apiserver/storage/sql_null_util.go index a1d3cd555d7..13e3fa175eb 100644 --- a/backend/src/apiserver/storage/sql_null_util.go +++ b/backend/src/apiserver/storage/sql_null_util.go @@ -17,6 +17,7 @@ package storage import ( "database/sql" + "github.com/kubeflow/pipelines/backend/src/apiserver/model" "github.com/kubeflow/pipelines/backend/src/common/util" ) @@ -53,3 +54,11 @@ func PointerToNullInt64(ip *int64) sql.NullInt64 { Valid: true, } } + +func largeTextToNullableSQL(lt *model.LargeText) *string { + if lt == nil || *lt == "" { + return nil + } + s := string(*lt) + return &s +} diff --git a/backend/src/apiserver/template/v2_template.go b/backend/src/apiserver/template/v2_template.go index 9b516922b22..d679932e0f4 100644 --- a/backend/src/apiserver/template/v2_template.go +++ b/backend/src/apiserver/template/v2_template.go @@ -32,6 +32,7 @@ import ( "github.com/kubeflow/pipelines/backend/src/v2/compiler/argocompiler" "google.golang.org/protobuf/encoding/protojson" goyaml "gopkg.in/yaml.v3" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/yaml" ) @@ -57,6 +58,11 @@ func NewGenericScheduledWorkflow(modelJob *model.Job) (*scheduledworkflow.Schedu return nil, util.Wrap(err, "converting model trigger to crd trigger failed") } + pluginsInput, err := modelPluginsInputToCRD(modelJob.PluginsInputString) + if err != nil { + return nil, util.Wrap(err, "Create job failed") + } + return &scheduledworkflow.ScheduledWorkflow{ TypeMeta: metav1.TypeMeta{ APIVersion: "kubeflow.org/v2beta1", @@ -73,6 +79,7 @@ func NewGenericScheduledWorkflow(modelJob *model.Job) (*scheduledworkflow.Schedu PipelineName: modelJob.PipelineName, PipelineVersionId: modelJob.PipelineVersionId, ServiceAccount: modelJob.ServiceAccount, + PluginsInput: pluginsInput, }, }, nil } @@ -510,3 +517,21 @@ func (t *V2Spec) validatePipelineJobInputs(job *pipelinespec.PipelineJob) error return nil } + +// modelPluginsInputToCRD converts the JSON-encoded plugins_input string from +// the model layer into the map[string]apiextensionsv1.JSON representation +// used by the ScheduledWorkflow CRD spec. +func modelPluginsInputToCRD(lt *model.LargeText) (map[string]apiextensionsv1.JSON, error) { + if lt == nil || *lt == "" { + return nil, nil + } + var raw map[string]json.RawMessage + if err := json.Unmarshal([]byte(*lt), &raw); err != nil { + return nil, fmt.Errorf("invalid plugins_input JSON: %w", err) + } + result := make(map[string]apiextensionsv1.JSON, len(raw)) + for k, v := range raw { + result[k] = apiextensionsv1.JSON{Raw: v} + } + return result, nil +} diff --git a/backend/src/common/plugins/config.go b/backend/src/common/plugins/config.go new file mode 100644 index 00000000000..e5b386ea808 --- /dev/null +++ b/backend/src/common/plugins/config.go @@ -0,0 +1,7 @@ +package plugins + +// TLSConfig holds TLS settings for a plugin endpoint. +type TLSConfig struct { + InsecureSkipVerify bool `json:"insecureSkipVerify,omitempty" mapstructure:"insecureSkipVerify"` + CABundlePath string `json:"caBundlePath,omitempty" mapstructure:"caBundlePath"` +} diff --git a/backend/src/common/plugins/mlflow/client.go b/backend/src/common/plugins/mlflow/client.go new file mode 100644 index 00000000000..652ead3f48a --- /dev/null +++ b/backend/src/common/plugins/mlflow/client.go @@ -0,0 +1,523 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mlflow provides a shared Go HTTP client for the MLflow REST API. +// It is used by the API server, driver, and launcher to create experiments, +// create/update/search runs, log-batch metrics/params, and set tags. +package mlflow + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/cenkalti/backoff" + "github.com/golang/glog" +) + +// Auth type constants. +const ( + AuthTypeKubernetes = "kubernetes" +) + +// Retry policy defaults. +const ( + DefaultRetryInitial time.Duration = 500 * time.Millisecond + DefaultRetryMax time.Duration = 10 * time.Second + DefaultRetryElapsed time.Duration = 60 * time.Second +) + +// MLflow REST API paths. +const ( + pathExperimentsCreate = "/api/2.0/mlflow/experiments/create" + pathExperimentsGet = "/api/2.0/mlflow/experiments/get" + pathExperimentsGetByName = "/api/2.0/mlflow/experiments/get-by-name" + pathRunsCreate = "/api/2.0/mlflow/runs/create" + pathRunsUpdate = "/api/2.0/mlflow/runs/update" + pathRunsSetTag = "/api/2.0/mlflow/runs/set-tag" + pathRunsSearch = "/api/2.0/mlflow/runs/search" + pathRunsLogBatch = "/api/2.0/mlflow/runs/log-batch" +) + +// Workspace header used when workspace-based multi-tenancy is enabled. +const workspaceHeader = "X-MLflow-Workspace" + +// ParentRunTagKey is a key used to store parent run ID as a Tag. +const ParentRunTagKey = "mlflow.parentRunId" + +// RetryPolicy configures the exponential backoff for retrying failed requests. +type RetryPolicy struct { + InitialInterval time.Duration + MaxInterval time.Duration + MaxElapsedTime time.Duration + Multiplier float64 +} + +// Config holds the configuration for creating a new Client. +type Config struct { + Endpoint string + HTTPClient *http.Client + BearerToken string + WorkspacesEnabled bool + Workspace string + Retry RetryPolicy +} + +// Param represents a single parameter key-value pair from a run, used in request to POST /api/2.0/mlflow/runs/log-batch +type Param struct { + Key string `json:"key"` + Value string `json:"value"` +} + +// Metric represents a scalar metric recorded during a run, used in request to POST /api/2.0/mlflow/runs/log-batch +type Metric struct { + Key string `json:"key"` + Value float64 `json:"value"` + Timestamp int64 `json:"timestamp"` + Step int64 `json:"step"` +} + +// Tag represents a key-value tag to set on an MLflow run. +type Tag struct { + Key string `json:"key"` + Value string `json:"value"` +} + +// LogBatchRequest represents the request to POST /api/2.0/mlflow/runs/log-batch +type LogBatchRequest struct { + RunID string `json:"run_id"` + Params []Param `json:"params"` + Metrics []Metric `json:"metrics"` + Tags []Tag `json:"tags"` +} + +// MLflowExperiment represents an MLflow experiment as returned by the REST API. +type MLflowExperiment struct { + ID string `json:"experiment_id"` + Name string `json:"name"` +} + +// SearchRunsResponse is the parsed response from POST /api/2.0/mlflow/runs/search. +type SearchRunsResponse struct { + Runs []json.RawMessage `json:"runs"` + NextPageToken string `json:"next_page_token"` +} + +// Client is a shared HTTP client for interacting with the MLflow REST API. +type Client struct { + endpoint *url.URL + httpClient *http.Client + bearerToken string + workspacesEnabled bool + workspace string +} + +// NewClient creates a new MLflow REST API client. +// The provided HTTPClient's transport is wrapped with a retryRoundTripper +// that retries transient (5xx / network) errors with exponential backoff. +func NewClient(cfg Config) (*Client, error) { + if cfg.Endpoint == "" { + return nil, fmt.Errorf("MLflow endpoint is required") + } + u, err := url.Parse(strings.TrimRight(cfg.Endpoint, "/")) + if err != nil { + return nil, fmt.Errorf("invalid MLflow endpoint %q: %w", cfg.Endpoint, err) + } + httpClient := cfg.HTTPClient + if httpClient == nil { + httpClient = &http.Client{Timeout: 30 * time.Second} + } + // Wrap the transport with retry logic. + base := httpClient.Transport + if base == nil { + base = http.DefaultTransport + } + httpClient.Transport = &retryRoundTripper{ + next: base, + retry: cfg.Retry, + } + return &Client{ + endpoint: u, + httpClient: httpClient, + bearerToken: cfg.BearerToken, + workspacesEnabled: cfg.WorkspacesEnabled, + workspace: cfg.Workspace, + }, nil +} + +// GetExperiment looks up an MLflowExperiment by ID. +func (c *Client) GetExperiment(ctx context.Context, experimentID string) (*MLflowExperiment, error) { + reqURL := c.buildURL(pathExperimentsGet) + q := reqURL.Query() + q.Set("experiment_id", experimentID) + reqURL.RawQuery = q.Encode() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL.String(), nil) + if err != nil { + return nil, fmt.Errorf("failed to create GetExperiment request: %w", err) + } + c.applyHeaders(req) + + respBody, err := c.do(req) + if err != nil { + return nil, err + } + var result struct { + Experiment MLflowExperiment `json:"experiment"` + } + if err := json.Unmarshal(respBody, &result); err != nil { + return nil, fmt.Errorf("failed to parse GetExperiment response: %w", err) + } + return &result.Experiment, nil +} + +// GetExperimentByName looks up an MLflowExperiment by name. +func (c *Client) GetExperimentByName(ctx context.Context, name string) (*MLflowExperiment, error) { + reqURL := c.buildURL(pathExperimentsGetByName) + q := reqURL.Query() + q.Set("experiment_name", name) + reqURL.RawQuery = q.Encode() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL.String(), nil) + if err != nil { + return nil, fmt.Errorf("failed to create GetExperimentByName request: %w", err) + } + c.applyHeaders(req) + + respBody, err := c.do(req) + if err != nil { + return nil, err + } + var result struct { + Experiment MLflowExperiment `json:"experiment"` + } + if err := json.Unmarshal(respBody, &result); err != nil { + return nil, fmt.Errorf("failed to parse GetExperimentByName response: %w", err) + } + return &result.Experiment, nil +} + +// CreateExperiment creates a new MLflow experiment. Returns the experiment ID. +func (c *Client) CreateExperiment(ctx context.Context, name string, description *string) (string, error) { + body := map[string]interface{}{ + "name": name, + } + if description != nil { + body["description"] = *description + } + respBody, err := c.postJSON(ctx, pathExperimentsCreate, body) + if err != nil { + return "", err + } + var result struct { + ExperimentID string `json:"experiment_id"` + } + if err := json.Unmarshal(respBody, &result); err != nil { + return "", fmt.Errorf("failed to parse CreateExperiment response: %w", err) + } + return result.ExperimentID, nil +} + +// CreateRun creates a new MLflow run under the given experiment. +func (c *Client) CreateRun(ctx context.Context, experimentID, runName string, tags []Tag) (string, error) { + glog.Infof("Creating MLflow run with experiment ID %s and name %s", experimentID, runName) + body := map[string]interface{}{ + "experiment_id": experimentID, + "run_name": runName, + "start_time": time.Now().UnixMilli(), + } + if len(tags) > 0 { + tagList := make([]map[string]string, len(tags)) + for i, t := range tags { + tagList[i] = map[string]string{"key": t.Key, "value": t.Value} + } + body["tags"] = tagList + } + glog.Infof("Creating MLflow run with body: %v", body) + respBody, err := c.postJSON(ctx, pathRunsCreate, body) + if err != nil { + return "", err + } + var result struct { + Run struct { + Info struct { + RunID string `json:"run_id"` + } `json:"info"` + } `json:"run"` + } + glog.Infof("Here") + + if err := json.Unmarshal(respBody, &result); err != nil { + return "", fmt.Errorf("failed to parse CreateRun response: %w", err) + } + return result.Run.Info.RunID, nil +} + +// UpdateRun updates the status of an MLflow run. +func (c *Client) UpdateRun(ctx context.Context, runID, status string, endTimeMs *int64) error { + body := map[string]interface{}{ + "run_id": runID, + "status": status, + } + if endTimeMs != nil { + body["end_time"] = *endTimeMs + } + _, err := c.postJSON(ctx, pathRunsUpdate, body) + return err +} + +// SetTag sets a single tag on an MLflow run. +func (c *Client) SetTag(ctx context.Context, runID, key, value string) error { + body := map[string]interface{}{ + "run_id": runID, + "key": key, + "value": value, + } + _, err := c.postJSON(ctx, pathRunsSetTag, body) + return err +} + +// SearchRuns searches for runs matching a filter expression. +func (c *Client) SearchRuns(ctx context.Context, experimentIDs []string, filter string, maxResults int, pageToken string) (*SearchRunsResponse, error) { + body := map[string]interface{}{ + "experiment_ids": experimentIDs, + "filter": filter, + "max_results": maxResults, + } + if pageToken != "" { + body["page_token"] = pageToken + } + respBody, err := c.postJSON(ctx, pathRunsSearch, body) + if err != nil { + return nil, err + } + var result SearchRunsResponse + if err := json.Unmarshal(respBody, &result); err != nil { + return nil, fmt.Errorf("failed to parse SearchRuns response: %w", err) + } + return &result, nil +} + +// LogBatch logs a batch of metrics, params, and tags to an MLflow run. +func (c *Client) LogBatch(ctx context.Context, request LogBatchRequest) error { + _, err := c.postJSON(ctx, pathRunsLogBatch, request) + if err != nil { + return err + } + return nil +} + +// APIError represents an error response from the MLflow REST API. +type APIError struct { + StatusCode int + ErrorCode string `json:"error_code"` + Message string `json:"message"` +} + +func (e *APIError) Error() string { + return fmt.Sprintf("MLflow API error (HTTP %d, %s): %s", e.StatusCode, e.ErrorCode, e.Message) +} + +// IsNotFoundError returns true if the error is an MLflow RESOURCE_DOES_NOT_EXIST error. +func IsNotFoundError(err error) bool { + apiErr, ok := err.(*APIError) + if !ok { + return false + } + return apiErr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" || apiErr.StatusCode == http.StatusNotFound +} + +// IsAlreadyExistsError returns true if the error is an MLflow RESOURCE_ALREADY_EXISTS error. +func IsAlreadyExistsError(err error) bool { + apiErr, ok := err.(*APIError) + if !ok { + return false + } + return apiErr.ErrorCode == "RESOURCE_ALREADY_EXISTS" +} + +func (c *Client) buildURL(path string) *url.URL { + u := *c.endpoint + u.Path = strings.TrimRight(u.Path, "/") + path + return &u +} + +func (c *Client) applyHeaders(req *http.Request) { + req.Header.Set("Content-Type", "application/json") + + if c.bearerToken != "" { + req.Header.Set("Authorization", "Bearer "+c.bearerToken) + } + + if c.workspacesEnabled && c.workspace != "" { + req.Header.Set(workspaceHeader, c.workspace) + } +} + +func (c *Client) postJSON(ctx context.Context, path string, body interface{}) ([]byte, error) { + bodyBytes, err := json.Marshal(body) + if err != nil { + return nil, fmt.Errorf("failed to marshal request body for %s: %w", path, err) + } + reqURL := c.buildURL(path) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, reqURL.String(), bytes.NewReader(bodyBytes)) + if err != nil { + return nil, fmt.Errorf("failed to create request for %s: %w", path, err) + } + c.applyHeaders(req) + return c.do(req) +} + +func (c *Client) do(req *http.Request) ([]byte, error) { + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("MLflow request %s %s failed: %w", req.Method, req.URL.Path, err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body from %s: %w", req.URL.Path, err) + } + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return body, nil + } + + apiErr := &APIError{StatusCode: resp.StatusCode} + _ = json.Unmarshal(body, apiErr) + return nil, apiErr +} + +// noRetryPaths lists MLflow API paths that are non-idempotent. Retrying +// these after a timeout could create duplicate server-side state. +var noRetryPaths = []string{ + pathRunsCreate, + pathExperimentsCreate, + pathRunsLogBatch, +} + +// retryRoundTripper wraps an http.RoundTripper with exponential backoff retry +// logic for transient (5xx and network) errors. Non-idempotent endpoints +// listed in noRetryPaths are executed exactly once. +type retryRoundTripper struct { + next http.RoundTripper + retry RetryPolicy +} + +func (rt *retryRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + // Skip retries for non-idempotent endpoints to avoid duplicates. + for _, path := range noRetryPaths { + if strings.HasSuffix(req.URL.Path, path) { + return rt.next.RoundTrip(req) + } + } + + // Ensure the request body can be replayed across retries. + if req.Body != nil && req.GetBody == nil { + bodyBytes, err := io.ReadAll(req.Body) + if err != nil { + return nil, fmt.Errorf("failed to buffer request body for retry: %w", err) + } + req.Body.Close() + req.GetBody = func() (io.ReadCloser, error) { + return io.NopCloser(bytes.NewReader(bodyBytes)), nil + } + req.Body, _ = req.GetBody() + } + + var resp *http.Response + operation := func() error { + if ctxErr := req.Context().Err(); ctxErr != nil { + return backoff.Permanent(ctxErr) + } + // Reset the request body for each attempt. + if req.GetBody != nil { + body, err := req.GetBody() + if err != nil { + return backoff.Permanent(fmt.Errorf("failed to reset request body: %w", err)) + } + req.Body = body + } + + var err error + resp, err = rt.next.RoundTrip(req) + if err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return backoff.Permanent(err) + } + glog.Warningf("MLflow request %s %s failed (will retry): %v", req.Method, req.URL.Path, err) + return err + } + + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return nil + } + + // 4xx — client error, not retryable. + if resp.StatusCode >= 400 && resp.StatusCode < 500 { + return backoff.Permanent(&roundTripError{statusCode: resp.StatusCode}) + } + + // 5xx — server error, retryable. Drain and close the body so the + // connection can be reused on the next attempt. + statusCode := resp.StatusCode + glog.Warningf("MLflow request %s %s returned %d (will retry)", req.Method, req.URL.Path, statusCode) + io.Copy(io.Discard, resp.Body) + resp.Body.Close() + resp = nil + return fmt.Errorf("server returned %d", statusCode) + } + + b := backoff.NewExponentialBackOff() + if rt.retry.InitialInterval > 0 { + b.InitialInterval = rt.retry.InitialInterval + } + if rt.retry.MaxInterval > 0 { + b.MaxInterval = rt.retry.MaxInterval + } + if rt.retry.MaxElapsedTime > 0 { + b.MaxElapsedTime = rt.retry.MaxElapsedTime + } + if rt.retry.Multiplier > 0 { + b.Multiplier = rt.retry.Multiplier + } + + retryBackoff := backoff.WithContext(b, req.Context()) + if err := backoff.Retry(operation, retryBackoff); err != nil { + // If we have a response (4xx case), return it so the caller can parse the error body. + if resp != nil { + return resp, nil + } + return nil, err + } + return resp, nil +} + +// roundTripError is a sentinel used by retryRoundTripper to signal a permanent +// (non-retryable) HTTP status to the backoff loop while preserving the response. +type roundTripError struct { + statusCode int +} + +func (e *roundTripError) Error() string { + return fmt.Sprintf("non-retryable HTTP %d", e.statusCode) +} diff --git a/backend/src/common/plugins/mlflow/client_test.go b/backend/src/common/plugins/mlflow/client_test.go new file mode 100644 index 00000000000..4c2276e4a15 --- /dev/null +++ b/backend/src/common/plugins/mlflow/client_test.go @@ -0,0 +1,707 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mlflow + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewClient_EmptyEndpoint_ReturnsError(t *testing.T) { + _, err := NewClient(Config{Endpoint: ""}) + require.Error(t, err) + assert.Contains(t, err.Error(), "endpoint is required") +} + +func TestNewClient_InvalidEndpoint_ReturnsError(t *testing.T) { + _, err := NewClient(Config{Endpoint: "://bad"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid MLflow endpoint") +} + +func TestNewClient_ValidEndpoint_Success(t *testing.T) { + c, err := NewClient(Config{Endpoint: "http://mlflow.example.com"}) + require.NoError(t, err) + require.NotNil(t, c) + assert.Equal(t, "http", c.endpoint.Scheme) + assert.Equal(t, "mlflow.example.com", c.endpoint.Host) +} + +func TestNewClient_TrailingSlashTrimmed(t *testing.T) { + c, err := NewClient(Config{Endpoint: "http://mlflow.example.com/"}) + require.NoError(t, err) + assert.Equal(t, "", c.endpoint.Path) +} + +func TestNewClient_DefaultHTTPClient(t *testing.T) { + c, err := NewClient(Config{Endpoint: "http://mlflow.example.com"}) + require.NoError(t, err) + require.NotNil(t, c.httpClient) +} + +func TestNewClient_CustomHTTPClient(t *testing.T) { + custom := &http.Client{Timeout: 99 * time.Second} + c, err := NewClient(Config{Endpoint: "http://mlflow.example.com", HTTPClient: custom}) + require.NoError(t, err) + assert.Equal(t, custom, c.httpClient) +} + +func TestApplyHeaders_BearerToken(t *testing.T) { + c, _ := NewClient(Config{ + Endpoint: "http://mlflow.example.com", + BearerToken: "sa-token", + }) + req, _ := http.NewRequest("GET", "http://mlflow.example.com", nil) + c.applyHeaders(req) + assert.Equal(t, "Bearer sa-token", req.Header.Get("Authorization")) + assert.Equal(t, "application/json", req.Header.Get("Content-Type")) +} + +func TestApplyHeaders_NoBearerToken(t *testing.T) { + c, _ := NewClient(Config{ + Endpoint: "http://mlflow.example.com", + }) + req, _ := http.NewRequest("GET", "http://mlflow.example.com", nil) + c.applyHeaders(req) + assert.Empty(t, req.Header.Get("Authorization")) +} + +func TestApplyHeaders_WorkspaceHeader(t *testing.T) { + c, _ := NewClient(Config{ + Endpoint: "http://mlflow.example.com", + WorkspacesEnabled: true, + Workspace: "my-ns", + }) + req, _ := http.NewRequest("GET", "http://mlflow.example.com", nil) + c.applyHeaders(req) + assert.Equal(t, "my-ns", req.Header.Get(workspaceHeader)) +} + +func TestApplyHeaders_WorkspaceHeader_DisabledWhenFalse(t *testing.T) { + c, _ := NewClient(Config{ + Endpoint: "http://mlflow.example.com", + WorkspacesEnabled: false, + Workspace: "my-ns", + }) + req, _ := http.NewRequest("GET", "http://mlflow.example.com", nil) + c.applyHeaders(req) + assert.Empty(t, req.Header.Get(workspaceHeader)) +} + +func TestGetExperimentByName_Success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodGet, r.Method) + assert.Equal(t, pathExperimentsGetByName, r.URL.Path) + assert.Equal(t, "my-experiment", r.URL.Query().Get("experiment_name")) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"experiment":{"experiment_id":"42","name":"my-experiment"}}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + exp, err := c.GetExperimentByName(context.Background(), "my-experiment") + require.NoError(t, err) + assert.Equal(t, "42", exp.ID) + assert.Equal(t, "my-experiment", exp.Name) +} + +func TestGetExperimentByName_NotFound(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`{"error_code":"RESOURCE_DOES_NOT_EXIST","message":"not found"}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + _, err := c.GetExperimentByName(context.Background(), "missing") + require.Error(t, err) + assert.True(t, IsNotFoundError(err)) +} + +func TestCreateExperiment_Success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, http.MethodPost, r.Method) + assert.Equal(t, pathExperimentsCreate, r.URL.Path) + + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "test-exp", payload["name"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"experiment_id":"99"}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + id, err := c.CreateExperiment(context.Background(), "test-exp", nil) + require.NoError(t, err) + assert.Equal(t, "99", id) +} + +func TestCreateExperiment_WithDescription(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "test-exp", payload["name"]) + assert.Equal(t, "my description", payload["description"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"experiment_id":"100"}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + desc := "my description" + id, err := c.CreateExperiment(context.Background(), "test-exp", &desc) + require.NoError(t, err) + assert.Equal(t, "100", id) +} + +func TestCreateExperiment_AlreadyExists(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusConflict) + _, _ = w.Write([]byte(`{"error_code":"RESOURCE_ALREADY_EXISTS","message":"experiment already exists"}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + _, err := c.CreateExperiment(context.Background(), "test-exp", nil) + require.Error(t, err) + assert.True(t, IsAlreadyExistsError(err)) +} + +func TestCreateRun_Success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, pathRunsCreate, r.URL.Path) + + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "exp-1", payload["experiment_id"]) + assert.Equal(t, "my-run", payload["run_name"]) + assert.NotNil(t, payload["start_time"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"run-abc"}}}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + runID, err := c.CreateRun(context.Background(), "exp-1", "my-run", nil) + require.NoError(t, err) + assert.Equal(t, "run-abc", runID) +} + +func TestCreateRun_WithTags(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + + tags, ok := payload["tags"].([]interface{}) + require.True(t, ok) + assert.Len(t, tags, 2) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"run-tagged"}}}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + tags := []Tag{ + {Key: "k1", Value: "v1"}, + {Key: "k2", Value: "v2"}, + } + runID, err := c.CreateRun(context.Background(), "exp-1", "tagged-run", tags) + require.NoError(t, err) + assert.Equal(t, "run-tagged", runID) +} + +func TestCreateRun_ServerError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error_code":"INVALID_PARAMETER_VALUE","message":"bad param"}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + _, err := c.CreateRun(context.Background(), "exp-1", "fail-run", nil) + require.Error(t, err) + apiErr, ok := err.(*APIError) + require.True(t, ok) + assert.Equal(t, http.StatusBadRequest, apiErr.StatusCode) + assert.Equal(t, "INVALID_PARAMETER_VALUE", apiErr.ErrorCode) +} + +func TestUpdateRun_Success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, pathRunsUpdate, r.URL.Path) + + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "run-1", payload["run_id"]) + assert.Equal(t, "FINISHED", payload["status"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + err := c.UpdateRun(context.Background(), "run-1", "FINISHED", nil) + require.NoError(t, err) +} + +func TestUpdateRun_WithEndTime(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "run-1", payload["run_id"]) + assert.NotNil(t, payload["end_time"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + endTime := int64(1700000000000) + err := c.UpdateRun(context.Background(), "run-1", "FAILED", &endTime) + require.NoError(t, err) +} + +func TestSetTag_Success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, pathRunsSetTag, r.URL.Path) + + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "run-1", payload["run_id"]) + assert.Equal(t, "my-key", payload["key"]) + assert.Equal(t, "my-value", payload["value"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + err := c.SetTag(context.Background(), "run-1", "my-key", "my-value") + require.NoError(t, err) +} + +func TestSearchRuns_Success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, pathRunsSearch, r.URL.Path) + + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + + ids := payload["experiment_ids"].([]interface{}) + assert.Len(t, ids, 1) + assert.Equal(t, "exp-1", ids[0]) + assert.Equal(t, "tags.status = 'RUNNING'", payload["filter"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"runs":[{"info":{"run_id":"r1"}},{"info":{"run_id":"r2"}}],"next_page_token":"tok2"}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + result, err := c.SearchRuns(context.Background(), []string{"exp-1"}, "tags.status = 'RUNNING'", 100, "") + require.NoError(t, err) + assert.Len(t, result.Runs, 2) + assert.Equal(t, "tok2", result.NextPageToken) +} + +func TestSearchRuns_EmptyResults(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"runs":[]}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + result, err := c.SearchRuns(context.Background(), []string{"exp-1"}, "", 10, "") + require.NoError(t, err) + assert.Empty(t, result.Runs) + assert.Empty(t, result.NextPageToken) +} + +func TestSearchRuns_WithPageToken(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "page-2", payload["page_token"]) + + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"runs":[]}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + _, err := c.SearchRuns(context.Background(), []string{"exp-1"}, "", 10, "page-2") + require.NoError(t, err) +} + +func TestIsNotFoundError_True(t *testing.T) { + err := &APIError{StatusCode: 404, ErrorCode: "RESOURCE_DOES_NOT_EXIST", Message: "not found"} + assert.True(t, IsNotFoundError(err)) +} + +func TestIsNotFoundError_ByStatusCode(t *testing.T) { + err := &APIError{StatusCode: http.StatusNotFound, ErrorCode: "UNKNOWN"} + assert.True(t, IsNotFoundError(err)) +} + +func TestIsNotFoundError_False(t *testing.T) { + err := &APIError{StatusCode: 500, ErrorCode: "INTERNAL_ERROR"} + assert.False(t, IsNotFoundError(err)) +} + +func TestIsNotFoundError_NonAPIError(t *testing.T) { + assert.False(t, IsNotFoundError(assert.AnError)) +} + +func TestIsAlreadyExistsError_True(t *testing.T) { + err := &APIError{StatusCode: 409, ErrorCode: "RESOURCE_ALREADY_EXISTS"} + assert.True(t, IsAlreadyExistsError(err)) +} + +func TestIsAlreadyExistsError_False(t *testing.T) { + err := &APIError{StatusCode: 409, ErrorCode: "OTHER"} + assert.False(t, IsAlreadyExistsError(err)) +} + +func TestIsAlreadyExistsError_NonAPIError(t *testing.T) { + assert.False(t, IsAlreadyExistsError(assert.AnError)) +} + +func TestAPIError_ErrorString(t *testing.T) { + err := &APIError{StatusCode: 400, ErrorCode: "BAD_REQUEST", Message: "invalid param"} + assert.Equal(t, "MLflow API error (HTTP 400, BAD_REQUEST): invalid param", err.Error()) +} + +func TestDoWithRetry_RetriesOn5xx(t *testing.T) { + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + n := atomic.AddInt32(&callCount, 1) + if n <= 2 { + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(`{"error_code":"SERVICE_UNAVAILABLE","message":"retry later"}`)) + return + } + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + })) + defer server.Close() + + // Use UpdateRun (idempotent) to verify that retryable endpoints are retried. + c := newTestClientWithFastRetry(t, server.URL) + err := c.UpdateRun(context.Background(), "run-1", "FINISHED", nil) + require.NoError(t, err) + assert.GreaterOrEqual(t, atomic.LoadInt32(&callCount), int32(3)) +} + +func TestDoWithRetry_NoRetryOn4xx(t *testing.T) { + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&callCount, 1) + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error_code":"INVALID_PARAMETER_VALUE","message":"bad"}`)) + })) + defer server.Close() + + // Use SetTag (idempotent, retryable) to verify 4xx stops retries. + c := newTestClientWithFastRetry(t, server.URL) + err := c.SetTag(context.Background(), "run-1", "key", "val") + require.Error(t, err) + assert.Equal(t, int32(1), atomic.LoadInt32(&callCount)) +} + +func TestDoWithRetry_SkipsRetryForRunsCreate(t *testing.T) { + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&callCount, 1) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(`{"error_code":"SERVICE_UNAVAILABLE","message":"retry later"}`)) + })) + defer server.Close() + + c := newTestClientWithFastRetry(t, server.URL) + _, err := c.CreateRun(context.Background(), "exp-1", "test-run", nil) + require.Error(t, err) + // Non-idempotent endpoint: should be called exactly once, no retries. + assert.Equal(t, int32(1), atomic.LoadInt32(&callCount)) +} + +func TestDoWithRetry_SkipsRetryForExperimentsCreate(t *testing.T) { + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&callCount, 1) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(`{"error_code":"SERVICE_UNAVAILABLE","message":"retry later"}`)) + })) + defer server.Close() + + c := newTestClientWithFastRetry(t, server.URL) + _, err := c.CreateExperiment(context.Background(), "test-exp", nil) + require.Error(t, err) + // Non-idempotent endpoint: should be called exactly once, no retries. + assert.Equal(t, int32(1), atomic.LoadInt32(&callCount)) +} + +func TestDoWithRetry_SkipsRetryForLogBatch(t *testing.T) { + var callCount int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&callCount, 1) + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte(`{"error_code":"SERVICE_UNAVAILABLE","message":"retry later"}`)) + })) + defer server.Close() + + c := newTestClientWithFastRetry(t, server.URL) + err := c.LogBatch(context.Background(), LogBatchRequest{RunID: "run-1"}) + require.Error(t, err) + assert.Equal(t, int32(1), atomic.LoadInt32(&callCount)) +} + +func TestBuildURL_AppendsPath(t *testing.T) { + c, _ := NewClient(Config{Endpoint: "http://mlflow.example.com/prefix"}) + u := c.buildURL("/api/2.0/mlflow/experiments/create") + assert.Equal(t, "http://mlflow.example.com/prefix/api/2.0/mlflow/experiments/create", u.String()) +} + +func TestBuildURL_TrimsTrailingSlash(t *testing.T) { + c, _ := NewClient(Config{Endpoint: "http://mlflow.example.com/prefix/"}) + u := c.buildURL("/api/2.0/mlflow/runs/create") + assert.Equal(t, "http://mlflow.example.com/prefix/api/2.0/mlflow/runs/create", u.String()) +} + +// ---- Helpers ---- + +func newTestClient(t *testing.T, endpoint string) *Client { + t.Helper() + c, err := NewClient(Config{ + Endpoint: endpoint, + Retry: RetryPolicy{ + InitialInterval: 1 * time.Millisecond, + MaxInterval: 5 * time.Millisecond, + MaxElapsedTime: 50 * time.Millisecond, + }, + }) + require.NoError(t, err) + return c +} + +func newTestClientWithFastRetry(t *testing.T, endpoint string) *Client { + t.Helper() + c, err := NewClient(Config{ + Endpoint: endpoint, + Retry: RetryPolicy{ + InitialInterval: 1 * time.Millisecond, + MaxInterval: 10 * time.Millisecond, + MaxElapsedTime: 2 * time.Second, + Multiplier: 1.5, + }, + }) + require.NoError(t, err) + return c +} + +// TestMLflowRuntimeConfig_JSONFieldAlignment verifies that the JSON keys produced by +// marshalling MLflowRuntimeConfig. +func TestMLflowRuntimeConfig_JSONFieldAlignment(t *testing.T) { + cfg := MLflowRuntimeConfig{ + Endpoint: "http://mlflow:5000", + Workspace: "ns1", + WorkspacesEnabled: true, + ParentRunID: "parent-1", + ExperimentID: "exp-1", + AuthType: "kubernetes", + Timeout: "30s", + InsecureSkipVerify: true, + InjectUserEnvVars: true, + } + data, err := json.Marshal(cfg) + require.NoError(t, err) + + var raw map[string]interface{} + require.NoError(t, json.Unmarshal(data, &raw)) + + expectedKeys := []string{ + "endpoint", + "workspacesEnabled", + "workspace", + "parentRunId", + "experimentId", + "authType", + "timeout", + "insecureSkipVerify", + "injectUserEnvVars", + } + for _, key := range expectedKeys { + assert.Contains(t, raw, key, "MLflowRuntimeConfig JSON must contain KEP key %q", key) + } + assert.Len(t, raw, len(expectedKeys), "MLflowRuntimeConfig JSON must contain exactly the KEP-defined keys") +} + +// TestMLflowRuntimeConfig_OmitEmptyFields verifies that omitempty fields are excluded +// when their zero values are set. +func TestMLflowRuntimeConfig_OmitEmptyFields(t *testing.T) { + cfg := MLflowRuntimeConfig{ + Endpoint: "http://mlflow:5000", + ParentRunID: "parent-1", + AuthType: "kubernetes", + } + data, err := json.Marshal(cfg) + require.NoError(t, err) + + var raw map[string]interface{} + require.NoError(t, json.Unmarshal(data, &raw)) + + // Fields with omitempty and zero values should be absent. + assert.NotContains(t, raw, "workspace", "workspace should be omitted when empty") + assert.NotContains(t, raw, "workspacesEnabled", "workspacesEnabled should be omitted when false") + assert.NotContains(t, raw, "timeout", "timeout should be omitted when empty") + assert.NotContains(t, raw, "insecureSkipVerify", "insecureSkipVerify should be omitted when false") + assert.NotContains(t, raw, "injectUserEnvVars", "injectUserEnvVars should be omitted when false") + + // Required fields should always be present. + assert.Contains(t, raw, "endpoint") + assert.Contains(t, raw, "parentRunId") + assert.Contains(t, raw, "experimentId") // present but empty string + assert.Contains(t, raw, "authType") +} + +// TestMLflowRuntimeConfig_RoundTrip verifies that marshalling and unmarshalling +// MLflowRuntimeConfig produces an identical struct. +func TestMLflowRuntimeConfig_RoundTrip(t *testing.T) { + original := MLflowRuntimeConfig{ + Endpoint: "http://mlflow:5000", + Workspace: "workspace-1", + ParentRunID: "parent-run-123", + ExperimentID: "exp-456", + AuthType: "kubernetes", + Timeout: "15s", + InsecureSkipVerify: true, + InjectUserEnvVars: true, + } + data, err := json.Marshal(original) + require.NoError(t, err) + + var decoded MLflowRuntimeConfig + require.NoError(t, json.Unmarshal(data, &decoded)) + assert.Equal(t, original, decoded) +} + +func TestLogBatch_Success(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, pathRunsLogBatch, r.URL.Path) + + body, _ := io.ReadAll(r.Body) + var payload map[string]interface{} + require.NoError(t, json.Unmarshal(body, &payload)) + assert.Equal(t, "exp-1", payload["run_id"]) + assert.Equal(t, testFormattedMetrics(), payload["metrics"]) + assert.Equal(t, testFormattedParams(), payload["params"]) + assert.Equal(t, testFormattedTags(), payload["tags"]) + + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + + err := c.LogBatch(context.Background(), testLogBatchRequest()) + require.NoError(t, err) +} + +func TestLogBatch_ServerError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + _, _ = w.Write([]byte(`{"error_code":"INVALID_PARAMETER_VALUE","message":"bad param"}`)) + })) + defer server.Close() + + c := newTestClient(t, server.URL) + + err := c.LogBatch(context.Background(), testLogBatchRequest()) + require.Error(t, err) + apiErr, ok := err.(*APIError) + require.True(t, ok) + assert.Equal(t, http.StatusBadRequest, apiErr.StatusCode) + assert.Equal(t, "INVALID_PARAMETER_VALUE", apiErr.ErrorCode) +} + +func testLogBatchRequest() LogBatchRequest { + return LogBatchRequest{ + RunID: "exp-1", + Metrics: []Metric{ + {Key: "CreateTimeSinceEpoch", Value: 1742826366000}, + }, + Params: []Param{ + {Key: "input-parameter-key", Value: "input-parameter-value"}, + }, + Tags: []Tag{ + {Key: "tag-key", Value: "tag-value"}, + }, + } +} + +func testFormattedMetrics() []interface{} { + return []interface{}{ + map[string]interface{}{ + "key": "CreateTimeSinceEpoch", + "step": float64(0), + "timestamp": float64(0), + "value": float64(1742826366000), + }, + } +} + +func testFormattedParams() []interface{} { + return []interface{}{ + map[string]interface{}{ + "key": "input-parameter-key", + "value": "input-parameter-value", + }, + } +} + +func testFormattedTags() []interface{} { + return []interface{}{ + map[string]interface{}{ + "key": "tag-key", + "value": "tag-value", + }, + } +} diff --git a/backend/src/common/plugins/mlflow/config.go b/backend/src/common/plugins/mlflow/config.go new file mode 100644 index 00000000000..7a61253f914 --- /dev/null +++ b/backend/src/common/plugins/mlflow/config.go @@ -0,0 +1,188 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mlflow + +import ( + "crypto/tls" + "crypto/x509" + "fmt" + "net/http" + "net/url" + "os" + "strings" + "time" + + commonplugins "github.com/kubeflow/pipelines/backend/src/common/plugins" + "github.com/kubeflow/pipelines/backend/src/common/util" +) + +// EnvMLflowConfig is the single environment variable injected into Argo +// Workflow templates by the API server. +const EnvMLflowConfig = "KFP_MLFLOW_CONFIG" + +// MLflow tag on nested runs (parent linkage). +const TagNestedRunParentRunID = "mlflow.parentRunId" + +// MLflowRuntimeConfig is the JSON payload marshalled into KFP_MLFLOW_CONFIG. +type MLflowRuntimeConfig struct { + Endpoint string `json:"endpoint"` + WorkspacesEnabled bool `json:"workspacesEnabled,omitempty"` + Workspace string `json:"workspace,omitempty"` + ParentRunID string `json:"parentRunId"` + ExperimentID string `json:"experimentId"` + AuthType string `json:"authType"` + Timeout string `json:"timeout,omitempty"` + InsecureSkipVerify bool `json:"insecureSkipVerify,omitempty"` + InjectUserEnvVars bool `json:"injectUserEnvVars,omitempty"` + TLS *commonplugins.TLSConfig `json:"tls,omitempty" mapstructure:"tls"` +} + +// MLflowPluginConfig represents the MLflow plugin configuration. +type MLflowPluginConfig struct { + Endpoint string `json:"endpoint,omitempty" mapstructure:"endpoint"` + Timeout string `json:"timeout,omitempty" mapstructure:"timeout"` + TLS *commonplugins.TLSConfig `json:"tls,omitempty" mapstructure:"tls"` + Settings *MLflowPluginSettings `json:"settings,omitempty" mapstructure:"settings"` +} + +// MLflowCredentials holds the resolved authentication credentials for an MLflow endpoint. +type MLflowCredentials struct { + AuthType string + BearerToken string +} + +// RequestContext holds a fully resolved MLflow connection: the parsed +// endpoint URL, the shared HTTP client, and workspace settings. +type RequestContext struct { + BaseURL *url.URL + Client *Client + Workspace string + WorkspacesEnabled bool +} + +// MLflowPluginSettings contains MLflow-specific settings parsed from +// MLflowPluginConfig.Settings. +type MLflowPluginSettings struct { + WorkspacesEnabled *bool `json:"workspacesEnabled,omitempty"` + ExperimentDescription *string `json:"experimentDescription,omitempty"` + DefaultExperimentName string `json:"defaultExperimentName,omitempty"` + KFPBaseURL string `json:"kfpBaseURL,omitempty"` + KFPRunURLPathTemplate string `json:"kfpRunURLPathTemplate,omitempty"` + MLflowBaseURL string `json:"mlflowBaseURL,omitempty"` + MLflowUIPathPrefix string `json:"mlflowUIPathPrefix,omitempty"` + InjectUserEnvVars *bool `json:"injectUserEnvVars,omitempty"` +} + +// BuildHTTPClient configures an http.Client with the given timeout and TLS settings. +func BuildHTTPClient(timeout time.Duration, tlsCfg *commonplugins.TLSConfig) (*http.Client, error) { + transport := http.DefaultTransport.(*http.Transport).Clone() + if tlsCfg != nil { + tlsConfig := &tls.Config{ + InsecureSkipVerify: tlsCfg.InsecureSkipVerify, + } + if tlsCfg.CABundlePath != "" { + caBundle, err := os.ReadFile(tlsCfg.CABundlePath) + if err != nil { + return nil, fmt.Errorf("failed to read plugins.mlflow.tls.caBundlePath %q: %w", tlsCfg.CABundlePath, err) + } + certPool, err := x509.SystemCertPool() + if err != nil { + certPool = x509.NewCertPool() + } + if !certPool.AppendCertsFromPEM(caBundle) { + return nil, fmt.Errorf("plugins.mlflow.tls.caBundlePath %q did not contain valid PEM certificates", tlsCfg.CABundlePath) + } + tlsConfig.RootCAs = certPool + } + transport.TLSClientConfig = tlsConfig + } + return &http.Client{ + Timeout: timeout, + Transport: transport, + }, nil +} + +// ResolveMLflowCredentials resolves the Kubernetes service account token used +// to authenticate with the MLflow endpoint. +func ResolveMLflowCredentials() (MLflowCredentials, error) { + restConfig, err := util.GetKubernetesConfig() + if err != nil { + return MLflowCredentials{}, util.NewInternalServerError(err, "failed to get Kubernetes config for MLflow auth") + } + token := restConfig.BearerToken + if token == "" && restConfig.BearerTokenFile != "" { + tokenBytes, err := os.ReadFile(restConfig.BearerTokenFile) + if err != nil { + return MLflowCredentials{}, util.NewInternalServerError(err, "failed to read bearer token file %q for MLflow auth", restConfig.BearerTokenFile) + } + token = strings.TrimSpace(string(tokenBytes)) + } + if token == "" { + return MLflowCredentials{}, util.NewInvalidInputError("Kubernetes bearer token is empty for MLflow auth") + } + return MLflowCredentials{ + AuthType: AuthTypeKubernetes, + BearerToken: token, + }, nil +} + +// BuildMLflowRequestContext is the shared core that validates the MLflowPluginConfig, +// resolves credentials, builds the HTTP client and MLflow client, and returns +// a ready-to-use RequestContext. The workspace and workspacesEnabled values +// are caller-specific and passed in directly. +func BuildMLflowRequestContext(pluginCfg MLflowPluginConfig, workspace string, workspacesEnabled bool) (*RequestContext, error) { + baseURL, err := url.Parse(pluginCfg.Endpoint) + if err != nil || baseURL.Scheme == "" || baseURL.Host == "" { + return nil, util.NewInvalidInputError("invalid plugins.mlflow endpoint %q", pluginCfg.Endpoint) + } + timeout, err := time.ParseDuration(pluginCfg.Timeout) + if err != nil { + return nil, util.NewInvalidInputError("invalid plugins.mlflow timeout %q: %v", pluginCfg.Timeout, err) + } + if timeout <= 0 { + return nil, util.NewInvalidInputError("plugins.mlflow timeout must be > 0") + } + authMaterial, err := ResolveMLflowCredentials() + if err != nil { + return nil, err + } + httpClient, err := BuildHTTPClient(timeout, pluginCfg.TLS) + if err != nil { + return nil, err + } + retrySettings := RetryPolicy{ + InitialInterval: DefaultRetryInitial, + MaxInterval: DefaultRetryMax, + MaxElapsedTime: DefaultRetryElapsed, + Multiplier: 2.0, + } + sharedClient, err := NewClient(Config{ + Endpoint: pluginCfg.Endpoint, + HTTPClient: httpClient, + BearerToken: authMaterial.BearerToken, + WorkspacesEnabled: workspacesEnabled, + Workspace: workspace, + Retry: retrySettings, + }) + if err != nil { + return nil, util.NewInvalidInputError("failed to build MLflow client: %v", err) + } + return &RequestContext{ + BaseURL: baseURL, + Workspace: workspace, + WorkspacesEnabled: workspacesEnabled, + Client: sharedClient, + }, nil +} diff --git a/backend/src/common/util/consts.go b/backend/src/common/util/consts.go index c98d3d5ba04..6b31400b851 100644 --- a/backend/src/common/util/consts.go +++ b/backend/src/common/util/consts.go @@ -56,6 +56,11 @@ const ( AnnotationValueIstioSidecarInjectEnabled = "true" AnnotationValueIstioSidecarInjectDisabled = "false" + // AnnotationKeyRuntimeRole is set on compiled Argo Workflow templates to + // identify the logical role of the pod (driver, launcher, etc.). It is + // used by UpsertRuntimeEnvVars to target the right containers. + AnnotationKeyRuntimeRole = "pipelines.kubeflow.org/runtime-role" + // LabelKeyCacheEnabled is a workflow label key. // It captures whether this step will be selected by cache service. // To disable/enable cache for a single run, this label needs to be added in every step under a run. diff --git a/backend/src/common/util/execution_spec.go b/backend/src/common/util/execution_spec.go index 60733f77ff5..4a20ad37298 100644 --- a/backend/src/common/util/execution_spec.go +++ b/backend/src/common/util/execution_spec.go @@ -31,6 +31,15 @@ const ( Unknown ExecutionType = "Unknown" ) +// ExecutionRuntimeRole identifies a logical container role within an execution engine. +type ExecutionRuntimeRole string + +const ( + ExecutionRuntimeRoleDriver ExecutionRuntimeRole = "driver" + ExecutionRuntimeRoleLauncher ExecutionRuntimeRole = "launcher" + ExecutionRuntimeRoleExecutor ExecutionRuntimeRole = "executor" +) + var ( executionType = ArgoWorkflow // an utility var to store current ExecutionType ) @@ -170,6 +179,10 @@ type ExecutionSpec interface { // Set OwnerReferences from a ScheduledWorkflow SetOwnerReferences(schedule *swfapi.ScheduledWorkflow) + + // UpsertRuntimeEnvVars adds or replaces env vars on containers that + // implement the given runtime roles for this execution engine. + UpsertRuntimeEnvVars(envVars map[string]string, roles ...ExecutionRuntimeRole) error } // Convert YAML in bytes into ExecutionSpec instance diff --git a/backend/src/common/util/workflow.go b/backend/src/common/util/workflow.go index 375b19e217c..b5860834e48 100644 --- a/backend/src/common/util/workflow.go +++ b/backend/src/common/util/workflow.go @@ -39,6 +39,7 @@ import ( swfapi "github.com/kubeflow/pipelines/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1" "github.com/pkg/errors" log "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" @@ -673,6 +674,54 @@ func (w *Workflow) SetLabelsToAllTemplates(key string, value string) { } } +// UpsertRuntimeEnvVars adds or replaces env vars on workflow containers +// matching the specified runtime roles using the AnnotationKeyRuntimeRole +// annotation stamped by the compiler. Templates without the annotation are +// skipped. +func (w *Workflow) UpsertRuntimeEnvVars(envVars map[string]string, roles ...ExecutionRuntimeRole) error { + if len(envVars) == 0 || len(w.Spec.Templates) == 0 { + return nil + } + roleSet := make(map[ExecutionRuntimeRole]bool, len(roles)) + for _, r := range roles { + roleSet[r] = true + } + envList := make([]corev1.EnvVar, 0, len(envVars)) + for k, v := range envVars { + envList = append(envList, corev1.EnvVar{Name: k, Value: v}) + } + for i := range w.Spec.Templates { + tmpl := &w.Spec.Templates[i] + if tmpl.Container == nil { + continue + } + + role := tmpl.Metadata.Annotations[AnnotationKeyRuntimeRole] + if role != "" && roleSet[ExecutionRuntimeRole(role)] { + tmpl.Container.Env = upsertEnvVars(tmpl.Container.Env, envList) + } + } + return nil +} + +// upsertEnvVars merges toAdd into existing: env vars whose name already +// exists are replaced; new names are appended. +func upsertEnvVars(existing []corev1.EnvVar, toAdd []corev1.EnvVar) []corev1.EnvVar { + nameIndex := make(map[string]int, len(existing)) + for i, e := range existing { + nameIndex[e.Name] = i + } + for _, e := range toAdd { + if idx, ok := nameIndex[e.Name]; ok { + existing[idx] = e + } else { + nameIndex[e.Name] = len(existing) + existing = append(existing, e) + } + } + return existing +} + // SetOwnerReferences sets owner references on a Workflow. func (w *Workflow) SetOwnerReferences(schedule *swfapi.ScheduledWorkflow) { w.OwnerReferences = []metav1.OwnerReference{ diff --git a/backend/src/common/util/workflow_test.go b/backend/src/common/util/workflow_test.go index d7682589511..821146d6c6e 100644 --- a/backend/src/common/util/workflow_test.go +++ b/backend/src/common/util/workflow_test.go @@ -2626,4 +2626,154 @@ func TestWorkflowInformer_List(t *testing.T) { result, err := wfi.List(&selector) assert.Nil(t, err) assert.Len(t, result, 1) + // ---------- UpsertRuntimeEnvVars tests ---------- +} + +// Helper that builds a Workflow with the supplied templates. +func workflowWithTemplates(templates ...workflowapi.Template) *Workflow { + return NewWorkflow(&workflowapi.Workflow{ + Spec: workflowapi.WorkflowSpec{ + Templates: templates, + }, + }) +} + +// dagTemplate returns a DAG-only template (no container). +func dagTemplate(name string) workflowapi.Template { + return workflowapi.Template{ + Name: name, + DAG: &workflowapi.DAGTemplate{}, + } +} + +func TestUpsertRuntimeEnvVars_EmptyInputs(t *testing.T) { + w := workflowWithTemplates( + annotatedTemplate("driver", ExecutionRuntimeRoleDriver), + ) + + // Empty env map — should be a no-op. + assert.NoError(t, w.UpsertRuntimeEnvVars(map[string]string{}, ExecutionRuntimeRoleDriver)) + assert.Empty(t, w.Spec.Templates[0].Container.Env) + + // Nil env map — should be a no-op. + assert.NoError(t, w.UpsertRuntimeEnvVars(nil, ExecutionRuntimeRoleDriver)) + + // No templates. + empty := NewWorkflow(&workflowapi.Workflow{}) + assert.NoError(t, empty.UpsertRuntimeEnvVars(map[string]string{"K": "V"}, ExecutionRuntimeRoleDriver)) +} + +func TestUpsertRuntimeEnvVars_DAGTemplateSkipped(t *testing.T) { + w := workflowWithTemplates(dagTemplate("dag")) + err := w.UpsertRuntimeEnvVars( + map[string]string{"K": "V"}, + ExecutionRuntimeRoleDriver, + ExecutionRuntimeRoleLauncher, + ExecutionRuntimeRoleExecutor, + ) + assert.NoError(t, err) + // DAG template has no container — nothing to modify. + assert.Nil(t, w.Spec.Templates[0].Container) +} + +// --- Annotation-based role detection tests --- + +// annotatedTemplate returns a template with the runtime-role annotation set. +func annotatedTemplate(name string, role ExecutionRuntimeRole, existingEnv ...corev1.EnvVar) workflowapi.Template { + return workflowapi.Template{ + Name: name, + Metadata: workflowapi.Metadata{ + Annotations: map[string]string{ + AnnotationKeyRuntimeRole: string(role), + }, + }, + Container: &corev1.Container{ + Image: "some-image:latest", + Env: existingEnv, + }, + } +} + +func TestUpsertRuntimeEnvVars_Annotation_DriverRole(t *testing.T) { + w := workflowWithTemplates( + annotatedTemplate("driver", ExecutionRuntimeRoleDriver), + annotatedTemplate("launcher", ExecutionRuntimeRoleLauncher), + ) + err := w.UpsertRuntimeEnvVars( + map[string]string{"KEY": "val"}, + ExecutionRuntimeRoleDriver, + ) + assert.NoError(t, err) + + assert.Equal(t, []corev1.EnvVar{{Name: "KEY", Value: "val"}}, + w.Spec.Templates[0].Container.Env) + // Launcher should be untouched. + assert.Empty(t, w.Spec.Templates[1].Container.Env) +} + +func TestUpsertRuntimeEnvVars_Annotation_LauncherRole(t *testing.T) { + w := workflowWithTemplates( + annotatedTemplate("driver", ExecutionRuntimeRoleDriver), + annotatedTemplate("launcher", ExecutionRuntimeRoleLauncher), + ) + err := w.UpsertRuntimeEnvVars( + map[string]string{"KEY": "val"}, + ExecutionRuntimeRoleLauncher, + ) + assert.NoError(t, err) + + assert.Empty(t, w.Spec.Templates[0].Container.Env) + assert.Equal(t, []corev1.EnvVar{{Name: "KEY", Value: "val"}}, + w.Spec.Templates[1].Container.Env) +} + +func TestUpsertRuntimeEnvVars_Annotation_MultipleRoles(t *testing.T) { + w := workflowWithTemplates( + annotatedTemplate("driver", ExecutionRuntimeRoleDriver), + annotatedTemplate("launcher", ExecutionRuntimeRoleLauncher), + dagTemplate("dag"), + ) + err := w.UpsertRuntimeEnvVars( + map[string]string{"KEY": "val"}, + ExecutionRuntimeRoleDriver, + ExecutionRuntimeRoleLauncher, + ) + assert.NoError(t, err) + + expected := []corev1.EnvVar{{Name: "KEY", Value: "val"}} + assert.Equal(t, expected, w.Spec.Templates[0].Container.Env) + assert.Equal(t, expected, w.Spec.Templates[1].Container.Env) + assert.Nil(t, w.Spec.Templates[2].Container) +} + +func TestUpsertRuntimeEnvVars_Annotation_UpsertReplacesExisting(t *testing.T) { + w := workflowWithTemplates( + annotatedTemplate("driver", ExecutionRuntimeRoleDriver, + corev1.EnvVar{Name: "OLD", Value: "before"}), + ) + err := w.UpsertRuntimeEnvVars( + map[string]string{"OLD": "after", "NEW": "fresh"}, + ExecutionRuntimeRoleDriver, + ) + assert.NoError(t, err) + + env := w.Spec.Templates[0].Container.Env + assert.Len(t, env, 2) + assert.Equal(t, corev1.EnvVar{Name: "OLD", Value: "after"}, env[0]) + assert.Equal(t, corev1.EnvVar{Name: "NEW", Value: "fresh"}, env[1]) +} + +func TestUpsertRuntimeEnvVars_Annotation_UnknownRoleIgnored(t *testing.T) { + // A template with an unknown annotation value should not match any role. + w := workflowWithTemplates( + annotatedTemplate("unknown", "some-other-role"), + ) + err := w.UpsertRuntimeEnvVars( + map[string]string{"KEY": "val"}, + ExecutionRuntimeRoleDriver, + ExecutionRuntimeRoleLauncher, + ExecutionRuntimeRoleExecutor, + ) + assert.NoError(t, err) + assert.Empty(t, w.Spec.Templates[0].Container.Env) } diff --git a/backend/src/crd/controller/scheduledworkflow/controller.go b/backend/src/crd/controller/scheduledworkflow/controller.go index d1d139703a1..f7e83201a5c 100644 --- a/backend/src/crd/controller/scheduledworkflow/controller.go +++ b/backend/src/crd/controller/scheduledworkflow/controller.go @@ -38,8 +38,10 @@ import ( log "github.com/sirupsen/logrus" "github.com/spf13/viper" "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/structpb" corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/runtime" @@ -628,6 +630,16 @@ func (c *Controller) submitNewWorkflowIfNotAlreadySubmitted( } } + // Convert PluginsInput from the SWF spec (map[string]apiextensionsv1.JSON) + // to the protobuf map expected by the CreateRun request. + var pluginsInput map[string]*structpb.Struct + if len(swf.Spec.PluginsInput) > 0 { + pluginsInput, err = crdPluginsInputToProto(swf.Spec.PluginsInput) + if err != nil { + return false, "", fmt.Errorf("failed to parse plugins_input from SWF spec: %w", err) + } + } + run, err := c.runClient.CreateRun(ctx, &api.CreateRunRequest{ ExperimentId: swf.Spec.ExperimentId, Run: &api.Run{ @@ -635,6 +647,7 @@ func (c *Controller) submitNewWorkflowIfNotAlreadySubmitted( DisplayName: swf.NextResourceName(), RecurringRunId: string(swf.UID), RuntimeConfig: runtimeConfig, + PluginsInput: pluginsInput, PipelineSource: &api.Run_PipelineVersionReference{ PipelineVersionReference: &api.PipelineVersionReference{ PipelineId: swf.Spec.PipelineId, @@ -791,3 +804,17 @@ func hasV2ComponentMarker(podMetadata *workflowapi.Metadata) bool { func hasV2PipelineMarker(labels, annotations map[string]string) bool { return labels[util.V2PipelineKey] == "true" || annotations[util.V2PipelineKey] == "true" } + +// crdPluginsInputToProto converts the CRD's map[string]apiextensionsv1.JSON +// representation into the protobuf map expected by the CreateRun request. +func crdPluginsInputToProto(input map[string]apiextensionsv1.JSON) (map[string]*structpb.Struct, error) { + result := make(map[string]*structpb.Struct, len(input)) + for key, val := range input { + s := &structpb.Struct{} + if err := protojson.Unmarshal(val.Raw, s); err != nil { + return nil, fmt.Errorf("invalid plugins_input entry %q: %w", key, err) + } + result[key] = s + } + return result, nil +} diff --git a/backend/src/crd/controller/scheduledworkflow/controller_test.go b/backend/src/crd/controller/scheduledworkflow/controller_test.go index 08933fe4a97..58e2b7fb074 100644 --- a/backend/src/crd/controller/scheduledworkflow/controller_test.go +++ b/backend/src/crd/controller/scheduledworkflow/controller_test.go @@ -27,6 +27,7 @@ import ( "github.com/spf13/viper" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" @@ -443,3 +444,43 @@ func (f *fakeExecutionInformer) InformerFactoryStart(stopCh <-chan struct{}) {} var _ commonutil.ExecutionClient = &fakeExecutionClient{} var _ commonutil.ExecutionInterface = &fakeExecutionInterface{} var _ commonutil.ExecutionInformer = &fakeExecutionInformer{} + +func TestCrdPluginsInputToProto(t *testing.T) { + t.Run("valid single plugin", func(t *testing.T) { + input := map[string]apiextensionsv1.JSON{ + "mlflow": {Raw: []byte(`{"experiment_name":"my-exp"}`)}, + } + result, err := crdPluginsInputToProto(input) + require.NoError(t, err) + require.Len(t, result, 1) + require.Contains(t, result, "mlflow") + assert.Equal(t, "my-exp", result["mlflow"].Fields["experiment_name"].GetStringValue()) + }) + + t.Run("valid multiple plugins", func(t *testing.T) { + input := map[string]apiextensionsv1.JSON{ + "mlflow": {Raw: []byte(`{"experiment_name":"exp-1"}`)}, + "other": {Raw: []byte(`{"key":"val"}`)}, + } + result, err := crdPluginsInputToProto(input) + require.NoError(t, err) + require.Len(t, result, 2) + assert.Contains(t, result, "mlflow") + assert.Contains(t, result, "other") + }) + + t.Run("empty map", func(t *testing.T) { + result, err := crdPluginsInputToProto(map[string]apiextensionsv1.JSON{}) + require.NoError(t, err) + assert.Empty(t, result) + }) + + t.Run("invalid inner value", func(t *testing.T) { + input := map[string]apiextensionsv1.JSON{ + "mlflow": {Raw: []byte(`"not-an-object"`)}, + } + _, err := crdPluginsInputToProto(input) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid plugins_input entry") + }) +} diff --git a/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/types.go b/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/types.go index d7f30d585e9..c525999bfcd 100644 --- a/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/types.go +++ b/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/types.go @@ -17,6 +17,7 @@ package v1beta1 import ( "github.com/kubeflow/pipelines/backend/src/common" corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ) @@ -93,6 +94,11 @@ type ScheduledWorkflowSpec struct { // ServiceAccount ServiceAccount string `json:"serviceAccount,omitempty"` + // PluginsInput is passed through to CreateRun for each triggered run. + // Each key is a plugin name and the value is that plugin's raw config. + // +optional + PluginsInput map[string]apiextensionsv1.JSON `json:"pluginsInput,omitempty"` + // TODO: support additional resource types: K8 jobs, etc. } diff --git a/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/zz_generated.deepcopy.go b/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/zz_generated.deepcopy.go index cbbd24d2e9c..b345fd42e5b 100644 --- a/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/zz_generated.deepcopy.go +++ b/backend/src/crd/pkg/apis/scheduledworkflow/v1beta1/zz_generated.deepcopy.go @@ -19,6 +19,7 @@ package v1beta1 import ( + v1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -189,6 +190,13 @@ func (in *ScheduledWorkflowSpec) DeepCopyInto(out *ScheduledWorkflowSpec) { *out = new(WorkflowResource) (*in).DeepCopyInto(*out) } + if in.PluginsInput != nil { + in, out := &in.PluginsInput, &out.PluginsInput + *out = make(map[string]v1.JSON, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } return } diff --git a/backend/src/v2/cmd/driver/main.go b/backend/src/v2/cmd/driver/main.go index 428832e6bea..30681464e47 100644 --- a/backend/src/v2/cmd/driver/main.go +++ b/backend/src/v2/cmd/driver/main.go @@ -21,10 +21,12 @@ import ( "flag" "fmt" + "github.com/spf13/viper" "google.golang.org/protobuf/encoding/protojson" "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" "github.com/kubeflow/pipelines/backend/src/common/util" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" "os" "path/filepath" @@ -38,6 +40,8 @@ import ( "github.com/kubeflow/pipelines/backend/src/v2/driver" "github.com/kubeflow/pipelines/backend/src/v2/metadata" "github.com/kubeflow/pipelines/kubernetes_platform/go/kubernetesplatform" + + _ "github.com/kubeflow/pipelines/backend/src/v2/common/plugins/all" ) const ( @@ -102,6 +106,7 @@ var ( func main() { flag.Parse() + initConfig() glog.Infof("Setting log level to: '%s'", *logLevel) err := flag.Set("v", *logLevel) @@ -212,6 +217,11 @@ func drive() (err error) { if err != nil { return err } + // pluginDispatcher executes task-level plugin lifecycle hooks + pluginDispatcher, err := plugins.GetPluginDispatcher() + if err != nil { + glog.Errorf("Failed to initialize plugin dispatcher: %v", err) + } options := driver.Options{ PipelineName: *pipelineName, RunID: *runID, @@ -234,6 +244,7 @@ func drive() (err error) { MLPipelineTLSEnabled: *mlPipelineTLSEnabled, MLMDTLSEnabled: *metadataTLSEnabled, CaCertPath: *caCertPath, + PluginDispatcher: pluginDispatcher, } var execution *driver.Execution var driverErr error @@ -382,3 +393,7 @@ func writeFile(path string, data []byte) (err error) { func newMlmdClient(mlmdServerAddress string, mlmdServerPort string, tlsCfg *tls.Config) (*metadata.Client, error) { return metadata.NewClient(mlmdServerAddress, mlmdServerPort, tlsCfg) } + +func initConfig() { + viper.AutomaticEnv() +} diff --git a/backend/src/v2/cmd/launcher-v2/main.go b/backend/src/v2/cmd/launcher-v2/main.go index bcd37df3f9a..aa17c689cb5 100644 --- a/backend/src/v2/cmd/launcher-v2/main.go +++ b/backend/src/v2/cmd/launcher-v2/main.go @@ -24,6 +24,7 @@ import ( "github.com/kubeflow/pipelines/backend/src/v2/client_manager" "github.com/kubeflow/pipelines/backend/src/v2/component" "github.com/kubeflow/pipelines/backend/src/v2/config" + "github.com/spf13/viper" ) // TODO: use https://github.com/spf13/cobra as a framework to create more complex CLI tools with subcommands. @@ -147,4 +148,5 @@ func init() { flag.Set("logtostderr", "true") // Change the WARNING to INFO level for debugging. flag.Set("stderrthreshold", "WARNING") + viper.AutomaticEnv() } diff --git a/backend/src/v2/common/plugins/all/all.go b/backend/src/v2/common/plugins/all/all.go new file mode 100644 index 00000000000..6d3dd1ee225 --- /dev/null +++ b/backend/src/v2/common/plugins/all/all.go @@ -0,0 +1,7 @@ +// Package all imports all plugin packages to trigger factory registration via init(). +// Binaries that use plugins.GetPluginDispatcher should blank-import this package. +package all + +import ( + _ "github.com/kubeflow/pipelines/backend/src/v2/common/plugins/mlflow" +) diff --git a/backend/src/v2/common/plugins/config.go b/backend/src/v2/common/plugins/config.go new file mode 100644 index 00000000000..805fc6b5f51 --- /dev/null +++ b/backend/src/v2/common/plugins/config.go @@ -0,0 +1,26 @@ +package plugins + +// TaskInfo contains Task-level information +type TaskInfo struct { + Name string `json:"name"` + RunEndTime int64 `json:"runEndTime"` + RunStatus string `json:"runStatus"` + ScalarMetrics map[string]float64 + Parameters map[string]interface{} + Tags map[string]string +} + +// UpdateTaskInfoWithMetadata updates the task's scalar metrics and parameters with the provided metadata maps. +func (t *TaskInfo) UpdateTaskInfoWithMetadata(kfpRunStatus string, metrics map[string]float64, params map[string]interface{}) { + t.RunStatus = kfpRunStatus + if metrics != nil { + t.ScalarMetrics = metrics + } + if params != nil { + t.Parameters = params + } +} + +func (t *TaskInfo) UpdateTaskInfoWithRunEndTime(runEndTime int64) { + t.RunEndTime = runEndTime +} diff --git a/backend/src/v2/common/plugins/dispatcher.go b/backend/src/v2/common/plugins/dispatcher.go new file mode 100644 index 00000000000..64a368b6355 --- /dev/null +++ b/backend/src/v2/common/plugins/dispatcher.go @@ -0,0 +1,151 @@ +package plugins + +import ( + "context" + "fmt" + "sort" + "time" + + "github.com/golang/glog" +) + +// TaskPluginDispatcher orchestrates plugin lifecycle hooks +type TaskPluginDispatcher interface { + // OnTaskStart is called when a task starts. + // The dispatcher reads taskInfo and pluginConfig and returns a TaskStartResult. + // Individual handler errors are best-effort (logged but non-blocking); the + // dispatcher continues with remaining handlers. Only handlers that started + // successfully will have OnTaskEnd invoked. + OnTaskStart(ctx context.Context, taskInfo *TaskInfo) (*TaskStartResult, error) + + // OnTaskEnd is called when a task reaches a terminal state. Returns true if all plugin syncs succeeded. + OnTaskEnd(ctx context.Context, taskInfo *TaskInfo) error + + // RetrieveUserContainerEnvVars returns the user-specified environment variables to be set in the task user container. + RetrieveUserContainerEnvVars(taskInfo *TaskInfo) (envVars map[string]string, err error) + + // ApplyCustomProperties updates the custom properties for all registered task-level plugins. + ApplyCustomProperties(properties map[string]string) +} + +// NoOpDispatcher is a TaskPluginDispatcher that does nothing. +type NoOpDispatcher struct{} + +func (NoOpDispatcher) OnTaskStart(ctx context.Context, taskInfo *TaskInfo) (*TaskStartResult, error) { + return nil, nil +} +func (NoOpDispatcher) OnTaskEnd(ctx context.Context, taskInfo *TaskInfo) error { + return nil +} +func (NoOpDispatcher) RetrieveUserContainerEnvVars(taskInfo *TaskInfo) (envVars map[string]string, err error) { + return nil, nil +} +func (NoOpDispatcher) ApplyCustomProperties(properties map[string]string) { +} + +var _ TaskPluginDispatcher = NoOpDispatcher{} + +var _ TaskPluginDispatcher = (*TaskPluginDispatcherImpl)(nil) + +type TaskPluginDispatcherImpl struct { + handlers []TaskPluginHandler + startedHandlers map[string]bool +} + +func NewTaskPluginDispatcherImpl(handlers []TaskPluginHandler) (*TaskPluginDispatcherImpl, error) { + if handlers == nil || len(handlers) == 0 { + return nil, fmt.Errorf("NewTaskPluginDispatcherImpl requires non-nil slice containing minimum one handler") + } + sorted := make([]TaskPluginHandler, len(handlers)) + copy(sorted, handlers) + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].Name() < sorted[j].Name() + }) + return &TaskPluginDispatcherImpl{ + handlers: sorted, + }, nil +} + +func (t *TaskPluginDispatcherImpl) OnTaskStart(ctx context.Context, taskInfo *TaskInfo) (*TaskStartResult, error) { + if t == nil || taskInfo == nil { + return nil, fmt.Errorf("dispatcher and taskInfo must be non-nil") + } + + t.startedHandlers = make(map[string]bool) + handlerResults := map[string]TaskHandlerStartResult{} + customProperties := map[string]string{} + for _, handler := range t.handlers { + result, err := handler.OnTaskStart(ctx, taskInfo) + if err != nil { + glog.Errorf("failed to launch task-level %s handler: %v", handler.Name(), err) + continue + } + t.startedHandlers[handler.Name()] = true + handlerResults[handler.Name()] = result + for k, v := range handler.GenerateCustomProperties(result) { + customProperties[k] = v + } + } + return &TaskStartResult{ + Results: handlerResults, + CustomProperties: customProperties, + }, nil +} + +func (t *TaskPluginDispatcherImpl) OnTaskEnd(ctx context.Context, taskInfo *TaskInfo) error { + if t == nil || taskInfo == nil { + return fmt.Errorf("dispatcher and taskInfo must be non-nil") + } + + taskInfo.UpdateTaskInfoWithRunEndTime(time.Now().UnixMilli()) + + var taskEndFailures []string + for _, handler := range t.handlers { + if t.startedHandlers != nil && !t.startedHandlers[handler.Name()] { + glog.Infof("Skipping OnTaskEnd for handler %s (OnTaskStart did not succeed)", handler.Name()) + continue + } + err := handler.OnTaskEnd(ctx, taskInfo) + if err != nil { + glog.Errorf("Failed to complete task-level %s handler: %v", handler.Name(), err) + taskEndFailures = append(taskEndFailures, handler.Name()) + } + } + if len(taskEndFailures) > 0 { + return fmt.Errorf("failed to complete the following task-level plugin(s): %v", taskEndFailures) + } + return nil +} + +func (t *TaskPluginDispatcherImpl) RetrieveUserContainerEnvVars(taskInfo *TaskInfo) (injectVars map[string]string, err error) { + if t == nil { + return nil, fmt.Errorf("dispatcher must be non-nil") + } + + injectVars = make(map[string]string) + for _, handler := range t.handlers { + vars, err := handler.RetrieveUserContainerEnvVars() + if err != nil { + return nil, fmt.Errorf("failed to retrieve user container env vars for handler %s: %v", handler.Name(), err) + } + + for k, v := range vars { + if _, ok := injectVars[k]; !ok { + injectVars[k] = v + } else { + glog.Errorf("Key %s already present in container env vars. Key-value pair %s:%s will not be added.", k, k, v) + } + } + } + return injectVars, nil +} + +// ApplyCustomProperties updates the custom properties for all registered task-level plugins. +func (t *TaskPluginDispatcherImpl) ApplyCustomProperties(properties map[string]string) { + for _, handler := range t.handlers { + err := handler.ApplyCustomProperties(properties) + if err != nil { + glog.Errorf("failed to apply custom properties for handler %s: %v", handler.Name(), err) + } + } +} diff --git a/backend/src/v2/common/plugins/dispatcher_test.go b/backend/src/v2/common/plugins/dispatcher_test.go new file mode 100644 index 00000000000..be8dd66770b --- /dev/null +++ b/backend/src/v2/common/plugins/dispatcher_test.go @@ -0,0 +1,294 @@ +package plugins + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var _ TaskPluginHandler = (*fakeHandler)(nil) + +type fakeStartResult struct { + RunID string +} + +type fakeHandler struct { + name string + startResult TaskHandlerStartResult + startErr error + endErr error + envVars map[string]string + envErr error + customProps map[string]string +} + +func (f *fakeHandler) Name() string { return f.name } +func (f *fakeHandler) OnTaskStart(_ context.Context, _ *TaskInfo) (TaskHandlerStartResult, error) { + return f.startResult, f.startErr +} +func (f *fakeHandler) OnTaskEnd(_ context.Context, _ *TaskInfo) error { + return f.endErr +} +func (f *fakeHandler) RetrieveUserContainerEnvVars() (map[string]string, error) { + return f.envVars, f.envErr +} +func (f *fakeHandler) GenerateCustomProperties(_ TaskHandlerStartResult) map[string]string { + return f.customProps +} + +func (f *fakeHandler) ApplyCustomProperties(customProperties map[string]string) error { return nil } + +var taskInfoStart = &TaskInfo{ + Name: "test-task", +} + +var taskInfoEnd = &TaskInfo{ + Name: "test-task", + RunEndTime: int64(1714400000000), + RunStatus: "COMPLETED", + ScalarMetrics: map[string]float64{}, + Parameters: map[string]interface{}{}, +} + +func TestNewTaskPluginDispatcherImpl_SingleHandler_Success(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, err := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + require.NoError(t, err) + require.NotNil(t, dispatcher) + require.NotNil(t, dispatcher.handlers) + require.Len(t, dispatcher.handlers, 1) +} + +func TestNewTaskPluginDispatcherImpl_NilHandlers_Failure(t *testing.T) { + dispatcher, err := NewTaskPluginDispatcherImpl(nil) + + require.Nil(t, dispatcher) + require.Error(t, err) + assert.Equal(t, "NewTaskPluginDispatcherImpl requires non-nil slice containing minimum one handler", err.Error()) +} + +func TestNewTaskPluginDispatcherImpl_EmptyHandlers_Failure(t *testing.T) { + dispatcher, err := NewTaskPluginDispatcherImpl([]TaskPluginHandler{}) + + require.Nil(t, dispatcher) + require.Error(t, err) + assert.Equal(t, "NewTaskPluginDispatcherImpl requires non-nil slice containing minimum one handler", err.Error()) +} + +func TestOnTaskStart_SingleHandler_Success(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + startResult: &fakeStartResult{RunID: "fake-run-1"}, + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + result, err := dispatcher.OnTaskStart(context.Background(), taskInfoStart) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "fake-run-1", result.Results["FakePlugin"].(*fakeStartResult).RunID) +} + +func TestOnTaskStart_MultipleHandlers_Success(t *testing.T) { + handler1 := &fakeHandler{ + name: "FakePluginA", + startResult: &fakeStartResult{RunID: "run-a"}, + } + handler2 := &fakeHandler{ + name: "FakePluginB", + startResult: &fakeStartResult{RunID: "run-b"}, + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler1, handler2}) + + result, err := dispatcher.OnTaskStart(context.Background(), taskInfoStart) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "run-a", result.Results["FakePluginA"].(*fakeStartResult).RunID) + assert.Equal(t, "run-b", result.Results["FakePluginB"].(*fakeStartResult).RunID) +} + +func TestOnTaskStart_NilDispatcher_Failure(t *testing.T) { + var dispatcher *TaskPluginDispatcherImpl + + result, err := dispatcher.OnTaskStart(context.Background(), taskInfoStart) + + require.Error(t, err) + assert.Equal(t, "dispatcher and taskInfo must be non-nil", err.Error()) + require.Nil(t, result) +} + +func TestOnTaskStart_NilTaskInfo_Failure(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin", startResult: &fakeStartResult{RunID: "run-1"}} + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + result, err := dispatcher.OnTaskStart(context.Background(), nil) + + require.Error(t, err) + assert.Equal(t, "dispatcher and taskInfo must be non-nil", err.Error()) + require.Nil(t, result) +} + +func TestOnTaskStart_CustomPropertiesMerged(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + startResult: &fakeStartResult{RunID: "fake-run-1"}, + customProps: map[string]string{"plugins.fake.run_id": "fake-run-1"}, + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + result, err := dispatcher.OnTaskStart(context.Background(), taskInfoStart) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "fake-run-1", result.CustomProperties["plugins.fake.run_id"]) +} + +func TestOnTaskStart_MultipleHandlers_CustomPropertiesMerged(t *testing.T) { + handler1 := &fakeHandler{ + name: "FakePluginA", + startResult: &fakeStartResult{RunID: "run-a"}, + customProps: map[string]string{"plugins.a.id": "run-a"}, + } + handler2 := &fakeHandler{ + name: "FakePluginB", + startResult: &fakeStartResult{RunID: "run-b"}, + customProps: map[string]string{"plugins.b.id": "run-b"}, + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler1, handler2}) + + result, err := dispatcher.OnTaskStart(context.Background(), taskInfoStart) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, "run-a", result.CustomProperties["plugins.a.id"]) + assert.Equal(t, "run-b", result.CustomProperties["plugins.b.id"]) +} + +func TestOnTaskStart_HandlerFailure_NoCustomProperties(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + startErr: fmt.Errorf("plugin startup failed"), + customProps: map[string]string{"plugins.fake.id": "should-not-appear"}, + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + result, err := dispatcher.OnTaskStart(context.Background(), taskInfoStart) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Empty(t, result.CustomProperties) +} + +func TestOnTaskStart_SingleHandler_HandlerFailure(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + startErr: fmt.Errorf("plugin startup failed"), + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + result, err := dispatcher.OnTaskStart(context.Background(), taskInfoStart) + + require.NoError(t, err) + require.NotNil(t, result) + assert.Empty(t, result.Results) +} + +func TestOnTaskEnd_SingleHandler_Success(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + err := dispatcher.OnTaskEnd(context.Background(), taskInfoEnd) + + require.NoError(t, err) +} + +func TestOnTaskEnd_NilDispatcher_Failure(t *testing.T) { + var dispatcher *TaskPluginDispatcherImpl + + err := dispatcher.OnTaskEnd(context.Background(), taskInfoEnd) + + require.Error(t, err) + assert.Equal(t, "dispatcher and taskInfo must be non-nil", err.Error()) +} + +func TestOnTaskEnd_NilTaskInfo_Failure(t *testing.T) { + handler := &fakeHandler{name: "FakePlugin"} + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + err := dispatcher.OnTaskEnd(context.Background(), nil) + + require.Error(t, err) + assert.Equal(t, "dispatcher and taskInfo must be non-nil", err.Error()) +} + +func TestOnTaskEnd_SingleHandler_HandlerFailure(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + endErr: fmt.Errorf("plugin shutdown failed"), + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + err := dispatcher.OnTaskEnd(context.Background(), taskInfoEnd) + + require.Error(t, err) + assert.Equal(t, "failed to complete the following task-level plugin(s): [FakePlugin]", err.Error()) +} + +func TestOnTaskEnd_HandlerAlwaysCalled(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + info := &TaskInfo{ + Name: "test-task", + } + + err := dispatcher.OnTaskEnd(context.Background(), info) + + require.NoError(t, err) +} + +func TestRetrieveUserContainerEnvVars_NilDispatcher_Failure(t *testing.T) { + var dispatcher *TaskPluginDispatcherImpl + + vars, err := dispatcher.RetrieveUserContainerEnvVars(taskInfoEnd) + + require.Error(t, err) + assert.Equal(t, "dispatcher must be non-nil", err.Error()) + require.Nil(t, vars) +} + +func TestRetrieveUserContainerEnvVars_Success(t *testing.T) { + expectedVars := map[string]string{ + "PLUGIN_RUN_ID": "fake-run-1", + } + handler := &fakeHandler{ + name: "FakePlugin", + envVars: expectedVars, + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + vars, err := dispatcher.RetrieveUserContainerEnvVars(taskInfoEnd) + + require.NoError(t, err) + assert.Equal(t, expectedVars, vars) +} + +func TestRetrieveUserContainerEnvVars_HandlerError_Propagated(t *testing.T) { + handler := &fakeHandler{ + name: "FakePlugin", + envErr: fmt.Errorf("env var retrieval failed"), + } + dispatcher, _ := NewTaskPluginDispatcherImpl([]TaskPluginHandler{handler}) + + vars, err := dispatcher.RetrieveUserContainerEnvVars(taskInfoEnd) + + require.Error(t, err) + assert.Contains(t, err.Error(), "env var retrieval failed") + require.Nil(t, vars) +} diff --git a/backend/src/v2/common/plugins/handler.go b/backend/src/v2/common/plugins/handler.go new file mode 100644 index 00000000000..287559337ea --- /dev/null +++ b/backend/src/v2/common/plugins/handler.go @@ -0,0 +1,38 @@ +package plugins + +import ( + "context" +) + +// TaskStartResult stores the handler-specific start results keyed by handler name. +type TaskStartResult struct { + Results map[string]TaskHandlerStartResult + CustomProperties map[string]string +} + +// TaskHandlerStartResult is implemented by each TaskPluginHandler to carry +// handler-specific state from OnTaskStart through to OnTaskEnd and +// RetrieveUserContainerEnvVars. Handlers type-assert to their own concrete +// implementation. +type TaskHandlerStartResult interface{} + +// TaskPluginHandler defines the generic task-level plugin lifecycle hooks +type TaskPluginHandler interface { + // Name returns the name of the plugin + Name() string + // OnTaskStart initializes task-level plugin execution for the specified task and returns execution results or an error. + OnTaskStart(ctx context.Context, taskInfo *TaskInfo) (TaskHandlerStartResult, error) + // OnTaskEnd updates task-level plugin execution for the specified task with metrics and parameters and completes plugin execution. + // Handlers recover per-task state (e.g. run IDs) from internal fields set + // during OnTaskStart or ApplyCustomProperties rather than from a start result parameter. + OnTaskEnd(ctx context.Context, taskInfo *TaskInfo) error + // RetrieveUserContainerEnvVars returns the user-specified environment variables to be set in the task user container. + // Handlers recover per-task state from internal fields set during OnTaskStart or ApplyCustomProperties. + RetrieveUserContainerEnvVars() (injectVars map[string]string, err error) + // GenerateCustomProperties returns key-value pairs to persist as MLMD + // execution custom properties. The driver relays these generically without + // knowing which plugin produced them. + GenerateCustomProperties(startResult TaskHandlerStartResult) map[string]string + // ApplyCustomProperties applies properties represented by key-value pairs to the handler configuration. + ApplyCustomProperties(customProperties map[string]string) error +} diff --git a/backend/src/v2/common/plugins/mlflow/config.go b/backend/src/v2/common/plugins/mlflow/config.go new file mode 100644 index 00000000000..6f6a8122b4f --- /dev/null +++ b/backend/src/v2/common/plugins/mlflow/config.go @@ -0,0 +1,105 @@ +package mlflow + +import ( + "encoding/json" + "fmt" + "strings" + + commonplugins "github.com/kubeflow/pipelines/backend/src/common/plugins" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/spf13/viper" +) + +const ( + mlflowRunID = "MLFLOW_RUN_ID" + kfpMLflowConfig = "KFP_MLFLOW_CONFIG" +) + +func GetStringConfig(configName string) string { + return viper.GetString(configName) +} + +func GetMLflowRunID() string { + return GetStringConfig(mlflowRunID) +} + +// ParseKfpMLflowRuntimeConfig parses the KFP_MLFLOW_CONFIG environment variable into an MLflowRuntimeConfig struct. +// Returns an error if the variable is not set, malformed, or contains an unsupported auth type. +func ParseKfpMLflowRuntimeConfig() (*commonmlflow.MLflowRuntimeConfig, error) { + var cfg commonmlflow.MLflowRuntimeConfig + runtimeCfg := GetStringConfig(kfpMLflowConfig) + if runtimeCfg == "" { + return nil, fmt.Errorf("KFP_MLFLOW_CONFIG env var not set") + } + if err := json.Unmarshal([]byte(runtimeCfg), &cfg); err != nil { + return nil, fmt.Errorf("failed to unmarshal KFP_MLFLOW_CONFIG: %v", err) + } + if cfg.Workspace != "" { + cfg.WorkspacesEnabled = true + } + var missingFields []string + if cfg.Endpoint == "" { + missingFields = append(missingFields, "Endpoint") + } + if cfg.ParentRunID == "" { + missingFields = append(missingFields, "ParentRunID") + } + if cfg.ExperimentID == "" { + missingFields = append(missingFields, "ExperimentID") + } + if cfg.AuthType == "" { + missingFields = append(missingFields, "AuthType") + } + if cfg.Timeout == "" { + missingFields = append(missingFields, "Timeout") + } + if len(missingFields) > 0 { + return nil, fmt.Errorf("missing one or more of the following required fields in KFP_MLFLOW_CONFIG: %s", strings.Join(missingFields, ", ")) + } + if cfg.AuthType != "kubernetes" { + return nil, fmt.Errorf("unsupported auth type: %s", cfg.AuthType) + } + // Only InsecureSkipVerify is propagated from the API server. Driver/launcher CA trust is configured + // separately (e.g., cluster-wide trusted CA injection). + cfg.TLS = &commonplugins.TLSConfig{ + InsecureSkipVerify: cfg.InsecureSkipVerify, + } + return &cfg, nil +} + +// IsEnabled reports whether the env var for the MLflow runtime config is present, +// indicating the driver/launcher has opted in to MLflow integration. +func IsEnabled() bool { + return viper.IsSet(commonmlflow.EnvMLflowConfig) +} + +// BuildMLflowTaskRequestContext constructs a fully initialized RequestContext +// by delegating to the common BuildRequestContext with task-specific parameters. +func BuildMLflowTaskRequestContext(runtimeCfg commonmlflow.MLflowRuntimeConfig) (*commonmlflow.RequestContext, error) { + mlflowPluginSettings := &commonmlflow.MLflowPluginSettings{ + WorkspacesEnabled: &runtimeCfg.WorkspacesEnabled, + KFPBaseURL: runtimeCfg.Endpoint, + InjectUserEnvVars: &runtimeCfg.InjectUserEnvVars, + } + + pluginCfg := commonmlflow.MLflowPluginConfig{ + Endpoint: runtimeCfg.Endpoint, + Timeout: runtimeCfg.Timeout, + TLS: runtimeCfg.TLS, + Settings: mlflowPluginSettings, + } + return commonmlflow.BuildMLflowRequestContext(pluginCfg, runtimeCfg.Workspace, runtimeCfg.WorkspacesEnabled) +} + +// ExecutionStateToMLflowTerminalStatus converts a string representing an MLMD Execution_State to an MLflow +// terminal status. +func ExecutionStateToMLflowTerminalStatus(state string) string { + switch state { + case "COMPLETE", "CACHED": + return "FINISHED" + case "CANCELED": + return "KILLED" + default: + return "FAILED" + } +} diff --git a/backend/src/v2/common/plugins/mlflow/config_test.go b/backend/src/v2/common/plugins/mlflow/config_test.go new file mode 100644 index 00000000000..34924be2a68 --- /dev/null +++ b/backend/src/v2/common/plugins/mlflow/config_test.go @@ -0,0 +1,171 @@ +package mlflow + +import ( + "encoding/json" + "testing" + + "github.com/golang/glog" + commonplugins "github.com/kubeflow/pipelines/backend/src/common/plugins" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" +) + +func setRuntimeCfg(runtimeCfg commonmlflow.MLflowRuntimeConfig) { + data, err := json.Marshal(runtimeCfg) + if err != nil { + glog.Fatalf("Failed to marshal MLflow runtime config: %v", err) + } + viper.Set(commonmlflow.EnvMLflowConfig, string(data)) +} + +func TestExecutionStateToMLflowTerminalStatus(t *testing.T) { + assert.Equal(t, "FINISHED", ExecutionStateToMLflowTerminalStatus("COMPLETE")) + assert.Equal(t, "FINISHED", ExecutionStateToMLflowTerminalStatus("CACHED")) + assert.Equal(t, "KILLED", ExecutionStateToMLflowTerminalStatus("CANCELED")) + assert.Equal(t, "FAILED", ExecutionStateToMLflowTerminalStatus("FAILED")) + assert.Equal(t, "FAILED", ExecutionStateToMLflowTerminalStatus("UNKNOWN")) +} + +func TestParseKfpMLflowRuntimeConfig_Success(t *testing.T) { + cfg := commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + expectedCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + WorkspacesEnabled: false, + Workspace: "", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + InsecureSkipVerify: false, + InjectUserEnvVars: false, + TLS: &commonplugins.TLSConfig{ + InsecureSkipVerify: false, + }, + } + + setRuntimeCfg(cfg) + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.NotNil(t, runtimeCfg) + assert.Equal(t, expectedCfg, runtimeCfg) + assert.NoError(t, err) +} + +func TestParseKfpMLflowRuntimeConfig_NoEnvVar_Failure(t *testing.T) { + viper.Set(commonmlflow.EnvMLflowConfig, "") + + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Contains(t, err.Error(), "KFP_MLFLOW_CONFIG env var not set") + +} + +func TestParseKfpMLflowRuntimeConfig_InvalidEnvVar_Failure(t *testing.T) { + viper.Set(commonmlflow.EnvMLflowConfig, "invalid-formatting.") + + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Contains(t, err.Error(), "failed to unmarshal KFP_MLFLOW_CONFIG") +} + +func TestParseKfpMLflowRuntimeConfig_MissingEndpoint_Failure(t *testing.T) { + cfg := commonmlflow.MLflowRuntimeConfig{ + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "invalid-auth-type", + Timeout: "10s", + } + setRuntimeCfg(cfg) + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Equal(t, "missing one or more of the following required fields in KFP_MLFLOW_CONFIG: Endpoint", err.Error()) +} + +func TestParseKfpMLflowRuntimeConfig_MissingParentRunId_Failure(t *testing.T) { + cfg := commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ExperimentID: "test-exp", + AuthType: "invalid-auth-type", + Timeout: "10s", + } + setRuntimeCfg(cfg) + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Equal(t, "missing one or more of the following required fields in KFP_MLFLOW_CONFIG: ParentRunID", err.Error()) +} + +func TestParseKfpMLflowRuntimeConfig_MissingExperimentId_Failure(t *testing.T) { + cfg := commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + AuthType: "invalid-auth-type", + Timeout: "10s", + } + setRuntimeCfg(cfg) + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Equal(t, "missing one or more of the following required fields in KFP_MLFLOW_CONFIG: ExperimentID", err.Error()) +} + +func TestParseKfpMLflowRuntimeConfig_MissingAuthType_Failure(t *testing.T) { + cfg := commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + Timeout: "10s", + } + setRuntimeCfg(cfg) + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Equal(t, "missing one or more of the following required fields in KFP_MLFLOW_CONFIG: AuthType", err.Error()) +} + +func TestParseKfpMLflowRuntimeConfig_MissingTimeout_Failure(t *testing.T) { + cfg := commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + } + setRuntimeCfg(cfg) + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Equal(t, "missing one or more of the following required fields in KFP_MLFLOW_CONFIG: Timeout", err.Error()) +} + +func TestParseKfpMLflowRuntimeConfig_InvalidAuthType_Failure(t *testing.T) { + cfg := commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "invalid-auth-type", + Timeout: "10s", + } + setRuntimeCfg(cfg) + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + + assert.Nil(t, runtimeCfg) + assert.Error(t, err) + assert.Equal(t, "unsupported auth type: invalid-auth-type", err.Error()) +} diff --git a/backend/src/v2/common/plugins/mlflow/factory.go b/backend/src/v2/common/plugins/mlflow/factory.go new file mode 100644 index 00000000000..04494cd3841 --- /dev/null +++ b/backend/src/v2/common/plugins/mlflow/factory.go @@ -0,0 +1,27 @@ +package mlflow + +import ( + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" +) + +func init() { + plugins.RegisterHandlerFactory(&mlflowHandlerFactory{}) +} + +type mlflowHandlerFactory struct{} + +func (f *mlflowHandlerFactory) Name() string { + return "MLflow" +} + +func (f *mlflowHandlerFactory) IsEnabled() bool { + return IsEnabled() +} + +func (f *mlflowHandlerFactory) Create() (plugins.TaskPluginHandler, error) { + runtimeCfg, err := ParseKfpMLflowRuntimeConfig() + if err != nil { + return nil, err + } + return NewMLflowTaskHandler(runtimeCfg) +} diff --git a/backend/src/v2/common/plugins/mlflow/factory_test.go b/backend/src/v2/common/plugins/mlflow/factory_test.go new file mode 100644 index 00000000000..57bbf448831 --- /dev/null +++ b/backend/src/v2/common/plugins/mlflow/factory_test.go @@ -0,0 +1,114 @@ +package mlflow + +import ( + "testing" + + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMlflowHandlerFactory_Name(t *testing.T) { + factory := &mlflowHandlerFactory{} + + assert.Equal(t, "MLflow", factory.Name()) +} + +func TestMlflowHandlerFactory_IsEnabled_ConfigSet(t *testing.T) { + setRuntimeCfg(commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + AuthType: "kubernetes", + }) + t.Cleanup(func() { viper.Set(commonmlflow.EnvMLflowConfig, "") }) + + factory := &mlflowHandlerFactory{} + + assert.True(t, factory.IsEnabled()) +} + +func TestMlflowHandlerFactory_IsEnabled_ConfigUnset(t *testing.T) { + viper.Reset() + + factory := &mlflowHandlerFactory{} + + assert.False(t, factory.IsEnabled()) +} + +func TestMlflowHandlerFactory_Create_Success(t *testing.T) { + setRuntimeCfg(commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "parent-run-1", + ExperimentID: "exp-1", + AuthType: "kubernetes", + Timeout: "10s", + }) + t.Cleanup(func() { viper.Set(commonmlflow.EnvMLflowConfig, "") }) + + factory := &mlflowHandlerFactory{} + handler, err := factory.Create() + + require.NoError(t, err) + require.NotNil(t, handler) + assert.Equal(t, "MLflow", handler.Name()) +} + +func TestMlflowHandlerFactory_Create_MissingConfig(t *testing.T) { + viper.Set(commonmlflow.EnvMLflowConfig, "") + + factory := &mlflowHandlerFactory{} + handler, err := factory.Create() + + require.Error(t, err) + assert.Nil(t, handler) + assert.Contains(t, err.Error(), "KFP_MLFLOW_CONFIG env var not set") +} + +func TestMLflowHandlerFactory_Create_MissingConfigField(t *testing.T) { + setRuntimeCfg(commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + }) + t.Cleanup(func() { viper.Set(commonmlflow.EnvMLflowConfig, "") }) + + factory := &mlflowHandlerFactory{} + handler, err := factory.Create() + + require.Error(t, err) + assert.Nil(t, handler) + assert.Contains(t, err.Error(), "missing one or more of the following required fields in KFP_MLFLOW_CONFIG: ParentRunID, ExperimentID, AuthType") + +} + +func TestMlflowHandlerFactory_Create_InvalidAuthType(t *testing.T) { + setRuntimeCfg(commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "parent-run-1", + ExperimentID: "exp-1", + AuthType: "oauth", + Timeout: "10s", + }) + t.Cleanup(func() { viper.Set(commonmlflow.EnvMLflowConfig, "") }) + + factory := &mlflowHandlerFactory{} + handler, err := factory.Create() + + require.Error(t, err) + assert.Nil(t, handler) + assert.Contains(t, err.Error(), "unsupported auth type: oauth") +} + +func TestInitRegistersFactory(t *testing.T) { + t.Cleanup(plugins.ResetRegistry) + + registered := plugins.RegisteredFactories() + + var found bool + for _, factory := range registered { + if factory.Name() == "MLflow" { + found = true + break + } + } + assert.True(t, found, "init() should register an MLflow factory in the global registry") +} diff --git a/backend/src/v2/common/plugins/mlflow/handler.go b/backend/src/v2/common/plugins/mlflow/handler.go new file mode 100644 index 00000000000..3c0664013c8 --- /dev/null +++ b/backend/src/v2/common/plugins/mlflow/handler.go @@ -0,0 +1,223 @@ +package mlflow + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/golang/glog" + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" +) + +var _ plugins.TaskPluginHandler = (*MLflowHandler)(nil) + +// MLflowStartResult carries handler-specific state from OnTaskStart through to +// OnTaskEnd and RetrieveUserContainerEnvVars for the MLflow plugin. +type MLflowStartResult struct { + RunID string +} + +// MLflowHandler Handler implements PluginHandler for the MLflow integration. +type MLflowHandler struct { + runtimeCfg *commonmlflow.MLflowRuntimeConfig + nestedRunID string +} + +// Name returns the name of the MLflowHandler plugin, which is "MLflow". +func (h *MLflowHandler) Name() string { + return "MLflow" +} + +// NewMLflowTaskHandler creates a new MLflow plugin handler with the given dependencies +// and plugin input. +func NewMLflowTaskHandler(cfg *commonmlflow.MLflowRuntimeConfig) (*MLflowHandler, error) { + if cfg == nil { + return nil, fmt.Errorf("cfg is nil") + } + if cfg.AuthType != commonmlflow.AuthTypeKubernetes { + return nil, fmt.Errorf("failed to parse MLflow runtime config: unsupported auth type: %s", cfg.AuthType) + } + return &MLflowHandler{ + runtimeCfg: cfg, + }, nil +} + +// OnTaskStart creates a nested MLflow run for the given task. +func (h *MLflowHandler) OnTaskStart(ctx context.Context, taskInfo *plugins.TaskInfo) (plugins.TaskHandlerStartResult, error) { + if h == nil || taskInfo == nil { + return nil, nil + } + if h.runtimeCfg == nil { + return nil, fmt.Errorf("MLflow runtime config is not set") + } + if h.runtimeCfg.ParentRunID == "" || h.runtimeCfg.ExperimentID == "" { + return nil, fmt.Errorf("ParentRunID and ExperimentID are both required to create nested MLflow run") + } + + requestCtx, err := BuildMLflowTaskRequestContext(*h.runtimeCfg) + if err != nil { + return nil, fmt.Errorf("failed to build MLflow request context: %v", err) + } + if requestCtx == nil || requestCtx.Client == nil { + return nil, fmt.Errorf("MLflow request context and client must be non-nil") + } + + parentRunTag := commonmlflow.Tag{Key: commonmlflow.ParentRunTagKey, Value: h.runtimeCfg.ParentRunID} + nestedRunID, err := requestCtx.Client.CreateRun(ctx, h.runtimeCfg.ExperimentID, taskInfo.Name, []commonmlflow.Tag{parentRunTag}) + if err != nil { + return nil, fmt.Errorf("failed to create task-level MLflow run: %v", err) + } + h.nestedRunID = nestedRunID + return &MLflowStartResult{RunID: nestedRunID}, nil +} + +// OnTaskEnd updates the nested MLflow run for the given task, along with its corresponding metrics and parameters. +// The run ID is resolved from h.nestedRunID, which is set either by OnTaskStart +// (driver path) or by ApplyCustomProperties (launcher path, recovered from MLMD). +func (h *MLflowHandler) OnTaskEnd(ctx context.Context, info *plugins.TaskInfo) error { + if info == nil { + return fmt.Errorf("taskInfo is nil") + } + if h.runtimeCfg.ExperimentID == "" { + return fmt.Errorf("experimentID is required to update nested MLflow run") + } + if h.nestedRunID == "" { + return fmt.Errorf("runID is required to update nested MLflow run") + } + resolvedRunID := h.nestedRunID + + requestCtx, err := BuildMLflowTaskRequestContext(*h.runtimeCfg) + if err != nil { + return fmt.Errorf("failed to build MLflow request context: %v", err) + } + if requestCtx == nil || requestCtx.Client == nil { + return fmt.Errorf("MLflow request context and client must be non-nil") + } + + // Record run data, if applicable, before updating MLflow run status + hasMetrics := len(info.ScalarMetrics) > 0 + hasParams := len(info.Parameters) > 0 + hasTags := len(info.Tags) > 0 + + if hasMetrics || hasParams || hasTags { + req := commonmlflow.LogBatchRequest{ + RunID: resolvedRunID, + Metrics: mapToMetrics(info.ScalarMetrics), + Params: mapToParams(info.Parameters), + Tags: mapToTags(info.Tags), + } + err = requestCtx.Client.LogBatch(ctx, req) + if err != nil { + glog.Errorf("failed to log metrics and params to MLflow: %v", err) + } + } + + resolvedStatus := ExecutionStateToMLflowTerminalStatus(info.RunStatus) + err = requestCtx.Client.UpdateRun(ctx, resolvedRunID, resolvedStatus, new(info.RunEndTime)) + if err != nil { + return fmt.Errorf("failed to update MLflow run: %v", err) + } + + return nil +} + +// RetrieveUserContainerEnvVars retrieves environment variables to inject into user containers based on the MLflow runtime config. +// The run ID is resolved from h.nestedRunID, set during OnTaskStart. +func (h *MLflowHandler) RetrieveUserContainerEnvVars() (injectVars map[string]string, err error) { + if h == nil || h.runtimeCfg == nil { + return nil, fmt.Errorf("MLflow plugin handler and runtime config must be non-nil") + } + injectVars = make(map[string]string) + + if h.runtimeCfg.InjectUserEnvVars { + if h.nestedRunID != "" { + injectVars["MLFLOW_RUN_ID"] = h.nestedRunID + } else { + return nil, fmt.Errorf("MLflow run ID is empty. Cannot inject MLFLOW_RUN_ID env var") + } + + injectVars["MLFLOW_TRACKING_URI"] = h.runtimeCfg.Endpoint + injectVars["MLFLOW_EXPERIMENT_ID"] = h.runtimeCfg.ExperimentID + + if h.runtimeCfg.WorkspacesEnabled { + injectVars["MLFLOW_WORKSPACE"] = h.runtimeCfg.Workspace + } + var auth string + if h.runtimeCfg.AuthType == "kubernetes" { + auth = "kubernetes" + if h.runtimeCfg.WorkspacesEnabled { + auth = "kubernetes-namespaced" + } + injectVars["MLFLOW_TRACKING_AUTH"] = auth + } else { + return nil, fmt.Errorf("MLflow auth type %s is not supported", h.runtimeCfg.AuthType) + } + } + return injectVars, nil +} + +// GenerateCustomProperties returns MLMD execution custom properties for the +// MLflow plugin. The key matches the MLMD custom property key used by the +// metadata client. +func (h *MLflowHandler) GenerateCustomProperties(result plugins.TaskHandlerStartResult) map[string]string { + if result == nil { + return nil + } + mlflowResult, ok := result.(*MLflowStartResult) + if !ok || mlflowResult.RunID == "" { + return nil + } + return map[string]string{ + "plugins.mlflow.run_id": mlflowResult.RunID, + } +} + +// ApplyCustomProperties updates the MLflow handler runtime configuration with custom property values. +func (h *MLflowHandler) ApplyCustomProperties(props map[string]string) error { + if h == nil || h.runtimeCfg == nil { + return fmt.Errorf("MLflow plugin handler and runtime config must be non-nil") + } + if props == nil { + return nil + } + runID, ok := props["plugins.mlflow.run_id"] + if ok { + h.runtimeCfg.ParentRunID = runID + h.nestedRunID = runID + } + return nil +} + +// mapToMetrics converts a map of string to float64 into a slice of MLflow Metric structs. +func mapToMetrics(metrics map[string]float64) []commonmlflow.Metric { + metricsFmt := make([]commonmlflow.Metric, 0, len(metrics)) + for key, value := range metrics { + metricsFmt = append(metricsFmt, commonmlflow.Metric{Key: key, Value: value}) + } + return metricsFmt +} + +// mapToTags converts a map of string key-value pairs into a slice of MLflow Tag structs. +func mapToTags(tags map[string]string) []commonmlflow.Tag { + tagsFmt := make([]commonmlflow.Tag, 0, len(tags)) + for key, value := range tags { + tagsFmt = append(tagsFmt, commonmlflow.Tag{Key: key, Value: value}) + } + return tagsFmt +} + +// mapToParams converts a map of parameters into a slice of MLflow Param structs, +// serializing values as JSON for the MLflow API. +func mapToParams(params map[string]interface{}) []commonmlflow.Param { + paramsFmt := make([]commonmlflow.Param, 0, len(params)) + for key, value := range params { + serialized, err := json.Marshal(value) + if err != nil { + glog.Warningf("Failed to serialize param %q: %v", key, err) + continue + } + paramsFmt = append(paramsFmt, commonmlflow.Param{Key: key, Value: string(serialized)}) + } + return paramsFmt +} diff --git a/backend/src/v2/common/plugins/mlflow/handler_test.go b/backend/src/v2/common/plugins/mlflow/handler_test.go new file mode 100644 index 00000000000..9edecf7fda6 --- /dev/null +++ b/backend/src/v2/common/plugins/mlflow/handler_test.go @@ -0,0 +1,716 @@ +package mlflow + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + commonmlflow "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var taskInfoStart = &plugins.TaskInfo{ + Name: "test-task", +} + +var taskInfoEnd = &plugins.TaskInfo{ + Name: "test-task", + RunEndTime: int64(1714400000000), + RunStatus: "COMPLETED", + ScalarMetrics: map[string]float64{ + "test-metric": 0.5, + }, + Parameters: map[string]interface{}{ + "test-param": "test-value", + }, + Tags: map[string]string{ + "tag-key": "tag-value", + }, +} + +var taskStartResult = &MLflowStartResult{ + RunID: "test-run-id", +} + +var emptyTaskStartResult = &MLflowStartResult{} + +// setupTestEnvWithServer sets up a test MLflow server using the given HTTP handler and configures runtime settings for testing. +func setupTestEnvWithServer(t *testing.T, httpHandler http.Handler) string { + t.Helper() + setupSAToken(t) + server := httptest.NewServer(httpHandler) + t.Cleanup(server.Close) + //todo: can potentially remove below. + setRuntimeCfg(commonmlflow.MLflowRuntimeConfig{ + Endpoint: server.URL, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + }) + return server.URL +} + +func setupSAToken(t *testing.T) func() { + t.Helper() + setupFakeKubernetesConfig(t, "test-sa-token") + return func() {} // cleanup handled by t.Cleanup in setupFakeKubernetesConfig +} + +// setupFakeKubernetesConfig writes a temp kubeconfig with the given bearer token +// and sets the KUBECONFIG env var so util.GetKubernetesConfig() picks it up. +func setupFakeKubernetesConfig(t *testing.T, token string) { + t.Helper() + kubeconfig := fmt.Sprintf(`apiVersion: v1 +kind: Config +clusters: +- cluster: + server: https://localhost + name: test +contexts: +- context: + cluster: test + user: test + name: test +current-context: test +users: +- name: test + user: + token: %s +`, token) + p := filepath.Join(t.TempDir(), "kubeconfig") + require.NoError(t, os.WriteFile(p, []byte(kubeconfig), 0600)) + t.Setenv("KUBECONFIG", p) +} + +// defaultMLflowHandlerFunc provides a default HTTP handler for testing MLflow server interactions. +func defaultMLflowHandlerFunc(t *testing.T) http.Handler { + t.Helper() + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/runs/create": + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"mlflow-run-1"}}}`)) + case "/api/2.0/mlflow/runs/update": + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"mlflow-run-1"}}}`)) + case "/api/2.0/mlflow/runs/log-batch": + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + }) +} + +func TestNewMLflowTaskHandler_Success(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, err := NewMLflowTaskHandler(runtimeCfg) + + require.NoError(t, err) + require.NotNil(t, handler) +} + +func TestNewMLflowTaskHandler_EmptyRuntimeConfig_Failure(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{} + + handler, err := NewMLflowTaskHandler(runtimeCfg) + + require.Nil(t, handler) + require.Error(t, err) + assert.Equal(t, "failed to parse MLflow runtime config: unsupported auth type: ", err.Error()) +} + +func TestNewMLflowTaskHandler_NilRuntimeConfig_Failure(t *testing.T) { + var runtimeCfg *commonmlflow.MLflowRuntimeConfig + runtimeCfg = nil + + handler, err := NewMLflowTaskHandler(runtimeCfg) + + require.Nil(t, handler) + require.Error(t, err) + assert.Equal(t, "cfg is nil", err.Error()) +} + +func TestOnTaskStart_MissingParentRunID_Failure(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + nestedRunID, err := handler.OnTaskStart(context.Background(), taskInfoStart) + assert.Empty(t, nestedRunID) + + require.Error(t, err) + assert.Equal(t, "ParentRunID and ExperimentID are both required to create nested MLflow run", err.Error()) +} + +func TestOnTaskStart_MissingExperimentID_Failure(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + nestedRunID, err := handler.OnTaskStart(context.Background(), taskInfoStart) + assert.Empty(t, nestedRunID) + + require.Error(t, err) + assert.Equal(t, "ParentRunID and ExperimentID are both required to create nested MLflow run", err.Error()) +} + +func TestOnTaskStart_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + + nestedRunID, err := handler.OnTaskStart(context.Background(), taskInfoStart) + require.NoError(t, err) + require.NotEmpty(t, nestedRunID) + +} + +func TestOnTaskStart_MLflowFailure_ReturnsEmptyNestedRunID(t *testing.T) { + cleanup := setupSAToken(t) + defer cleanup() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error_code":"INTERNAL_ERROR","message":"server down"}`)) + })) + defer server.Close() + + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: server.URL, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + + nestedRunID, err := handler.OnTaskStart(context.Background(), taskInfoStart) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to create task-level MLflow run: ") + require.Empty(t, nestedRunID) +} + +func TestOnTaskEnd_NestedRunID_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + + err := handler.OnTaskEnd(context.Background(), taskInfoEnd) + + require.NoError(t, err) +} + +func TestOnTaskEnd_NestedRunIDFromApplyCustomProperties_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + + err := handler.ApplyCustomProperties(map[string]string{"plugins.mlflow.run_id": "test-run-id"}) + require.NoError(t, err) + + var capturedUpdateBody map[string]interface{} + var capturedLogBatchBody map[string]interface{} + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/2.0/mlflow/runs/update": + require.NoError(t, json.NewDecoder(r.Body).Decode(&capturedUpdateBody)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"run":{"info":{"run_id":"test-run-id"}}}`)) + case "/api/2.0/mlflow/runs/log-batch": + require.NoError(t, json.NewDecoder(r.Body).Decode(&capturedLogBatchBody)) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{}`)) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + handler.runtimeCfg.Endpoint = server.URL + err = handler.OnTaskEnd(context.Background(), taskInfoEnd) + + require.NoError(t, err) + assert.Equal(t, "test-run-id", capturedUpdateBody["run_id"]) + assert.Equal(t, "test-run-id", capturedLogBatchBody["run_id"]) + assert.Equal(t, testFormattedTags(), capturedLogBatchBody["tags"]) +} + +func TestOnTaskEnd_MissingRunID_Failure(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + err := handler.OnTaskEnd(context.Background(), taskInfoEnd) + + require.Error(t, err) + assert.Equal(t, "runID is required to update nested MLflow run", err.Error()) +} + +func TestOnTaskEnd_MissingExperimentID_Failure(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + + err := handler.OnTaskEnd(context.Background(), taskInfoEnd) + + require.Error(t, err) + assert.Equal(t, "experimentID is required to update nested MLflow run", err.Error()) +} + +func TestOnTaskEnd_EmptyMetrics_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + + info := &plugins.TaskInfo{ + Name: "test-task", + RunEndTime: int64(1714400000000), + RunStatus: "COMPLETED", + ScalarMetrics: map[string]float64{}, + Parameters: map[string]interface{}{ + "test-param": "test-value", + }, + } + + err := handler.OnTaskEnd(context.Background(), info) + + require.NoError(t, err) +} + +func TestOnTaskEnd_NilMetrics_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + + info := &plugins.TaskInfo{ + Name: "test-task", + RunEndTime: int64(1714400000000), + RunStatus: "COMPLETED", + ScalarMetrics: nil, + Parameters: map[string]interface{}{ + "test-param": "test-value", + }, + } + err := handler.OnTaskEnd(context.Background(), info) + + require.NoError(t, err) +} + +func TestOnTaskEnd_EmptyParams_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + + info := &plugins.TaskInfo{ + Name: "test-task", + RunEndTime: int64(1714400000000), + RunStatus: "COMPLETED", + ScalarMetrics: map[string]float64{ + "test-metric": 0.5, + }, + Parameters: map[string]interface{}{}, + } + + err := handler.OnTaskEnd(context.Background(), info) + + require.NoError(t, err) +} + +func TestOnTaskEnd_NilParams_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + + info := &plugins.TaskInfo{ + Name: "test-task", + RunEndTime: int64(1714400000000), + RunStatus: "COMPLETED", + ScalarMetrics: map[string]float64{ + "test-metric": 0.5, + }, + Parameters: nil, + } + + err := handler.OnTaskEnd(context.Background(), info) + + require.NoError(t, err) +} + +func TestOnTaskEnd_Success(t *testing.T) { + serverUrl := setupTestEnvWithServer(t, defaultMLflowHandlerFunc(t)) + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: serverUrl, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + err := handler.OnTaskEnd(context.Background(), taskInfoEnd) + require.NoError(t, err) +} + +func TestOnTaskEnd_MLflowFailure_ReturnsError(t *testing.T) { + cleanup := setupSAToken(t) + defer cleanup() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error_code":"INTERNAL_ERROR","message":"server down"}`)) + })) + defer server.Close() + + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: server.URL, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + err := handler.OnTaskEnd(context.Background(), taskInfoEnd) + + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to update MLflow run") +} + +func TestRetrieveUserContainerEnvVars_WorkspacesEnabled_InjectVars_Success(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + Workspace: "test-workspace", + WorkspacesEnabled: true, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + InjectUserEnvVars: true, + } + + expectedEnvVars := map[string]string{ + "MLFLOW_RUN_ID": "test-run-id", + "MLFLOW_TRACKING_URI": "http://localhost", + "MLFLOW_EXPERIMENT_ID": "test-exp", + "MLFLOW_WORKSPACE": "test-workspace", + "MLFLOW_TRACKING_AUTH": "kubernetes-namespaced", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + envVars, err := handler.RetrieveUserContainerEnvVars() + + require.NoError(t, err) + assert.Equal(t, expectedEnvVars, envVars) +} + +func TestRetrieveUserContainerEnvVars_WorkspacesDisabled_InjectVars_Success(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + WorkspacesEnabled: false, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + InsecureSkipVerify: true, + InjectUserEnvVars: true, + } + + expectedEnvVars := map[string]string{ + "MLFLOW_RUN_ID": "test-run-id", + "MLFLOW_TRACKING_URI": "http://localhost", + "MLFLOW_EXPERIMENT_ID": "test-exp", + "MLFLOW_TRACKING_AUTH": "kubernetes", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + envVars, err := handler.RetrieveUserContainerEnvVars() + + require.NoError(t, err) + assert.Equal(t, expectedEnvVars, envVars) +} + +func TestRetrieveUserContainerEnvVars_EmptyRunID_Failure(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + Workspace: "test-workspace", + WorkspacesEnabled: true, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + InsecureSkipVerify: true, + InjectUserEnvVars: true, + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + _, err := handler.RetrieveUserContainerEnvVars() + + require.Error(t, err) + assert.Equal(t, "MLflow run ID is empty. Cannot inject MLFLOW_RUN_ID env var", err.Error()) +} + +func TestRetrieveUserContainerEnvVars_InjectVarsDisabled_Success(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + Workspace: "test-workspace", + WorkspacesEnabled: true, + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + InsecureSkipVerify: true, + InjectUserEnvVars: false, + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + handler.nestedRunID = "test-run-id" + envVars, err := handler.RetrieveUserContainerEnvVars() + + require.NoError(t, err) + assert.Empty(t, envVars) +} + +func TestRetrieveCustomProperties_WithRunID(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + props := handler.GenerateCustomProperties(taskStartResult) + + assert.Equal(t, map[string]string{"plugins.mlflow.run_id": "test-run-id"}, props) +} + +func TestRetrieveCustomProperties_EmptyRunID(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + props := handler.GenerateCustomProperties(emptyTaskStartResult) + + assert.Nil(t, props) +} + +func TestRetrieveCustomProperties_NilResult(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + props := handler.GenerateCustomProperties(nil) + + assert.Nil(t, props) +} + +func TestMapToParams_MapValue(t *testing.T) { + params := map[string]interface{}{ + "nested_map": map[string]interface{}{ + "key1": "value1", + "key2": 42, + }, + } + + result := mapToParams(params) + + require.Len(t, result, 1) + assert.Equal(t, "nested_map", result[0].Key) + + var parsed map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result[0].Value), &parsed)) + assert.Equal(t, "value1", parsed["key1"]) + assert.Equal(t, float64(42), parsed["key2"]) +} + +func TestMapToParams_ListValue(t *testing.T) { + params := map[string]interface{}{ + "tags": []interface{}{"tag1", "tag2", "tag3"}, + } + + result := mapToParams(params) + + require.Len(t, result, 1) + assert.Equal(t, "tags", result[0].Key) + + var parsed []interface{} + require.NoError(t, json.Unmarshal([]byte(result[0].Value), &parsed)) + assert.Equal(t, []interface{}{"tag1", "tag2", "tag3"}, parsed) +} + +func TestMapToParams_MixedValues(t *testing.T) { + params := map[string]interface{}{ + "string_param": "hello", + "int_param": 42, + "float_param": 3.14, + "bool_param": true, + "list_param": []interface{}{1, "two", 3.0}, + "map_param": map[string]interface{}{"nested": true}, + } + + result := mapToParams(params) + require.Len(t, result, len(params)) + + resultMap := make(map[string]string) + for _, p := range result { + resultMap[p.Key] = p.Value + } + + assert.Equal(t, `"hello"`, resultMap["string_param"]) + assert.Equal(t, "42", resultMap["int_param"]) + assert.Equal(t, "3.14", resultMap["float_param"]) + assert.Equal(t, "true", resultMap["bool_param"]) + + var listParsed []interface{} + require.NoError(t, json.Unmarshal([]byte(resultMap["list_param"]), &listParsed)) + assert.Len(t, listParsed, 3) + + var mapParsed map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(resultMap["map_param"]), &mapParsed)) + assert.Equal(t, true, mapParsed["nested"]) +} + +func TestMapToParams_NestedMapWithList(t *testing.T) { + params := map[string]interface{}{ + "config": map[string]interface{}{ + "layers": []interface{}{64, 128, 256}, + "learning_rate": 0.001, + "optimizer": "adam", + }, + } + + result := mapToParams(params) + + require.Len(t, result, 1) + assert.Equal(t, "config", result[0].Key) + + var parsed map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(result[0].Value), &parsed)) + assert.Equal(t, "adam", parsed["optimizer"]) + assert.Equal(t, 0.001, parsed["learning_rate"]) + assert.Equal(t, []interface{}{float64(64), float64(128), float64(256)}, parsed["layers"]) +} + +func TestApplyCustomProperties_Success(t *testing.T) { + runtimeCfg := &commonmlflow.MLflowRuntimeConfig{ + Endpoint: "http://localhost", + ParentRunID: "test-parent-run-id", + ExperimentID: "test-exp", + AuthType: "kubernetes", + Timeout: "10s", + } + + handler, _ := NewMLflowTaskHandler(runtimeCfg) + + testProps := map[string]string{ + "plugins.mlflow.run_id": "custom-prop-run-id", + } + err := handler.ApplyCustomProperties(testProps) + assert.NoError(t, err) + assert.Equal(t, "custom-prop-run-id", handler.runtimeCfg.ParentRunID) + assert.Equal(t, "custom-prop-run-id", handler.nestedRunID) +} + +func testFormattedTags() []interface{} { + return []interface{}{ + map[string]interface{}{ + "key": "tag-key", + "value": "tag-value", + }, + } +} diff --git a/backend/src/v2/common/plugins/registry.go b/backend/src/v2/common/plugins/registry.go new file mode 100644 index 00000000000..6c959394775 --- /dev/null +++ b/backend/src/v2/common/plugins/registry.go @@ -0,0 +1,73 @@ +package plugins + +import ( + "fmt" + "sync" + + "github.com/golang/glog" +) + +// HandlerFactory knows how to check whether a plugin is enabled and how to +// construct its TaskPluginHandler. Each plugin package registers a factory +// at init time via RegisterHandlerFactory. +type HandlerFactory interface { + // Name returns the unique identifier for this plugin factory. + Name() string + // IsEnabled reports whether the plugin should be activated in the current environment. + IsEnabled() bool + // Create constructs and returns a ready-to-use TaskPluginHandler. + Create() (TaskPluginHandler, error) +} + +var ( + registryMu sync.RWMutex + factories []HandlerFactory +) + +// RegisterHandlerFactory adds a HandlerFactory to the global registry. +// Typically called from a plugin package's init() function. +func RegisterHandlerFactory(factory HandlerFactory) { + registryMu.Lock() + defer registryMu.Unlock() + factories = append(factories, factory) +} + +// RegisteredFactories returns a snapshot of all registered handler factories. +func RegisteredFactories() []HandlerFactory { + registryMu.RLock() + defer registryMu.RUnlock() + result := make([]HandlerFactory, len(factories)) + copy(result, factories) + return result +} + +// ResetRegistry clears all registered factories. Intended for use in tests only. +func ResetRegistry() { + registryMu.Lock() + defer registryMu.Unlock() + factories = nil +} + +// GetPluginDispatcher builds a TaskPluginDispatcher from all registered and +// enabled handler factories. Returns a NoOpDispatcher when no plugins are active. +func GetPluginDispatcher() (TaskPluginDispatcher, error) { + var handlers []TaskPluginHandler + + for _, factory := range RegisteredFactories() { + if !factory.IsEnabled() { + continue + } + handler, err := factory.Create() + if err != nil { + return NoOpDispatcher{}, fmt.Errorf("failed to initialize %s task plugin handler: %v", factory.Name(), err) + } + handlers = append(handlers, handler) + } + + if len(handlers) == 0 { + glog.Infof("No task-level plugins enabled, returning no-op dispatcher") + return NoOpDispatcher{}, nil + } + + return NewTaskPluginDispatcherImpl(handlers) +} diff --git a/backend/src/v2/common/plugins/registry_test.go b/backend/src/v2/common/plugins/registry_test.go new file mode 100644 index 00000000000..fdf264fe2f5 --- /dev/null +++ b/backend/src/v2/common/plugins/registry_test.go @@ -0,0 +1,103 @@ +package plugins + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var _ HandlerFactory = (*fakeFactory)(nil) + +type fakeFactory struct { + name string + enabled bool + handler TaskPluginHandler + err error +} + +func (f *fakeFactory) Name() string { return f.name } +func (f *fakeFactory) IsEnabled() bool { return f.enabled } +func (f *fakeFactory) Create() (TaskPluginHandler, error) { return f.handler, f.err } + +func resetRegistryForTest(t *testing.T) { + t.Helper() + ResetRegistry() + t.Cleanup(ResetRegistry) +} + +func TestRegisterHandlerFactory_AppearsInSnapshot(t *testing.T) { + resetRegistryForTest(t) + + factory := &fakeFactory{name: "A", enabled: true} + RegisterHandlerFactory(factory) + + registered := RegisteredFactories() + require.Len(t, registered, 1) + assert.Equal(t, "A", registered[0].Name()) +} + +func TestRegisteredFactories_SnapshotIsolation(t *testing.T) { + resetRegistryForTest(t) + + RegisterHandlerFactory(&fakeFactory{name: "A"}) + snapshot := RegisteredFactories() + snapshot[0] = &fakeFactory{name: "Mutated"} + + assert.Equal(t, "A", RegisteredFactories()[0].Name()) +} + +func TestResetRegistry_ClearsAll(t *testing.T) { + resetRegistryForTest(t) + + RegisterHandlerFactory(&fakeFactory{name: "A"}) + ResetRegistry() + + assert.Empty(t, RegisteredFactories()) +} + +func TestGetPluginDispatcher_NoFactories_ReturnsNoOp(t *testing.T) { + resetRegistryForTest(t) + + dispatcher, err := GetPluginDispatcher() + + require.NoError(t, err) + assert.IsType(t, NoOpDispatcher{}, dispatcher) +} + +func TestGetPluginDispatcher_AllDisabled_ReturnsNoOp(t *testing.T) { + resetRegistryForTest(t) + RegisterHandlerFactory(&fakeFactory{name: "A", enabled: false}) + + dispatcher, err := GetPluginDispatcher() + + require.NoError(t, err) + assert.IsType(t, NoOpDispatcher{}, dispatcher) +} + +func TestGetPluginDispatcher_OneEnabled_ReturnsImpl(t *testing.T) { + resetRegistryForTest(t) + handler := &fakeHandler{name: "A"} + RegisterHandlerFactory(&fakeFactory{name: "A", enabled: true, handler: handler}) + + dispatcher, err := GetPluginDispatcher() + + require.NoError(t, err) + assert.IsType(t, &TaskPluginDispatcherImpl{}, dispatcher) +} + +func TestGetPluginDispatcher_CreateFails_ReturnsError(t *testing.T) { + resetRegistryForTest(t) + RegisterHandlerFactory(&fakeFactory{ + name: "Broken", + enabled: true, + err: fmt.Errorf("init failed"), + }) + + dispatcher, err := GetPluginDispatcher() + + require.Error(t, err) + assert.Contains(t, err.Error(), "Broken") + assert.Equal(t, dispatcher, NoOpDispatcher{}) +} diff --git a/backend/src/v2/compiler/argocompiler/common.go b/backend/src/v2/compiler/argocompiler/common.go index ce167539ea8..fafeb02ec4f 100644 --- a/backend/src/v2/compiler/argocompiler/common.go +++ b/backend/src/v2/compiler/argocompiler/common.go @@ -18,6 +18,7 @@ import ( wfapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/golang/glog" "github.com/kubeflow/pipelines/backend/src/apiserver/common" + "github.com/kubeflow/pipelines/backend/src/common/util" "github.com/kubeflow/pipelines/backend/src/v2/component" k8score "k8s.io/api/core/v1" ) @@ -65,6 +66,15 @@ var retryIndexEnv = k8score.EnvVar{ Value: "{{retries}}", } +// setRuntimeRole stamps the template with an annotation declaring its logical +// execution role (e.g. "driver", "launcher"). +func setRuntimeRole(tmpl *wfapi.Template, role util.ExecutionRuntimeRole) { + if tmpl.Metadata.Annotations == nil { + tmpl.Metadata.Annotations = make(map[string]string) + } + tmpl.Metadata.Annotations[util.AnnotationKeyRuntimeRole] = string(role) +} + // ConfigureCustomCABundle adds CABundle environment variables and volume mounts if CABUNDLE_SECRET_NAME is set. func ConfigureCustomCABundle(tmpl *wfapi.Template) { caBundleSecretName := common.GetCaBundleSecretName() diff --git a/backend/src/v2/compiler/argocompiler/container.go b/backend/src/v2/compiler/argocompiler/container.go index ce4b89e4fac..1a6c9393def 100644 --- a/backend/src/v2/compiler/argocompiler/container.go +++ b/backend/src/v2/compiler/argocompiler/container.go @@ -32,6 +32,7 @@ import ( "github.com/golang/glog" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" "github.com/kubeflow/pipelines/backend/src/apiserver/common" + "github.com/kubeflow/pipelines/backend/src/common/util" "github.com/kubeflow/pipelines/backend/src/v2/component" "github.com/kubeflow/pipelines/kubernetes_platform/go/kubernetesplatform" k8score "k8s.io/api/core/v1" @@ -274,6 +275,7 @@ func (c *workflowCompiler) addContainerDriverTemplate() string { Env: append(proxy.GetConfig().GetEnvVars(), commonEnvs...), }, } + setRuntimeRole(template, util.ExecutionRuntimeRoleDriver) applySecurityContextToTemplate(template) // If TLS is enabled (apiserver or metadata), add the custom CA bundle to the container driver template. if setCABundle { @@ -561,6 +563,7 @@ func (c *workflowCompiler) addContainerExecutorTemplate(task *pipelinespec.Pipel Env: commonEnvs, }, } + setRuntimeRole(executor, util.ExecutionRuntimeRoleLauncher) // If CABUNDLE_SECRET_NAME or CABUNDLE_CONFIGMAP_NAME is set, add the custom CA bundle to the executor. if common.GetCaBundleSecretName() != "" || common.GetCaBundleConfigMapName() != "" { ConfigureCustomCABundle(executor) diff --git a/backend/src/v2/compiler/argocompiler/dag.go b/backend/src/v2/compiler/argocompiler/dag.go index 658ea931fb2..9030fc31756 100644 --- a/backend/src/v2/compiler/argocompiler/dag.go +++ b/backend/src/v2/compiler/argocompiler/dag.go @@ -26,6 +26,7 @@ import ( wfapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" "github.com/kubeflow/pipelines/backend/src/apiserver/common" + "github.com/kubeflow/pipelines/backend/src/common/util" "github.com/kubeflow/pipelines/backend/src/v2/compiler" k8score "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -643,6 +644,7 @@ func (c *workflowCompiler) addDAGDriverTemplate() string { Env: proxy.GetConfig().GetEnvVars(), }, } + setRuntimeRole(template, util.ExecutionRuntimeRoleDriver) applySecurityContextToTemplate(template) // If TLS is enabled (apiserver or metadata), add the custom CA bundle to the DAG driver template. if setCABundle { diff --git a/backend/src/v2/compiler/argocompiler/importer.go b/backend/src/v2/compiler/argocompiler/importer.go index 7c608ecaaa5..f54d367917f 100644 --- a/backend/src/v2/compiler/argocompiler/importer.go +++ b/backend/src/v2/compiler/argocompiler/importer.go @@ -21,6 +21,7 @@ import ( wfapi "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" "github.com/kubeflow/pipelines/backend/src/apiserver/common" + "github.com/kubeflow/pipelines/backend/src/common/util" "github.com/kubeflow/pipelines/backend/src/v2/component" "github.com/kubeflow/pipelines/backend/src/v2/metadata" k8score "k8s.io/api/core/v1" @@ -148,6 +149,7 @@ func (c *workflowCompiler) addImporterTemplate(downloadToWorkspace bool) string Volumes: volumes, } + setRuntimeRole(importerTemplate, util.ExecutionRuntimeRoleLauncher) // If TLS is enabled (apiserver or metadata), add the custom CA bundle to the importer template. if setCABundle { ConfigureCustomCABundle(importerTemplate) diff --git a/backend/src/v2/component/launcher_v2.go b/backend/src/v2/component/launcher_v2.go index a7a2200acc9..59c18baa7cb 100644 --- a/backend/src/v2/component/launcher_v2.go +++ b/backend/src/v2/component/launcher_v2.go @@ -27,8 +27,11 @@ import ( "strings" "time" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" "google.golang.org/protobuf/types/known/timestamppb" + _ "github.com/kubeflow/pipelines/backend/src/v2/common/plugins/all" + "github.com/golang/glog" api "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" "github.com/kubeflow/pipelines/backend/src/v2/client_manager" @@ -185,6 +188,7 @@ func (l *LauncherV2) Execute(ctx context.Context) (err error) { var execution *metadata.Execution var executorOutput *pipelinespec.ExecutorOutput var outputArtifacts []*metadata.OutputArtifact + var dispatcher plugins.TaskPluginDispatcher status := pb.Execution_FAILED defer func() { if execution == nil { @@ -200,6 +204,19 @@ func (l *LauncherV2) Execute(ctx context.Context) (err error) { } } glog.Infof("publish success.") + + if dispatcher != nil { + taskPluginInfo := &plugins.TaskInfo{ + RunStatus: status.String(), + ScalarMetrics: metadata.FormatScalarMetricArtifacts(outputArtifacts), + Parameters: metadata.FormatExecutionParameters(execution), + } + dispatchErr := dispatcher.OnTaskEnd(ctx, taskPluginInfo) + if dispatchErr != nil { + glog.Errorf("failed to dispatch task end: %v", dispatchErr) + } + } + // At the end of the current task, we check the statuses of all tasks in // the current DAG and update the DAG's status accordingly. dag, err := l.clientManager.MetadataClient().GetDAG(ctx, execution.GetExecution().CustomProperties["parent_dag_id"].GetIntValue()) @@ -217,6 +234,20 @@ func (l *LauncherV2) Execute(ctx context.Context) (err error) { if err != nil { return err } + + // Construct the plugin dispatcher after prePublish so we can hydrate + // handlers with plugin custom properties from the MLMD execution + // (written by the driver during CreateExecution). This reuses the + // execution already fetched by prePublish with zero additional queries. + dispatcher, dispatchErr := plugins.GetPluginDispatcher() + if dispatchErr != nil { + glog.Errorf("Failed to get plugin dispatcher: %v", dispatchErr) + } else { + pluginProps := metadata.ExtractPluginCustomProperties(execution) + if pluginProps != nil { + dispatcher.ApplyCustomProperties(pluginProps) + } + } fingerPrint := execution.FingerPrint() storeSessionInfo, err := objectstore.GetSessionInfoFromString(execution.GetPipeline().GetStoreSessionInfo()) if err != nil { diff --git a/backend/src/v2/driver/cache.go b/backend/src/v2/driver/cache.go index 0419beeb3d8..3cae98f5cc3 100644 --- a/backend/src/v2/driver/cache.go +++ b/backend/src/v2/driver/cache.go @@ -77,6 +77,7 @@ func reuseCachedOutputs(ctx context.Context, executorInput *pipelinespec.Executo return executorOutput, outputArtifacts, nil } + // getFingerPrint generates a fingerprint for caching. The PVC names are included in the fingerprint since it's assumed // PVCs have side effects (e.g. files written for tasks later on in the run) on the execution. If the PVC names are // different, the execution shouldn't be reused for the cache. diff --git a/backend/src/v2/driver/container.go b/backend/src/v2/driver/container.go index 5f10ba31500..711004cfa8b 100644 --- a/backend/src/v2/driver/container.go +++ b/backend/src/v2/driver/container.go @@ -24,6 +24,7 @@ import ( "github.com/google/uuid" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" "github.com/kubeflow/pipelines/backend/src/v2/cacheutils" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" "github.com/kubeflow/pipelines/backend/src/v2/expression" "github.com/kubeflow/pipelines/backend/src/v2/metadata" pb "github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata" @@ -177,6 +178,14 @@ func Container(ctx context.Context, opts Options, mlmd *metadata.Client, cacheCl ecfg.FingerPrint = fingerPrint } + taskPluginInfo := &plugins.TaskInfo{Name: opts.TaskName} + pluginStartResult, dispatchErr := opts.PluginDispatcher.OnTaskStart(ctx, taskPluginInfo) + if dispatchErr != nil { + glog.Errorf("Failed to dispatch task start: %v", dispatchErr) + } else if pluginStartResult != nil { + ecfg.PluginCustomProperties = pluginStartResult.CustomProperties + } + // TODO(Bobgy): change execution state to pending, because this is driver, execution hasn't started. createdExecution, err := mlmd.CreateExecution(ctx, pipeline, ecfg) if err != nil { @@ -206,7 +215,16 @@ func Container(ctx context.Context, opts Options, mlmd *metadata.Client, cacheCl execution.Cached = &cached if !opts.CacheDisabled { if opts.Task.GetCachingOptions().GetEnableCache() && ecfg.CachedMLMDExecutionID != "" { - executorOutput, outputArtifacts, err := reuseCachedOutputs(ctx, execution.ExecutorInput, mlmd, ecfg.CachedMLMDExecutionID) + var outputArtifacts []*metadata.OutputArtifact + var executorOutput *pipelinespec.ExecutorOutput + defer func() { + taskPluginInfo.UpdateTaskInfoWithMetadata("CACHED", metadata.FormatScalarMetricArtifacts(outputArtifacts), metadata.FormatExecutionParameters(createdExecution)) + dispatchErr = opts.PluginDispatcher.OnTaskEnd(ctx, taskPluginInfo) + if dispatchErr != nil { + glog.Errorf("failed to dispatch task end: %v", dispatchErr) + } + }() + executorOutput, outputArtifacts, err = reuseCachedOutputs(ctx, execution.ExecutorInput, mlmd, ecfg.CachedMLMDExecutionID) if err != nil { return execution, err } @@ -226,6 +244,11 @@ func Container(ctx context.Context, opts Options, mlmd *metadata.Client, cacheCl taskConfig := &TaskConfig{} + pluginEnvVars, err := opts.PluginDispatcher.RetrieveUserContainerEnvVars(taskPluginInfo) + if err != nil { + return execution, err + } + podSpec, err := initPodSpecPatch( opts.Container, opts.Component, @@ -245,6 +268,7 @@ func Container(ctx context.Context, opts Options, mlmd *metadata.Client, cacheCl opts.MLPipelineServerPort, opts.MLMDServerAddress, opts.MLMDServerPort, + pluginEnvVars, ) if err != nil { return execution, err diff --git a/backend/src/v2/driver/container_test.go b/backend/src/v2/driver/container_test.go index af21731480e..299c77152b6 100644 --- a/backend/src/v2/driver/container_test.go +++ b/backend/src/v2/driver/container_test.go @@ -20,6 +20,7 @@ import ( "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" "github.com/kubeflow/pipelines/backend/src/v2/metadata" pb "github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata" "github.com/stretchr/testify/assert" @@ -253,6 +254,7 @@ func TestContainer_CreateExecutionGeneralFailure(t *testing.T) { Image: "python:3.11", Command: []string{"python", "main.py"}, }, + PluginDispatcher: plugins.NoOpDispatcher{}, }, mlmdClient, &mockCacheClient{}) require.NotNil(t, execution) @@ -309,6 +311,7 @@ func TestContainer_CreateExecutionSuccess(t *testing.T) { Image: "python:3.11", Command: []string{"python", "main.py"}, }, + PluginDispatcher: plugins.NoOpDispatcher{}, }, mlmdClient, &mockCacheClient{}) require.NotNil(t, execution) @@ -368,6 +371,7 @@ func TestContainer_CreateExecutionAlreadyExistsLookupReturnsNil(t *testing.T) { Image: "python:3.11", Command: []string{"python", "main.py"}, }, + PluginDispatcher: plugins.NoOpDispatcher{}, }, mlmdClient, &mockCacheClient{}) require.NotNil(t, execution) @@ -426,6 +430,7 @@ func TestContainer_CreateExecutionDoesNotExistGenericError(t *testing.T) { Image: "python:3.11", Command: []string{"python", "main.py"}, }, + PluginDispatcher: plugins.NoOpDispatcher{}, }, mlmdClient, &mockCacheClient{}) // In a successful recovery, we expect NO error to be returned from Container diff --git a/backend/src/v2/driver/dag.go b/backend/src/v2/driver/dag.go index 362fac66f7f..5b9a7752f70 100644 --- a/backend/src/v2/driver/dag.go +++ b/backend/src/v2/driver/dag.go @@ -21,8 +21,10 @@ import ( "github.com/golang/glog" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" "github.com/kubeflow/pipelines/backend/src/v2/expression" "github.com/kubeflow/pipelines/backend/src/v2/metadata" + pb "github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata" "google.golang.org/protobuf/types/known/structpb" ) @@ -107,6 +109,42 @@ func DAG(ctx context.Context, opts Options, mlmd *metadata.Client) (execution *E ecfg.IterationIndex = iterationIndex ecfg.NotTriggered = !execution.WillTrigger() + // Dispatch a plugin task for each loop DAG driver, but not the loop's individual iteration DAG drivers. + var taskPluginInfo *plugins.TaskInfo + if opts.IterationIndex < 0 { + taskPluginInfo = &plugins.TaskInfo{Name: opts.TaskName} + pluginStartResult, dispatchErr := opts.PluginDispatcher.OnTaskStart(ctx, taskPluginInfo) + if dispatchErr != nil { + glog.Errorf("Failed to dispatch task start: %v", dispatchErr) + } else if pluginStartResult != nil { + ecfg.PluginCustomProperties = pluginStartResult.CustomProperties + } + } else { + // For iteration DAG drivers, propagate plugin custom properties from the + // parent loop DAG's MLMD execution so container drivers inside the + // iteration can recover them via ApplyCustomProperties. + pluginProps := metadata.ExtractPluginCustomProperties(dag.Execution) + if pluginProps != nil { + ecfg.PluginCustomProperties = pluginProps + } + } + + var createdExecution *metadata.Execution + defer func() { + if opts.IterationIndex < 0 { + status := pb.Execution_COMPLETE + if err != nil { + status = pb.Execution_FAILED + } + + taskPluginInfo.UpdateTaskInfoWithMetadata(status.String(), nil, metadata.FormatExecutionParameters(createdExecution)) + dispatchErr := opts.PluginDispatcher.OnTaskEnd(ctx, taskPluginInfo) + if dispatchErr != nil { + glog.Errorf("failed to dispatch task end: %v", dispatchErr) + } + } + }() + // Handle writing output parameters to MLMD. ecfg.OutputParameters = opts.Component.GetDag().GetOutputs().GetParameters() glog.V(4).Info("outputParameters: ", ecfg.OutputParameters) @@ -170,7 +208,7 @@ func DAG(ctx context.Context, opts Options, mlmd *metadata.Client) (execution *E glog.V(4).Infof("dag: %v", dag) // TODO(Bobgy): change execution state to pending, because this is driver, execution hasn't started. - createdExecution, err := mlmd.CreateExecution(ctx, pipeline, ecfg) + createdExecution, err = mlmd.CreateExecution(ctx, pipeline, ecfg) if err != nil { return execution, err } diff --git a/backend/src/v2/driver/driver.go b/backend/src/v2/driver/driver.go index 6d6188c2e61..d583f15175a 100644 --- a/backend/src/v2/driver/driver.go +++ b/backend/src/v2/driver/driver.go @@ -23,6 +23,7 @@ import ( "github.com/kubeflow/pipelines/backend/src/apiserver/config/proxy" "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" "github.com/kubeflow/pipelines/backend/src/v2/component" "github.com/kubeflow/pipelines/backend/src/v2/metadata" "github.com/kubeflow/pipelines/kubernetes_platform/go/kubernetesplatform" @@ -92,6 +93,8 @@ type Options struct { PipelineJobScheduleTimeUTC string + PluginDispatcher plugins.TaskPluginDispatcher + // Admin-configured default runAsUser for user containers. Nil means not set. DefaultRunAsUser *int64 // Admin-configured default runAsGroup for user containers. Nil means not set. @@ -254,6 +257,7 @@ func initPodSpecPatch( mlPipelineServerPort string, mlmdServerAddress string, mlmdServerPort string, + pluginEnvVars map[string]string, ) (*k8score.PodSpec, error) { executorInputJSON, err := protojson.Marshal(executorInput) if err != nil { @@ -270,6 +274,11 @@ func initPodSpecPatch( userEnvVar = append(userEnvVar, k8score.EnvVar{Name: envVar.GetName(), Value: envVar.GetValue()}) } + // Append necessary env variables for task-level plugin(s). + for envVar, val := range pluginEnvVars { + userEnvVar = append(userEnvVar, k8score.EnvVar{Name: envVar, Value: val}) + } + userEnvVar = append(userEnvVar, proxy.GetConfig().GetEnvVars()...) setOnTaskConfig, setOnPod := getTaskConfigOptions(componentSpec) diff --git a/backend/src/v2/driver/driver_test.go b/backend/src/v2/driver/driver_test.go index e0a1f9d0858..e6ca6667694 100644 --- a/backend/src/v2/driver/driver_test.go +++ b/backend/src/v2/driver/driver_test.go @@ -284,6 +284,7 @@ func Test_initPodSpecPatch_acceleratorConfig(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) if tt.wantErr { assert.Nil(t, podSpec) @@ -407,6 +408,7 @@ func Test_initPodSpecPatch_resource_placeholders(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) assert.Len(t, podSpec.Containers, 1) @@ -461,6 +463,7 @@ func Test_initPodSpecPatch_legacy_resources(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) assert.Len(t, podSpec.Containers, 1) @@ -517,6 +520,7 @@ func Test_initPodSpecPatch_modelcar_input_artifact(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) @@ -578,6 +582,7 @@ func Test_initPodSpecPatch_publishLogs(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) cmd := podSpec.Containers[0].Command @@ -709,6 +714,7 @@ func Test_initPodSpecPatch_resourceRequests(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) assert.NotEmpty(t, podSpec) @@ -771,6 +777,7 @@ func Test_initPodSpecPatch_TaskConfig_ForwardsResourcesOnly(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) assert.NotNil(t, podSpec) @@ -839,6 +846,7 @@ func Test_initPodSpecPatch_inputTaskFinalStatus(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) require.Nil(t, err) @@ -1043,6 +1051,7 @@ func Test_initPodSpecPatch_WorkspaceRequiresRunName(t *testing.T) { "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) require.NotNil(t, err) } @@ -1157,6 +1166,7 @@ func TestWorkspaceMount_PassthroughVolumes_CaptureOnly(t *testing.T) { podSpec, err := initPodSpecPatch( containerSpec, componentSpec, executorInput, 27, "test", "run", "my-run-name", "1", "false", "false", taskCfg, false, false, "", "ml-pipeline.kubeflow", "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) @@ -1200,6 +1210,7 @@ func TestWorkspaceMount_PassthroughVolumes_ApplyAndCapture(t *testing.T) { podSpec, err := initPodSpecPatch( containerSpec, componentSpec, executorInput, 27, "test", "run", "my-run-name", "1", "false", "false", taskCfg, false, false, "", "ml-pipeline.kubeflow", "8887", "metatadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) // Should mount workspace to pod and also capture to TaskConfig @@ -1270,6 +1281,7 @@ func TestWorkspaceMount_TriggeredByArtifactMetadata(t *testing.T) { podSpec, err := initPodSpecPatch( containerSpec, componentSpec, execInput, 27, "test", "run", "my-run-name", "1", "false", "false", taskCfg, false, false, "", "ml-pipeline.kubeflow", "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) @@ -1332,6 +1344,7 @@ func Test_initPodSpecPatch_TaskConfig_Env_Passthrough_CaptureOnly(t *testing.T) "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) @@ -1383,6 +1396,7 @@ func Test_initPodSpecPatch_TaskConfig_Resources_Passthrough_ApplyAndCapture(t *t "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) // Resources should be both on pod and in TaskConfig @@ -1465,6 +1479,7 @@ func Test_initPodSpecPatch_TaskConfig_Affinity_NodeSelector_Tolerations_Passthro "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) @@ -1568,6 +1583,7 @@ func Test_initPodSpecPatch_TaskConfig_Affinity_NodeSelector_Tolerations_ApplyAnd "8887", "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) @@ -1653,6 +1669,7 @@ func Test_initPodSpecPatch_mlPipelineServerConfig(t *testing.T) { customPort, "metadata-grpc-service.kubeflow.svc.local", "8080", + map[string]string{}, ) assert.Nil(t, err) assert.NotNil(t, podSpec) diff --git a/backend/src/v2/driver/k8s.go b/backend/src/v2/driver/k8s.go index 9716b7f11fc..6908115662e 100644 --- a/backend/src/v2/driver/k8s.go +++ b/backend/src/v2/driver/k8s.go @@ -26,6 +26,7 @@ import ( "github.com/kubeflow/pipelines/api/v2alpha1/go/pipelinespec" "github.com/kubeflow/pipelines/backend/src/common/util" "github.com/kubeflow/pipelines/backend/src/v2/cacheutils" + "github.com/kubeflow/pipelines/backend/src/v2/common/plugins" "github.com/kubeflow/pipelines/backend/src/v2/component" "github.com/kubeflow/pipelines/backend/src/v2/config" "github.com/kubeflow/pipelines/backend/src/v2/metadata" @@ -76,6 +77,7 @@ func kubernetesPlatformOps( var createdExecution *metadata.Execution status := pb.Execution_FAILED var pvcName string + taskPluginInfo := &plugins.TaskInfo{Name: opts.TaskName} defer func() { // We publish the execution, no matter this operartion succeeds or not perr := publishDriverExecution(k8sClient, mlmd, ctx, createdExecution, outputParameters, nil, status) @@ -84,8 +86,20 @@ func kubernetesPlatformOps( } else if perr != nil { err = fmt.Errorf("failed to publish driver execution: %w", perr) } + taskPluginInfo.UpdateTaskInfoWithMetadata(status.String(), nil, nil) + dispatchErr := opts.PluginDispatcher.OnTaskEnd(ctx, taskPluginInfo) + if dispatchErr != nil { + glog.Errorf("failed to dispatch task end: %v", dispatchErr) + } + }() + pluginStartResult, dispatchErr := opts.PluginDispatcher.OnTaskStart(ctx, taskPluginInfo) + if dispatchErr != nil { + glog.Errorf("Failed to dispatch task start: %v", dispatchErr) + } else if pluginStartResult != nil { + ecfg.PluginCustomProperties = pluginStartResult.CustomProperties + } switch opts.Container.Image { case "argostub/createpvc": pvcName, createdExecution, status, err = createPVC(ctx, k8sClient, *execution, opts, cacheClient, mlmd, ecfg) diff --git a/backend/src/v2/driver/resolve.go b/backend/src/v2/driver/resolve.go index d1e687f483d..0d47f71d36c 100644 --- a/backend/src/v2/driver/resolve.go +++ b/backend/src/v2/driver/resolve.go @@ -123,6 +123,14 @@ func resolveInputs( if err != nil { return nil, err } + // Recover plugin custom properties from the parent DAG's MLMD execution + // (e.g. loop DAG → iteration DAG → container). This calls + // ApplyCustomProperties so that subsequent OnTaskStart uses the correct + // parent run ID. + pluginProps := metadata.ExtractPluginCustomProperties(dag.Execution) + if pluginProps != nil { + opts.PluginDispatcher.ApplyCustomProperties(pluginProps) + } inputArtifacts, err := mlmd.GetInputArtifactsByExecutionID(ctx, dag.Execution.GetID()) if err != nil { return nil, err diff --git a/backend/src/v2/metadata/client.go b/backend/src/v2/metadata/client.go index 201126668c6..88009639a02 100644 --- a/backend/src/v2/metadata/client.go +++ b/backend/src/v2/metadata/client.go @@ -215,6 +215,10 @@ type ExecutionConfig struct { // DAGExecution custom properties IterationCount *int // Number of iterations for an iterator DAG. TotalDagTasks *int // Number of tasks inside the DAG + + // PluginCustomProperties holds arbitrary key-value pairs contributed by + // task-level plugins (e.g. "plugins.mlflow.run_id"). + PluginCustomProperties map[string]string } // InputArtifact is a wrapper around an MLMD artifact used as component inputs. @@ -347,6 +351,26 @@ func (e *Execution) FingerPrint() string { return e.Execution.GetCustomProperties()[keyCacheFingerPrint].GetStringValue() } +const pluginCustomPropertyPrefix = "plugins." + +// ExtractPluginCustomProperties returns all MLMD execution custom properties +// that match the plugin naming convention (keys starting with "plugins."). +func ExtractPluginCustomProperties(execution *Execution) map[string]string { + if execution == nil || execution.Execution == nil { + return nil + } + result := map[string]string{} + for key, value := range execution.Execution.GetCustomProperties() { + if len(key) > len(pluginCustomPropertyPrefix) && key[:len(pluginCustomPropertyPrefix)] == pluginCustomPropertyPrefix { + result[key] = value.GetStringValue() + } + } + if len(result) == 0 { + return nil + } + return result +} + // GetTaskNameWithDagID appends the taskName with its parent dag id. This is // used to help avoid collisions when creating the taskMap for downstream input // resolution. @@ -704,6 +728,9 @@ func (c *Client) CreateExecution(ctx context.Context, pipeline *Pipeline, config if config.TotalDagTasks != nil { e.CustomProperties[keyTotalDagTasks] = intValue(int64(*config.TotalDagTasks)) } + for k, v := range config.PluginCustomProperties { + e.CustomProperties[k] = StringValue(v) + } req := &pb.PutExecutionRequest{ Execution: e, @@ -1440,3 +1467,41 @@ func (c *Client) getContextByID(ctx context.Context, id int64) (*pb.Context, err } return contexts[0], nil } + +func FormatExecutionParameters(execution *Execution) map[string]interface{} { + if execution == nil { + return nil + } + params := make(map[string]interface{}) + inputParams, _, err := execution.GetParameters() + if err != nil { + glog.Errorf("failed to retrieve task parameters: %v", err) + } else { + for key, value := range inputParams { + if value == nil { + params[key] = nil + continue + } + params[key] = value.AsInterface() + } + } + return params +} + +func FormatScalarMetricArtifacts(outputArtifacts []*OutputArtifact) map[string]float64 { + metrics := map[string]float64{} + if outputArtifacts != nil { + for _, artifact := range outputArtifacts { + if artifact.Artifact != nil && artifact.Artifact.GetType() == "system.Metrics" { + for customKey, customValue := range artifact.Artifact.CustomProperties { + // retrieve scalar metric artifact values. do not retrieve display_name or store_session_info. + if customKey == "display_name" || customKey == "store_session_info" { + continue + } + metrics[customKey] = customValue.GetDoubleValue() + } + } + } + } + return metrics +} diff --git a/backend/src/v2/metadata/client_test.go b/backend/src/v2/metadata/client_test.go index a2c0d2c0168..cce49daef03 100644 --- a/backend/src/v2/metadata/client_test.go +++ b/backend/src/v2/metadata/client_test.go @@ -18,6 +18,7 @@ import ( "context" "crypto/tls" "fmt" + "maps" "reflect" "runtime/debug" "sync" @@ -25,6 +26,7 @@ import ( "unsafe" "github.com/kubeflow/pipelines/backend/src/v2/metadata/testutils" + "google.golang.org/protobuf/types/known/structpb" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -420,6 +422,75 @@ func Test_GetExecutionsByTypeAndName(t *testing.T) { }) } +func TestFormatOutputArtifacts(t *testing.T) { + var outputArtifacts = []*metadata.OutputArtifact{ + { + Name: "accuracy_metrics", + Artifact: &pb.Artifact{ + Type: proto.String("system.Metrics"), + CustomProperties: map[string]*pb.Value{ + "accuracy": {Value: &pb.Value_DoubleValue{DoubleValue: 0.95}}, + "display_name": {Value: &pb.Value_StringValue{StringValue: "accuracy_metrics"}}, + "store_session_info": {Value: &pb.Value_StringValue{StringValue: "session-abc-123"}}, + }, + }, + Schema: "title: kfp.Metrics\ntype: object", + }, + { + Name: "loss_metrics", + Artifact: &pb.Artifact{ + Type: proto.String("system.Metrics"), + CustomProperties: map[string]*pb.Value{ + "loss": {Value: &pb.Value_DoubleValue{DoubleValue: 0.05}}, + "display_name": {Value: &pb.Value_StringValue{StringValue: "loss_metrics"}}, + "store_session_info": {Value: &pb.Value_StringValue{StringValue: "session-abc-123"}}, + }, + }, + Schema: "title: kfp.Metrics\ntype: object", + }, + } + + expectedResult := map[string]float64{ + "accuracy": 0.95, + "loss": 0.05, + } + + result := metadata.FormatScalarMetricArtifacts(outputArtifacts) + + if !maps.Equal(expectedResult, result) { + t.Errorf("result differs from expected result. Expected: %v, Actual: %v", expectedResult, result) + } +} + +func TestFormatExecutionParameters(t *testing.T) { + var execution = metadata.NewExecution(&pb.Execution{ + LastKnownState: pb.Execution_COMPLETE.Enum(), + LastUpdateTimeSinceEpoch: proto.Int64(1714400000000), + CustomProperties: map[string]*pb.Value{ + "inputs": {Value: &pb.Value_StructValue{ + StructValue: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "param_one": structpb.NewStringValue("hello"), + "param_two": structpb.NewNumberValue(42), + }, + }, + }}, + }, + }) + + expectedResult := map[string]interface{}{ + "param_one": "hello", + "param_two": float64(42), + } + + result := metadata.FormatExecutionParameters(execution) + + if diff := cmp.Diff(expectedResult, result); diff != "" { + t.Errorf("result differs from expected (-want +got):\n%s", diff) + } + +} + func newLocalClientOrFatal(t *testing.T) *metadata.Client { t.Helper() client, err := metadata.NewClient("localhost", "8080", &tls.Config{}) diff --git a/backend/test/config/flags.go b/backend/test/config/flags.go index 32b56b52bac..fe826289718 100644 --- a/backend/test/config/flags.go +++ b/backend/test/config/flags.go @@ -59,3 +59,7 @@ var ( MinioEndpoint = flag.String("minioEndpoint", "localhost:9000", "MinIO endpoint (host:port)") MinioLogsPrefixFmt = flag.String("minioLogsPrefixFmt", "private-artifacts/%s", "Format string for logs prefix (use %s for workflow namespace)") ) + +var ( + MLflowEnabled = flag.Bool("mlflowEnabled", false, "Whether MLflow is deployed and available for integration tests") +) diff --git a/backend/test/constants/test_features.go b/backend/test/constants/test_features.go index b65d44e5e59..70f4c4e504e 100644 --- a/backend/test/constants/test_features.go +++ b/backend/test/constants/test_features.go @@ -47,4 +47,13 @@ const ( UpgradePreparation string = "UpgradePreparation" UpgradeVerification string = "UpgradeVerification" + + // MLflow - quality gate tag for MLflow integration testing + MLflow string = "MLflow" + // MLflowCore — primary MLflow E2E paths + MLflowCore string = "MLflowCore" + // MLflowParallelLoop — parallel-for pipeline and multi-level nested MLflow run assertions. + MLflowParallelLoop string = "MLflowParallelLoop" + // MLflowFailure — failed pipelines and RetryRun semantics with MLflow tracking. + MLflowFailure string = "MLflowFailure" ) diff --git a/backend/test/end2end/mlflow_e2e_test.go b/backend/test/end2end/mlflow_e2e_test.go new file mode 100644 index 00000000000..b1837e71858 --- /dev/null +++ b/backend/test/end2end/mlflow_e2e_test.go @@ -0,0 +1,680 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package end2end + +import ( + "fmt" + "path/filepath" + "strconv" + "time" + + upload_params "github.com/kubeflow/pipelines/backend/api/v2beta1/go_http_client/pipeline_upload_client/pipeline_upload_service" + "github.com/kubeflow/pipelines/backend/api/v2beta1/go_http_client/pipeline_upload_model" + "github.com/kubeflow/pipelines/backend/api/v2beta1/go_http_client/run_model" + . "github.com/kubeflow/pipelines/backend/test/constants" + e2e_utils "github.com/kubeflow/pipelines/backend/test/end2end/utils" + "github.com/kubeflow/pipelines/backend/test/logger" + "github.com/kubeflow/pipelines/backend/test/testutil" + apitests "github.com/kubeflow/pipelines/backend/test/v2/api" + + . "github.com/onsi/ginkgo/v2" + "github.com/onsi/ginkgo/v2/types" + . "github.com/onsi/gomega" +) + +var _ = Describe("MLflow Integration >", Label(MLflow, FullRegression), func() { + var testContext *apitests.TestContext + var mlflowEndpoint string + + const singleTaskPipelineFile = "add_numbers.yaml" + const singleTaskPipelineDir = "valid/critical" + + const multiTaskPipelineFile = "producer_consumer_param_pipeline.yaml" + const multiTaskPipelineDir = "valid/critical" + + // ################## SETUP AND TEARDOWN ################## + + BeforeEach(func() { + e2e_utils.SkipIfMLflowDisabled() + mlflowEndpoint = e2e_utils.GetMLflowEndpoint() + + logger.Log("################### Setup before each MLflow test #####################") + testContext = &apitests.TestContext{ + TestStartTimeUTC: time.Now(), + } + randomName = strconv.FormatInt(time.Now().UnixNano(), 10) + testContext.Pipeline.UploadParams = upload_params.NewUploadPipelineParams() + testContext.Pipeline.PipelineGeneratedName = "mlflow-e2e-" + randomName + testContext.Pipeline.CreatedPipelines = make([]*pipeline_upload_model.V2beta1Pipeline, 0) + testContext.PipelineRun.CreatedRunIds = make([]string, 0) + }) + + ReportAfterEach(func(specReport types.SpecReport) { + if testContext == nil { + return + } + if specReport.Failed() && len(testContext.PipelineRun.CreatedRunIds) > 0 { + report, _ := testutil.BuildArchivedWorkflowLogsReport(k8Client, testContext.PipelineRun.CreatedRunIds) + AddReportEntry(testutil.ArchivedWorkflowLogsReportTitle, report) + } + + logger.Log("Deleting %d run(s)", len(testContext.PipelineRun.CreatedRunIds)) + for _, runID := range testContext.PipelineRun.CreatedRunIds { + runID := runID + testutil.TerminatePipelineRun(runClient, runID) + testutil.ArchivePipelineRun(runClient, runID) + testutil.DeletePipelineRun(runClient, runID) + } + logger.Log("Deleting %d experiment(s)", len(testContext.Experiment.CreatedExperimentIds)) + for _, expID := range testContext.Experiment.CreatedExperimentIds { + expID := expID + testutil.DeleteExperiment(experimentClient, expID) + } + logger.Log("Deleting %d pipeline(s)", len(testContext.Pipeline.CreatedPipelines)) + for _, pipeline := range testContext.Pipeline.CreatedPipelines { + testutil.DeletePipeline(pipelineClient, pipeline.PipelineID, true) + } + }) + + // ################## HELPER ################## + + // uploadPipeline uploads a pipeline from the given dir/file and returns + // the pipeline ID and version ID. + uploadPipeline := func(dir, file string) (string, string) { + pipelineFilePath := filepath.Join(testutil.GetPipelineFilesDir(), dir, file) + uploadedPipeline, err := testutil.UploadPipeline(pipelineUploadClient, pipelineFilePath, &testContext.Pipeline.PipelineGeneratedName, nil) + Expect(err).To(BeNil(), "Failed to upload pipeline %s", file) + testContext.Pipeline.CreatedPipelines = append(testContext.Pipeline.CreatedPipelines, uploadedPipeline) + version := testutil.GetLatestPipelineVersion(pipelineClient, &uploadedPipeline.PipelineID) + return uploadedPipeline.PipelineID, version.PipelineVersionID + } + + uploadSingleTaskPipeline := func() (string, string) { + return uploadPipeline(singleTaskPipelineDir, singleTaskPipelineFile) + } + + uploadMultiTaskPipeline := func() (string, string) { + return uploadPipeline(multiTaskPipelineDir, multiTaskPipelineFile) + } + + // ################## TESTS ################## + + Context("Single task pipeline with MLflow enabled >", Label(MLflowCore), func() { + It("Should populate plugins_output.mlflow with experiment_id, root_run_id, and state=PLUGIN_SUCCEEDED", func() { + pipelineID, versionID := uploadSingleTaskPipeline() + experimentName := fmt.Sprintf("mlflow-test-%s", randomName) + pluginsInput := e2e_utils.BuildMLflowPluginsInput(experimentName) + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), singleTaskPipelineDir, singleTaskPipelineFile), + ) + + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateSUCCEEDED, + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(updatedRun.State).NotTo(BeNil()) + Expect(*updatedRun.State).To(Equal(run_model.V2beta1RuntimeStateSUCCEEDED), + "Pipeline run should succeed") + + // Verify KFP plugins_output + err := e2e_utils.VerifyPluginsOutput(updatedRun, run_model.V2beta1PluginStatePLUGINSUCCEEDED) + Expect(err).NotTo(HaveOccurred()) + + // Verify MLflow side + rootRunID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + Expect(rootRunID).NotTo(BeEmpty(), "root_run_id should not be empty") + + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + Expect(mlflowExperimentID).NotTo(BeEmpty(), "experiment_id should not be empty") + + // Verify the MLflow experiment was created with the correct name + mlflowExp, err := e2e_utils.QueryMLflowExperimentByName(mlflowEndpoint, experimentName) + Expect(err).NotTo(HaveOccurred()) + Expect(mlflowExp.ID).To(Equal(mlflowExperimentID)) + + // Verify parent run status + err = e2e_utils.VerifyMLflowRunStatus(mlflowEndpoint, rootRunID, mlflowExperimentID, "FINISHED") + Expect(err).NotTo(HaveOccurred()) + + // Verify KFP tags on parent run + err = e2e_utils.VerifyMLflowRunTags(mlflowEndpoint, rootRunID, mlflowExperimentID, map[string]string{ + "kfp.pipeline_run_id": updatedRun.RunID, + }) + Expect(err).NotTo(HaveOccurred()) + }) + + It("Should have nested MLflow run(s) with FINISHED status", func() { + pipelineID, versionID := uploadSingleTaskPipeline() + experimentName := fmt.Sprintf("mlflow-nested-%s", randomName) + pluginsInput := e2e_utils.BuildMLflowPluginsInput(experimentName) + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), singleTaskPipelineDir, singleTaskPipelineFile), + ) + + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateSUCCEEDED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + rootRunID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + + // Single-task pipeline should have exactly 1 nested run directly under the root + nestedRuns, err := e2e_utils.QueryNestedRuns(mlflowEndpoint, rootRunID, mlflowExperimentID) + Expect(err).NotTo(HaveOccurred()) + Expect(len(nestedRuns)).To(Equal(1), + "Should have exactly 1 nested MLflow run for a single-task pipeline") + + // Verify the nested run is FINISHED + nestedRun := nestedRuns[0] + Expect(nestedRun.Info.Status).To(Equal("FINISHED"), + "Nested MLflow run should be FINISHED") + + // Verify parent linkage tag is present on the nested run. + // Task-level run naming/tagging may vary by launcher/runtime path. + var hasParentRunTag bool + for _, tag := range nestedRun.Data.Tags { + if tag.Key == "mlflow.parentRunId" { + hasParentRunTag = true + Expect(tag.Value).To(Equal(rootRunID), + "Nested MLflow run should reference the parent root run") + break + } + } + Expect(hasParentRunTag).To(BeTrue(), + "Nested MLflow run should have an mlflow.parentRunId tag") + }) + }) + + Context("Multi-task pipeline with MLflow >", Label(MLflowCore), func() { + // producer_consumer_param_pipeline.yaml has 2 executor tasks: producer → consumer + const expectedTaskCount = 2 + + It("Should create one parent run and one nested run per task, all FINISHED", func() { + pipelineID, versionID := uploadMultiTaskPipeline() + experimentName := fmt.Sprintf("mlflow-multi-%s", randomName) + pluginsInput := e2e_utils.BuildMLflowPluginsInput(experimentName) + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), multiTaskPipelineDir, multiTaskPipelineFile), + ) + + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateSUCCEEDED, + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(updatedRun.State).NotTo(BeNil()) + Expect(*updatedRun.State).To(Equal(run_model.V2beta1RuntimeStateSUCCEEDED), + "Multi-task pipeline run should succeed") + + // Verify KFP plugins_output + err := e2e_utils.VerifyPluginsOutput(updatedRun, run_model.V2beta1PluginStatePLUGINSUCCEEDED) + Expect(err).NotTo(HaveOccurred()) + + // Verify parent MLflow run + rootRunID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + err = e2e_utils.VerifyMLflowRunStatus(mlflowEndpoint, rootRunID, mlflowExperimentID, "FINISHED") + Expect(err).NotTo(HaveOccurred()) + + // Verify there is one nested run per task + nestedCount, err := e2e_utils.CountNestedRuns(mlflowEndpoint, rootRunID, mlflowExperimentID) + Expect(err).NotTo(HaveOccurred()) + Expect(nestedCount).To(Equal(expectedTaskCount), + fmt.Sprintf("Should have exactly %d nested MLflow runs (one per task)", expectedTaskCount)) + + // Verify all nested runs are FINISHED + allRuns, err := e2e_utils.QueryMLflowRuns(mlflowEndpoint, mlflowExperimentID) + Expect(err).NotTo(HaveOccurred()) + for _, run := range allRuns { + Expect(run.Info.Status).To(Equal("FINISHED"), + fmt.Sprintf("MLflow run %s should be FINISHED", run.Info.RunID)) + } + }) + }) + + Context("Backward compatibility — no plugins_input >", Label(MLflowCore), func() { + It("Should succeed and populate plugins_output with defaults when no plugins_input is provided", func() { + pipelineID, versionID := uploadSingleTaskPipeline() + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), singleTaskPipelineDir, singleTaskPipelineFile), + ) + + // Create run without plugins_input + createdRun := e2e_utils.CreatePipelineRun( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateSUCCEEDED, + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(updatedRun.State).NotTo(BeNil()) + Expect(*updatedRun.State).To(Equal(run_model.V2beta1RuntimeStateSUCCEEDED), + "Pipeline run should succeed even without plugins_input") + + err := e2e_utils.VerifyPluginsOutput(updatedRun, run_model.V2beta1PluginStatePLUGINSUCCEEDED) + Expect(err).NotTo(HaveOccurred()) + + // Verify the default MLflow experiment and parent run exist + rootRunID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + Expect(rootRunID).NotTo(BeEmpty(), "root_run_id should be present even without plugins_input") + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + + err = e2e_utils.VerifyMLflowRunStatus(mlflowEndpoint, rootRunID, mlflowExperimentID, "FINISHED") + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Context("MLflow opt-out via disabled flag >", Label(MLflowCore), func() { + It("Should succeed with no MLflow output when plugins_input.mlflow.disabled=true", func() { + pipelineID, versionID := uploadSingleTaskPipeline() + pluginsInput := e2e_utils.BuildMLflowPluginsInputDisabled() + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), singleTaskPipelineDir, singleTaskPipelineFile), + ) + + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateSUCCEEDED, + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(updatedRun.State).NotTo(BeNil()) + Expect(*updatedRun.State).To(Equal(run_model.V2beta1RuntimeStateSUCCEEDED), + "Pipeline run should succeed when MLflow is disabled via plugins_input") + + // No MLflow output should be present + err := e2e_utils.VerifyNoPluginsOutput(updatedRun) + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Context("Parallel-for pipeline with MLflow >", Label(MLflowParallelLoop), func() { + // parallel_for_after_dependency.yaml has a parallel-for loop with 3 iterations + // and 2 dependent tasks. + const parallelForPipelineFile = "parallel_for_after_dependency.yaml" + const parallelForPipelineDir = "valid/critical" + + It("Should create parent run with correct 2-level nesting hierarchy", func() { + pipelineID, versionID := uploadPipeline(parallelForPipelineDir, parallelForPipelineFile) + experimentName := fmt.Sprintf("mlflow-loop-%s", randomName) + pluginsInput := e2e_utils.BuildMLflowPluginsInput(experimentName) + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), parallelForPipelineDir, parallelForPipelineFile), + ) + + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateSUCCEEDED, + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(updatedRun.State).NotTo(BeNil()) + Expect(*updatedRun.State).To(Equal(run_model.V2beta1RuntimeStateSUCCEEDED), + "Parallel-for pipeline run should succeed") + + // Verify KFP plugins_output + err := e2e_utils.VerifyPluginsOutput(updatedRun, run_model.V2beta1PluginStatePLUGINSUCCEEDED) + Expect(err).NotTo(HaveOccurred()) + + // Verify parent MLflow run is FINISHED + rootRunID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + err = e2e_utils.VerifyMLflowRunStatus(mlflowEndpoint, rootRunID, mlflowExperimentID, "FINISHED") + Expect(err).NotTo(HaveOccurred()) + + // Direct children of root: 1 loop run + 2 dependent tasks = 3 + const expectedDirectChildren = 3 + directChildCount, err := e2e_utils.CountNestedRuns(mlflowEndpoint, rootRunID, mlflowExperimentID) + Expect(err).NotTo(HaveOccurred()) + Expect(directChildCount).To(Equal(expectedDirectChildren), + "Should have exactly 3 direct children of root (1 loop run + 2 dependent tasks)") + + // Find the loop nested run (has iterations as children). + // It's the one direct child that itself has nested runs. + allRuns, err := e2e_utils.QueryMLflowRuns(mlflowEndpoint, mlflowExperimentID) + Expect(err).NotTo(HaveOccurred()) + + var loopRunID string + for _, run := range allRuns { + if run.Info.RunID == rootRunID { + continue // skip parent + } + childCount, err := e2e_utils.CountNestedRuns(mlflowEndpoint, run.Info.RunID, mlflowExperimentID) + Expect(err).NotTo(HaveOccurred()) + if childCount > 0 { + loopRunID = run.Info.RunID + // The loop run should have exactly 3 iteration children + Expect(childCount).To(Equal(3), + "Loop nested run should have exactly 3 iteration children") + break + } + } + Expect(loopRunID).NotTo(BeEmpty(), + "Should find a loop nested run with iteration children") + + // Verify all MLflow runs in the experiment are FINISHED + for _, run := range allRuns { + Expect(run.Info.Status).To(Equal("FINISHED"), + fmt.Sprintf("MLflow run %s should be FINISHED", run.Info.RunID)) + } + + // Total runs in experiment: 1 parent + 3 direct + 3 iterations = 7 + Expect(len(allRuns)).To(Equal(7), + "Should have exactly 7 MLflow runs in the experiment") + }) + }) + + Context("Failed pipeline with MLflow >", Label(MLflowFailure), func() { + const failPipelineFile = "fail_v2.yaml" + const failPipelineDir = "valid/failing" + + It("Should mark parent and nested MLflow runs as FAILED", func() { + pipelineID, versionID := uploadPipeline(failPipelineDir, failPipelineFile) + experimentName := fmt.Sprintf("mlflow-fail-%s", randomName) + pluginsInput := e2e_utils.BuildMLflowPluginsInput(experimentName) + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), failPipelineDir, failPipelineFile), + ) + + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(updatedRun.State).NotTo(BeNil()) + Expect(*updatedRun.State).To(Equal(run_model.V2beta1RuntimeStateFAILED), + "Pipeline run should be FAILED") + + // Verify MLflow plugins_output still records the run + err := e2e_utils.VerifyPluginsOutput(updatedRun, run_model.V2beta1PluginStatePLUGINSUCCEEDED) + Expect(err).NotTo(HaveOccurred()) + + rootRunID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + Expect(rootRunID).NotTo(BeEmpty(), "root_run_id should not be empty") + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + // The parent MLflow run should be FAILED because the KFP run failed + err = e2e_utils.VerifyMLflowRunStatus(mlflowEndpoint, rootRunID, mlflowExperimentID, "FAILED") + Expect(err).NotTo(HaveOccurred()) + + // Verify any nested runs are also in a terminal state (FAILED) + allRuns, err := e2e_utils.QueryMLflowRuns(mlflowEndpoint, mlflowExperimentID) + Expect(err).NotTo(HaveOccurred()) + for _, run := range allRuns { + if run.Info.RunID != rootRunID { + // Nested runs for failed tasks should be FAILED + Expect(run.Info.Status).To(Equal("FAILED"), + fmt.Sprintf("Nested MLflow run %s should be FAILED", run.Info.RunID)) + } + } + }) + }) + + Context("Failed pipeline + RetryRun with MLflow >", Label(MLflowFailure), func() { + const failPipelineFile = "fail_v2.yaml" + const failPipelineDir = "valid/failing" + + It("Should reopen MLflow runs on retry and then reflect the retried status", func() { + pipelineID, versionID := uploadPipeline(failPipelineDir, failPipelineFile) + experimentName := fmt.Sprintf("mlflow-retry-%s", randomName) + pluginsInput := e2e_utils.BuildMLflowPluginsInput(experimentName) + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), failPipelineDir, failPipelineFile), + ) + + // Create the run and wait for it to FAIL + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(updatedRun.State).NotTo(BeNil()) + Expect(*updatedRun.State).To(Equal(run_model.V2beta1RuntimeStateFAILED), + "Pipeline run should initially be FAILED") + + rootRunID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + Expect(rootRunID).NotTo(BeEmpty(), "root_run_id should not be empty") + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + + // Retry the run + e2e_utils.RetryPipelineRun(runClient, createdRun.RunID) + + // Wait for the retried run to reach terminal state + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateFAILED, + }, &timeout) + + retriedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + Expect(retriedRun.State).NotTo(BeNil()) + Expect(*retriedRun.State).To(Equal(run_model.V2beta1RuntimeStateFAILED), + "Retried pipeline run should still be FAILED (fail_v2 always fails)") + + // Verify the MLflow parent run reflects the retry + err = e2e_utils.VerifyMLflowRunStatus(mlflowEndpoint, rootRunID, mlflowExperimentID, "FAILED") + Expect(err).NotTo(HaveOccurred()) + + // Verify plugins_output is still populated after retry + err = e2e_utils.VerifyPluginsOutput(retriedRun, run_model.V2beta1PluginStatePLUGINSUCCEEDED) + Expect(err).NotTo(HaveOccurred()) + + // Verify retry reused the existing parent run + retriedRootRunID, err := e2e_utils.GetPluginsOutputEntryValue(retriedRun, "root_run_id") + Expect(err).NotTo(HaveOccurred()) + Expect(retriedRootRunID).To(Equal(rootRunID), + "OnRunRetry should reopen the existing parent MLflow run, not create a new one") + }) + }) + + Context("Custom experiment name >", Label(MLflowCore), func() { + It("Should create MLflow experiment with the user-specified name", func() { + pipelineID, versionID := uploadSingleTaskPipeline() + customExpName := fmt.Sprintf("custom-exp-%s", randomName) + pluginsInput := e2e_utils.BuildMLflowPluginsInput(customExpName) + pipelineRuntimeInputs := testutil.GetPipelineRunTimeInputs( + filepath.Join(testutil.GetPipelineFilesDir(), singleTaskPipelineDir, singleTaskPipelineFile), + ) + + createdRun := e2e_utils.CreatePipelineRunWithPluginsInput( + runClient, testContext, &pipelineID, &versionID, experimentID, pipelineRuntimeInputs, pluginsInput, + ) + + timeout := time.Duration(maxPipelineWaitTime) + testutil.WaitForRunToBeInState(runClient, &createdRun.RunID, []run_model.V2beta1RuntimeState{ + run_model.V2beta1RuntimeStateSUCCEEDED, + }, &timeout) + + updatedRun := testutil.GetPipelineRun(runClient, &createdRun.RunID) + mlflowExperimentID, err := e2e_utils.GetPluginsOutputEntryValue(updatedRun, "experiment_id") + Expect(err).NotTo(HaveOccurred()) + + // Verify the MLflow experiment name matches + mlflowExp, err := e2e_utils.QueryMLflowExperimentByName(mlflowEndpoint, customExpName) + Expect(err).NotTo(HaveOccurred()) + Expect(mlflowExp.ID).To(Equal(mlflowExperimentID), + "MLflow experiment ID should match the one in plugins_output") + Expect(mlflowExp.Name).To(Equal(customExpName), + "MLflow experiment name should match the user-specified name") + }) + }) +}) + +// Additional scenarios from proposals/12862-mlflow-integration/MLflow-KFP-Integration-TestPlan*.md. +// Uses PDescribe the same way as pipeline_api_test.go. + +var _ = PDescribe("MLflow Integration > CreateRun and validation >", Label(MLflow, FullRegression), func() { + Context("Experiment and plugins_input >", func() { + It("Should use default admin experiment name when no plugins_input and assert run_url in plugins_output", func() { + }) + It("Should prefer experiment_id over experiment_name when both are set", func() { + }) + It("Should create two separate MLflow experiments and parent runs for two runs with different experiment_name (same pipeline)", func() { + }) + It("Should recover when concurrent creates race and MLflow reports experiment already exists", func() { + }) + }) + Context("API and workflow semantics >", func() { + It("Should reject CreateRun when client supplies plugins_output", func() { + }) + It("Should expose MLflow runtime JSON env on driver and launcher templates only after compile", func() { + }) + It("Should update MLflow parent to Failed when MLflow parent exists then workflow create or DB write fails", func() { + }) + }) + Context("Tags and deep links >", func() { + It("Should set MLflow tags for pipeline_id and pipeline_version_id when present", func() { + }) + It("Should produce usable KFP and MLflow deep links in tags and plugins_output", func() { + }) + It("Should honor experiment description omit, empty string, and custom values per defaults", func() { + }) + }) +}) + +var _ = PDescribe("MLflow Integration > Terminal state and sync >", Label(MLflow, FullRegression), func() { + Context("Pipeline outcomes >", func() { + It("Should map MLflow parent and nested runs when pipeline is Canceled", func() { + }) + }) + Context("Reliability >", func() { + It("Should update straggler nested runs with pagination for large fan-out", func() { + }) + It("Should persist latest plugins_output on the KFP run after terminal handling", func() { + }) + It("Should record KFP terminal state when MLflow sync returns errors", func() { + }) + It("Should handle terminal sync with corrupt or missing stored MLflow ids in plugin output", func() { + }) + It("Should retry MLflow HTTP only where appropriate", func() { + }) + }) +}) + +var _ = PDescribe("MLflow Integration > RetryRun >", Label(MLflow, FullRegression), func() { + Context("Edge cases >", func() { + It("Should have no MLflow side effects when retrying a failed run with no MLflow plugin output", func() { + }) + }) +}) + +var _ = PDescribe("MLflow Integration > CloneRun >", Label(MLflow, FullRegression), func() { + Context("Clone behavior >", func() { + It("Should inherit plugins_input.mlflow on clone and create a new MLflow parent for the cloned run", func() { + }) + It("Should use distinct MLflow run ids for clone vs original when both complete", func() { + }) + }) +}) + +var _ = PDescribe("MLflow Integration > Recurring runs >", Label(MLflow, FullRegression), func() { + Context("Scheduled jobs >", func() { + It("Should store plugins_input on recurring job and surface it on the scheduled workflow", func() { + }) + It("Should propagate plugins_input to runs created by each schedule trigger", func() { + }) + It("Should accept recurring job referencing pipeline version with plugins without inline workflow spec", func() { + }) + It("Should create one MLflow parent per schedule trigger", func() { + }) + }) +}) + +var _ = PDescribe("MLflow Integration > Negative and edge cases >", Label(MLflow, FullRegression), func() { + Context("Configuration and validation >", func() { + It("Should allow or clearly fail CreateRun when MLflow endpoint is unreachable at create", func() { + }) + It("Should reject malformed plugins_input.mlflow JSON or unknown fields with validation error", func() { + }) + }) +}) + +var _ = PDescribe("MLflow Integration > Compiled workflow injection >", Label(MLflow, FullRegression), func() { + Context("Templates and env >", func() { + It("Should inject MLflow env only on driver and launcher templates", func() { + }) + It("Should replace duplicate env keys consistently when template predefines the same name", func() { + }) + It("Should omit MLflow env when plugins.mlflow is not configured", func() { + }) + }) +}) + +var _ = PDescribe("MLflow Integration > Cluster and deployment >", Label(MLflow, FullRegression), func() { + Context("Lifecycle and platform >", func() { + It("Should leave existing behavior unchanged with no plugins.mlflow (upgrade path)", func() { + }) + It("Should respect per-namespace kfp-launcher overrides for MLflow in multi-tenant setups", func() { + }) + It("Should allow deleting KFP run while MLflow runs may remain", func() { + }) + It("Should succeed against MLflow served over HTTPS", func() { + }) + }) +}) diff --git a/backend/test/end2end/utils/mlflow_utils.go b/backend/test/end2end/utils/mlflow_utils.go new file mode 100644 index 00000000000..84f87d62df1 --- /dev/null +++ b/backend/test/end2end/utils/mlflow_utils.go @@ -0,0 +1,400 @@ +// Copyright 2026 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Package utils provides helpers shared across end-to-end tests. +package utils + +import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "net/http" + "os" + "strings" + "time" + + runparams "github.com/kubeflow/pipelines/backend/api/v2beta1/go_http_client/run_client/run_service" + "github.com/kubeflow/pipelines/backend/api/v2beta1/go_http_client/run_model" + apiserver "github.com/kubeflow/pipelines/backend/src/common/client/api_server/v2" + mlflowclient "github.com/kubeflow/pipelines/backend/src/common/plugins/mlflow" + "github.com/kubeflow/pipelines/backend/test/config" + "github.com/kubeflow/pipelines/backend/test/logger" + apitests "github.com/kubeflow/pipelines/backend/test/v2/api" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" +) + +const ( + mlflowEndpointEnv = "MLFLOW_TRACKING_URI" + mlflowInsecureTLSEnv = "MLFLOW_TRACKING_INSECURE_TLS" + mlflowBearerTokenEnv = "MLFLOW_BEARER_TOKEN" + mlflowWorkspaceEnv = "MLFLOW_WORKSPACE" + //todo: potential fix + mlflowPluginKey = "mlflow" +) + +func getMLflowClient(endpoint string) (*mlflowclient.Client, error) { + insecure := strings.EqualFold(os.Getenv(mlflowInsecureTLSEnv), "true") + workspace := os.Getenv(mlflowWorkspaceEnv) + bearerToken := os.Getenv(mlflowBearerTokenEnv) + httpClient := &http.Client{ + Timeout: 30 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: insecure, //nolint:gosec + }, + }, + } + client, err := mlflowclient.NewClient(mlflowclient.Config{ + Endpoint: endpoint, + HTTPClient: httpClient, + BearerToken: bearerToken, + WorkspacesEnabled: workspace != "", + Workspace: workspace, + }) + if err != nil { + return nil, fmt.Errorf("failed to create MLflow client: %w", err) + } + if bearerToken != "" { + logger.Log("MLflow client initialized with bearer token auth") + } + if workspace != "" { + logger.Log("MLflow client initialized with workspace header: %s", workspace) + } + if insecure { + logger.Log("MLflow client initialized with InsecureSkipVerify=true") + } + return client, nil +} + +// RetryPipelineRun retries a failed/terminated KFP pipeline run. +func RetryPipelineRun(runClient *apiserver.RunClient, runID string) { + ginkgo.GinkgoHelper() + retryParams := runparams.NewRunServiceRetryRunParams() + retryParams.RunID = runID + err := runClient.Retry(retryParams) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), + fmt.Sprintf("Failed to retry run %s", runID)) + logger.Log("Retried Pipeline Run, runId=%s", runID) +} + +// SkipIfMLflowDisabled skips the current test if the mlflowEnabled flag is false. +func SkipIfMLflowDisabled() { + ginkgo.GinkgoHelper() + if !*config.MLflowEnabled { + ginkgo.Skip("MLflow is not enabled; skipping MLflow integration test") + } +} + +// GetMLflowEndpoint returns the MLflow tracking server endpoint from the +// MLFLOW_TRACKING_URI environment variable. +func GetMLflowEndpoint() string { + ginkgo.GinkgoHelper() + endpoint := os.Getenv(mlflowEndpointEnv) + gomega.Expect(endpoint).NotTo(gomega.BeEmpty(), + fmt.Sprintf("%s environment variable must be set for MLflow tests", mlflowEndpointEnv)) + return strings.TrimRight(endpoint, "/") +} + +// --- KFP Run helpers with plugins_input --- + +// CreatePipelineRunWithPluginsInput - Create a pipeline run with plugins_input. +func CreatePipelineRunWithPluginsInput( + runClient *apiserver.RunClient, + testContext *apitests.TestContext, + pipelineID *string, + pipelineVersionID *string, + experimentID *string, + inputParams map[string]interface{}, + pluginsInput map[string]interface{}, +) *run_model.V2beta1Run { + ginkgo.GinkgoHelper() + runName := fmt.Sprintf("MLflow E2e Test Run-%v", testContext.TestStartTimeUTC) + runDescription := fmt.Sprintf("MLflow run for %s", runName) + logger.Log("Create a pipeline run with plugins_input for pipeline id=%s versionId=%s", + *pipelineID, *pipelineVersionID) + + createRunRequest := &runparams.RunServiceCreateRunParams{ + ExperimentID: experimentID, + Run: CreatePipelineRunWithPluginsInputPayload( + runName, + runDescription, + pipelineID, + pipelineVersionID, + experimentID, + inputParams, + pluginsInput, + ), + } + createdRun, createRunError := runClient.Create(createRunRequest) + gomega.Expect(createRunError).NotTo(gomega.HaveOccurred(), + "Failed to create run with plugins_input for pipeline id="+*pipelineID) + testContext.PipelineRun.CreatedRunIds = append(testContext.PipelineRun.CreatedRunIds, createdRun.RunID) + logger.Log("Created Pipeline Run with plugins_input, runId=%s", createdRun.RunID) + return createdRun +} + +// CreatePipelineRunWithPluginsInputPayload - Create a pipeline run payload with plugins_input. +func CreatePipelineRunWithPluginsInputPayload( + runName string, + runDescription string, + pipelineID *string, + pipelineVersionID *string, + experimentID *string, + inputParams map[string]interface{}, + pluginsInput map[string]interface{}, +) *run_model.V2beta1Run { + run := CreatePipelineRunPayload( + runName, + runDescription, + pipelineID, + pipelineVersionID, + experimentID, + inputParams, + ) + run.PluginsInput = pluginsInput + return run +} + +func BuildMLflowPluginsInput(experimentName string) map[string]interface{} { + mlflowCfg := map[string]interface{}{} + if experimentName != "" { + mlflowCfg["experiment_name"] = experimentName + } + return map[string]interface{}{ + mlflowPluginKey: mlflowCfg, + } +} + +func BuildMLflowPluginsInputDisabled() map[string]interface{} { + return map[string]interface{}{ + mlflowPluginKey: map[string]interface{}{ + "disabled": true, + }, + } +} + +// --- KFP plugins_output verification --- + +// VerifyPluginsOutput asserts that the run's plugins_output contains a valid +// MLflow entry with experiment_id, root_run_id, and the expected plugin state. +func VerifyPluginsOutput(run *run_model.V2beta1Run, expectedState run_model.V2beta1PluginState) error { + ginkgo.GinkgoHelper() + if run.PluginsOutput == nil { + return fmt.Errorf("plugins_output should not be nil") + } + mlflowOutput, ok := run.PluginsOutput[mlflowPluginKey] + if !ok { + return fmt.Errorf("plugins_output should contain %q key", mlflowPluginKey) + } + if mlflowOutput.State == nil { + return fmt.Errorf("plugins_output.%s.state should not be nil", mlflowPluginKey) + } + + if *mlflowOutput.State != expectedState { + return fmt.Errorf( + "plugins_output.%s.state should be %s, got %s", + mlflowPluginKey, expectedState, *mlflowOutput.State, + ) + } + + if mlflowOutput.Entries == nil { + return fmt.Errorf("plugins_output.%s.entries should not be nil", mlflowPluginKey) + } + if _, ok := mlflowOutput.Entries["experiment_id"]; !ok { + return fmt.Errorf("plugins_output.%s.entries should have %q", mlflowPluginKey, "experiment_id") + } + if _, ok := mlflowOutput.Entries["root_run_id"]; !ok { + return fmt.Errorf("plugins_output.%s.entries should have %q", mlflowPluginKey, "root_run_id") + } + return nil +} + +func GetPluginsOutputEntryValue(run *run_model.V2beta1Run, entryKey string) (string, error) { + ginkgo.GinkgoHelper() + if run.PluginsOutput == nil { + return "", fmt.Errorf("plugins_output should not be nil") + } + mlflowOutput, ok := run.PluginsOutput[mlflowPluginKey] + if !ok { + return "", fmt.Errorf("plugins_output should contain %q key", mlflowPluginKey) + } + entry, ok := mlflowOutput.Entries[entryKey] + if !ok { + return "", fmt.Errorf("plugins_output.%s.entries should have %q", mlflowPluginKey, entryKey) + } + strVal, ok := entry.Value.(string) + if !ok { + return "", fmt.Errorf( + "plugins_output.%s.entries[%q].value should be a string", + mlflowPluginKey, entryKey, + ) + } + return strVal, nil +} + +func VerifyNoPluginsOutput(run *run_model.V2beta1Run) error { + ginkgo.GinkgoHelper() + if run.PluginsOutput == nil { + return nil + } + _, ok := run.PluginsOutput[mlflowPluginKey] + if ok { + return fmt.Errorf("plugins_output should not contain %q key when MLflow is disabled", mlflowPluginKey) + } + return nil +} + +type MLflowRun struct { + Info MLflowRunInfo `json:"info"` + Data MLflowRunData `json:"data"` +} + +type MLflowRunInfo struct { + RunID string `json:"run_id"` + ExperimentID string `json:"experiment_id"` + Status string `json:"status"` +} + +type MLflowRunData struct { + Tags []MLflowTag `json:"tags"` +} + +type MLflowTag struct { + Key string `json:"key"` + Value string `json:"value"` +} + +func QueryMLflowExperimentByName(endpoint, experimentName string) (*mlflowclient.MLflowExperiment, error) { + ginkgo.GinkgoHelper() + client, err := getMLflowClient(endpoint) + if err != nil { + return nil, err + } + experiment, err := client.GetExperimentByName(context.Background(), experimentName) + if err != nil { + return nil, fmt.Errorf("failed to query MLflow experiment by name %q: %w", experimentName, err) + } + return experiment, nil +} + +func QueryMLflowRuns(endpoint, experimentID string) ([]MLflowRun, error) { + return searchMLflowRuns(endpoint, []string{experimentID}, "", 1000) +} + +func QueryMLflowRunByID(endpoint, runID, experimentID string) (*MLflowRun, error) { + ginkgo.GinkgoHelper() + if experimentID == "" { + return nil, fmt.Errorf("experimentID is required to query MLflow run %q", runID) + } + runs, err := searchMLflowRuns(endpoint, []string{experimentID}, "", 1000) + if err != nil { + return nil, fmt.Errorf("failed to query MLflow run %q: %w", runID, err) + } + for _, run := range runs { + if run.Info.RunID == runID { + return &run, nil + } + } + return nil, fmt.Errorf("MLflow run %q not found in experiment %q", runID, experimentID) +} + +func VerifyMLflowRunStatus(endpoint, runID, experimentID, expectedStatus string) error { + ginkgo.GinkgoHelper() + mlflowRun, err := QueryMLflowRunByID(endpoint, runID, experimentID) + if err != nil { + return err + } + if mlflowRun.Info.Status != expectedStatus { + return fmt.Errorf("MLflow run %s should have status %s", runID, expectedStatus) + } + return nil +} + +func VerifyMLflowRunTags(endpoint, runID, experimentID string, expectedTags map[string]string) error { + ginkgo.GinkgoHelper() + mlflowRun, err := QueryMLflowRunByID(endpoint, runID, experimentID) + if err != nil { + return err + } + tagMap := make(map[string]string) + for _, tag := range mlflowRun.Data.Tags { + tagMap[tag.Key] = tag.Value + } + for key, expectedValue := range expectedTags { + actualValue, ok := tagMap[key] + if !ok || actualValue != expectedValue { + return fmt.Errorf("MLflow run %s should have tag %s=%s", runID, key, expectedValue) + } + } + return nil +} + +// QueryNestedRuns returns the MLflow runs whose mlflow.parentRunId tag matches +// the given parent run ID. +func QueryNestedRuns(endpoint, parentRunID string, experimentID ...string) ([]MLflowRun, error) { + ginkgo.GinkgoHelper() + filter := fmt.Sprintf(`tags.mlflow.parentRunId = '%s'`, parentRunID) + if len(experimentID) > 0 && experimentID[0] != "" { + return searchMLflowRuns(endpoint, []string{experimentID[0]}, filter, 1000) + } + return searchMLflowRuns(endpoint, nil, filter, 1000) +} + +// CountNestedRuns counts the number of MLflow runs that have a +// mlflow.parentRunId tag matching the given parent run ID. +func CountNestedRuns(endpoint, parentRunID string, experimentID ...string) (int, error) { + nestedRuns, err := QueryNestedRuns(endpoint, parentRunID, experimentID...) + if err != nil { + return 0, err + } + return len(nestedRuns), nil +} + +func searchMLflowRuns(endpoint string, experimentIDs []string, filter string, maxResults int) ([]MLflowRun, error) { + ginkgo.GinkgoHelper() + client, err := getMLflowClient(endpoint) + if err != nil { + return nil, err + } + pageToken := "" + var allRuns []MLflowRun + for { + response, err := client.SearchRuns( + context.Background(), + experimentIDs, + filter, + maxResults, + pageToken, + ) + if err != nil { + return nil, fmt.Errorf("failed to search MLflow runs: %w", err) + } + for _, rawRun := range response.Runs { + var parsedRun MLflowRun + unmarshalErr := json.Unmarshal(rawRun, &parsedRun) + if unmarshalErr != nil { + return nil, fmt.Errorf("failed to unmarshal MLflow runs/search response: %w", unmarshalErr) + } + allRuns = append(allRuns, parsedRun) + } + if response.NextPageToken == "" { + break + } + pageToken = response.NextPageToken + } + return allRuns, nil +} diff --git a/backend/test/proto_tests/testdata/generated-1791485/recurring_run.json b/backend/test/proto_tests/testdata/generated-1791485/recurring_run.json index b5ac3cbdb00..4d7b743393b 100644 --- a/backend/test/proto_tests/testdata/generated-1791485/recurring_run.json +++ b/backend/test/proto_tests/testdata/generated-1791485/recurring_run.json @@ -29,5 +29,6 @@ "error": null, "no_catchup": false, "namespace": "namespace1", - "experiment_id": "" + "experiment_id": "", + "plugins_input": {} } \ No newline at end of file diff --git a/backend/test/proto_tests/testdata/generated-1791485/run_completed.json b/backend/test/proto_tests/testdata/generated-1791485/run_completed.json index 1eb3d07da91..3cadf5f7464 100644 --- a/backend/test/proto_tests/testdata/generated-1791485/run_completed.json +++ b/backend/test/proto_tests/testdata/generated-1791485/run_completed.json @@ -23,5 +23,7 @@ "error": null, "run_details": null, "recurring_run_id": "recurring-schedule-001", - "state_history": [] + "state_history": [], + "plugins_input": {}, + "plugins_output": {} } \ No newline at end of file diff --git a/backend/test/proto_tests/testdata/generated-1791485/run_completed_with_spec.json b/backend/test/proto_tests/testdata/generated-1791485/run_completed_with_spec.json index 331453529d3..98df85173c3 100644 --- a/backend/test/proto_tests/testdata/generated-1791485/run_completed_with_spec.json +++ b/backend/test/proto_tests/testdata/generated-1791485/run_completed_with_spec.json @@ -70,5 +70,7 @@ "error": null, "run_details": null, "recurring_run_id": "recurring-schedule-001", - "state_history": [] + "state_history": [], + "plugins_input": {}, + "plugins_output": {} } \ No newline at end of file diff --git a/backend/test/proto_tests/testdata/generated-1791485/run_failed.json b/backend/test/proto_tests/testdata/generated-1791485/run_failed.json index 5dae9160864..b5cdb686642 100644 --- a/backend/test/proto_tests/testdata/generated-1791485/run_failed.json +++ b/backend/test/proto_tests/testdata/generated-1791485/run_failed.json @@ -17,5 +17,7 @@ }, "run_details": null, "recurring_run_id": "", - "state_history": [] + "state_history": [], + "plugins_input": {}, + "plugins_output": {} } \ No newline at end of file diff --git a/backend/test/v2/integration/run_api_test.go b/backend/test/v2/integration/run_api_test.go index de7cc6490a7..b6c31682a96 100644 --- a/backend/test/v2/integration/run_api_test.go +++ b/backend/test/v2/integration/run_api_test.go @@ -359,6 +359,8 @@ func (s *RunAPITestSuite) checkTerminatedRunDetail(t *testing.T, run *run_model. StorageState: run.StorageState, ServiceAccount: test.GetDefaultPipelineRunnerServiceAccount(*isKubeflowMode), PipelineSpec: run.PipelineSpec, + PluginsInput: run.PluginsInput, + PluginsOutput: run.PluginsOutput, ExperimentID: experimentID, PipelineVersionReference: &run_model.V2beta1PipelineVersionReference{ PipelineID: pipelineID, @@ -383,6 +385,8 @@ func (s *RunAPITestSuite) checkHelloWorldRunDetail(t *testing.T, run *run_model. StorageState: run.StorageState, ServiceAccount: test.GetDefaultPipelineRunnerServiceAccount(*isKubeflowMode), PipelineSpec: run.PipelineSpec, + PluginsInput: run.PluginsInput, + PluginsOutput: run.PluginsOutput, ExperimentID: experimentID, PipelineVersionReference: &run_model.V2beta1PipelineVersionReference{ PipelineID: pipelineID, @@ -419,6 +423,8 @@ func (s *RunAPITestSuite) checkArgParamsRunDetail(t *testing.T, run *run_model.V StorageState: run.StorageState, ServiceAccount: test.GetDefaultPipelineRunnerServiceAccount(*isKubeflowMode), PipelineSpec: run.PipelineSpec, + PluginsInput: run.PluginsInput, + PluginsOutput: run.PluginsOutput, RuntimeConfig: &run_model.V2beta1RuntimeConfig{ Parameters: map[string]interface{}{ "param1": "goodbye", diff --git a/go.mod b/go.mod index c810064e8cb..284584c4d63 100644 --- a/go.mod +++ b/go.mod @@ -81,6 +81,7 @@ require ( gorm.io/driver/postgres v1.6.0 gorm.io/driver/sqlite v1.6.0 gorm.io/gorm v1.30.1 + k8s.io/apiextensions-apiserver v0.35.2 ) require ( @@ -252,7 +253,6 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/apiextensions-apiserver v0.35.2 // indirect k8s.io/gengo/v2 v2.0.0-20250922181213-ec3ebc5fd46b // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml index 3807b02fb8d..9e8e58adb5c 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml @@ -323,6 +323,23 @@ rules: - update - patch - delete + - apiGroups: + - mlflow.kubeflow.org + resources: + - experiments + verbs: + - get + - list + - create + - update + - apiGroups: + - "" + resources: + - configmaps + resourceNames: + - kfp-launcher + verbs: + - get --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -400,6 +417,15 @@ rules: - jobs verbs: - '*' + - apiGroups: + - mlflow.kubeflow.org + resources: + - experiments + verbs: + - get + - list + - create + - update --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding diff --git a/manifests/kustomize/base/installs/multi-user/api-service/cluster-role.yaml b/manifests/kustomize/base/installs/multi-user/api-service/cluster-role.yaml index 14e6e415262..2336ffdce93 100644 --- a/manifests/kustomize/base/installs/multi-user/api-service/cluster-role.yaml +++ b/manifests/kustomize/base/installs/multi-user/api-service/cluster-role.yaml @@ -61,3 +61,11 @@ rules: - tokenreviews verbs: - create +- apiGroups: + - "" + resources: + - configmaps + resourceNames: + - kfp-launcher + verbs: + - get diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml index 3e5e450d4d1..bdb943053eb 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml @@ -297,4 +297,14 @@ spec: requests: cpu: 250m memory: 500Mi + # Trust bundle for plugins.mlflow.tls.caBundlePath (create ConfigMap mlflow-tracking-ca in the same namespace). + volumeMounts: + - name: mlflow-tracking-ca + mountPath: /etc/mlflow-tracking-ca + readOnly: true + volumes: + - name: mlflow-tracking-ca + configMap: + name: mlflow-tracking-ca + optional: true serviceAccountName: ml-pipeline diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-role.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-role.yaml index 59d3926c0dd..c3535152947 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-role.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-role.yaml @@ -63,3 +63,20 @@ rules: - tokenreviews verbs: - create +- apiGroups: + - mlflow.kubeflow.org + resources: + - experiments + verbs: + - get + - list + - create + - update +- apiGroups: + - "" + resources: + - configmaps + resourceNames: + - kfp-launcher + verbs: + - get diff --git a/manifests/kustomize/base/pipeline/pipeline-runner-role.yaml b/manifests/kustomize/base/pipeline/pipeline-runner-role.yaml index eba0ee9f2d6..8214f1bdc33 100644 --- a/manifests/kustomize/base/pipeline/pipeline-runner-role.yaml +++ b/manifests/kustomize/base/pipeline/pipeline-runner-role.yaml @@ -85,3 +85,12 @@ rules: verbs: - create - patch +- apiGroups: + - mlflow.kubeflow.org + resources: + - experiments + verbs: + - get + - list + - create + - update diff --git a/test_data/compiled-workflows/add_numbers.yaml b/test_data/compiled-workflows/add_numbers.yaml index de6ffdefd65..542a9841381 100644 --- a/test_data/compiled-workflows/add_numbers.yaml +++ b/test_data/compiled-workflows/add_numbers.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -224,7 +226,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -356,7 +360,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/arguments_parameters.yaml b/test_data/compiled-workflows/arguments_parameters.yaml index 6e33503119a..37082ea23fb 100644 --- a/test_data/compiled-workflows/arguments_parameters.yaml +++ b/test_data/compiled-workflows/arguments_parameters.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/artifact_cache.yaml b/test_data/compiled-workflows/artifact_cache.yaml index 401b4d7d9d8..8ad848e2d3f 100644 --- a/test_data/compiled-workflows/artifact_cache.yaml +++ b/test_data/compiled-workflows/artifact_cache.yaml @@ -136,7 +136,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -243,7 +245,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -375,7 +379,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/artifact_crust.yaml b/test_data/compiled-workflows/artifact_crust.yaml index 1e55ef9d2a9..c2352cbe185 100644 --- a/test_data/compiled-workflows/artifact_crust.yaml +++ b/test_data/compiled-workflows/artifact_crust.yaml @@ -136,7 +136,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -243,7 +245,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -375,7 +379,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/artifacts_complex.yaml b/test_data/compiled-workflows/artifacts_complex.yaml index 23f3c90176a..26579cd91ce 100644 --- a/test_data/compiled-workflows/artifacts_complex.yaml +++ b/test_data/compiled-workflows/artifacts_complex.yaml @@ -162,7 +162,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -269,7 +271,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -482,7 +486,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/artifacts_simple.yaml b/test_data/compiled-workflows/artifacts_simple.yaml index 980c19b8ec0..b41c57e5319 100644 --- a/test_data/compiled-workflows/artifacts_simple.yaml +++ b/test_data/compiled-workflows/artifacts_simple.yaml @@ -147,7 +147,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -254,7 +256,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -386,7 +390,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/collected_artifacts.yaml b/test_data/compiled-workflows/collected_artifacts.yaml index 92272c9cd13..99fc2bc1926 100644 --- a/test_data/compiled-workflows/collected_artifacts.yaml +++ b/test_data/compiled-workflows/collected_artifacts.yaml @@ -228,7 +228,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -335,7 +337,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -467,7 +471,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/collected_parameters.yaml b/test_data/compiled-workflows/collected_parameters.yaml index 0dcc4c50b89..9c4669d0b0c 100644 --- a/test_data/compiled-workflows/collected_parameters.yaml +++ b/test_data/compiled-workflows/collected_parameters.yaml @@ -163,7 +163,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -270,7 +272,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -427,7 +431,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/component_with_metadata_fields.yaml b/test_data/compiled-workflows/component_with_metadata_fields.yaml index 8a379d4acaa..de559929dd8 100644 --- a/test_data/compiled-workflows/component_with_metadata_fields.yaml +++ b/test_data/compiled-workflows/component_with_metadata_fields.yaml @@ -132,7 +132,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -239,7 +241,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -371,7 +375,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/component_with_optional_inputs.yaml b/test_data/compiled-workflows/component_with_optional_inputs.yaml index f8e8d63a33b..2d552bffb2a 100644 --- a/test_data/compiled-workflows/component_with_optional_inputs.yaml +++ b/test_data/compiled-workflows/component_with_optional_inputs.yaml @@ -121,7 +121,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -228,7 +230,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -360,7 +364,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/component_with_pip_index_urls.yaml b/test_data/compiled-workflows/component_with_pip_index_urls.yaml index cb5e4cf00a7..9ca9af5805d 100644 --- a/test_data/compiled-workflows/component_with_pip_index_urls.yaml +++ b/test_data/compiled-workflows/component_with_pip_index_urls.yaml @@ -119,7 +119,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -226,7 +228,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -358,7 +362,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/component_with_pip_install.yaml b/test_data/compiled-workflows/component_with_pip_install.yaml index bc771212680..8d5e0d3d3f4 100644 --- a/test_data/compiled-workflows/component_with_pip_install.yaml +++ b/test_data/compiled-workflows/component_with_pip_install.yaml @@ -119,7 +119,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -226,7 +228,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -358,7 +362,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/component_with_pip_install_in_venv.yaml b/test_data/compiled-workflows/component_with_pip_install_in_venv.yaml index 4f6c6ef817b..9d049bdeabc 100644 --- a/test_data/compiled-workflows/component_with_pip_install_in_venv.yaml +++ b/test_data/compiled-workflows/component_with_pip_install_in_venv.yaml @@ -120,7 +120,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -227,7 +229,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -359,7 +363,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/components_with_optional_artifacts.yaml b/test_data/compiled-workflows/components_with_optional_artifacts.yaml index 304042d1a4c..5e089c8da2e 100644 --- a/test_data/compiled-workflows/components_with_optional_artifacts.yaml +++ b/test_data/compiled-workflows/components_with_optional_artifacts.yaml @@ -133,7 +133,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -240,7 +242,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -388,7 +392,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer outputs: {} securityContext: @@ -473,7 +479,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/concat_message.yaml b/test_data/compiled-workflows/concat_message.yaml index 83347d58281..3c78fcd065e 100644 --- a/test_data/compiled-workflows/concat_message.yaml +++ b/test_data/compiled-workflows/concat_message.yaml @@ -118,7 +118,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -225,7 +227,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -357,7 +361,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/conditional_producer_and_consumers.yaml b/test_data/compiled-workflows/conditional_producer_and_consumers.yaml index eeef66b184b..47adf7cae91 100644 --- a/test_data/compiled-workflows/conditional_producer_and_consumers.yaml +++ b/test_data/compiled-workflows/conditional_producer_and_consumers.yaml @@ -139,7 +139,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -246,7 +248,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -410,7 +414,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/container_component_with_no_inputs.yaml b/test_data/compiled-workflows/container_component_with_no_inputs.yaml index 9c2081fe84e..5b1e103c35f 100644 --- a/test_data/compiled-workflows/container_component_with_no_inputs.yaml +++ b/test_data/compiled-workflows/container_component_with_no_inputs.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/container_io.yaml b/test_data/compiled-workflows/container_io.yaml index d22e741c5c0..66413eefd5f 100644 --- a/test_data/compiled-workflows/container_io.yaml +++ b/test_data/compiled-workflows/container_io.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/container_no_input.yaml b/test_data/compiled-workflows/container_no_input.yaml index 923a04b6e85..267663c05d9 100644 --- a/test_data/compiled-workflows/container_no_input.yaml +++ b/test_data/compiled-workflows/container_no_input.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/container_with_artifact_output.yaml b/test_data/compiled-workflows/container_with_artifact_output.yaml index 4f858708083..07284e1187b 100644 --- a/test_data/compiled-workflows/container_with_artifact_output.yaml +++ b/test_data/compiled-workflows/container_with_artifact_output.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/container_with_concat_placeholder.yaml b/test_data/compiled-workflows/container_with_concat_placeholder.yaml index 89f38c22455..7c7c85853e3 100644 --- a/test_data/compiled-workflows/container_with_concat_placeholder.yaml +++ b/test_data/compiled-workflows/container_with_concat_placeholder.yaml @@ -109,7 +109,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -216,7 +218,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -348,7 +352,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/container_with_if_placeholder.yaml b/test_data/compiled-workflows/container_with_if_placeholder.yaml index 0df3f24826e..1ab1a375515 100644 --- a/test_data/compiled-workflows/container_with_if_placeholder.yaml +++ b/test_data/compiled-workflows/container_with_if_placeholder.yaml @@ -111,7 +111,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -218,7 +220,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -350,7 +354,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/container_with_placeholder_in_fstring.yaml b/test_data/compiled-workflows/container_with_placeholder_in_fstring.yaml index 996046e0c58..5f83fe7aec4 100644 --- a/test_data/compiled-workflows/container_with_placeholder_in_fstring.yaml +++ b/test_data/compiled-workflows/container_with_placeholder_in_fstring.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/containerized_python_component.yaml b/test_data/compiled-workflows/containerized_python_component.yaml index c4f3550b4ac..b9624a2de32 100644 --- a/test_data/compiled-workflows/containerized_python_component.yaml +++ b/test_data/compiled-workflows/containerized_python_component.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/create_pod_metadata_complex.yaml b/test_data/compiled-workflows/create_pod_metadata_complex.yaml index cbac80c4550..b418f1ede0c 100644 --- a/test_data/compiled-workflows/create_pod_metadata_complex.yaml +++ b/test_data/compiled-workflows/create_pod_metadata_complex.yaml @@ -161,7 +161,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -268,7 +270,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -403,6 +407,7 @@ spec: metadata: annotations: '{{inputs.parameters.pod-metadata-annotation-key}}': '{{inputs.parameters.pod-metadata-annotation-val}}' + pipelines.kubeflow.org/runtime-role: launcher labels: '{{inputs.parameters.pod-metadata-label-key-1}}': '{{inputs.parameters.pod-metadata-label-val-1}}' '{{inputs.parameters.pod-metadata-label-key-2}}': '{{inputs.parameters.pod-metadata-label-val-2}}' @@ -533,6 +538,7 @@ spec: annotations: '{{inputs.parameters.pod-metadata-annotation-key-1}}': '{{inputs.parameters.pod-metadata-annotation-val-1}}' '{{inputs.parameters.pod-metadata-annotation-key-2}}': '{{inputs.parameters.pod-metadata-annotation-val-2}}' + pipelines.kubeflow.org/runtime-role: launcher name: metadata-2-0-system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -736,7 +742,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/cross_loop_after_topology.yaml b/test_data/compiled-workflows/cross_loop_after_topology.yaml index a53b89b94a9..f3e1e5048ef 100644 --- a/test_data/compiled-workflows/cross_loop_after_topology.yaml +++ b/test_data/compiled-workflows/cross_loop_after_topology.yaml @@ -138,7 +138,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -245,7 +247,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -409,7 +413,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/dict_input.yaml b/test_data/compiled-workflows/dict_input.yaml index 06d27d21937..2c233174dec 100644 --- a/test_data/compiled-workflows/dict_input.yaml +++ b/test_data/compiled-workflows/dict_input.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -224,7 +226,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -356,7 +360,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/embedded_artifact.yaml b/test_data/compiled-workflows/embedded_artifact.yaml index 5275df4d526..36e47c8bc1a 100644 --- a/test_data/compiled-workflows/embedded_artifact.yaml +++ b/test_data/compiled-workflows/embedded_artifact.yaml @@ -158,7 +158,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -265,7 +267,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -421,7 +425,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/env-var.yaml b/test_data/compiled-workflows/env-var.yaml index 0177ac3b3ed..a92868c9fd3 100644 --- a/test_data/compiled-workflows/env-var.yaml +++ b/test_data/compiled-workflows/env-var.yaml @@ -119,7 +119,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -226,7 +228,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -358,7 +362,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/fail_v2.yaml b/test_data/compiled-workflows/fail_v2.yaml index 49cfe93539c..78fff1eea40 100644 --- a/test_data/compiled-workflows/fail_v2.yaml +++ b/test_data/compiled-workflows/fail_v2.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -224,7 +226,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -356,7 +360,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/flip_coin.yaml b/test_data/compiled-workflows/flip_coin.yaml index be8efc3a924..db334c82fe2 100644 --- a/test_data/compiled-workflows/flip_coin.yaml +++ b/test_data/compiled-workflows/flip_coin.yaml @@ -174,7 +174,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -281,7 +283,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -449,7 +453,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/hello_world.yaml b/test_data/compiled-workflows/hello_world.yaml index 497c09c20b9..311355aa603 100644 --- a/test_data/compiled-workflows/hello_world.yaml +++ b/test_data/compiled-workflows/hello_world.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/identity.yaml b/test_data/compiled-workflows/identity.yaml index e35b003f95a..a82951037f1 100644 --- a/test_data/compiled-workflows/identity.yaml +++ b/test_data/compiled-workflows/identity.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -224,7 +226,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -356,7 +360,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/if_elif_else_complex.yaml b/test_data/compiled-workflows/if_elif_else_complex.yaml index 541abc3533f..14fd7b18ccb 100644 --- a/test_data/compiled-workflows/if_elif_else_complex.yaml +++ b/test_data/compiled-workflows/if_elif_else_complex.yaml @@ -208,7 +208,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -315,7 +317,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -448,7 +452,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/if_elif_else_with_oneof_parameters.yaml b/test_data/compiled-workflows/if_elif_else_with_oneof_parameters.yaml index eae6957a429..84473d623c4 100644 --- a/test_data/compiled-workflows/if_elif_else_with_oneof_parameters.yaml +++ b/test_data/compiled-workflows/if_elif_else_with_oneof_parameters.yaml @@ -163,7 +163,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -270,7 +272,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -468,7 +472,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/if_else_with_oneof_artifacts.yaml b/test_data/compiled-workflows/if_else_with_oneof_artifacts.yaml index d7e105ba593..d0558d922ff 100644 --- a/test_data/compiled-workflows/if_else_with_oneof_artifacts.yaml +++ b/test_data/compiled-workflows/if_else_with_oneof_artifacts.yaml @@ -154,7 +154,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -261,7 +263,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -425,7 +429,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/if_else_with_oneof_parameters.yaml b/test_data/compiled-workflows/if_else_with_oneof_parameters.yaml index bac42bafaf2..35c8ab8e1a0 100644 --- a/test_data/compiled-workflows/if_else_with_oneof_parameters.yaml +++ b/test_data/compiled-workflows/if_else_with_oneof_parameters.yaml @@ -141,7 +141,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -248,7 +250,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -414,7 +418,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/input_artifact.yaml b/test_data/compiled-workflows/input_artifact.yaml index f7f824e1002..33cbb3519b7 100644 --- a/test_data/compiled-workflows/input_artifact.yaml +++ b/test_data/compiled-workflows/input_artifact.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -224,7 +226,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -356,7 +360,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/iris_pipeline_compiled.yaml b/test_data/compiled-workflows/iris_pipeline_compiled.yaml index 5fbe07a89f1..c955b6af471 100644 --- a/test_data/compiled-workflows/iris_pipeline_compiled.yaml +++ b/test_data/compiled-workflows/iris_pipeline_compiled.yaml @@ -170,7 +170,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -277,7 +279,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -459,7 +463,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/lightweight_python_functions_pipeline.yaml b/test_data/compiled-workflows/lightweight_python_functions_pipeline.yaml index f3a185613bb..74a84187946 100644 --- a/test_data/compiled-workflows/lightweight_python_functions_pipeline.yaml +++ b/test_data/compiled-workflows/lightweight_python_functions_pipeline.yaml @@ -172,7 +172,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -279,7 +281,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -436,7 +440,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/lightweight_python_functions_with_outputs.yaml b/test_data/compiled-workflows/lightweight_python_functions_with_outputs.yaml index ec74f4ca372..378b49d57d9 100644 --- a/test_data/compiled-workflows/lightweight_python_functions_with_outputs.yaml +++ b/test_data/compiled-workflows/lightweight_python_functions_with_outputs.yaml @@ -164,7 +164,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -271,7 +273,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -477,7 +481,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/log_streaming_compiled.yaml b/test_data/compiled-workflows/log_streaming_compiled.yaml index ebb532ced27..737751e8a4b 100644 --- a/test_data/compiled-workflows/log_streaming_compiled.yaml +++ b/test_data/compiled-workflows/log_streaming_compiled.yaml @@ -121,7 +121,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -228,7 +230,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -360,7 +364,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/long-running.yaml b/test_data/compiled-workflows/long-running.yaml index f20fe9eb81e..ce53f920124 100644 --- a/test_data/compiled-workflows/long-running.yaml +++ b/test_data/compiled-workflows/long-running.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -372,7 +376,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/loop_consume_upstream.yaml b/test_data/compiled-workflows/loop_consume_upstream.yaml index dd511ef15d2..12e115ccc82 100644 --- a/test_data/compiled-workflows/loop_consume_upstream.yaml +++ b/test_data/compiled-workflows/loop_consume_upstream.yaml @@ -165,7 +165,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -272,7 +274,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -431,7 +435,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/metrics_visualization_v2.yaml b/test_data/compiled-workflows/metrics_visualization_v2.yaml index 908aee03a83..293551e8fdd 100644 --- a/test_data/compiled-workflows/metrics_visualization_v2.yaml +++ b/test_data/compiled-workflows/metrics_visualization_v2.yaml @@ -209,7 +209,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -316,7 +318,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -544,7 +548,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/missing_kubernetes_optional_inputs.yaml b/test_data/compiled-workflows/missing_kubernetes_optional_inputs.yaml index bdfccd231af..8b251c38439 100644 --- a/test_data/compiled-workflows/missing_kubernetes_optional_inputs.yaml +++ b/test_data/compiled-workflows/missing_kubernetes_optional_inputs.yaml @@ -120,7 +120,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -227,7 +229,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -362,7 +366,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/mixed_parameters.yaml b/test_data/compiled-workflows/mixed_parameters.yaml index 759de56d366..1b1287d1283 100644 --- a/test_data/compiled-workflows/mixed_parameters.yaml +++ b/test_data/compiled-workflows/mixed_parameters.yaml @@ -134,7 +134,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -241,7 +243,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -373,7 +377,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/modelcar.yaml b/test_data/compiled-workflows/modelcar.yaml index 5f85063aec6..59c124e5013 100644 --- a/test_data/compiled-workflows/modelcar.yaml +++ b/test_data/compiled-workflows/modelcar.yaml @@ -142,7 +142,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -249,7 +251,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -333,7 +337,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer outputs: {} securityContext: @@ -487,7 +493,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/mounted_cabundle_configmap.yaml b/test_data/compiled-workflows/mounted_cabundle_configmap.yaml index af20a1ee82a..9a54c7b1690 100644 --- a/test_data/compiled-workflows/mounted_cabundle_configmap.yaml +++ b/test_data/compiled-workflows/mounted_cabundle_configmap.yaml @@ -113,7 +113,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -229,7 +231,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -372,7 +376,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/mounted_cabundle_secret.yaml b/test_data/compiled-workflows/mounted_cabundle_secret.yaml index 191b1a9e0e8..daefed05f51 100644 --- a/test_data/compiled-workflows/mounted_cabundle_secret.yaml +++ b/test_data/compiled-workflows/mounted_cabundle_secret.yaml @@ -113,7 +113,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -229,7 +231,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -372,7 +376,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/multiple_artifacts_namedtuple.yaml b/test_data/compiled-workflows/multiple_artifacts_namedtuple.yaml index 0f7888c032e..c80f5ae77ad 100644 --- a/test_data/compiled-workflows/multiple_artifacts_namedtuple.yaml +++ b/test_data/compiled-workflows/multiple_artifacts_namedtuple.yaml @@ -138,7 +138,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -245,7 +247,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -377,7 +381,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/multiple_parameters_namedtuple.yaml b/test_data/compiled-workflows/multiple_parameters_namedtuple.yaml index 80250a01eb4..881e6f48d0d 100644 --- a/test_data/compiled-workflows/multiple_parameters_namedtuple.yaml +++ b/test_data/compiled-workflows/multiple_parameters_namedtuple.yaml @@ -137,7 +137,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -244,7 +246,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -376,7 +380,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/nested_pipeline_opt_input_child_level_compiled.yaml b/test_data/compiled-workflows/nested_pipeline_opt_input_child_level_compiled.yaml index ea49f9f4ad1..95242618201 100644 --- a/test_data/compiled-workflows/nested_pipeline_opt_input_child_level_compiled.yaml +++ b/test_data/compiled-workflows/nested_pipeline_opt_input_child_level_compiled.yaml @@ -197,7 +197,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -304,7 +306,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -556,7 +560,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/nested_pipeline_opt_inputs_nil_compiled.yaml b/test_data/compiled-workflows/nested_pipeline_opt_inputs_nil_compiled.yaml index 0990d07101f..01ed9241182 100644 --- a/test_data/compiled-workflows/nested_pipeline_opt_inputs_nil_compiled.yaml +++ b/test_data/compiled-workflows/nested_pipeline_opt_inputs_nil_compiled.yaml @@ -148,7 +148,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -255,7 +257,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -435,7 +439,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/nested_pipeline_opt_inputs_parent_level_compiled.yaml b/test_data/compiled-workflows/nested_pipeline_opt_inputs_parent_level_compiled.yaml index ba852795df5..34cb9adee1f 100644 --- a/test_data/compiled-workflows/nested_pipeline_opt_inputs_parent_level_compiled.yaml +++ b/test_data/compiled-workflows/nested_pipeline_opt_inputs_parent_level_compiled.yaml @@ -200,7 +200,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -307,7 +309,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -567,7 +571,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/nested_return.yaml b/test_data/compiled-workflows/nested_return.yaml index afefd925131..9872013261f 100644 --- a/test_data/compiled-workflows/nested_return.yaml +++ b/test_data/compiled-workflows/nested_return.yaml @@ -118,7 +118,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -225,7 +227,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -357,7 +361,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/nested_with_parameters.yaml b/test_data/compiled-workflows/nested_with_parameters.yaml index 033f1a45129..d12a5a1015d 100644 --- a/test_data/compiled-workflows/nested_with_parameters.yaml +++ b/test_data/compiled-workflows/nested_with_parameters.yaml @@ -150,7 +150,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -257,7 +259,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -438,7 +442,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/notebook_component_mixed.yaml b/test_data/compiled-workflows/notebook_component_mixed.yaml index f20b92ce2ec..f817f582f79 100644 --- a/test_data/compiled-workflows/notebook_component_mixed.yaml +++ b/test_data/compiled-workflows/notebook_component_mixed.yaml @@ -346,7 +346,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -453,7 +455,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -635,7 +639,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/notebook_component_simple.yaml b/test_data/compiled-workflows/notebook_component_simple.yaml index 8635ad128f7..a1104894aa2 100644 --- a/test_data/compiled-workflows/notebook_component_simple.yaml +++ b/test_data/compiled-workflows/notebook_component_simple.yaml @@ -215,7 +215,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -322,7 +324,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -454,7 +458,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/output_metrics.yaml b/test_data/compiled-workflows/output_metrics.yaml index a62a2af6f91..fe294e9fd54 100644 --- a/test_data/compiled-workflows/output_metrics.yaml +++ b/test_data/compiled-workflows/output_metrics.yaml @@ -119,7 +119,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -226,7 +228,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -358,7 +362,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/parallel_for_after_dependency.yaml b/test_data/compiled-workflows/parallel_for_after_dependency.yaml index 5776ce6f02c..e624b2191b0 100644 --- a/test_data/compiled-workflows/parallel_for_after_dependency.yaml +++ b/test_data/compiled-workflows/parallel_for_after_dependency.yaml @@ -120,7 +120,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -227,7 +229,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -359,7 +363,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/parameter_cache.yaml b/test_data/compiled-workflows/parameter_cache.yaml index 0d874d1e1ab..860632ee60d 100644 --- a/test_data/compiled-workflows/parameter_cache.yaml +++ b/test_data/compiled-workflows/parameter_cache.yaml @@ -134,7 +134,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -241,7 +243,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -373,7 +377,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/parameter_oneof.yaml b/test_data/compiled-workflows/parameter_oneof.yaml index df9c5ac6f90..5cb71da2553 100644 --- a/test_data/compiled-workflows/parameter_oneof.yaml +++ b/test_data/compiled-workflows/parameter_oneof.yaml @@ -173,7 +173,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -280,7 +282,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -446,7 +450,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/parameters_complex.yaml b/test_data/compiled-workflows/parameters_complex.yaml index ba547b6933e..32512eaf97b 100644 --- a/test_data/compiled-workflows/parameters_complex.yaml +++ b/test_data/compiled-workflows/parameters_complex.yaml @@ -167,7 +167,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -274,7 +276,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -406,7 +410,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/parameters_simple.yaml b/test_data/compiled-workflows/parameters_simple.yaml index 5598d7ca741..e168bbdf082 100644 --- a/test_data/compiled-workflows/parameters_simple.yaml +++ b/test_data/compiled-workflows/parameters_simple.yaml @@ -138,7 +138,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -245,7 +247,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -377,7 +381,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_as_exit_task.yaml b/test_data/compiled-workflows/pipeline_as_exit_task.yaml index c54aa3afcf3..d0886d74611 100644 --- a/test_data/compiled-workflows/pipeline_as_exit_task.yaml +++ b/test_data/compiled-workflows/pipeline_as_exit_task.yaml @@ -154,7 +154,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -261,7 +263,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -394,7 +398,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_in_pipeline.yaml b/test_data/compiled-workflows/pipeline_in_pipeline.yaml index 2042a6496cb..bcb4aefe2a6 100644 --- a/test_data/compiled-workflows/pipeline_in_pipeline.yaml +++ b/test_data/compiled-workflows/pipeline_in_pipeline.yaml @@ -123,7 +123,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -230,7 +232,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -387,7 +391,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_in_pipeline_complex.yaml b/test_data/compiled-workflows/pipeline_in_pipeline_complex.yaml index 81798ae1c4c..7345e8f6427 100644 --- a/test_data/compiled-workflows/pipeline_in_pipeline_complex.yaml +++ b/test_data/compiled-workflows/pipeline_in_pipeline_complex.yaml @@ -132,7 +132,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -239,7 +241,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -403,7 +407,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_in_pipeline_loaded_from_yaml.yaml b/test_data/compiled-workflows/pipeline_in_pipeline_loaded_from_yaml.yaml index 50a866f8c90..f2fd1e9a587 100644 --- a/test_data/compiled-workflows/pipeline_in_pipeline_loaded_from_yaml.yaml +++ b/test_data/compiled-workflows/pipeline_in_pipeline_loaded_from_yaml.yaml @@ -139,7 +139,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -246,7 +248,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -403,7 +407,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_producer_consumer.yaml b/test_data/compiled-workflows/pipeline_producer_consumer.yaml index d8e486cc89b..6e29409e589 100644 --- a/test_data/compiled-workflows/pipeline_producer_consumer.yaml +++ b/test_data/compiled-workflows/pipeline_producer_consumer.yaml @@ -170,7 +170,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -277,7 +279,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -409,7 +413,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_after.yaml b/test_data/compiled-workflows/pipeline_with_after.yaml index 3912ec5fcbb..ce9d9f15789 100644 --- a/test_data/compiled-workflows/pipeline_with_after.yaml +++ b/test_data/compiled-workflows/pipeline_with_after.yaml @@ -111,7 +111,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -218,7 +220,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -403,7 +407,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_artifact_custom_path.yaml b/test_data/compiled-workflows/pipeline_with_artifact_custom_path.yaml index 98939fb15d0..723ddf5320f 100644 --- a/test_data/compiled-workflows/pipeline_with_artifact_custom_path.yaml +++ b/test_data/compiled-workflows/pipeline_with_artifact_custom_path.yaml @@ -139,7 +139,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -246,7 +248,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -403,7 +407,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_artifact_upload_download.yaml b/test_data/compiled-workflows/pipeline_with_artifact_upload_download.yaml index 3682568eb4c..829c08c69a9 100644 --- a/test_data/compiled-workflows/pipeline_with_artifact_upload_download.yaml +++ b/test_data/compiled-workflows/pipeline_with_artifact_upload_download.yaml @@ -138,7 +138,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -245,7 +247,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -402,7 +406,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_concat_placeholder.yaml b/test_data/compiled-workflows/pipeline_with_concat_placeholder.yaml index 17fa857645b..d124b47c747 100644 --- a/test_data/compiled-workflows/pipeline_with_concat_placeholder.yaml +++ b/test_data/compiled-workflows/pipeline_with_concat_placeholder.yaml @@ -110,7 +110,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -217,7 +219,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -349,7 +353,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_condition.yaml b/test_data/compiled-workflows/pipeline_with_condition.yaml index b4bf744556d..45bcbf59381 100644 --- a/test_data/compiled-workflows/pipeline_with_condition.yaml +++ b/test_data/compiled-workflows/pipeline_with_condition.yaml @@ -136,7 +136,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -243,7 +245,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -424,7 +428,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_condition_dynamic_task_output_custom_training_job.yaml b/test_data/compiled-workflows/pipeline_with_condition_dynamic_task_output_custom_training_job.yaml index f8f8556a811..980f35f6cc7 100644 --- a/test_data/compiled-workflows/pipeline_with_condition_dynamic_task_output_custom_training_job.yaml +++ b/test_data/compiled-workflows/pipeline_with_condition_dynamic_task_output_custom_training_job.yaml @@ -233,7 +233,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -340,7 +342,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -472,7 +476,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_dynamic_importer_metadata.yaml b/test_data/compiled-workflows/pipeline_with_dynamic_importer_metadata.yaml index d561b789414..0bcfc72ad10 100644 --- a/test_data/compiled-workflows/pipeline_with_dynamic_importer_metadata.yaml +++ b/test_data/compiled-workflows/pipeline_with_dynamic_importer_metadata.yaml @@ -102,7 +102,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer outputs: {} securityContext: @@ -194,7 +196,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -301,7 +305,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -457,7 +463,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_dynamic_task_output_custom_training_job.yaml b/test_data/compiled-workflows/pipeline_with_dynamic_task_output_custom_training_job.yaml index cf68933f943..b6c37bcc8fe 100644 --- a/test_data/compiled-workflows/pipeline_with_dynamic_task_output_custom_training_job.yaml +++ b/test_data/compiled-workflows/pipeline_with_dynamic_task_output_custom_training_job.yaml @@ -195,7 +195,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -302,7 +304,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -507,7 +511,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_env.yaml b/test_data/compiled-workflows/pipeline_with_env.yaml index fde5ffa35d5..2f904f5aeaa 100644 --- a/test_data/compiled-workflows/pipeline_with_env.yaml +++ b/test_data/compiled-workflows/pipeline_with_env.yaml @@ -123,7 +123,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -230,7 +232,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -386,7 +390,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_exit_handler.yaml b/test_data/compiled-workflows/pipeline_with_exit_handler.yaml index f7cbdb42a3f..85677d395d2 100644 --- a/test_data/compiled-workflows/pipeline_with_exit_handler.yaml +++ b/test_data/compiled-workflows/pipeline_with_exit_handler.yaml @@ -135,7 +135,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -242,7 +244,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -432,7 +436,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_google_artifact_type.yaml b/test_data/compiled-workflows/pipeline_with_google_artifact_type.yaml index aef085eeaf5..0ba696d2cca 100644 --- a/test_data/compiled-workflows/pipeline_with_google_artifact_type.yaml +++ b/test_data/compiled-workflows/pipeline_with_google_artifact_type.yaml @@ -120,7 +120,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer outputs: {} securityContext: @@ -212,7 +214,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -319,7 +323,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -488,7 +494,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_importer.yaml b/test_data/compiled-workflows/pipeline_with_importer.yaml index 2c1634e9548..952058a16c9 100644 --- a/test_data/compiled-workflows/pipeline_with_importer.yaml +++ b/test_data/compiled-workflows/pipeline_with_importer.yaml @@ -110,7 +110,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer outputs: {} securityContext: @@ -202,7 +204,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -309,7 +313,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -454,7 +460,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_importer_and_gcpc_types.yaml b/test_data/compiled-workflows/pipeline_with_importer_and_gcpc_types.yaml index f230897198d..6666c29843d 100644 --- a/test_data/compiled-workflows/pipeline_with_importer_and_gcpc_types.yaml +++ b/test_data/compiled-workflows/pipeline_with_importer_and_gcpc_types.yaml @@ -112,7 +112,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -219,7 +221,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -303,7 +307,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer outputs: {} securityContext: @@ -433,7 +439,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_importer_workspace.yaml b/test_data/compiled-workflows/pipeline_with_importer_workspace.yaml index 81dd2fa7c49..fe7da416a71 100644 --- a/test_data/compiled-workflows/pipeline_with_importer_workspace.yaml +++ b/test_data/compiled-workflows/pipeline_with_importer_workspace.yaml @@ -180,7 +180,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer-workspace outputs: {} securityContext: @@ -276,7 +278,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -383,7 +387,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -565,7 +571,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_input_status_state.yaml b/test_data/compiled-workflows/pipeline_with_input_status_state.yaml index 1143ae8d57b..f3e14606c34 100644 --- a/test_data/compiled-workflows/pipeline_with_input_status_state.yaml +++ b/test_data/compiled-workflows/pipeline_with_input_status_state.yaml @@ -135,7 +135,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -242,7 +244,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -406,7 +410,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_loops.yaml b/test_data/compiled-workflows/pipeline_with_loops.yaml index da73914e125..075f658b24a 100644 --- a/test_data/compiled-workflows/pipeline_with_loops.yaml +++ b/test_data/compiled-workflows/pipeline_with_loops.yaml @@ -151,7 +151,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -258,7 +260,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -550,7 +554,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_loops_and_conditions.yaml b/test_data/compiled-workflows/pipeline_with_loops_and_conditions.yaml index 70bb6366691..ac9284639f7 100644 --- a/test_data/compiled-workflows/pipeline_with_loops_and_conditions.yaml +++ b/test_data/compiled-workflows/pipeline_with_loops_and_conditions.yaml @@ -197,7 +197,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -304,7 +306,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -500,7 +504,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_metadata_fields.yaml b/test_data/compiled-workflows/pipeline_with_metadata_fields.yaml index a470d72ca21..eb2390927c5 100644 --- a/test_data/compiled-workflows/pipeline_with_metadata_fields.yaml +++ b/test_data/compiled-workflows/pipeline_with_metadata_fields.yaml @@ -148,7 +148,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -255,7 +257,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -412,7 +416,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_metrics_outputs.yaml b/test_data/compiled-workflows/pipeline_with_metrics_outputs.yaml index 5154c2415be..99f06c5981e 100644 --- a/test_data/compiled-workflows/pipeline_with_metrics_outputs.yaml +++ b/test_data/compiled-workflows/pipeline_with_metrics_outputs.yaml @@ -122,7 +122,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -229,7 +231,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -361,7 +365,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_multiple_exit_handlers.yaml b/test_data/compiled-workflows/pipeline_with_multiple_exit_handlers.yaml index f73f7d0a3f9..f65d737e04f 100644 --- a/test_data/compiled-workflows/pipeline_with_multiple_exit_handlers.yaml +++ b/test_data/compiled-workflows/pipeline_with_multiple_exit_handlers.yaml @@ -141,7 +141,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -248,7 +250,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -568,7 +572,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_nested_conditions.yaml b/test_data/compiled-workflows/pipeline_with_nested_conditions.yaml index 21b88b2684f..edee1b3a2d2 100644 --- a/test_data/compiled-workflows/pipeline_with_nested_conditions.yaml +++ b/test_data/compiled-workflows/pipeline_with_nested_conditions.yaml @@ -138,7 +138,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -245,7 +247,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -402,7 +406,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_nested_conditions_yaml.yaml b/test_data/compiled-workflows/pipeline_with_nested_conditions_yaml.yaml index 2d7853adffb..3788aeebfc0 100644 --- a/test_data/compiled-workflows/pipeline_with_nested_conditions_yaml.yaml +++ b/test_data/compiled-workflows/pipeline_with_nested_conditions_yaml.yaml @@ -150,7 +150,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -257,7 +259,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -425,7 +429,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_nested_loops.yaml b/test_data/compiled-workflows/pipeline_with_nested_loops.yaml index 57b1bfbf194..4e632145a26 100644 --- a/test_data/compiled-workflows/pipeline_with_nested_loops.yaml +++ b/test_data/compiled-workflows/pipeline_with_nested_loops.yaml @@ -128,7 +128,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -235,7 +237,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -367,7 +371,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_only_display_name.yaml b/test_data/compiled-workflows/pipeline_with_only_display_name.yaml index fec2b482f4f..1218055e557 100644 --- a/test_data/compiled-workflows/pipeline_with_only_display_name.yaml +++ b/test_data/compiled-workflows/pipeline_with_only_display_name.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -347,7 +351,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_outputs.yaml b/test_data/compiled-workflows/pipeline_with_outputs.yaml index 099c71c3d2a..063b71280bf 100644 --- a/test_data/compiled-workflows/pipeline_with_outputs.yaml +++ b/test_data/compiled-workflows/pipeline_with_outputs.yaml @@ -124,7 +124,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -231,7 +233,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -388,7 +392,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_parallelfor_parallelism.yaml b/test_data/compiled-workflows/pipeline_with_parallelfor_parallelism.yaml index f3750fa4972..45401fc3de0 100644 --- a/test_data/compiled-workflows/pipeline_with_parallelfor_parallelism.yaml +++ b/test_data/compiled-workflows/pipeline_with_parallelfor_parallelism.yaml @@ -215,7 +215,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -322,7 +324,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -454,7 +458,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_params_containing_format.yaml b/test_data/compiled-workflows/pipeline_with_params_containing_format.yaml index 2c29a52640a..50bfb10e63a 100644 --- a/test_data/compiled-workflows/pipeline_with_params_containing_format.yaml +++ b/test_data/compiled-workflows/pipeline_with_params_containing_format.yaml @@ -137,7 +137,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -244,7 +246,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -377,7 +381,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_placeholders.yaml b/test_data/compiled-workflows/pipeline_with_placeholders.yaml index 7cec62f4b53..1cb44357984 100644 --- a/test_data/compiled-workflows/pipeline_with_placeholders.yaml +++ b/test_data/compiled-workflows/pipeline_with_placeholders.yaml @@ -124,7 +124,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -231,7 +233,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -363,7 +367,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_pod_metadata.yaml b/test_data/compiled-workflows/pipeline_with_pod_metadata.yaml index 47bed1a657a..b1cb2e13178 100644 --- a/test_data/compiled-workflows/pipeline_with_pod_metadata.yaml +++ b/test_data/compiled-workflows/pipeline_with_pod_metadata.yaml @@ -239,7 +239,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -346,7 +348,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -481,6 +485,7 @@ spec: metadata: annotations: '{{inputs.parameters.pod-metadata-annotation-key}}': '{{inputs.parameters.pod-metadata-annotation-val}}' + pipelines.kubeflow.org/runtime-role: launcher labels: '{{inputs.parameters.pod-metadata-label-key-1}}': '{{inputs.parameters.pod-metadata-label-val-1}}' '{{inputs.parameters.pod-metadata-label-key-2}}': '{{inputs.parameters.pod-metadata-label-val-2}}' @@ -653,6 +658,7 @@ spec: '{{inputs.parameters.pod-metadata-annotation-key-2}}': '{{inputs.parameters.pod-metadata-annotation-val-2}}' '{{inputs.parameters.pod-metadata-annotation-key-3}}': '{{inputs.parameters.pod-metadata-annotation-val-3}}' '{{inputs.parameters.pod-metadata-annotation-key-4}}': '{{inputs.parameters.pod-metadata-annotation-val-4}}' + pipelines.kubeflow.org/runtime-role: launcher labels: '{{inputs.parameters.pod-metadata-label-key-1}}': '{{inputs.parameters.pod-metadata-label-val-1}}' '{{inputs.parameters.pod-metadata-label-key-2}}': '{{inputs.parameters.pod-metadata-label-val-2}}' @@ -784,6 +790,7 @@ spec: annotations: '{{inputs.parameters.pod-metadata-annotation-key-1}}': '{{inputs.parameters.pod-metadata-annotation-val-1}}' '{{inputs.parameters.pod-metadata-annotation-key-2}}': '{{inputs.parameters.pod-metadata-annotation-val-2}}' + pipelines.kubeflow.org/runtime-role: launcher name: metadata-2-0-system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -916,6 +923,8 @@ spec: - name: pod-metadata-label-key-3 - name: pod-metadata-label-val-3 metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher labels: '{{inputs.parameters.pod-metadata-label-key-1}}': '{{inputs.parameters.pod-metadata-label-val-1}}' '{{inputs.parameters.pod-metadata-label-key-2}}': '{{inputs.parameters.pod-metadata-label-val-2}}' @@ -1217,7 +1226,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_retry.yaml b/test_data/compiled-workflows/pipeline_with_retry.yaml index 11f55866a23..aa6a4a5ed89 100644 --- a/test_data/compiled-workflows/pipeline_with_retry.yaml +++ b/test_data/compiled-workflows/pipeline_with_retry.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -246,7 +248,9 @@ spec: - name: retry-backoff-duration - name: retry-backoff-factor - name: retry-backoff-max-duration - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: retry-system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -392,7 +396,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_reused_component.yaml b/test_data/compiled-workflows/pipeline_with_reused_component.yaml index 0ff9b9cedba..3fc8d39c3be 100644 --- a/test_data/compiled-workflows/pipeline_with_reused_component.yaml +++ b/test_data/compiled-workflows/pipeline_with_reused_component.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -224,7 +226,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -406,7 +410,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_secret_as_env.yaml b/test_data/compiled-workflows/pipeline_with_secret_as_env.yaml index 7e2f8625519..3b3b80fa473 100644 --- a/test_data/compiled-workflows/pipeline_with_secret_as_env.yaml +++ b/test_data/compiled-workflows/pipeline_with_secret_as_env.yaml @@ -136,7 +136,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -243,7 +245,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -402,7 +406,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_secret_as_volume.yaml b/test_data/compiled-workflows/pipeline_with_secret_as_volume.yaml index eb023f22197..d24a6f57b4f 100644 --- a/test_data/compiled-workflows/pipeline_with_secret_as_volume.yaml +++ b/test_data/compiled-workflows/pipeline_with_secret_as_volume.yaml @@ -124,7 +124,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -231,7 +233,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -365,7 +369,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_string_machine_fields_pipeline_input.yaml b/test_data/compiled-workflows/pipeline_with_string_machine_fields_pipeline_input.yaml index 5ac3a9a795b..78a7e482f18 100644 --- a/test_data/compiled-workflows/pipeline_with_string_machine_fields_pipeline_input.yaml +++ b/test_data/compiled-workflows/pipeline_with_string_machine_fields_pipeline_input.yaml @@ -117,7 +117,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -224,7 +226,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -356,7 +360,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_string_machine_fields_task_output.yaml b/test_data/compiled-workflows/pipeline_with_string_machine_fields_task_output.yaml index 86cee35bd46..ab6fccac0ce 100644 --- a/test_data/compiled-workflows/pipeline_with_string_machine_fields_task_output.yaml +++ b/test_data/compiled-workflows/pipeline_with_string_machine_fields_task_output.yaml @@ -169,7 +169,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -276,7 +278,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -506,7 +510,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_submit_request.yaml b/test_data/compiled-workflows/pipeline_with_submit_request.yaml index 64dfa4657af..2f37db2c468 100644 --- a/test_data/compiled-workflows/pipeline_with_submit_request.yaml +++ b/test_data/compiled-workflows/pipeline_with_submit_request.yaml @@ -128,7 +128,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -235,7 +237,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -391,7 +395,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_task_final_status.yaml b/test_data/compiled-workflows/pipeline_with_task_final_status.yaml index 72a58330fd2..9e0d6a75b55 100644 --- a/test_data/compiled-workflows/pipeline_with_task_final_status.yaml +++ b/test_data/compiled-workflows/pipeline_with_task_final_status.yaml @@ -151,7 +151,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -258,7 +260,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -447,7 +451,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_task_final_status_yaml.yaml b/test_data/compiled-workflows/pipeline_with_task_final_status_yaml.yaml index e2ec3a77b22..d8a3f32504c 100644 --- a/test_data/compiled-workflows/pipeline_with_task_final_status_yaml.yaml +++ b/test_data/compiled-workflows/pipeline_with_task_final_status_yaml.yaml @@ -116,7 +116,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -223,7 +225,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -387,7 +391,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_task_using_ignore_upstream_failure.yaml b/test_data/compiled-workflows/pipeline_with_task_using_ignore_upstream_failure.yaml index 1b4856bbf25..e6a9cbc526b 100644 --- a/test_data/compiled-workflows/pipeline_with_task_using_ignore_upstream_failure.yaml +++ b/test_data/compiled-workflows/pipeline_with_task_using_ignore_upstream_failure.yaml @@ -131,7 +131,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -238,7 +240,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -409,7 +413,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_utils.yaml b/test_data/compiled-workflows/pipeline_with_utils.yaml index e7fb28ee7f9..7318b1a85d9 100644 --- a/test_data/compiled-workflows/pipeline_with_utils.yaml +++ b/test_data/compiled-workflows/pipeline_with_utils.yaml @@ -121,7 +121,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -228,7 +230,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -360,7 +364,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_various_io_types.yaml b/test_data/compiled-workflows/pipeline_with_various_io_types.yaml index 799eb7d5f2c..d063289fe0e 100644 --- a/test_data/compiled-workflows/pipeline_with_various_io_types.yaml +++ b/test_data/compiled-workflows/pipeline_with_various_io_types.yaml @@ -112,7 +112,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -219,7 +221,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -376,7 +380,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_volume.yaml b/test_data/compiled-workflows/pipeline_with_volume.yaml index 458742d257a..376f2d426fc 100644 --- a/test_data/compiled-workflows/pipeline_with_volume.yaml +++ b/test_data/compiled-workflows/pipeline_with_volume.yaml @@ -165,7 +165,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -272,7 +274,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -463,7 +467,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_volume_no_cache.yaml b/test_data/compiled-workflows/pipeline_with_volume_no_cache.yaml index 1218333a830..594ebe39949 100644 --- a/test_data/compiled-workflows/pipeline_with_volume_no_cache.yaml +++ b/test_data/compiled-workflows/pipeline_with_volume_no_cache.yaml @@ -165,7 +165,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -272,7 +274,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -463,7 +467,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pipeline_with_workspace.yaml b/test_data/compiled-workflows/pipeline_with_workspace.yaml index 34a3dd49915..b18db0f4afe 100644 --- a/test_data/compiled-workflows/pipeline_with_workspace.yaml +++ b/test_data/compiled-workflows/pipeline_with_workspace.yaml @@ -141,7 +141,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -248,7 +250,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -405,7 +409,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/placeholder_with_if_placeholder_none_input_value.yaml b/test_data/compiled-workflows/placeholder_with_if_placeholder_none_input_value.yaml index f2bb06458c2..620bfc5929d 100644 --- a/test_data/compiled-workflows/placeholder_with_if_placeholder_none_input_value.yaml +++ b/test_data/compiled-workflows/placeholder_with_if_placeholder_none_input_value.yaml @@ -111,7 +111,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -218,7 +220,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -350,7 +354,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/preprocess.yaml b/test_data/compiled-workflows/preprocess.yaml index 2165b01e6b0..690604c71f5 100644 --- a/test_data/compiled-workflows/preprocess.yaml +++ b/test_data/compiled-workflows/preprocess.yaml @@ -139,7 +139,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -246,7 +248,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -378,7 +382,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/producer_consumer_param_pipeline.yaml b/test_data/compiled-workflows/producer_consumer_param_pipeline.yaml index ce71e5ff290..dcf4b149d5e 100644 --- a/test_data/compiled-workflows/producer_consumer_param_pipeline.yaml +++ b/test_data/compiled-workflows/producer_consumer_param_pipeline.yaml @@ -115,7 +115,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -222,7 +224,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -379,7 +383,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pvc_mount.yaml b/test_data/compiled-workflows/pvc_mount.yaml index 50fa4a99b05..4c6cc4eef82 100644 --- a/test_data/compiled-workflows/pvc_mount.yaml +++ b/test_data/compiled-workflows/pvc_mount.yaml @@ -135,7 +135,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -242,7 +244,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -403,7 +407,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pythonic_artifact_with_single_return.yaml b/test_data/compiled-workflows/pythonic_artifact_with_single_return.yaml index 4019446da75..d66d8c4fba2 100644 --- a/test_data/compiled-workflows/pythonic_artifact_with_single_return.yaml +++ b/test_data/compiled-workflows/pythonic_artifact_with_single_return.yaml @@ -105,7 +105,9 @@ spec: - name: component - name: importer - name: parent-dag-id - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-importer outputs: {} securityContext: @@ -197,7 +199,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -304,7 +308,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -449,7 +455,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pythonic_artifacts_test_pipeline.yaml b/test_data/compiled-workflows/pythonic_artifacts_test_pipeline.yaml index 8d1a56969ff..db4d75f32b4 100644 --- a/test_data/compiled-workflows/pythonic_artifacts_test_pipeline.yaml +++ b/test_data/compiled-workflows/pythonic_artifacts_test_pipeline.yaml @@ -136,7 +136,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -243,7 +245,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -400,7 +404,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pythonic_artifacts_with_list_of_artifacts.yaml b/test_data/compiled-workflows/pythonic_artifacts_with_list_of_artifacts.yaml index 7cbebbad713..eac996bbd77 100644 --- a/test_data/compiled-workflows/pythonic_artifacts_with_list_of_artifacts.yaml +++ b/test_data/compiled-workflows/pythonic_artifacts_with_list_of_artifacts.yaml @@ -137,7 +137,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -244,7 +246,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -376,7 +380,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/pythonic_artifacts_with_multiple_returns.yaml b/test_data/compiled-workflows/pythonic_artifacts_with_multiple_returns.yaml index 1952102bdc9..a60c864a7c1 100644 --- a/test_data/compiled-workflows/pythonic_artifacts_with_multiple_returns.yaml +++ b/test_data/compiled-workflows/pythonic_artifacts_with_multiple_returns.yaml @@ -142,7 +142,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -249,7 +251,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -381,7 +385,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/ray_integration_compiled.yaml b/test_data/compiled-workflows/ray_integration_compiled.yaml index d495579ff62..c28af08c28d 100644 --- a/test_data/compiled-workflows/ray_integration_compiled.yaml +++ b/test_data/compiled-workflows/ray_integration_compiled.yaml @@ -182,7 +182,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -289,7 +291,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -421,7 +425,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/run_as_user_cache_disabled.yaml b/test_data/compiled-workflows/run_as_user_cache_disabled.yaml index 2ed37911c49..bc8bafdf1f5 100644 --- a/test_data/compiled-workflows/run_as_user_cache_disabled.yaml +++ b/test_data/compiled-workflows/run_as_user_cache_disabled.yaml @@ -111,7 +111,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -220,7 +222,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -354,7 +358,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/run_as_user_cache_enabled.yaml b/test_data/compiled-workflows/run_as_user_cache_enabled.yaml index 2ed37911c49..bc8bafdf1f5 100644 --- a/test_data/compiled-workflows/run_as_user_cache_enabled.yaml +++ b/test_data/compiled-workflows/run_as_user_cache_enabled.yaml @@ -111,7 +111,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -220,7 +222,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -354,7 +358,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/sequential_v1.yaml b/test_data/compiled-workflows/sequential_v1.yaml index aa05a6bd410..5673cbf3cd5 100644 --- a/test_data/compiled-workflows/sequential_v1.yaml +++ b/test_data/compiled-workflows/sequential_v1.yaml @@ -108,7 +108,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -215,7 +217,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -371,7 +375,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/sequential_v2.yaml b/test_data/compiled-workflows/sequential_v2.yaml index d5dd44ca770..78450394897 100644 --- a/test_data/compiled-workflows/sequential_v2.yaml +++ b/test_data/compiled-workflows/sequential_v2.yaml @@ -112,7 +112,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -219,7 +221,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -376,7 +380,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/take_nap_compiled.yaml b/test_data/compiled-workflows/take_nap_compiled.yaml index 0f94165371c..f5fac71c3e3 100644 --- a/test_data/compiled-workflows/take_nap_compiled.yaml +++ b/test_data/compiled-workflows/take_nap_compiled.yaml @@ -133,7 +133,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -240,7 +242,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -397,7 +401,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/take_nap_pipeline_root_compiled.yaml b/test_data/compiled-workflows/take_nap_pipeline_root_compiled.yaml index 0f94165371c..f5fac71c3e3 100644 --- a/test_data/compiled-workflows/take_nap_pipeline_root_compiled.yaml +++ b/test_data/compiled-workflows/take_nap_pipeline_root_compiled.yaml @@ -133,7 +133,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -240,7 +242,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -397,7 +401,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/two_step_pipeline.yaml b/test_data/compiled-workflows/two_step_pipeline.yaml index f6b9c888276..0a4c1d50a82 100644 --- a/test_data/compiled-workflows/two_step_pipeline.yaml +++ b/test_data/compiled-workflows/two_step_pipeline.yaml @@ -113,7 +113,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -220,7 +222,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -377,7 +381,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/two_step_pipeline_containerized.yaml b/test_data/compiled-workflows/two_step_pipeline_containerized.yaml index 27f8ea4d398..628d23f692a 100644 --- a/test_data/compiled-workflows/two_step_pipeline_containerized.yaml +++ b/test_data/compiled-workflows/two_step_pipeline_containerized.yaml @@ -114,7 +114,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -221,7 +223,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -378,7 +382,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/upload_download_compiled.yaml b/test_data/compiled-workflows/upload_download_compiled.yaml index 3edca3d8542..db496ed8dc2 100644 --- a/test_data/compiled-workflows/upload_download_compiled.yaml +++ b/test_data/compiled-workflows/upload_download_compiled.yaml @@ -172,7 +172,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -279,7 +281,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -461,7 +465,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: diff --git a/test_data/compiled-workflows/xgboost_sample_pipeline.yaml b/test_data/compiled-workflows/xgboost_sample_pipeline.yaml index 4fcdd6e6a6b..9d95f03c3b2 100644 --- a/test_data/compiled-workflows/xgboost_sample_pipeline.yaml +++ b/test_data/compiled-workflows/xgboost_sample_pipeline.yaml @@ -376,7 +376,9 @@ spec: name: iteration-index - default: "" name: kubernetes-config - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-container-driver outputs: parameters: @@ -483,7 +485,9 @@ spec: inputs: parameters: - name: pod-spec-patch - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: launcher name: system-container-impl outputs: {} podSpecPatch: '{{inputs.parameters.pod-spec-patch}}' @@ -791,7 +795,9 @@ spec: name: iteration-index - default: DAG name: driver-type - metadata: {} + metadata: + annotations: + pipelines.kubeflow.org/runtime-role: driver name: system-dag-driver outputs: parameters: