Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
bb0b8ea
Central driver POC https://github.com/kubeflow/pipelines/pull/12023
Sep 22, 2025
934655c
- add pipeline flow details to contribution
Sep 24, 2025
7e77e29
- rebase brunch
Sep 30, 2025
395c8c1
- remove redundant regenerate code
Sep 30, 2025
c2c0a49
- rebase master
Nov 13, 2025
037436f
- merge tests
Nov 17, 2025
38cbe18
- update branch
Dec 24, 2025
db3ad72
Fixes for review comments, part 1 (easy ones)
Jan 15, 2026
f9d0537
make driver-plugin image kustimizable
Jan 15, 2026
e536640
Mount certificate to executor-plugin based drivers
Feb 1, 2026
8e7a95d
merge
Feb 1, 2026
77d9f89
debug kfp-api-tls-cert creation and availability
Feb 4, 2026
d19d099
add necessary sa to profile namespaces
Feb 5, 2026
4d54eca
Fix failing driver task log capturing in e2e tests
Feb 5, 2026
5c7b636
- rework CA cert mounting
Feb 6, 2026
0534993
- add list events to e2e tests
Feb 6, 2026
1feb4c3
- create argo resources in end-user profiles
Feb 7, 2026
13d28a0
- remove debugging tls in tests
Feb 7, 2026
d1261b5
- add debug logs to e2e
Feb 7, 2026
9793c72
- add debug logs to e2e
Feb 7, 2026
40aa133
update from master
Feb 18, 2026
a630b38
update from master
Feb 18, 2026
77ead11
create executor-plugin SA, role per profile
Feb 21, 2026
d34b980
create executor-plugin SA, role per profile
Feb 21, 2026
a271f45
- create executor-plugin SA, role per profile
Feb 22, 2026
33d46e8
- make driver-plugin securityContext PodSecurity restricted compliant
Feb 23, 2026
64c5bda
- pass mlmdtls parameter to launcher cmd args
Feb 23, 2026
3ba6e0b
- debug e2e & upgrade
Feb 23, 2026
8a320dc
Merge commit 'origin/master~1' into central-driver-poc
Feb 24, 2026
ae3a5fa
Merge branch 'master' into central-driver-poc
Feb 24, 2026
22f9270
- fix after merge
Feb 24, 2026
804c668
- reduce agent CPU requests for testing cluster
Feb 24, 2026
91a5cd8
- remove debug logs
Feb 25, 2026
61a6c39
Merge branch 'master' into central-driver-poc
Mar 4, 2026
ca7eb10
- store driver logs into artifact store
Mar 8, 2026
b422c55
Store driver logs in artifact storage and display container driver lo…
Mar 9, 2026
aa1abdb
apply formatting to frontend
Mar 9, 2026
c70b855
fix linter errors
Mar 9, 2026
1ae6c97
fix tests
Mar 9, 2026
4754bf9
add additional rbac
Mar 9, 2026
54d186a
Merge branch 'master' into central-driver-poc
Mar 9, 2026
f437053
fix error message
Mar 9, 2026
f1bd7cc
print events
Mar 9, 2026
01b0755
remove debug logs
Mar 10, 2026
93fabba
Prevent log file race condition in for loop by adding per-request mil…
Mar 13, 2026
dc4f0ff
Merge branch 'master' into central-driver-poc
Mar 27, 2026
1716f32
Move driver logs code to React Query v5
Mar 27, 2026
1f023c5
fix linter errors
Mar 27, 2026
209ecc7
fix workflow-compiler tests after merge
Mar 27, 2026
33f2cad
skip unit test: envs now defined in manifests, verified in existed wo…
Mar 27, 2026
11d59f2
fix react driver logs query
Mar 28, 2026
ddf186d
add kfp envs
Mar 28, 2026
9118534
add roles nececcary rbac
Mar 28, 2026
f75f873
Merge branch 'master' into central-driver-poc
Mar 28, 2026
a85d3de
add kfo envs to tests
Mar 29, 2026
70d33ac
add kfo envs to tests
Mar 29, 2026
2e5b30c
- fix plugin cm
Mar 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/resources/manifests/base/driver-plugin-cm-path.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: ml-pipeline-driver-agent
data:
sidecar.container: |
name: driver-plugin
image: kind-registry:5000/driver:latest
imagePullPolicy: IfNotPresent
env:
- name: LOG_ACCESS_KEY
valueFrom:
secretKeyRef:
name: mlpipeline-minio-artifact
key: accesskey
- name: LOG_SECRET_KEY
valueFrom:
secretKeyRef:
name: mlpipeline-minio-artifact
key: secretkey
ports:
- containerPort: 8080
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
securityContext:
runAsNonRoot: true
runAsUser: 65534
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: var-run-argo
mountPath: /kfp/log
readOnly: false
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: ../../base/grpc-specs.yaml
target:
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: ../../base/grpc-specs.yaml
target:
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: cache-env.yaml
target:
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: ../../base/grpc-specs.yaml
target:
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: cache-env.yaml
target:
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: ../../base/grpc-specs.yaml
target:
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: ../../base/driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: ../../base/grpc-specs.yaml
target:
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: ml-pipeline-driver-agent
data:
sidecar.container: |
name: driver-plugin
image: kind-registry:5000/driver:latest
imagePullPolicy: IfNotPresent
ports:
- containerPort: 8080
env:
- name: LOG_ACCESS_KEY
valueFrom:
secretKeyRef:
name: mlpipeline-minio-artifact
key: accesskey
- name: LOG_SECRET_KEY
valueFrom:
secretKeyRef:
name: mlpipeline-minio-artifact
key: secretkey
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
securityContext:
runAsNonRoot: true
runAsUser: 65534
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
volumeMounts:
- name: argo-workflows-agent-ca-certificates
mountPath: /kfp/certs
readOnly: true
- name: var-run-argo
mountPath: /kfp/log
readOnly: false
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ patches:
target:
kind: Deployment
name: ml-pipeline
- path: driver-plugin-cm-path.yaml
target:
kind: ConfigMap
name: ml-pipeline-driver-agent
- path: ../../base/grpc-specs.yaml
target:
kind: Deployment
Expand Down
18 changes: 17 additions & 1 deletion .github/resources/scripts/collect-logs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ function check_namespace {
return 0
}

function describe_argo_workflows {
local NAMESPACE=$1
echo "===== Argo Workflows list ====="
kubectl describe wf -n "${NAMESPACE}"
echo "===== Argo Workflows data ====="
kubectl get events -n "${NAMESPACE}" --field-selector involvedObject.kind=Workflow --sort-by='.metadata.creationTimestamp'
echo "==============================="
}

function display_pod_info {
local NAMESPACE=$1

Expand All @@ -52,7 +61,13 @@ function display_pod_info {
kubectl describe pod "${POD_NAME}" -n "${NAMESPACE}" | grep -A 100 Events || echo "No events found for pod ${POD_NAME}."

echo "----- LOGS -----"
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" || echo "No logs found for pod ${POD_NAME}."
if [[ "${POD_NAME}" == *-agent* ]]; then
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" -c driver-plugin || \
echo "No logs found for pod ${POD_NAME}."
Comment on lines +64 to +66
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The container name 'driver-plugin' is hardcoded. Consider extracting this to a constant or configuration variable to ensure consistency across the codebase.

Copilot uses AI. Check for mistakes.
else
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" || \
echo "No logs found for pod ${POD_NAME}."
fi

echo "==========================="
echo ""
Expand All @@ -64,6 +79,7 @@ function display_pod_info {

if check_namespace "$NS"; then
display_pod_info "$NS"
describe_argo_workflows "$NS"
else
exit 0
fi
3 changes: 2 additions & 1 deletion .github/resources/scripts/kfp-readiness/wait_for_pods.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def get_pod_statuses():
statuses = {}
for pod in pods.items:
pod_name = pod.metadata.name
if "system" not in pod_name:
# This filter is safe: 'ml-pipeline-persistenceagent-<guid>' will not be excluded and will be processed.
if not ("system" in pod_name or pod_name.endswith("-agent")):
pod_status = pod.status.phase
container_statuses = pod.status.container_statuses or []
ready = 0
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/api-server-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ jobs:
shell: bash
if: ${{ matrix.pod_to_pod_tls_enabled == 'true'}}
run: |
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
kubectl get secret argo-workflows-agent-ca-certificates -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
echo "CA_CERT_PATH=${{ github.workspace }}/ca.crt" >> "$GITHUB_ENV"


Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ jobs:
shell: bash
if: ${{ matrix.pod_to_pod_tls_enabled == 'true'}}
run: |
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
kubectl get secret argo-workflows-agent-ca-certificates -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
echo "CA_CERT_PATH=${{ github.workspace }}/ca.crt" >> "$GITHUB_ENV"
- name: Configure Input Variables
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/legacy-v2-api-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
shell: bash
if: ${{ matrix.pod_to_pod_tls_enabled == 'true' }}
run: |
kubectl get secret kfp-api-tls-cert -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
kubectl get secret argo-workflows-agent-ca-certificates -n kubeflow -o jsonpath='{.data.ca\.crt}' | base64 -d > "${{ github.workspace }}/ca.crt"
echo "CA_CERT_PATH=${{ github.workspace }}/ca.crt" >> "$GITHUB_ENV"

- name: Forward MLMD port
Expand Down
2 changes: 1 addition & 1 deletion backend/Dockerfile.driver
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN GO111MODULE=on go mod download

COPY . .

Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The build path changed from ./backend/src/v2/cmd/driver/*.go to ./backend/src/driver/*.go, but the PR description doesn't mention this restructuring. This is a significant change in code organization that should be documented.

Suggested change
# Note: driver build path was reorganized from ./backend/src/v2/cmd/driver/*.go
# to ./backend/src/driver/*.go as part of backend driver code restructuring.

Copilot uses AI. Check for mistakes.
RUN GO111MODULE=on CGO_ENABLED=0 GOOS=linux go build -tags netgo -gcflags="${GCFLAGS}" -ldflags '-extldflags "-static"' -o /bin/driver ./backend/src/v2/cmd/driver/*.go
RUN GO111MODULE=on CGO_ENABLED=0 GOOS=linux go build -tags netgo -gcflags="${GCFLAGS}" -ldflags '-extldflags "-static"' -o /bin/driver ./backend/src/driver/*.go

FROM alpine:3.21

Expand Down
73 changes: 73 additions & 0 deletions backend/src/common/util/context_logger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package util

import (
"context"
"fmt"
"io"
"os"

"github.com/sirupsen/logrus"
)

type CtxKey string

const (
contextLoggerKey CtxKey = "driver_log_key"
)

func newFileLogger(logFile string) (*logrus.Logger, io.Closer, error) {
f, err := os.Create(logFile)
if err != nil {
return nil, nil, err
}

logger := logrus.New()
logger.Out = io.MultiWriter(os.Stdout, f)
logger.Formatter = &logrus.TextFormatter{}
return logger, f, nil
}

// WithExistingLogger For testing only
func WithExistingLogger(ctx context.Context, logger *logrus.Logger) context.Context {
return context.WithValue(ctx, contextLoggerKey, logger)
}

func WithLogger(ctx context.Context, logFile string) (context.Context, io.Closer, error) {
if ctx == nil {
return nil, nil, fmt.Errorf(
"error during creation of the logger for logId: %v. ctx can not be nil",
logFile,
)
}

if GetLoggerFrom(ctx) != nil {
return nil, nil, fmt.Errorf("logger already exists in context")
}

logger, f, err := newFileLogger(logFile)
if err != nil {
return nil, nil, fmt.Errorf(
"error during creation of the logger for logId: %v details: %w",
logFile,
err,
)
}

ctx = context.WithValue(ctx, contextLoggerKey, logger)

return ctx, f, nil
}

func GetLoggerFrom(ctx context.Context) *logrus.Logger {
v := ctx.Value(contextLoggerKey)
if v == nil {
return nil
}

logger, ok := v.(*logrus.Logger)
if !ok {
return nil
}

return logger
}
Loading
Loading