Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions frontend/server/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ function createUIServer(options: UIConfigs) {
authorizeFn,
options.auth.enabled,
options.auth.kubeflowUserIdHeader,
envoyServiceAddress,
);

/** Artifact */
Expand Down
62 changes: 62 additions & 0 deletions frontend/server/handlers/artifacts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ import { isAllowedDomain } from './domain-checker.js';
import { getK8sSecret } from '../k8s-helper.js';
import { CredentialBody } from 'google-auth-library';
import { AuthorizeFn } from '../helpers/auth.js';
import {
validateArtifactNamespace,
buildArtifactUri,
} from '../helpers/mlmd-client.js';
import {
AuthorizeRequestResources,
AuthorizeRequestVerb,
Expand Down Expand Up @@ -126,6 +130,7 @@ export function getArtifactsAuthMiddleware(
authorizeFn: AuthorizeFn,
authEnabled: boolean,
kubeflowUserIdHeader: string,
envoyAddress?: string,
): Handler {
return async (request: Request, response: Response, next: NextFunction) => {
if (!authEnabled) {
Expand Down Expand Up @@ -187,6 +192,63 @@ export function getArtifactsAuthMiddleware(
return;
}

// IDOR prevention: verify that the artifact actually belongs to the
// claimed namespace by checking MLMD ownership. Without this check,
// a user who has access to namespace A can change the namespace query
// parameter to A while keeping bucket/key pointing to namespace B's
// artifact — bypassing authorization when namespaces share S3 credentials.
// See https://github.com/kubeflow/pipelines/issues/9889
if (envoyAddress) {
// Extract artifact coordinates exactly the same way the downstream
// handler will. The handler uses query params for /artifacts/get and
// route params for /artifacts/:source/:bucket/*. We determine which
// by checking whether the path matches the download pattern. If both
// query params and path segments are present, prefer whichever source
// the handler will actually use — otherwise an attacker could supply
// benign query params that pass validation while the handler reads
// different values from path segments.
let effectiveSource = '';
let effectiveBucket = '';
let effectiveKey = '';

const downloadPathMatch = request.path.match(
/^(?:\/pipeline)?\/artifacts\/([^/]+)\/([^/]+)\/(.+)$/,
);
if (downloadPathMatch && downloadPathMatch[1] !== 'get') {
effectiveSource = decodeURIComponent(downloadPathMatch[1]);
effectiveBucket = decodeURIComponent(downloadPathMatch[2]);
effectiveKey = decodeURIComponent(downloadPathMatch[3]);
} else {
effectiveSource = (request.query.source as string) || '';
effectiveBucket = (request.query.bucket as string) || '';
effectiveKey = (request.query.key as string) || '';
}

if (effectiveSource && effectiveBucket && effectiveKey) {
const artifactUri = buildArtifactUri(effectiveSource, effectiveBucket, effectiveKey);
const validation = await validateArtifactNamespace(
envoyAddress,
artifactUri,
namespace,
);

if (!validation.valid) {
console.warn(
`[SECURITY] IDOR blocked: artifact namespace mismatch. ` +
`User: ${userId}, ` +
`Claimed namespace: ${namespace}, ` +
`Actual namespace: ${validation.actualNamespace}, ` +
`URI: ${artifactUri}, ` +
`Path: ${request.originalUrl}`,
);
response
.status(403)
.send('Artifact does not belong to the requested namespace');
return;
}
}
}

next();
};
}
Expand Down
275 changes: 275 additions & 0 deletions frontend/server/helpers/mlmd-client.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
// Copyright 2025 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { createRequire } from 'module';

const require = createRequire(import.meta.url);

// Load MLMD proto classes via CJS require (generated code uses google-protobuf CJS format).
// eslint-disable-next-line @typescript-eslint/no-var-requires
const servicePb = require('../../src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.js');
// eslint-disable-next-line @typescript-eslint/no-var-requires
const storePb = require('../../src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.js');

const PIPELINE_RUN_CONTEXT_TYPE = 'system.PipelineRun';
const NAMESPACE_PROPERTY_KEY = 'namespace';

const GRPC_WEB_CONTENT_TYPE = 'application/grpc-web+proto';
const GRPC_WEB_ACCEPT = 'application/grpc-web+proto';

function encodeGrpcWebRequest(serializedMessage: Uint8Array): Uint8Array {
const frame = new Uint8Array(5 + serializedMessage.length);
frame[0] = 0x00; // data frame
const view = new DataView(frame.buffer);
view.setUint32(1, serializedMessage.length, false); // big-endian length
frame.set(serializedMessage, 5);
return frame;
}

function decodeGrpcWebResponse(buffer: ArrayBuffer): Uint8Array {
const view = new DataView(buffer);
let dataPayload: Uint8Array | null = null;
let offset = 0;

while (offset + 5 <= buffer.byteLength) {
const frameType = view.getUint8(offset);
const frameLength = view.getUint32(offset + 1, false);

if (offset + 5 + frameLength > buffer.byteLength) {
throw new Error(
`gRPC-web frame at offset ${offset} claims length ${frameLength} ` +
`but only ${buffer.byteLength - offset - 5} bytes remain`,
);
}

if (frameType === 0x00) {
dataPayload = new Uint8Array(buffer, offset + 5, frameLength);
} else if (frameType === 0x80) {
const trailerBytes = new Uint8Array(buffer, offset + 5, frameLength);
const trailerText = new TextDecoder().decode(trailerBytes);
const statusMatch = trailerText.match(/grpc-status:\s*(\d+)/);
const messageMatch = trailerText.match(/grpc-message:\s*(.+)/);
const status = statusMatch ? parseInt(statusMatch[1], 10) : -1;
if (status !== 0) {
throw new Error(
`gRPC error status ${status}: ${messageMatch ? decodeURIComponent(messageMatch[1].trim()) : 'unknown'}`,
);
}
} else {
throw new Error(`Unexpected gRPC-web frame type: 0x${frameType.toString(16)}`);
}

offset += 5 + frameLength;
}

if (!dataPayload) {
throw new Error('gRPC-web response contained no data frame');
}
return dataPayload;
}

async function grpcWebCall(
envoyAddress: string,
method: string,
requestBytes: Uint8Array,
timeoutMs: number = 5000,
): Promise<Uint8Array> {
const url = `${envoyAddress}/ml_metadata.MetadataStoreService/${method}`;
const body = encodeGrpcWebRequest(requestBytes);

const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);

try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': GRPC_WEB_CONTENT_TYPE,
Accept: GRPC_WEB_ACCEPT,
'x-grpc-web': '1',
},
body,
signal: controller.signal,
});

if (!response.ok) {
throw new Error(`MLMD gRPC-web call ${method} failed: HTTP ${response.status}`);
}

const responseBuffer = await response.arrayBuffer();
return decodeGrpcWebResponse(responseBuffer);
} finally {
clearTimeout(timer);
}
}

interface ArtifactInfo {
id: number;
}

interface ContextNamespace {
contextType: string;
namespace: string | undefined;
}

function getArtifactsByUri(
envoyAddress: string,
uris: string[],
): Promise<ArtifactInfo[]> {
const request = new servicePb.GetArtifactsByURIRequest();
request.setUrisList(uris);

return grpcWebCall(envoyAddress, 'GetArtifactsByURI', request.serializeBinary()).then(
(responseBytes) => {
const response = servicePb.GetArtifactsByURIResponse.deserializeBinary(responseBytes);
const artifacts: ArtifactInfo[] = [];
for (const artifact of response.getArtifactsList()) {
artifacts.push({ id: artifact.getId() });
}
return artifacts;
},
);
}

function getContextsByArtifact(
envoyAddress: string,
artifactId: number,
): Promise<ContextNamespace[]> {
const request = new servicePb.GetContextsByArtifactRequest();
request.setArtifactId(artifactId);

return grpcWebCall(
envoyAddress,
'GetContextsByArtifact',
request.serializeBinary(),
).then((responseBytes) => {
const response = servicePb.GetContextsByArtifactResponse.deserializeBinary(responseBytes);
const contexts: ContextNamespace[] = [];
for (const context of response.getContextsList()) {
const contextType = context.getType();
let namespace: string | undefined;
const customProps = context.getCustomPropertiesMap();
if (customProps) {
const nsValue = customProps.get(NAMESPACE_PROPERTY_KEY);
if (nsValue) {
const valueCase = nsValue.getValueCase();
if (valueCase === storePb.Value.ValueCase.STRING_VALUE) {
namespace = nsValue.getStringValue();
}
}
}
contexts.push({ contextType, namespace });
}
return contexts;
});
}

export interface ValidationResult {
valid: boolean;
actualNamespace?: string;
reason?: string;
}

export async function validateArtifactNamespace(
envoyAddress: string,
artifactUri: string,
claimedNamespace: string,
): Promise<ValidationResult> {
// Step 1: Look up artifact by URI in MLMD
let artifacts: ArtifactInfo[];
try {
artifacts = await getArtifactsByUri(envoyAddress, [artifactUri]);
} catch (error) {
console.warn(
`[SECURITY] MLMD artifact lookup failed for URI "${artifactUri}", ` +
`allowing access (fail-open). Error: ${error}`,
);
return { valid: true, reason: 'mlmd-unavailable' };
}

if (artifacts.length === 0) {
console.warn(
`[SECURITY] Artifact not found in MLMD for URI "${artifactUri}", ` +
`allowing access (fail-open for legacy/external artifacts).`,
);
return { valid: true, reason: 'artifact-not-found' };
}

// Step 2: Look up contexts for all artifacts in parallel.
// Reject if ANY artifact has a PipelineRun context whose namespace
// differs from the claim. Track whether we found any namespace evidence
// at all — if none exists, fall through (legacy/external artifacts).
let contextResults: { artifactId: number; contexts: ContextNamespace[] | null }[];
try {
contextResults = await Promise.all(
artifacts.map(async (artifact) => {
try {
const contexts = await getContextsByArtifact(envoyAddress, artifact.id);
return { artifactId: artifact.id, contexts };
} catch (error) {
console.warn(
`[SECURITY] MLMD context lookup failed for artifact ${artifact.id}, ` +
`marking as unavailable. Error: ${error}`,
);
return { artifactId: artifact.id, contexts: null };
}
}),
);
} catch (error) {
console.warn(
`[SECURITY] MLMD batch context lookup failed, ` +
`allowing access (fail-open). Error: ${error}`,
);
return { valid: true, reason: 'mlmd-unavailable' };
}

let hasNamespaceEvidence = false;
for (const { contexts } of contextResults) {
if (contexts === null) {
return { valid: true, reason: 'mlmd-unavailable' };
}

for (const ctx of contexts) {
if (ctx.contextType !== PIPELINE_RUN_CONTEXT_TYPE) {
continue;
}
if (!ctx.namespace) {
continue;
}

hasNamespaceEvidence = true;
if (ctx.namespace !== claimedNamespace) {
return {
valid: false,
actualNamespace: ctx.namespace,
reason: 'namespace-mismatch',
};
}
}
}

if (!hasNamespaceEvidence) {
console.warn(
`[SECURITY] No PipelineRun namespace evidence found in MLMD for URI "${artifactUri}", ` +
`allowing access (no ownership data to validate against).`,
);
}

return { valid: true };
}

export function buildArtifactUri(source: string, bucket: string, key: string): string {
const scheme = source === 'gcs' ? 'gs' : source;
return `${scheme}://${bucket}/${key}`;
}
Loading
Loading