diff --git a/api/v1alpha1/inferenceservice_types.go b/api/v1alpha1/inferenceservice_types.go
index 3a483e2d..d73755e8 100644
--- a/api/v1alpha1/inferenceservice_types.go
+++ b/api/v1alpha1/inferenceservice_types.go
@@ -80,6 +80,27 @@ type SpeculativeDecodingSpec struct {
 	NDraftMax *int32 `json:"nDraftMax,omitempty"`
 }
 
+// ModelCacheSpec points this InferenceService's model cache at a user-managed
+// PVC instead of the operator's shared/perService cache PVC. The operator
+// mounts and populates the claim through the same prep + download init
+// containers as the built-in cache, but never creates, mutates, or deletes it;
+// the user owns the PVC end-to-end.
+type ModelCacheSpec struct {
+	// ClaimName names a pre-existing PersistentVolumeClaim in the
+	// InferenceService's namespace to use as the writable model cache volume.
+	// Weights land under the usual <cacheKey>/ subdirectory of the claim, so
+	// RefreshPolicy and cache-key semantics are unchanged and multiple models
+	// can share one claim without colliding. The claim must already exist:
+	// when it is missing the InferenceService is marked Degraded rather than
+	// silently falling back to the shared cache. Ignored for pvc:// model
+	// sources (already staged, read-only, no download). Node alignment of
+	// RWO/local claims (via nodeSelector) is the user's responsibility.
+	// +kubebuilder:validation:MinLength=1
+	// +kubebuilder:validation:MaxLength=253
+	// +optional
+	ClaimName string `json:"claimName,omitempty"`
+}
+
 type InferenceServiceSpec struct {
 	// ModelRef references the Model CR that contains the model to serve
 	// +kubebuilder:validation:Required
@@ -403,6 +424,14 @@ type InferenceServiceSpec struct {
 	// +optional
 	SkipModelInit *bool `json:"skipModelInit,omitempty"`
 
+	// ModelCache overrides where this InferenceService caches model weights:
+	// when claimName is set, the named user-owned PVC is mounted as the
+	// writable model cache (prep + download init containers run against it)
+	// instead of the operator's shared/perService cache PVC. When unset, the
+	// operator-global cache mode applies unchanged.
+	// +optional
+	ModelCache *ModelCacheSpec `json:"modelCache,omitempty"`
+
 	// PersonaPlexConfig holds configuration for the PersonaPlex (Moshi) runtime.
 	// Only used when Runtime is "personaplex".
 	// +optional
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index ebd96fed..b3216776 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -624,6 +624,11 @@ func (in *InferenceServiceSpec) DeepCopyInto(out *InferenceServiceSpec) {
 		*out = new(bool)
 		**out = **in
 	}
+	if in.ModelCache != nil {
+		in, out := &in.ModelCache, &out.ModelCache
+		*out = new(ModelCacheSpec)
+		**out = **in
+	}
 	if in.PersonaPlexConfig != nil {
 		in, out := &in.PersonaPlexConfig, &out.PersonaPlexConfig
 		*out = new(PersonaPlexConfig)
@@ -794,6 +799,21 @@ func (in *Model) DeepCopyObject() runtime.Object {
 	return nil
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ModelCacheSpec) DeepCopyInto(out *ModelCacheSpec) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelCacheSpec.
+func (in *ModelCacheSpec) DeepCopy() *ModelCacheSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(ModelCacheSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ModelList) DeepCopyInto(out *ModelList) {
 	*out = *in
diff --git a/charts/llmkube/templates/crds/inferenceservices.yaml b/charts/llmkube/templates/crds/inferenceservices.yaml
index 13d64d59..178bc152 100644
--- a/charts/llmkube/templates/crds/inferenceservices.yaml
+++ b/charts/llmkube/templates/crds/inferenceservices.yaml
@@ -1490,6 +1490,29 @@ spec:
                 - embedding
                 - rerank
                 type: string
+              modelCache:
+                description: |-
+                  ModelCache overrides where this InferenceService caches model weights:
+                  when claimName is set, the named user-owned PVC is mounted as the
+                  writable model cache (prep + download init containers run against it)
+                  instead of the operator's shared/perService cache PVC. When unset, the
+                  operator-global cache mode applies unchanged.
+                properties:
+                  claimName:
+                    description: |-
+                      ClaimName names a pre-existing PersistentVolumeClaim in the
+                      InferenceService's namespace to use as the writable model cache volume.
+                      Weights land under the usual <cacheKey>/ subdirectory of the claim, so
+                      RefreshPolicy and cache-key semantics are unchanged and multiple models
+                      can share one claim without colliding. The claim must already exist:
+                      when it is missing the InferenceService is marked Degraded rather than
+                      silently falling back to the shared cache. Ignored for pvc:// model
+                      sources (already staged, read-only, no download). Node alignment of
+                      RWO/local claims (via nodeSelector) is the user's responsibility.
+                    maxLength: 253
+                    minLength: 1
+                    type: string
+                type: object
               modelRef:
                 description: ModelRef references the Model CR that contains the model
                   to serve
diff --git a/config/crd/bases/inference.llmkube.dev_inferenceservices.yaml b/config/crd/bases/inference.llmkube.dev_inferenceservices.yaml
index 71d62c29..03e01b34 100644
--- a/config/crd/bases/inference.llmkube.dev_inferenceservices.yaml
+++ b/config/crd/bases/inference.llmkube.dev_inferenceservices.yaml
@@ -1486,6 +1486,29 @@ spec:
                 - embedding
                 - rerank
                 type: string
+              modelCache:
+                description: |-
+                  ModelCache overrides where this InferenceService caches model weights:
+                  when claimName is set, the named user-owned PVC is mounted as the
+                  writable model cache (prep + download init containers run against it)
+                  instead of the operator's shared/perService cache PVC. When unset, the
+                  operator-global cache mode applies unchanged.
+                properties:
+                  claimName:
+                    description: |-
+                      ClaimName names a pre-existing PersistentVolumeClaim in the
+                      InferenceService's namespace to use as the writable model cache volume.
+                      Weights land under the usual <cacheKey>/ subdirectory of the claim, so
+                      RefreshPolicy and cache-key semantics are unchanged and multiple models
+                      can share one claim without colliding. The claim must already exist:
+                      when it is missing the InferenceService is marked Degraded rather than
+                      silently falling back to the shared cache. Ignored for pvc:// model
+                      sources (already staged, read-only, no download). Node alignment of
+                      RWO/local claims (via nodeSelector) is the user's responsibility.
+                    maxLength: 253
+                    minLength: 1
+                    type: string
+                type: object
               modelRef:
                 description: ModelRef references the Model CR that contains the model
                   to serve
diff --git a/docs/MODEL-CACHE.md b/docs/MODEL-CACHE.md
index cd857575..55b554a0 100644
--- a/docs/MODEL-CACHE.md
+++ b/docs/MODEL-CACHE.md
@@ -145,6 +145,46 @@ modelCache:
   accessMode: ReadWriteMany
 ```
 
+### Per-InferenceService Cache PVC (Bring Your Own)
+
+The cache backend above is an operator-global choice. To point a *single*
+InferenceService at its own pre-existing, user-owned PVC — for example a
+node-local volume for a large model pinned to one node, while everything else
+rides the shared cache — set `spec.modelCache.claimName`:
+
+```yaml
+apiVersion: inference.llmkube.dev/v1alpha1
+kind: InferenceService
+metadata:
+  name: llama-3.1-70b
+spec:
+  modelRef: llama-3.1-70b
+  modelCache:
+    claimName: my-model-cache   # pre-existing PVC in the same namespace
+```
+
+Behavior:
+
+- The named PVC becomes the writable model cache for this workload only: the
+  same `model-cache-prep` and `model-downloader` init containers run against
+  it, weights land under the usual `<cacheKey>/` subdirectory, and the serving
+  container mounts it read-only. `RefreshPolicy` and cache-key semantics are
+  unchanged, so multiple models can safely share one claim.
+- The operator **never creates or deletes** the claim — you own it end-to-end
+  (unlike `perService` mode, where the operator provisions and
+  garbage-collects `<isvc>-model-cache`). If the claim does not exist, the
+  InferenceService is marked `Degraded` with a `ModelCachePVCNotFound` event
+  instead of silently falling back to the shared cache.
+- `claimName` targets the download path, so it is ignored for pre-staged
+  `pvc://` model sources (mounted read-only, no download); a warning event is
+  emitted if both are set.
+- Node alignment is your responsibility: for an RWO or node-local claim, use
+  `nodeSelector` so the pod lands where the PVC binds (a
+  `WaitForFirstConsumer` local class binds on the first consumer; a pre-bound
+  RWO PVC pins the pod).
+- `llmkube cache list` / `cache clear` inspect the shared cache only; they do
+  not see bring-your-own cache PVCs.
+
 ## CLI Commands
 
 ### List Cached Models
diff --git a/internal/controller/inferenceservice_controller.go b/internal/controller/inferenceservice_controller.go
index 41c1754c..16f4344e 100644
--- a/internal/controller/inferenceservice_controller.go
+++ b/internal/controller/inferenceservice_controller.go
@@ -18,6 +18,7 @@ package controller
 
 import (
 	"context"
+	"fmt"
 	"net/http"
 	"strings"
 	"time"
@@ -167,10 +168,21 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req
 	if effectiveModelCacheKey(model) != "" && r.ModelCachePath != "" {
 		if err := r.ensureModelCachePVC(ctx, inferenceService); err != nil {
 			log.Error(err, "Failed to ensure model cache PVC exists", "namespace", inferenceService.Namespace)
-			return r.updateStatusWithSchedulingInfo(ctx, inferenceService, PhaseFailed, modelReady, 0, desiredReplicas, "", "Failed to create model cache PVC", nil)
+			return r.updateStatusWithSchedulingInfo(ctx, inferenceService, PhaseFailed, modelReady, 0, desiredReplicas, "",
+				fmt.Sprintf("Failed to ensure model cache PVC: %v", err), nil)
 		}
 	}
 
+	// spec.modelCache.claimName targets the download path, so it is meaningless
+	// for a pre-staged pvc:// source (mounted read-only, no download). The
+	// claimName is ignored in that case; surface a Warning so the conflict is
+	// visible instead of silently dropped.
+	if r.Recorder != nil && userModelCacheClaimName(inferenceService) != "" && isPVCSource(model.Spec.Source) {
+		r.Recorder.Eventf(inferenceService, nil, corev1.EventTypeWarning, "ModelCacheClaimIgnored", "Reconcile",
+			"spec.modelCache.claimName is ignored: model source %q is a pre-staged pvc:// volume (read-only, no download)",
+			model.Spec.Source)
+	}
+
 	isMetal := isMetalModel(model)
 
 	if r.Recorder != nil && needsOffloadMemoryWarning(inferenceService) {
diff --git a/internal/controller/inferenceservice_storage_test.go b/internal/controller/inferenceservice_storage_test.go
index 16683235..be206771 100644
--- a/internal/controller/inferenceservice_storage_test.go
+++ b/internal/controller/inferenceservice_storage_test.go
@@ -23,6 +23,7 @@ import (
 	. "github.com/onsi/gomega"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/types"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -767,6 +768,166 @@ var _ = Describe("buildCachedStorageConfig cache mode selection (#728)", func()
 	})
 })
 
+var _ = Describe("buildCachedStorageConfig user claimName override (#928)", func() {
+	model := &inferencev1alpha1.Model{
+		Spec:   inferencev1alpha1.ModelSpec{Source: "https://example.com/model.gguf"},
+		Status: inferencev1alpha1.ModelStatus{CacheKey: "abc123def456"},
+	}
+	isvcWithClaim := func() *inferencev1alpha1.InferenceService {
+		return &inferencev1alpha1.InferenceService{
+			ObjectMeta: metav1.ObjectMeta{Name: "byo-isvc"},
+			Spec: inferencev1alpha1.InferenceServiceSpec{
+				ModelCache: &inferencev1alpha1.ModelCacheSpec{ClaimName: "my-model-cache"},
+			},
+		}
+	}
+
+	It("mounts the user PVC instead of the shared PVC in shared mode", func() {
+		config := buildCachedStorageConfig(model, isvcWithClaim(), ModelCacheModeShared, "", "curl:8.18.0", 102)
+		Expect(config.volumes[0].PersistentVolumeClaim.ClaimName).To(Equal("my-model-cache"))
+	})
+
+	It("mounts the user PVC instead of the per-isvc PVC in perService mode", func() {
+		config := buildCachedStorageConfig(model, isvcWithClaim(), ModelCacheModePerService, "", "curl:8.18.0", 102)
+		Expect(config.volumes[0].PersistentVolumeClaim.ClaimName).To(Equal("my-model-cache"))
+	})
+
+	It("keeps the cache layout and init containers identical to the built-in cache path", func() {
+		config := buildCachedStorageConfig(model, isvcWithClaim(), "", "", "curl:8.18.0", 102)
+
+		// Weights still land under <cacheKey>/, not the PVC root.
+		Expect(config.modelPath).To(Equal("/models/abc123def456/model.gguf"))
+		// Same prep + downloader init containers, mounted read-write.
+		Expect(config.initContainers).To(HaveLen(2))
+		Expect(config.initContainers[0].Name).To(Equal("model-cache-prep"))
+		Expect(config.initContainers[1].Name).To(Equal("model-downloader"))
+		Expect(config.initContainers[1].VolumeMounts[0].ReadOnly).To(BeFalse())
+		// The main container mounts the user PVC read-only.
+		Expect(config.volumeMounts[0].MountPath).To(Equal("/models"))
+		Expect(config.volumeMounts[0].ReadOnly).To(BeTrue())
+	})
+
+	It("uses the user PVC for multi-file staged models too", func() {
+		staged := &inferencev1alpha1.Model{
+			ObjectMeta: metav1.ObjectMeta{Name: "staged", Namespace: "default"},
+			Spec: inferencev1alpha1.ModelSpec{
+				Source: "hf://org/repo-GGUF",
+				Files:  []string{"model-Q4_K_M.gguf"},
+			},
+		}
+		config := buildCachedStorageConfig(staged, isvcWithClaim(), "", "", "curl:8.18.0", 102)
+		Expect(config.volumes[0].PersistentVolumeClaim.ClaimName).To(Equal("my-model-cache"))
+	})
+
+	It("does not affect an InferenceService without modelCache (shared PVC as before)", func() {
+		isvc := &inferencev1alpha1.InferenceService{ObjectMeta: metav1.ObjectMeta{Name: "plain-isvc"}}
+		config := buildCachedStorageConfig(model, isvc, ModelCacheModeShared, "", "curl:8.18.0", 102)
+		Expect(config.volumes[0].PersistentVolumeClaim.ClaimName).To(Equal(ModelCachePVCName))
+	})
+})
+
+var _ = Describe("ensureModelCachePVC (user claimName, #928)", func() {
+	var reconciler *InferenceServiceReconciler
+	var isvc *inferencev1alpha1.InferenceService
+	const userClaim = "byo-model-cache"
+
+	forceDeletePVC := func(name string) {
+		ctx := context.Background()
+		pvc := &corev1.PersistentVolumeClaim{}
+		key := types.NamespacedName{Name: name, Namespace: "default"}
+		if err := k8sClient.Get(ctx, key, pvc); err != nil {
+			return
+		}
+		if len(pvc.Finalizers) > 0 {
+			pvc.Finalizers = nil
+			_ = k8sClient.Update(ctx, pvc)
+		}
+		_ = k8sClient.Delete(ctx, pvc)
+		Eventually(func() bool {
+			return errors.IsNotFound(k8sClient.Get(ctx, key, &corev1.PersistentVolumeClaim{}))
+		}, "5s", "100ms").Should(BeTrue())
+	}
+
+	createUserPVC := func() {
+		pvc := &corev1.PersistentVolumeClaim{
+			ObjectMeta: metav1.ObjectMeta{Name: userClaim, Namespace: "default"},
+			Spec: corev1.PersistentVolumeClaimSpec{
+				AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce},
+				Resources: corev1.VolumeResourceRequirements{
+					Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("10Gi")},
+				},
+			},
+		}
+		Expect(k8sClient.Create(context.Background(), pvc)).To(Succeed())
+	}
+
+	BeforeEach(func() {
+		forceDeletePVC(ModelCachePVCName)
+		forceDeletePVC(userClaim)
+		reconciler = &InferenceServiceReconciler{
+			Client:         k8sClient,
+			Scheme:         k8sClient.Scheme(),
+			ModelCacheMode: ModelCacheModeShared,
+		}
+		isvc = &inferencev1alpha1.InferenceService{
+			ObjectMeta: metav1.ObjectMeta{Name: "byo-cache-isvc", Namespace: "default"},
+			Spec: inferencev1alpha1.InferenceServiceSpec{
+				ModelRef:   "some-model",
+				ModelCache: &inferencev1alpha1.ModelCacheSpec{ClaimName: userClaim},
+			},
+		}
+	})
+
+	AfterEach(func() {
+		forceDeletePVC(ModelCachePVCName)
+		forceDeletePVC(userClaim)
+	})
+
+	It("succeeds without creating any operator PVC when the user PVC exists", func() {
+		createUserPVC()
+		Expect(reconciler.ensureModelCachePVC(context.Background(), isvc)).To(Succeed())
+
+		// Neither the shared nor a per-isvc cache PVC is created.
+		shared := &corev1.PersistentVolumeClaim{}
+		err := k8sClient.Get(context.Background(), types.NamespacedName{Name: ModelCachePVCName, Namespace: "default"}, shared)
+		Expect(errors.IsNotFound(err)).To(BeTrue())
+		perISVC := &corev1.PersistentVolumeClaim{}
+		err = k8sClient.Get(context.Background(), types.NamespacedName{Name: isvc.Name + "-model-cache", Namespace: "default"}, perISVC)
+		Expect(errors.IsNotFound(err)).To(BeTrue())
+	})
+
+	It("never adopts or mutates the user PVC (no owner refs, no operator labels)", func() {
+		createUserPVC()
+		Expect(reconciler.ensureModelCachePVC(context.Background(), isvc)).To(Succeed())
+
+		pvc := &corev1.PersistentVolumeClaim{}
+		Expect(k8sClient.Get(context.Background(), types.NamespacedName{Name: userClaim, Namespace: "default"}, pvc)).To(Succeed())
+		Expect(pvc.OwnerReferences).To(BeEmpty())
+		Expect(pvc.Labels).NotTo(HaveKey("app.kubernetes.io/managed-by"))
+	})
+
+	It("does not create the user PVC and errors clearly when it is missing", func() {
+		err := reconciler.ensureModelCachePVC(context.Background(), isvc)
+		Expect(err).To(HaveOccurred())
+		Expect(err.Error()).To(ContainSubstring(userClaim))
+		Expect(err.Error()).To(ContainSubstring("spec.modelCache.claimName"))
+
+		pvc := &corev1.PersistentVolumeClaim{}
+		getErr := k8sClient.Get(context.Background(), types.NamespacedName{Name: userClaim, Namespace: "default"}, pvc)
+		Expect(errors.IsNotFound(getErr)).To(BeTrue())
+	})
+
+	It("overrides perService mode as well (no <isvc>-model-cache created)", func() {
+		reconciler.ModelCacheMode = ModelCacheModePerService
+		createUserPVC()
+		Expect(reconciler.ensureModelCachePVC(context.Background(), isvc)).To(Succeed())
+
+		perISVC := &corev1.PersistentVolumeClaim{}
+		err := k8sClient.Get(context.Background(), types.NamespacedName{Name: isvc.Name + "-model-cache", Namespace: "default"}, perISVC)
+		Expect(errors.IsNotFound(err)).To(BeTrue())
+	})
+})
+
 var _ = Describe("resolveCacheMode", func() {
 	It("maps an empty mode to the shared default", func() {
 		Expect(resolveCacheMode("")).To(Equal(ModelCacheModeShared))
diff --git a/internal/controller/model_storage.go b/internal/controller/model_storage.go
index 0f59e195..c887c4d4 100644
--- a/internal/controller/model_storage.go
+++ b/internal/controller/model_storage.go
@@ -70,12 +70,28 @@ func resolveCacheMode(mode string) string {
 	return ModelCacheModeShared
 }
 
+// userModelCacheClaimName returns the user-supplied cache PVC name from
+// spec.modelCache.claimName, or "" when the InferenceService does not override
+// the operator-global cache mode.
+func userModelCacheClaimName(isvc *inferencev1alpha1.InferenceService) string {
+	if isvc == nil || isvc.Spec.ModelCache == nil {
+		return ""
+	}
+	return isvc.Spec.ModelCache.ClaimName
+}
+
 // modelCachePVCName returns the name of the model cache PVC for the given mode.
-// In shared mode (the default, and the resolution of an empty mode) this is the
-// single cluster-wide PVC; in perService mode it is the per-InferenceService PVC
-// "<isvc>-model-cache". A nil isvc (unit tests that exercise the builder
-// directly) falls back to the shared name.
+// A per-InferenceService spec.modelCache.claimName override (#928) wins over
+// the operator-global mode: that user-owned PVC becomes the cache volume for
+// this workload only. Otherwise, in shared mode (the default, and the
+// resolution of an empty mode) this is the single cluster-wide PVC; in
+// perService mode it is the per-InferenceService PVC "<isvc>-model-cache". A
+// nil isvc (unit tests that exercise the builder directly) falls back to the
+// shared name.
 func modelCachePVCName(isvc *inferencev1alpha1.InferenceService, mode string) string {
+	if claim := userModelCacheClaimName(isvc); claim != "" {
+		return claim
+	}
 	if resolveCacheMode(mode) == ModelCacheModeShared || isvc == nil {
 		return ModelCachePVCName
 	}
@@ -644,6 +660,30 @@ func buildEmptyDirStorageConfig(model *inferencev1alpha1.Model, isvc *inferencev
 func (r *InferenceServiceReconciler) ensureModelCachePVC(ctx context.Context, isvc *inferencev1alpha1.InferenceService) error {
 	log := logf.FromContext(ctx)
 
+	// Bring-your-own cache PVC (#928): spec.modelCache.claimName names a
+	// user-owned claim, so the operator never creates, mutates, or deletes
+	// it — it only verifies the claim exists. A missing claim is surfaced as
+	// an error (-> Degraded condition + event) rather than silently falling
+	// back to the shared cache.
+	if claim := userModelCacheClaimName(isvc); claim != "" {
+		pvc := &corev1.PersistentVolumeClaim{}
+		err := r.Get(ctx, types.NamespacedName{Name: claim, Namespace: isvc.Namespace}, pvc)
+		if err == nil {
+			return nil
+		}
+		if apierrors.IsNotFound(err) {
+			if r.Recorder != nil {
+				r.Recorder.Eventf(isvc, nil, corev1.EventTypeWarning, "ModelCachePVCNotFound", "Reconcile",
+					"spec.modelCache.claimName %q does not exist in namespace %q; create the PVC or remove the field",
+					claim, isvc.Namespace)
+			}
+			return fmt.Errorf(
+				"model cache PVC %q (spec.modelCache.claimName) not found in namespace %q: the claim is user-owned and must be created before use",
+				claim, isvc.Namespace)
+		}
+		return fmt.Errorf("failed to check user model cache PVC %q: %w", claim, err)
+	}
+
 	shared := resolveCacheMode(r.ModelCacheMode) == ModelCacheModeShared
 	namespace := isvc.Namespace
 	pvcName := modelCachePVCName(isvc, r.ModelCacheMode)