From a23bb4b99f1b03c0cdc497acfd6417ce7a465e59 Mon Sep 17 00:00:00 2001 From: Leoyzen Date: Tue, 3 Mar 2026 09:53:50 +0800 Subject: [PATCH] fix(kserve): support minReplicas=0 for scale-to-zero and prevent reset on update This commit fixes two issues with minReplicas configuration in KServe: 1. Helm template condition fix: - Changed template condition from 'if .Values.minReplicas' to 'if ge (int .Values.minReplicas) 0' - This ensures minReplicas=0 is correctly rendered instead of being ignored (since 0 is falsy in Helm) 2. Update command default value fix: - Changed default value of --min-replicas flag from 1 to -1 in update_serving_kserve.go - Update logic only applies when args.MinReplicas >= 0 - Now updating other fields won't reset minReplicas to 1 Files changed: - charts/kserve/templates/inferenceservice.yaml - charts/kserve/values.yaml - pkg/argsbuilder/update_serving_kserve.go Fixes scale-to-zero (minReplicas=0) functionality for KServe inference services. Signed-off-by: Leoyzen --- charts/kserve/templates/inferenceservice.yaml | 4 ++-- charts/kserve/values.yaml | 2 +- pkg/argsbuilder/update_serving_kserve.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/kserve/templates/inferenceservice.yaml b/charts/kserve/templates/inferenceservice.yaml index 5394466a5..17afd30f9 100644 --- a/charts/kserve/templates/inferenceservice.yaml +++ b/charts/kserve/templates/inferenceservice.yaml @@ -29,7 +29,7 @@ spec: predictor: {{- include "setAffinityFunction" . | nindent 4 }} {{- if eq (include "kserve.isCustomMetrics" .) "false" }} - {{- if .Values.minReplicas }} + {{- if ge (int .Values.minReplicas) 0 }} minReplicas: {{ .Values.minReplicas }} {{- end }} {{- if .Values.maxReplicas }} @@ -347,7 +347,7 @@ spec: apiVersion: apps/v1 kind: Deployment name: {{ .Release.Name }}-predictor - {{- if .Values.minReplicas }} + {{- if ge (int .Values.minReplicas) 0 }} minReplicas: {{ .Values.minReplicas }} {{- end }} {{- if .Values.maxReplicas }} diff --git a/charts/kserve/values.yaml b/charts/kserve/values.yaml index aa3e376a7..9fc0eed3d 100644 --- a/charts/kserve/values.yaml +++ b/charts/kserve/values.yaml @@ -2,4 +2,4 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. -replicaCount: 1 \ No newline at end of file +replicaCount: 1 diff --git a/pkg/argsbuilder/update_serving_kserve.go b/pkg/argsbuilder/update_serving_kserve.go index eb8e84d85..a87e32458 100644 --- a/pkg/argsbuilder/update_serving_kserve.go +++ b/pkg/argsbuilder/update_serving_kserve.go @@ -79,7 +79,7 @@ func (s *UpdateKServeArgsBuilder) AddCommandFlags(command *cobra.Command) { command.Flags().StringVar(&s.args.ProtocolVersion, "protocol-version", "", "the protocol version to use by the predictor (i.e. v1 or v2 or grpc-v1 or grpc-v2)") // ComponentExtension defines the deployment configuration for a given InferenceService component - command.Flags().IntVar(&s.args.MinReplicas, "min-replicas", 1, "minimum number of replicas, defaults to 1 but can be set to 0 to enable scale-to-zero") + command.Flags().IntVar(&s.args.MinReplicas, "min-replicas", -1, "minimum number of replicas, defaults to 1 but can be set to 0 to enable scale-to-zero. use -1 to keep existing value on update") command.Flags().IntVar(&s.args.MaxReplicas, "max-replicas", 0, "maximum number of replicas for autoscaling") command.Flags().IntVar(&s.args.ScaleTarget, "scale-target", 0, "specifies the integer target value of the metric type the Autoscaler watches for") command.Flags().StringVar(&s.args.ScaleMetric, "scale-metric", "", "the scaling metric type watched by autoscaler. possible values are concurrency, rps, cpu, memory. concurrency, rps are supported via KPA")