diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index 6e466aef8..dd1dfa29f 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -935,6 +935,21 @@ type DCGMExporterServiceConfig struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:text" Type corev1.ServiceType `json:"type,omitempty"` + // NodePort allows setting a custom port number for the NodePort service. + // Must be within the range configured in the Kubernetes API server (usually 30000-32767). + // +kubebuilder:validation:Minimum=30000 + // +kubebuilder:validation:Maximum=32767 + // +optional + NodePort *int32 `json:"nodePort,omitempty"` + + // ExternalTrafficPolicy controls how external traffic is handled by the Kubernetes service. + // Acceptable values are "Cluster" and "Local". This setting only applies when service type is NodePort or LoadBalancer. + // +kubebuilder:validation:Enum=Cluster;Local + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="External Traffic Policy" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:select:Cluster,Local" + ExternalTrafficPolicy *corev1.ServiceExternalTrafficPolicyType `json:"externalTrafficPolicy,omitempty"` + // InternalTrafficPolicy describes how nodes distribute service traffic they receive on the ClusterIP. // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Internal Traffic Policy for the DCGM Exporter K8s Service" diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go index 1735b0699..93b9d7d4f 100644 --- a/api/nvidia/v1/zz_generated.deepcopy.go +++ b/api/nvidia/v1/zz_generated.deepcopy.go @@ -276,6 +276,16 @@ func (in *DCGMExporterMetricsConfig) DeepCopy() *DCGMExporterMetricsConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DCGMExporterServiceConfig) DeepCopyInto(out *DCGMExporterServiceConfig) { *out = *in + if in.NodePort != nil { + in, out := &in.NodePort, &out.NodePort + *out = new(int32) + **out = **in + } + if in.ExternalTrafficPolicy != nil { + in, out := &in.ExternalTrafficPolicy, &out.ExternalTrafficPolicy + *out = new(corev1.ServiceExternalTrafficPolicyType) + **out = **in + } if in.InternalTrafficPolicy != nil { in, out := &in.InternalTrafficPolicy, &out.InternalTrafficPolicy *out = new(corev1.ServiceInternalTrafficPolicy) diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index 032be5190..d5f06899d 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -385,10 +385,25 @@ spec: description: 'Optional: Service configuration for NVIDIA DCGM Exporter' properties: + externalTrafficPolicy: + description: |- + Controls how external traffic is routed to the service. + Valid values are "Cluster" (default) and "Local". Applies to NodePort and LoadBalancer service types only. + enum: + - Cluster + - Local + type: string internalTrafficPolicy: description: InternalTrafficPolicy describes how nodes distribute service traffic they receive on the ClusterIP. type: string + nodePort: + description: |- + Custom port number to expose for the NodePort service. + Must be between 30000 and 32767. + minimum: 30000 + maximum: 32767 + type: integer type: description: Type represents the ServiceType which describes ingress methods for a service diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index 032be5190..d5f06899d 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -385,10 +385,25 @@ spec: description: 'Optional: Service configuration for NVIDIA DCGM Exporter' properties: + externalTrafficPolicy: + description: |- + Controls how external traffic is routed to the service. + Valid values are "Cluster" (default) and "Local". Applies to NodePort and LoadBalancer service types only. + enum: + - Cluster + - Local + type: string internalTrafficPolicy: description: InternalTrafficPolicy describes how nodes distribute service traffic they receive on the ClusterIP. type: string + nodePort: + description: |- + Custom port number to expose for the NodePort service. + Must be between 30000 and 32767. + minimum: 30000 + maximum: 32767 + type: integer type: description: Type represents the ServiceType which describes ingress methods for a service diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 30b52e81a..8ad717df5 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -995,9 +995,17 @@ func TransformDCGMExporterService(obj *corev1.Service, config *gpuv1.ClusterPoli obj.Spec.Type = serviceConfig.Type } + if serviceConfig.NodePort != nil && len(obj.Spec.Ports) > 0 { + obj.Spec.Ports[0].NodePort = *serviceConfig.NodePort + } + if serviceConfig.InternalTrafficPolicy != nil { obj.Spec.InternalTrafficPolicy = serviceConfig.InternalTrafficPolicy } + + if serviceConfig.ExternalTrafficPolicy != nil { + obj.Spec.ExternalTrafficPolicy = *serviceConfig.ExternalTrafficPolicy + } } return nil } diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index 032be5190..d5f06899d 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -385,10 +385,25 @@ spec: description: 'Optional: Service configuration for NVIDIA DCGM Exporter' properties: + externalTrafficPolicy: + description: |- + Controls how external traffic is routed to the service. + Valid values are "Cluster" (default) and "Local". Applies to NodePort and LoadBalancer service types only. + enum: + - Cluster + - Local + type: string internalTrafficPolicy: description: InternalTrafficPolicy describes how nodes distribute service traffic they receive on the ClusterIP. type: string + nodePort: + description: |- + Custom port number to expose for the NodePort service. + Must be between 30000 and 32767. + minimum: 30000 + maximum: 32767 + type: integer type: description: Type represents the ServiceType which describes ingress methods for a service diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml index 5f4b64c07..a8e313fb9 100644 --- a/deployments/gpu-operator/values.yaml +++ b/deployments/gpu-operator/values.yaml @@ -324,6 +324,9 @@ dcgmExporter: value: "/etc/dcgm-exporter/dcp-metrics-included.csv" resources: {} service: + type: ClusterIP + nodePort: null + externalTrafficPolicy: Cluster internalTrafficPolicy: Cluster serviceMonitor: enabled: false