From 9a42ca2d05b1735c26962e5143f89d83cfe8d238 Mon Sep 17 00:00:00 2001 From: Shay Goldstein Date: Wed, 18 Feb 2026 15:22:44 +0200 Subject: [PATCH] [Kafka] Add optional ingress TCP proxy for Kafka external access Add an alternative to NodePort for external Kafka connectivity that provides the same behavior as the old Bitnami Kafka setup. Changes: - Add kafka-ingress-tcp.yaml template that creates tcp-services ConfigMap for ingress-nginx - Add kafka.ingress.* values (disabled by default) When enabled, external clients can connect via ingress on port 9094 -> kafka...domain:9094 - This option is disabled by default. Users can choose between: - NodePort (default): port 30094 - Ingress TCP proxy: port 9094 [CEML-651](https://iguazio.atlassian.net/browse/CEML-651) --- charts/mlrun-ce/Chart.yaml | 2 +- .../templates/kafka/kafka-ingress-tcp.yaml | 82 ++++++++ charts/mlrun-ce/values.yaml | 179 +++++++++++++++++- 3 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 charts/mlrun-ce/templates/kafka/kafka-ingress-tcp.yaml diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml index 6296d177..eb248c10 100644 --- a/charts/mlrun-ce/Chart.yaml +++ b/charts/mlrun-ce/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 name: mlrun-ce -version: 0.11.0-rc.11 +version: 0.11.0-rc.12 description: MLRun Open Source Stack home: https://iguazio.com icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png diff --git a/charts/mlrun-ce/templates/kafka/kafka-ingress-tcp.yaml b/charts/mlrun-ce/templates/kafka/kafka-ingress-tcp.yaml new file mode 100644 index 00000000..76e4f2b4 --- /dev/null +++ b/charts/mlrun-ce/templates/kafka/kafka-ingress-tcp.yaml @@ -0,0 +1,82 @@ +{{- if and .Values.kafka.enabled .Values.kafka.ingress.enabled }} +{{- /* + This template manages the ingress-nginx tcp-services ConfigMap entry for Kafka. + It MERGES with existing ConfigMap data (using Helm lookup), so other TCP service + entries are preserved. Multiple TCP services can coexist in the same ConfigMap. + + IMPORTANT: TCP proxy routing is PORT-BASED ONLY (no hostname/SNI routing). + Each Kafka installation must use a unique externalPort cluster-wide. + The hostname does NOT provide isolation - it just points to the ingress controller. + + Behavior: + - Clients connect to ingress:9094 + - Traffic is proxied: ingress:9094 → kafka-bootstrap:9092 (internal listener) + - Works for in-cluster clients (hairpin NAT) accessing via ingress hostname + + NOTE: We intentionally proxy to the INTERNAL listener (9092), NOT the external/nodeport + listener (9094). This is correct because: + - ingress-nginx TCP proxy is Layer 4 (transparent) - it forwards raw TCP bytes + - The internal listener is plaintext, which is what L4 proxy needs + - Kafka metadata returns internal DNS names that resolve correctly for in-cluster clients + - This matches how the old Bitnami Kafka setup worked for in-cluster traffic + + LIMITATION FOR EXTERNAL CLIENTS: + - Kafka returns broker metadata with internal DNS names + - External clients may fail after bootstrap if they cannot resolve internal DNS + - For true external access, use NodePort listener with advertisedHost configured + - Single-broker setups may work externally since bootstrap = broker + + Requirements: + - ingress-nginx must be installed BEFORE enabling this option + - The ingress-nginx controller args must include: + --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services +*/ -}} +{{- $configMapName := .Values.kafka.ingress.tcpConfigMapName | default "tcp-services" }} +{{- $ingressNamespace := .Values.kafka.ingress.ingressNamespace | default "ingress-nginx" }} +{{- $externalPort := .Values.kafka.ingress.externalPort | default 9094 | toString }} +{{- $kafkaService := printf "%s/%s-kafka-bootstrap:9092" .Release.Namespace .Values.kafka.name }} +{{- /* Verify ingress namespace exists before proceeding */ -}} +{{- $ingressNs := lookup "v1" "Namespace" "" $ingressNamespace }} +{{- if not $ingressNs }} +{{- fail (printf "kafka.ingress.enabled is true but Helm could not verify that namespace '%s' exists. This may mean the namespace is missing, or that your current RBAC permissions do not allow listing namespaces. Please ensure ingress-nginx is installed, that kafka.ingress.ingressNamespace is set correctly, and that you have sufficient permissions." $ingressNamespace) }} +{{- end }} +{{- /* Lookup existing ConfigMap and merge with our entry */ -}} +{{- $existingCM := lookup "v1" "ConfigMap" $ingressNamespace $configMapName }} +{{- $existingData := dict }} +{{- if $existingCM }} +{{- $existingData = $existingCM.data | default dict }} +{{- end }} +{{- /* Prevent silent overrides: fail if port is already used by a different service */ -}} +{{- if hasKey $existingData $externalPort }} +{{- $existingTarget := index $existingData $externalPort }} +{{- if ne $existingTarget $kafkaService }} +{{- fail (printf "kafka.ingress.externalPort %s is already mapped in ConfigMap %s/%s to '%s' for another service/namespace. The ingress-nginx tcp-services ConfigMap is shared cluster-wide, so a given external port can only be used by one TCP service across all namespaces. Either reuse the existing mapping or choose a different kafka.ingress.externalPort value." $externalPort $ingressNamespace $configMapName $existingTarget) }} +{{- end }} +{{- end }} +{{- /* Merge existing data so other TCP service entries are preserved */ -}} +{{- $newData := merge (dict $externalPort $kafkaService) $existingData }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $configMapName }} + namespace: {{ $ingressNamespace }} + labels: + app.kubernetes.io/name: kafka + app.kubernetes.io/component: ingress-tcp + app.kubernetes.io/managed-by: Helm + annotations: + # Using hooks so this ConfigMap is NOT tracked by Helm release ownership. + # This prevents Helm from deleting it on uninstall (which would break other TCP services). + # NOTE: No hook-delete-policy - we intentionally preserve the ConfigMap to allow + # the lookup+merge logic to work correctly across multiple installations. + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "10" +data: + {{- /* + Format: "externalPort": "namespace/service:port" + Merged with existing ConfigMap entries to preserve other TCP services + */ -}} + {{- range $port, $target := $newData }} + "{{ $port }}": "{{ $target }}" + {{- end }} +{{- end }} diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml index bed3e5b0..667fca6d 100644 --- a/charts/mlrun-ce/values.yaml +++ b/charts/mlrun-ce/values.yaml @@ -595,6 +595,159 @@ kafka: # Name for the bootstrap service alias (only used if enabled is true) name: kafka-stream + # ============================================================================ + # INGRESS TCP PROXY CONFIGURATION + # ============================================================================ + # Creates a ConfigMap entry for ingress-nginx to proxy external Kafka traffic. + # This MERGES with existing tcp-services ConfigMap entries, preserving other TCP services. + # + # This provides similar behavior to the old Bitnami Kafka setup: + # - Clients connect to the ingress controller on port 9094 + # - Traffic is proxied through ingress: ingress:9094 → kafka-bootstrap:9092 + # - Works from inside pods (with hairpin NAT) accessing via the ingress hostname + # + # ============================================================================ + # IMPORTANT LIMITATIONS + # ============================================================================ + # + # 1. PORT-BASED ROUTING ONLY (no hostname isolation) + # The ingress-nginx TCP proxy routes by PORT only, not by hostname/SNI. + # The hostname (e.g., kafka...domain) is just DNS pointing to + # the ingress controller - it does NOT provide namespace isolation. + # Each Kafka installation MUST use a unique externalPort cluster-wide. + # + # 2. EXTERNAL CLIENT LIMITATION + # This proxy forwards to the internal Kafka listener (9092). Kafka returns + # broker metadata with INTERNAL DNS names (e.g., kafka-0.kafka-brokers.mlrun). + # - IN-CLUSTER CLIENTS: Work correctly - internal DNS resolves properly + # - EXTERNAL CLIENTS: May fail after bootstrap if they cannot resolve + # internal DNS names. For true external access, consider: + # - Using NodePort listener with advertisedHost configured, OR + # - Configuring Kafka to advertise the ingress hostname/port + # - SINGLE-BROKER SETUPS: External clients may work since bootstrap = broker + # + # 3. PRIMARY USE CASE + # This feature is primarily designed for in-cluster clients that need to + # access Kafka via the ingress hostname (hairpin NAT scenario), matching + # how the old Bitnami Kafka setup worked. + # + # NOTE: This works ALONGSIDE the NodePort external listener (both can be enabled). + # The ingress TCP proxy forwards to the internal listener (9092), which is correct + # for in-cluster clients because the internal listener is plaintext and Kafka + # metadata returns addresses that resolve correctly via internal DNS. + # + # ============================================================================ + # PREREQUISITES - ingress-nginx must be configured for TCP services + # ============================================================================ + # + # This Helm chart ONLY creates the tcp-services ConfigMap entry. You must also + # configure ingress-nginx to expose the TCP port. There are three requirements: + # + # 1. CONTROLLER ARG: Add --tcp-services-configmap argument + # The ingress-nginx controller must be started with: + # --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services + # + # If using the ingress-nginx Helm chart, add to values.yaml: + # controller: + # extraArgs: + # tcp-services-configmap: "$(POD_NAMESPACE)/tcp-services" + # + # Or patch an existing deployment: + # kubectl patch deployment ingress-nginx-controller -n ingress-nginx \ + # --type='json' -p='[{"op":"add","path":"/spec/template/spec/containers/0/args/-", + # "value":"--tcp-services-configmap=$(POD_NAMESPACE)/tcp-services"}]' + # + # 2. CONTAINER PORT: The controller pod must expose port 9094 + # If using the ingress-nginx Helm chart, add to values.yaml: + # controller: + # containerPort: + # kafka-tcp: 9094 + # + # Or patch an existing deployment: + # kubectl patch deployment ingress-nginx-controller -n ingress-nginx \ + # --type='json' -p='[{"op":"add","path":"/spec/template/spec/containers/0/ports/-", + # "value":{"containerPort":9094,"name":"kafka-tcp","protocol":"TCP"}}]' + # + # 3. SERVICE PORT: The ingress-nginx Service must expose port 9094 + # If using the ingress-nginx Helm chart, add to values.yaml: + # tcp: + # 9094: "/kafka-stream-kafka-bootstrap:9092" + # (This also creates the tcp-services ConfigMap entry, making this chart's + # ConfigMap creation redundant but harmless due to merge behavior) + # + # Or patch an existing service: + # kubectl patch svc ingress-nginx-controller -n ingress-nginx \ + # --type='json' -p='[{"op":"add","path":"/spec/ports/-", + # "value":{"name":"kafka-tcp","port":9094,"targetPort":9094,"protocol":"TCP"}}]' + # + # ============================================================================ + # COMPLETE INGRESS-NGINX HELM VALUES EXAMPLE + # ============================================================================ + # If installing ingress-nginx via Helm, use these values for Kafka TCP support: + # + # controller: + # extraArgs: + # tcp-services-configmap: "$(POD_NAMESPACE)/tcp-services" + # containerPort: + # kafka-tcp: 9094 + # tcp: + # 9094: "mlrun/kafka-stream-kafka-bootstrap:9092" + # + # ============================================================================ + # VERIFICATION STEPS + # ============================================================================ + # + # 1. Verify controller has tcp-services-configmap arg: + # kubectl get deployment ingress-nginx-controller -n ingress-nginx \ + # -o jsonpath='{.spec.template.spec.containers[0].args}' | grep tcp-services + # + # 2. Verify controller pod exposes port 9094: + # kubectl get deployment ingress-nginx-controller -n ingress-nginx \ + # -o jsonpath='{.spec.template.spec.containers[0].ports[*].containerPort}' + # # Should include: 9094 + # + # 3. Verify service exposes port 9094: + # kubectl get svc ingress-nginx-controller -n ingress-nginx \ + # -o jsonpath='{.spec.ports[*].port}' + # # Should include: 9094 + # + # 4. Verify tcp-services ConfigMap has the Kafka entry: + # kubectl get configmap tcp-services -n ingress-nginx -o yaml + # # Should show: "9094": "/kafka-stream-kafka-bootstrap:9092" + # + # 5. Verify nginx is listening on port 9094: + # POD=$(kubectl get pods -n ingress-nginx -l app.kubernetes.io/component=controller \ + # -o jsonpath='{.items[0].metadata.name}') + # kubectl exec -n ingress-nginx $POD -- cat /etc/nginx/nginx.conf | grep "listen.*9094" + # + # 6. Test TCP connectivity (from outside the release namespace): + # kubectl run test --image=alpine --rm -it --restart=Never -n ingress-nginx \ + # -- sh -c "apk add netcat-openbsd && nc -zv ingress-nginx-controller 9094" + # + # ============================================================================ + # PORT UNIQUENESS (TCP proxy routes by PORT, not hostname) + # ============================================================================ + # IMPORTANT: Each externalPort can only be used by ONE Kafka installation cluster-wide. + # The ingress-nginx TCP proxy routes traffic based on PORT only - hostnames like + # kafka.ns1.cluster.domain and kafka.ns2.cluster.domain do NOT provide isolation. + # They are just DNS aliases pointing to the same ingress controller. + # + # If deploying multiple Kafka instances across namespaces, each must use a unique + # externalPort (e.g., 9094, 9095, 9096). The default port 9094 can only serve one namespace. + # The chart will fail with an error if you try to use a port already mapped to another service. + # + # ============================================================================ + ingress: + # Enable ingress TCP proxy (disabled by default) + enabled: false + # External port exposed by ingress-nginx (must be unique cluster-wide) + # If another namespace already uses 9094, choose a different port (e.g., 9095) + externalPort: 9094 + # Namespace where ingress-nginx is installed + ingressNamespace: ingress-nginx + # Name of the TCP services ConfigMap (default: tcp-services) + tcpConfigMapName: tcp-services + replicas: 1 listeners: @@ -606,10 +759,32 @@ kafka: port: 9093 type: internal tls: false - - name: internal + # External listener for direct NodePort access (not used by ingress TCP proxy). + # The ingress TCP proxy (kafka.ingress) routes to the internal 'client' listener (9092), + # while this NodePort listener (9094/30094) is for clients connecting directly to nodes. + - name: external port: 9094 - type: internal + type: nodeport tls: false + configuration: + # NOTE: The bootstrap nodePort (30094) pins only the *bootstrap* service. + # Strimzi also creates per-broker NodePort services. For single-replica + # deployments (replicas=1), only the bootstrap port is needed for initial + # connection. For multi-replica deployments, you may need to either: + # - Open additional auto-assigned broker NodePorts, or + # - Explicitly configure brokers[].nodePort per broker, or + # - Use the ingress TCP proxy option instead (kafka.ingress.enabled) + bootstrap: + nodePort: 30094 + # For direct NodePort-based external access, set advertisedHost per broker: + # brokers: + # - broker: 0 + # advertisedHost: kafka...lab.iguazeng.com + # nodePort: 30095 # Optional: pin broker's nodePort (must be unique) + # + # NOTE: When using ingress TCP proxy (kafka.ingress.enabled=true), clients + # connect via the ingress endpoint (kafka.:9094) and do NOT need + # advertisedHost configured here. The ingress proxy handles routing. storage: type: persistent-claim