diff --git a/.claude/agents/vshnforgejo-expert.md b/.claude/agents/vshnforgejo-expert.md index 8b26896e7b..7f78828f0d 100644 --- a/.claude/agents/vshnforgejo-expert.md +++ b/.claude/agents/vshnforgejo-expert.md @@ -65,6 +65,8 @@ The mutating webhook is **service-agnostic** — it handles any XListenerSet, no - Per-gateway listener capacity limit (e.g., 100 per gateway) - New listeners go to least-loaded gateway with capacity - Denies creation if all gateways are full +- **AllowedGateways filtering**: XListenerSets can carry an `appcat.vshn.io/allowed-gateways` label (comma-separated gateway names) to restrict which gateways they may be placed on. Empty label = all gateways eligible. +- `GatewayKey` includes `AllowedGateways` field from the label, but sharding compares only Namespace+Name when checking current gateway capacity (since the gateway list entries don't carry the label) ### Controller Configuration ``` diff --git a/.claude/skills/appcat-architecture/SKILL.md b/.claude/skills/appcat-architecture/SKILL.md index 5fb4d1a4dc..49845d4b01 100644 --- a/.claude/skills/appcat-architecture/SKILL.md +++ b/.claude/skills/appcat-architecture/SKILL.md @@ -102,6 +102,8 @@ Services needing TCP exposure (e.g., Forgejo SSH) use a shared two-layer system: 1. **Composition function layer** (`pkg/comp-functions/functions/common/tcproute/`): `AddTCPRoute()` creates XListenerSet + TCPRoute + NetworkPolicy via provider-kubernetes. Any service can call this with a `TCPRouteConfig`. 2. **Webhook layer** (`pkg/controller/webhooks/tcpgateway/`): service-agnostic mutating webhook allocates ports (via Leases) and shards across Gateways for any XListenerSet. +**Gateway sharding** supports an `appcat.vshn.io/allowed-gateways` label on XListenerSets (comma-separated gateway names) to restrict placement. Empty/missing label = all gateways eligible. The sharding logic compares gateways by Namespace+Name only (ignoring the AllowedGateways metadata field) when checking current gateway capacity. + When adding TCP routing to a new service: use `tcproute.AddTCPRoute()` in the composition function. No webhook changes needed — the existing `tcpgateway/` handler covers all XListenerSets. ## Testing diff --git a/pkg/comp-functions/functions/common/tcproute/tcproute.go b/pkg/comp-functions/functions/common/tcproute/tcproute.go index 19d1516389..3aafc82c73 100644 --- a/pkg/comp-functions/functions/common/tcproute/tcproute.go +++ b/pkg/comp-functions/functions/common/tcproute/tcproute.go @@ -4,10 +4,12 @@ import ( "encoding/json" "fmt" "sort" + "strings" xfnproto "github.com/crossplane/function-sdk-go/proto/v1" "github.com/vshn/appcat/v4/pkg/common/utils" "github.com/vshn/appcat/v4/pkg/comp-functions/runtime" + "github.com/vshn/appcat/v4/pkg/controller/webhooks/tcpgateway" corev1 "k8s.io/api/core/v1" netv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -78,7 +80,8 @@ func createXListenerSet(svc *runtime.ServiceRuntime, cfg TCPRouteConfig, gateway xls.SetName(cfg.ResourceName) xls.SetNamespace(cfg.InstanceNamespace) xls.SetLabels(map[string]string{ - runtime.TCPGatewayLabel: "true", + runtime.TCPGatewayLabel: "true", + tcpgateway.AllowedLabelName: getAllowedGateways(svc, cfg.GatewaysConfigKey), }) err := unstructured.SetNestedMap(xls.Object, map[string]any{ @@ -261,25 +264,43 @@ func lookupDomain(svc *runtime.ServiceRuntime, configKey, gatewayName string) st } func defaultGatewayName(svc *runtime.ServiceRuntime, configKey string) string { + mapping := getRawGateways(svc, configKey) + + names := make([]string, 0, len(mapping)) + for name := range mapping { + names = append(names, name) + } + sort.Strings(names) + + if len(names) == 0 { + return "" + } + return names[0] +} + +func getRawGateways(svc *runtime.ServiceRuntime, configKey string) map[string]string { raw, ok := svc.Config.Data[configKey] if !ok || raw == "" { - return "" + return map[string]string{} } mapping := map[string]string{} if err := json.Unmarshal([]byte(raw), &mapping); err != nil { svc.Log.Error(err, "failed to parse gateways config", "key", configKey) - return "" + return mapping } + return mapping +} + +func getAllowedGateways(svc *runtime.ServiceRuntime, configKey string) string { + mapping := getRawGateways(svc, configKey) + names := make([]string, 0, len(mapping)) for name := range mapping { names = append(names, name) } sort.Strings(names) - if len(names) == 0 { - return "" - } - return names[0] + return strings.Join(names, ",") } diff --git a/pkg/controller/webhooks/tcpgateway/allocator.go b/pkg/controller/webhooks/tcpgateway/allocator.go index d690aed730..2419f5d4d2 100644 --- a/pkg/controller/webhooks/tcpgateway/allocator.go +++ b/pkg/controller/webhooks/tcpgateway/allocator.go @@ -31,6 +31,10 @@ var xListenerSetGVK = schema.GroupVersionKind{ type GatewayKey struct { Namespace string Name string + // AllowedGateways contains a comma separated list of allowed gateways. + // We're not using a slice here, because we won't be able to use + // GatewayKey as a map key then. + AllowedGateways string } // PortAllocator allocates unique TCP ports by scanning existing XListenerSet diff --git a/pkg/controller/webhooks/tcpgateway/handler.go b/pkg/controller/webhooks/tcpgateway/handler.go index 66e1382cee..c4b76c1357 100644 --- a/pkg/controller/webhooks/tcpgateway/handler.go +++ b/pkg/controller/webhooks/tcpgateway/handler.go @@ -66,6 +66,8 @@ func (h *XListenerSetHandler) Handle(ctx context.Context, req admission.Request) metadata, _ := obj["metadata"].(map[string]any) name, _ := metadata["name"].(string) namespace, _ := metadata["namespace"].(string) + labelsRaw, _ := metadata["labels"].(map[string]any) + allowedGateways, _ := labelsRaw[AllowedLabelName].(string) l := h.log.WithValues("name", name, "namespace", namespace) @@ -95,8 +97,9 @@ func (h *XListenerSetHandler) Handle(ctx context.Context, req admission.Request) } currentRef := GatewayKey{ - Namespace: parentNs, - Name: parentName, + Namespace: parentNs, + Name: parentName, + AllowedGateways: allowedGateways, } listeners, _ := spec["listeners"].([]any) diff --git a/pkg/controller/webhooks/tcpgateway/sharding.go b/pkg/controller/webhooks/tcpgateway/sharding.go index 43e1290ee4..2c50ebcce8 100644 --- a/pkg/controller/webhooks/tcpgateway/sharding.go +++ b/pkg/controller/webhooks/tcpgateway/sharding.go @@ -3,6 +3,11 @@ package tcpgateway import ( "fmt" "slices" + "strings" +) + +const ( + AllowedLabelName = "appcat.vshn.io/allowed-gateways" ) // GatewaySharding selects the best Gateway for new XListenerSets based on @@ -26,17 +31,25 @@ func NewGatewaySharding(gateways []GatewayKey, capacity int) *GatewaySharding { // Otherwise it picks the gateway with the fewest listeners that still has capacity. // Returns an error if all gateways are full. func (gs *GatewaySharding) SelectGateway(currentRef GatewayKey, newListenerCount int, listenerCounts map[GatewayKey]int) (GatewayKey, bool, error) { - if slices.Contains(gs.gateways, currentRef) && listenerCounts[currentRef]+newListenerCount <= gs.capacity { + currentBase := GatewayKey{Namespace: currentRef.Namespace, Name: currentRef.Name} + if slices.Contains(gs.gateways, currentBase) && listenerCounts[currentBase]+newListenerCount <= gs.capacity { return currentRef, false, nil } + var allowedGWs []string + if currentRef.AllowedGateways != "" { + allowedGWs = strings.Split(currentRef.AllowedGateways, ",") + } + best := GatewayKey{} bestCount := gs.capacity + 1 for _, gw := range gs.gateways { - count := listenerCounts[gw] - if count+newListenerCount <= gs.capacity && count < bestCount { - best = gw - bestCount = count + if len(allowedGWs) == 0 || slices.Contains(allowedGWs, gw.Name) { + count := listenerCounts[gw] + if count+newListenerCount <= gs.capacity && count < bestCount { + best = gw + bestCount = count + } } } diff --git a/pkg/controller/webhooks/tcpgateway/sharding_test.go b/pkg/controller/webhooks/tcpgateway/sharding_test.go index 59bd8fb1c1..f33e290e0c 100644 --- a/pkg/controller/webhooks/tcpgateway/sharding_test.go +++ b/pkg/controller/webhooks/tcpgateway/sharding_test.go @@ -156,6 +156,86 @@ func TestSelectGateway_UnknownCurrentRef_Reassigns(t *testing.T) { assert.Equal(t, GatewayKey{Namespace: "gw-ns", Name: "gw-1"}, selected, "should pick least-loaded known gateway") } +func TestSelectGateway_AllowedGateways_FiltersCorrectly(t *testing.T) { + gs := newTestSharding([]GatewayKey{ + {Namespace: "gw-ns", Name: "gw-1"}, + {Namespace: "gw-ns", Name: "gw-2"}, + {Namespace: "gw-ns", Name: "gw-3"}, + }, 10) + + // gw-1 is full, gw-2 has room but is NOT allowed, gw-3 has room and IS allowed + current := GatewayKey{Namespace: "gw-ns", Name: "gw-1", AllowedGateways: "gw-1,gw-3"} + counts := map[GatewayKey]int{ + {Namespace: "gw-ns", Name: "gw-1"}: 10, + {Namespace: "gw-ns", Name: "gw-2"}: 1, + {Namespace: "gw-ns", Name: "gw-3"}: 3, + } + + selected, changed, err := gs.SelectGateway(current, 1, counts) + require.NoError(t, err) + assert.True(t, changed) + assert.Equal(t, GatewayKey{Namespace: "gw-ns", Name: "gw-3"}, selected) +} + +func TestSelectGateway_AllowedGateways_AllFull(t *testing.T) { + gs := newTestSharding([]GatewayKey{ + {Namespace: "gw-ns", Name: "gw-1"}, + {Namespace: "gw-ns", Name: "gw-2"}, + {Namespace: "gw-ns", Name: "gw-3"}, + }, 5) + + // Only gw-1 and gw-2 allowed, both full. gw-3 has room but not allowed. + current := GatewayKey{Namespace: "gw-ns", Name: "gw-1", AllowedGateways: "gw-1,gw-2"} + counts := map[GatewayKey]int{ + {Namespace: "gw-ns", Name: "gw-1"}: 5, + {Namespace: "gw-ns", Name: "gw-2"}: 5, + {Namespace: "gw-ns", Name: "gw-3"}: 1, + } + + _, _, err := gs.SelectGateway(current, 1, counts) + require.Error(t, err) + assert.Contains(t, err.Error(), "all gateways are full") +} + +func TestSelectGateway_AllowedGateways_Empty_AllowsAll(t *testing.T) { + gs := newTestSharding([]GatewayKey{ + {Namespace: "gw-ns", Name: "gw-1"}, + {Namespace: "gw-ns", Name: "gw-2"}, + {Namespace: "gw-ns", Name: "gw-3"}, + }, 10) + + // Empty AllowedGateways = all gateways eligible, should pick least-loaded + current := GatewayKey{Namespace: "gw-ns", Name: "gw-1", AllowedGateways: ""} + counts := map[GatewayKey]int{ + {Namespace: "gw-ns", Name: "gw-1"}: 10, + {Namespace: "gw-ns", Name: "gw-2"}: 7, + {Namespace: "gw-ns", Name: "gw-3"}: 2, + } + + selected, changed, err := gs.SelectGateway(current, 1, counts) + require.NoError(t, err) + assert.True(t, changed) + assert.Equal(t, GatewayKey{Namespace: "gw-ns", Name: "gw-3"}, selected) +} + +func TestSelectGateway_AllowedGateways_CurrentUnderCapacity(t *testing.T) { + gs := newTestSharding([]GatewayKey{ + {Namespace: "gw-ns", Name: "gw-1"}, + {Namespace: "gw-ns", Name: "gw-2"}, + }, 10) + + // Current gateway has room — AllowedGateways shouldn't matter, no reassignment needed + current := GatewayKey{Namespace: "gw-ns", Name: "gw-1", AllowedGateways: "gw-2"} + counts := map[GatewayKey]int{ + {Namespace: "gw-ns", Name: "gw-1"}: 3, + } + + selected, changed, err := gs.SelectGateway(current, 1, counts) + require.NoError(t, err) + assert.False(t, changed) + assert.Equal(t, current, selected) +} + func TestCountListenersPerGateway(t *testing.T) { scheme := runtime.NewScheme() scheme.AddKnownTypeWithName(