diff --git a/cmd/compiler/addnode.go b/cmd/compiler/addnode.go
new file mode 100644
index 0000000..98d885c
--- /dev/null
+++ b/cmd/compiler/addnode.go
@@ -0,0 +1,210 @@
+// addnode.go implements the compiler addnode subcommand for generating a new
+// MachineConfig CR for a node being added to an existing cluster.
+//
+// Usage:
+//
+//	compiler addnode --cluster <name> --hostname <hostname> --ip <ip> --role <role>
+//	                 [--order <n>] [--existing-cr <path>] --output <dir>
+//
+// When --existing-cr is given, the machine and cluster config sections are
+// copied from the specified MachineConfig CR with identity fields overridden.
+// When absent, a skeleton CR is emitted with empty machine and cluster stubs.
+//
+// conductor-schema.md §9. platform-schema.md §9.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	corev1 "k8s.io/api/core/v1"
+	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/yaml"
+
+	platformv1alpha1 "github.com/ontai-dev/platform/api/v1alpha1"
+)
+
+// addnodeHelp is the authored per-subcommand help for 'compiler addnode'.
+const addnodeHelp = `Usage: compiler addnode --cluster <name> --hostname <hostname> --ip <ip> --role <role> --output <dir>
+                       [--order <n>] [--existing-cr <path>]
+
+Generate a MachineConfig CR for a node being added to an existing cluster.
+The output CR is placed in the --output directory as seam-mc-{cluster}-{hostname}.yaml.
+
+Flags:
+  --cluster      Cluster name (matches the TalosCluster CR name and seam-tenant-{cluster} namespace).
+  --hostname     Node hostname. The cluster-name prefix is stripped automatically if present,
+                 so both "cp4" and "ccs-dev-cp4" produce seam-mc-{cluster}-cp4.
+  --ip           Node IP address reachable on Talos API port 50000.
+  --role         Node role: controlplane or worker. (init is reserved for compiler bootstrap.)
+  --order        Upgrade sequence order (default: 1). init=0, controlplane=1..N, worker=N+1..M.
+  --existing-cr  Path to an existing MachineConfig CR YAML. When provided, spec.machine and
+                 spec.cluster are copied from the existing CR and identity fields are overridden
+                 with the flags above. Use to clone an existing node config for a new peer.
+  --output       Output directory for the generated MachineConfig CR YAML (required).
+
+When --existing-cr is absent, a skeleton CR is emitted with empty machine and cluster sections.
+Populate those sections with the Talos v1alpha1 machineconfig content before applying.
+
+Compile-only: compiler addnode never applies resources. Human review and GitOps apply required.
+`
+
+// compileAddNode generates a MachineConfig CR for a node being added to an existing cluster.
+// clusterName is the TalosCluster name. hostname may include the cluster-name prefix --
+// it is stripped before constructing the CR name. role must be "controlplane" or "worker"
+// (init is managed exclusively by compiler bootstrap). order is the upgrade sequence position.
+// existingCRPath, when non-empty, is read to copy spec.machine and spec.cluster. output is the
+// directory receiving seam-mc-{cluster}-{bareHostname}.yaml.
+func compileAddNode(clusterName, hostname, ip, role string, order int32, existingCRPath, output string) error {
+	if clusterName == "" {
+		return fmt.Errorf("--cluster is required")
+	}
+	if hostname == "" {
+		return fmt.Errorf("--hostname is required")
+	}
+	if ip == "" {
+		return fmt.Errorf("--ip is required")
+	}
+	switch role {
+	case "controlplane", "worker":
+	case "init":
+		return fmt.Errorf("role=init is reserved for compiler bootstrap; use controlplane or worker")
+	default:
+		return fmt.Errorf("--role must be controlplane or worker, got %q", role)
+	}
+	if output == "" {
+		return fmt.Errorf("--output is required")
+	}
+
+	// Strip cluster-name prefix from hostname so seam-mc-{cluster}-{hostname} is not doubled.
+	bareHostname := stripClusterPrefix(clusterName, hostname)
+
+	mcRole := platformv1alpha1.MachineConfigRoleControlPlane
+	if role == "worker" {
+		mcRole = platformv1alpha1.MachineConfigRoleWorker
+	}
+
+	var machineJSON, clusterJSON *apiextensionsv1.JSON
+	if existingCRPath != "" {
+		m, c, err := loadMachineClusterFromCR(existingCRPath)
+		if err != nil {
+			return fmt.Errorf("read existing CR %q: %w", existingCRPath, err)
+		}
+		machineJSON = m
+		clusterJSON = c
+	}
+
+	crName := "seam-mc-" + clusterName + "-" + bareHostname
+	mc := platformv1alpha1.MachineConfig{
+		TypeMeta: metav1.TypeMeta{
+			APIVersion: "platform.ontai.dev/v1alpha1",
+			Kind:       "MachineConfig",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      crName,
+			Namespace: "seam-tenant-" + clusterName,
+			Labels: map[string]string{
+				"ontai.dev/cluster":    clusterName,
+				"ontai.dev/node":       hostname,
+				"ontai.dev/node-role":  role,
+				"ontai.dev/managed-by": "compiler",
+			},
+		},
+		Spec: platformv1alpha1.MachineConfigSpec{
+			Role:  mcRole,
+			Order: order,
+			ClusterRef: corev1.LocalObjectReference{
+				Name: clusterName,
+			},
+			NodeIP:       ip,
+			NodeHostname: bareHostname,
+			Machine:      machineJSON,
+			Cluster:      clusterJSON,
+		},
+	}
+
+	data, err := yaml.Marshal(mc)
+	if err != nil {
+		return fmt.Errorf("marshal MachineConfig CR: %w", err)
+	}
+
+	var header string
+	if existingCRPath == "" {
+		header = "# MachineConfig CR skeleton generated by compiler addnode.\n" +
+			"# Populate spec.machine and spec.cluster with the Talos v1alpha1\n" +
+			"# machineconfig sections for this node before applying.\n" +
+			"# Refer to: https://www.talos.dev/latest/reference/configuration/\n"
+	}
+
+	if err := os.MkdirAll(output, 0755); err != nil {
+		return fmt.Errorf("create output directory %q: %w", output, err)
+	}
+	outPath := filepath.Join(output, crName+".yaml")
+	if err := os.WriteFile(outPath, []byte(header+string(data)), 0644); err != nil {
+		return fmt.Errorf("write MachineConfig CR %q: %w", outPath, err)
+	}
+	return nil
+}
+
+// stripClusterPrefix strips the "{clusterName}-" prefix from hostname if present.
+// e.g. stripClusterPrefix("ccs-dev", "ccs-dev-cp4") → "cp4"
+//
+//	stripClusterPrefix("ccs-dev", "cp4")  → "cp4"
+func stripClusterPrefix(clusterName, hostname string) string {
+	prefix := clusterName + "-"
+	if len(hostname) > len(prefix) && hostname[:len(prefix)] == prefix {
+		return hostname[len(prefix):]
+	}
+	return hostname
+}
+
+// loadMachineClusterFromCR reads a MachineConfig CR YAML file and returns the
+// spec.machine and spec.cluster sections. Used by addnode to clone the config
+// body from an existing peer node.
+func loadMachineClusterFromCR(path string) (*apiextensionsv1.JSON, *apiextensionsv1.JSON, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, nil, fmt.Errorf("read file: %w", err)
+	}
+
+	var cr struct {
+		Spec struct {
+			Machine *apiextensionsv1.JSON `json:"machine" yaml:"machine"`
+			Cluster *apiextensionsv1.JSON `json:"cluster" yaml:"cluster"`
+		} `json:"spec" yaml:"spec"`
+	}
+	if err := yaml.Unmarshal(data, &cr); err != nil {
+		return nil, nil, fmt.Errorf("parse MachineConfig CR: %w", err)
+	}
+	return cr.Spec.Machine, cr.Spec.Cluster, nil
+}
+
+// runAddNodeSubcommand parses addnode-specific flags and calls compileAddNode.
+func runAddNodeSubcommand(args []string) {
+	fs := flag.NewFlagSet("addnode", flag.ExitOnError)
+	cluster := fs.String("cluster", "", "Cluster name (required)")
+	hostname := fs.String("hostname", "", "Node hostname (required)")
+	ip := fs.String("ip", "", "Node IP address (required)")
+	role := fs.String("role", "", "Node role: controlplane or worker (required)")
+	order := fs.Int("order", 1, "Upgrade sequence order (default: 1)")
+	existingCR := fs.String("existing-cr", "", "Path to existing MachineConfig CR to clone machine/cluster sections from")
+	output := fs.String("output", "", "Output directory (required)")
+
+	fs.Usage = func() {
+		fmt.Fprint(os.Stderr, addnodeHelp)
+		fs.PrintDefaults()
+	}
+
+	if err := fs.Parse(args); err != nil {
+		fmt.Fprintf(os.Stderr, "compiler addnode: flag error: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err := compileAddNode(*cluster, *hostname, *ip, *role, int32(*order), *existingCR, *output); err != nil {
+		fmt.Fprintf(os.Stderr, "compiler addnode: %v\n", err)
+		os.Exit(1)
+	}
+}
diff --git a/cmd/compiler/addnode_test.go b/cmd/compiler/addnode_test.go
new file mode 100644
index 0000000..9fdfefe
--- /dev/null
+++ b/cmd/compiler/addnode_test.go
@@ -0,0 +1,191 @@
+// addnode_test.go tests the compiler addnode subcommand.
+// Covers skeleton generation (no --existing-cr) and CR cloning
+// (--existing-cr copies machine/cluster sections, overrides identity fields).
+// All tests are fully offline -- no cluster connectivity.
+package main
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"sigs.k8s.io/yaml"
+)
+
+// TestAddNode_SkeletonOutput verifies that addnode without --existing-cr produces
+// a valid MachineConfig CR YAML with correct metadata and an empty machine/cluster section.
+func TestAddNode_SkeletonOutput(t *testing.T) {
+	outDir := t.TempDir()
+	err := compileAddNode("ccs-dev", "cp4", "10.20.0.14", "controlplane", 3, "", outDir)
+	if err != nil {
+		t.Fatalf("compileAddNode error: %v", err)
+	}
+
+	outPath := filepath.Join(outDir, "seam-mc-ccs-dev-cp4.yaml")
+	data, err := os.ReadFile(outPath)
+	if err != nil {
+		t.Fatalf("output file not found: %v", err)
+	}
+	content := string(data)
+
+	assertContainsStr(t, content, "apiVersion: platform.ontai.dev/v1alpha1")
+	assertContainsStr(t, content, "kind: MachineConfig")
+	assertContainsStr(t, content, "name: seam-mc-ccs-dev-cp4")
+	assertContainsStr(t, content, "namespace: seam-tenant-ccs-dev")
+	assertContainsStr(t, content, "role: controlplane")
+	assertContainsStr(t, content, "nodeIP: 10.20.0.14")
+	assertContainsStr(t, content, "nodeHostname: cp4")
+	assertContainsStr(t, content, "ontai.dev/cluster: ccs-dev")
+	// Skeleton header comment must be present.
+	assertContainsStr(t, content, "# MachineConfig CR skeleton generated by compiler addnode.")
+}
+
+// TestAddNode_SkeletonStripsClusterPrefix verifies that a hostname already containing
+// the cluster-name prefix is not doubled in the output CR name.
+func TestAddNode_SkeletonStripsClusterPrefix(t *testing.T) {
+	outDir := t.TempDir()
+	// hostname includes cluster prefix -- should produce seam-mc-ccs-dev-cp4, not seam-mc-ccs-dev-ccs-dev-cp4.
+	err := compileAddNode("ccs-dev", "ccs-dev-cp4", "10.20.0.14", "controlplane", 3, "", outDir)
+	if err != nil {
+		t.Fatalf("compileAddNode error: %v", err)
+	}
+
+	outPath := filepath.Join(outDir, "seam-mc-ccs-dev-cp4.yaml")
+	if _, err := os.Stat(outPath); err != nil {
+		t.Errorf("expected output file seam-mc-ccs-dev-cp4.yaml not found: %v", err)
+	}
+	data, _ := os.ReadFile(outPath)
+	if strings.Contains(string(data), "ccs-dev-ccs-dev") {
+		t.Errorf("hostname prefix was doubled in output: %s", string(data))
+	}
+}
+
+// TestAddNode_OrderField verifies that the spec.order field is written correctly.
+func TestAddNode_OrderField(t *testing.T) {
+	outDir := t.TempDir()
+	if err := compileAddNode("ccs-dev", "wk1", "10.20.0.20", "worker", 5, "", outDir); err != nil {
+		t.Fatalf("compileAddNode error: %v", err)
+	}
+	data, _ := os.ReadFile(filepath.Join(outDir, "seam-mc-ccs-dev-wk1.yaml"))
+	assertContainsStr(t, string(data), "order: 5")
+	assertContainsStr(t, string(data), "role: worker")
+}
+
+// TestAddNode_InitRoleFails verifies that role=init is rejected (reserved for bootstrap).
+func TestAddNode_InitRoleFails(t *testing.T) {
+	err := compileAddNode("ccs-dev", "cp1", "10.20.0.11", "init", 0, "", t.TempDir())
+	if err == nil {
+		t.Error("expected error for role=init; got nil")
+	}
+	if !strings.Contains(err.Error(), "init") {
+		t.Errorf("error %q should mention 'init'", err.Error())
+	}
+}
+
+// TestAddNode_InvalidRoleFails verifies that an unknown role is rejected.
+func TestAddNode_InvalidRoleFails(t *testing.T) {
+	err := compileAddNode("ccs-dev", "cp4", "10.20.0.14", "not-a-role", 1, "", t.TempDir())
+	if err == nil {
+		t.Error("expected error for unknown role; got nil")
+	}
+}
+
+// TestAddNode_MissingClusterFails verifies that an empty --cluster is rejected.
+func TestAddNode_MissingClusterFails(t *testing.T) {
+	err := compileAddNode("", "cp4", "10.20.0.14", "controlplane", 1, "", t.TempDir())
+	if err == nil {
+		t.Error("expected error for missing cluster; got nil")
+	}
+}
+
+// TestAddNode_MissingIPFails verifies that an empty --ip is rejected.
+func TestAddNode_MissingIPFails(t *testing.T) {
+	err := compileAddNode("ccs-dev", "cp4", "", "controlplane", 1, "", t.TempDir())
+	if err == nil {
+		t.Error("expected error for missing ip; got nil")
+	}
+}
+
+// TestAddNode_ExistingCR_CopiesMachineCluster verifies that when --existing-cr is
+// provided, spec.machine and spec.cluster are copied from the existing CR and all
+// identity fields are overridden with the supplied flags.
+func TestAddNode_ExistingCR_CopiesMachineCluster(t *testing.T) {
+	// Build a bootstrap output to get a real MachineConfig CR as the source.
+	bootstrapDir := t.TempDir()
+	inputPath := writeInputFile(t, bootstrapInputYAML)
+	if err := compileBootstrap(inputPath, bootstrapDir, "", ""); err != nil {
+		t.Fatalf("compileBootstrap error: %v", err)
+	}
+
+	// Use node1's MachineConfig CR as the existing-cr template.
+	existingCRPath := filepath.Join(bootstrapDir, "seam-mc-ccs-mgmt-node1.yaml")
+	if _, err := os.Stat(existingCRPath); err != nil {
+		t.Fatalf("existing CR not found: %v", err)
+	}
+
+	outDir := t.TempDir()
+	err := compileAddNode("ccs-mgmt", "node4", "10.20.0.14", "controlplane", 3, existingCRPath, outDir)
+	if err != nil {
+		t.Fatalf("compileAddNode with existing-cr error: %v", err)
+	}
+
+	data, err := os.ReadFile(filepath.Join(outDir, "seam-mc-ccs-mgmt-node4.yaml"))
+	if err != nil {
+		t.Fatalf("output file not found: %v", err)
+	}
+	content := string(data)
+
+	// Identity fields must be overridden.
+	assertContainsStr(t, content, "name: seam-mc-ccs-mgmt-node4")
+	assertContainsStr(t, content, "namespace: seam-tenant-ccs-mgmt")
+	assertContainsStr(t, content, "nodeIP: 10.20.0.14")
+	assertContainsStr(t, content, "nodeHostname: node4")
+	assertContainsStr(t, content, "role: controlplane")
+	assertContainsStr(t, content, "order: 3")
+
+	// Machine/cluster sections must be present (copied from source CR).
+	var cr map[string]interface{}
+	if err := yaml.Unmarshal(data, &cr); err != nil {
+		t.Fatalf("parse output CR: %v", err)
+	}
+	spec, _ := cr["spec"].(map[string]interface{})
+	if spec == nil {
+		t.Fatal("output CR has no spec")
+	}
+	if spec["machine"] == nil {
+		t.Error("spec.machine should be populated from existing CR")
+	}
+	if spec["cluster"] == nil {
+		t.Error("spec.cluster should be populated from existing CR")
+	}
+
+	// Skeleton comment must NOT appear (this is a cloned CR, not a skeleton).
+	if strings.Contains(content, "skeleton generated by compiler addnode") {
+		t.Error("cloned CR must not contain skeleton header comment")
+	}
+}
+
+// TestAddNode_ExistingCR_MissingFileFails verifies that a missing --existing-cr path
+// returns a descriptive error.
+func TestAddNode_ExistingCR_MissingFileFails(t *testing.T) {
+	err := compileAddNode("ccs-dev", "cp4", "10.20.0.14", "controlplane", 1, "/nonexistent/cr.yaml", t.TempDir())
+	if err == nil {
+		t.Error("expected error for missing existing-cr path; got nil")
+	}
+}
+
+// TestAddNode_NamingConvention verifies the seam-mc-{cluster}-{hostname} naming
+// convention for the output file. platform-schema.md §9.
+func TestAddNode_NamingConvention(t *testing.T) {
+	outDir := t.TempDir()
+	if err := compileAddNode("my-cluster", "worker99", "10.10.0.99", "worker", 10, "", outDir); err != nil {
+		t.Fatalf("compileAddNode error: %v", err)
+	}
+	expectedFile := filepath.Join(outDir, "seam-mc-my-cluster-worker99.yaml")
+	if _, err := os.Stat(expectedFile); err != nil {
+		t.Errorf("expected output file seam-mc-my-cluster-worker99.yaml not found: %v", err)
+	}
+}
+
+// assertContainsStr is defined in compile_bootstrap_test.go.
diff --git a/cmd/compiler/compile.go b/cmd/compiler/compile.go
index 4324193..de5cb96 100644
--- a/cmd/compiler/compile.go
+++ b/cmd/compiler/compile.go
@@ -6,6 +6,7 @@ package main
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"log/slog"
 	"os"
@@ -14,7 +15,7 @@ import (
 	"time"
 
 	corev1 "k8s.io/api/core/v1"
-	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"sigs.k8s.io/yaml"
 
@@ -71,17 +72,20 @@ func ciliumPrerequisitesPatch() string {
 // the provided registry mirrors into machine.registries.mirrors. The http:// prefix
 // on endpoints is preserved exactly — no TLS config is added.
 func buildRegistryMirrorsPatch(mirrors []RegistryMirror) (string, error) {
+	// json: tags are required: sigs.k8s.io/yaml marshals via encoding/json, so
+	// without json: tags the field names default to PascalCase (Machine, Registries)
+	// which won't merge into the lowercase Talos machineconfig keys.
 	type mirrorSpec struct {
-		Endpoints []string `yaml:"endpoints"`
+		Endpoints []string `json:"endpoints" yaml:"endpoints"`
 	}
 	type registriesSpec struct {
-		Mirrors map[string]mirrorSpec `yaml:"mirrors"`
+		Mirrors map[string]mirrorSpec `json:"mirrors" yaml:"mirrors"`
 	}
 	type machineSpec struct {
-		Registries registriesSpec `yaml:"registries"`
+		Registries registriesSpec `json:"registries" yaml:"registries"`
 	}
 	type patchSpec struct {
-		Machine machineSpec `yaml:"machine"`
+		Machine machineSpec `json:"machine" yaml:"machine"`
 	}
 
 	mirrorMap := make(map[string]mirrorSpec, len(mirrors))
@@ -171,9 +175,9 @@ func extractCAFromMachineConfig(machineConfigBytes []byte) (*secrets.Bundle, err
 }
 
 // BootstrapNode declares a single Talos node for management cluster bootstrap.
-// Each node maps to one Talos machine configuration and one Kubernetes Secret.
+// Each node maps to one Talos machine configuration and one MachineConfig CR.
 type BootstrapNode struct {
-	// Hostname is the node's hostname. Used as the node name in Secret naming
+	// Hostname is the node's hostname. Used as the bare hostname in naming
 	// convention seam-mc-{cluster}-{hostname}. platform-schema.md §9.
 	Hostname string `yaml:"hostname"`
 
@@ -184,6 +188,11 @@ type BootstrapNode struct {
 	// "controlplane" (additional control plane nodes), or "worker".
 	// Exactly one node must have role "init".
 	Role string `yaml:"role"`
+
+	// MAC is the node's primary NIC MAC address. Informational only --
+	// not used by the compiler or operator. Stored for admin reference.
+	// +optional
+	MAC string `yaml:"mac,omitempty"`
 }
 
 // BootstrapSection holds management cluster bootstrap configuration.
@@ -744,25 +753,14 @@ func validateBootstrapInput(b *BootstrapSection) error {
 
 // compileBootstrap implements the bootstrap subcommand.
 //
-// Reads a ClusterInput spec (with a bootstrap section declaring node IPs, roles,
-// and Talos version) and produces three output artifacts in --output:
-//   - seam-mc-{cluster}-{hostname}.yaml — Kubernetes Secret YAML per node
-//     containing the Talos machine configuration. platform-schema.md §9.
-//   - {cluster-name}.yaml — TalosCluster CR with mode=bootstrap, capi.enabled=false.
-//   - bootstrap-sequence.yaml — documents the apply order.
+// Bootstrap mode: generates MachineConfig CRs (one per node), namespace manifest,
+// TalosCluster CR, and bootstrap-sequence.yaml. Uses Talos machinery for PKI.
 //
-// kubeconfigPath is the optional path to a kubeconfig file, used only when
-// in.ImportExistingCluster=true. Pass empty string to use the standard resolution
-// chain (KUBECONFIG env → ~/.kube/config).
+// Import mode: generates namespace manifest, talosconfig Secret (if resolvable),
+// TalosCluster CR, and bootstrap-sequence.yaml. MachineConfig CRs are NOT
+// generated -- admin provides them via compiler addnode or hand-authored. CP-INV-004.
 //
-// When importExistingCluster=true, Compiler connects to the cluster Kubernetes API
-// via kubeconfig, reads the init-node machine config Secret from seam-system, parses
-// it, and derives the secrets bundle from existing CA material so new configs are
-// signed with the same PKI. Fails fast if the kubeconfig is unreachable or the
-// Secret or its machineconfig.yaml field is missing.
-//
-// Uses the Talos machinery library to generate machine configurations.
-// No cluster connection is required in the default (fresh PKI) path.
+// kubeconfigPath is not used; retained for CLI flag compatibility.
 // conductor-schema.md §9.
 func compileBootstrap(input, output, kubeconfigPath, talosconfigPath string) error {
 	in, err := readClusterInput(input)
@@ -797,6 +795,11 @@ func compileBootstrap(input, output, kubeconfigPath, talosconfigPath string) err
 		controlPlaneEndpoint = ep
 	}
 
+	tcMode := platformv1alpha1.TalosClusterModeBootstrap
+	if in.Mode == "import" || in.ImportExistingCluster {
+		tcMode = platformv1alpha1.TalosClusterModeImport
+	}
+
 	// Resolve kubernetesVersion: explicit > support matrix.
 	kubernetesVersion := b.KubernetesVersion
 	if kubernetesVersion == "" {
@@ -808,8 +811,10 @@ func compileBootstrap(input, output, kubeconfigPath, talosconfigPath string) err
 	}
 
 	// Resolve installDisk: explicit > extracted from machineConfigPaths > default.
+	// Only needed for bootstrap mode (generate.NewInput); skip extraction in import mode
+	// to avoid requiring machineConfigPaths files that admin provides separately.
 	installDisk := b.InstallDisk
-	if installDisk == "" {
+	if installDisk == "" && tcMode == platformv1alpha1.TalosClusterModeBootstrap {
 		extracted, err := extractFromInitNode(in.MachineConfigPaths, b.Nodes,
 			func(mcBytes []byte) (string, error) {
 				return extractInstallDiskFromMachineConfig(mcBytes), nil
@@ -843,106 +848,6 @@ func compileBootstrap(input, output, kubeconfigPath, talosconfigPath string) err
 		}
 	}
 
-	// Resolve the secrets bundle. When importExistingCluster=true, extract PKI from
-	// an existing cluster. Two paths are available:
-	//
-	//   machineConfigPaths non-empty — local file path (pre-Seam clusters):
-	//     Read the init node entry from the map, load the raw machine config file,
-	//     and extract CA material via extractCAFromMachineConfig.
-	//
-	//   machineConfigPaths absent — Kubernetes API path (Seam clusters):
-	//     Connect to the cluster API via kubeconfig, read the seam-mc-{cluster}-{init}
-	//     Secret from seam-system, extract machineconfig.yaml, and extract CA material.
-	//
-	// Both paths share extractCAFromMachineConfig for the final CA extraction step.
-	var secretsBundle *secrets.Bundle
-	if in.Mode == "import" || in.ImportExistingCluster {
-		// Find the init node hostname (guaranteed present by validateBootstrapInput).
-		var initHostname string
-		for _, n := range b.Nodes {
-			if n.Role == "init" {
-				initHostname = n.Hostname
-				break
-			}
-		}
-
-		if len(in.MachineConfigPaths) > 0 {
-			// Local file path: read CA from user-provided machine config file.
-			// Only the init node entry is required; the same bundle is used for all nodes.
-			mcPath, ok := in.MachineConfigPaths[initHostname]
-			if !ok {
-				return fmt.Errorf("importExistingCluster: machineConfigPaths is non-empty but init node %q has no entry", initHostname)
-			}
-			mcBytes, err := os.ReadFile(mcPath)
-			if err != nil {
-				return fmt.Errorf("importExistingCluster: read machineconfig for init node %q from %q: %w", initHostname, mcPath, err)
-			}
-			secretsBundle, err = extractCAFromMachineConfig(mcBytes)
-			if err != nil {
-				return fmt.Errorf("importExistingCluster: extract CA from local file %q: %w", mcPath, err)
-			}
-		} else {
-			// Kubernetes API path: read CA from seam-mc Secret in seam-system.
-			resolvedKubeconfig := resolveKubeconfigPath(kubeconfigPath)
-			k8sClient, err := buildK8sClient(resolvedKubeconfig)
-			if err != nil {
-				return fmt.Errorf("importExistingCluster: connect to cluster via kubeconfig %q: %w", resolvedKubeconfig, err)
-			}
-
-			// Strip cluster-name prefix from hostname: Talos node names carry the
-			// cluster prefix (e.g. "ccs-mgmt-cp1" for cluster "ccs-mgmt"), so the
-			// Secret name would double the prefix without this strip. C-32.
-			hostname := strings.TrimPrefix(initHostname, in.Name+"-")
-			secretName := "seam-mc-" + in.Name + "-" + hostname
-			mcSecret, err := k8sClient.CoreV1().Secrets("seam-system").Get(
-				context.Background(), secretName, metav1.GetOptions{},
-			)
-			if err != nil {
-				if apierrors.IsNotFound(err) {
-					// seam-mc Secret absent — cluster was not bootstrapped via Seam.
-					// Fall through to the talosconfig-only path: emit only the
-					// talosconfig Secret and TalosCluster CR. No machineconfig
-					// generation, no PKI extraction. C-32 Bug 2.
-					return compileImportTalosconfigSecret(in, output, talosconfigPath)
-				}
-				return fmt.Errorf("importExistingCluster: read secret %q from seam-system: %w", secretName, err)
-			}
-
-			mcBytes, ok := mcSecret.Data["machineconfig.yaml"]
-			if !ok {
-				return fmt.Errorf("importExistingCluster: secret %q is missing machineconfig.yaml field", secretName)
-			}
-
-			secretsBundle, err = extractCAFromMachineConfig(mcBytes)
-			if err != nil {
-				return fmt.Errorf("importExistingCluster: extract CA from secret %q: %w", secretName, err)
-			}
-		}
-	} else {
-		secretsBundle, err = secrets.NewBundle(
-			secrets.NewFixedClock(time.Now()),
-			versionContract,
-		)
-		if err != nil {
-			return fmt.Errorf("generate secrets bundle: %w", err)
-		}
-	}
-
-	// Build the generate input with cluster-wide settings.
-	genInput, err := generate.NewInput(
-		in.Name,
-		controlPlaneEndpoint,
-		kubernetesVersion,
-		generate.WithVersionContract(versionContract),
-		generate.WithSecretsBundle(secretsBundle),
-		generate.WithInstallDisk(installDisk),
-		generate.WithInstallImage(installerImage),
-		generate.WithEndpointList(cpIPs),
-	)
-	if err != nil {
-		return fmt.Errorf("build generate input: %w", err)
-	}
-
 	if err := os.MkdirAll(output, 0755); err != nil {
 		return fmt.Errorf("create output directory: %w", err)
 	}
@@ -952,111 +857,135 @@ func compileBootstrap(input, output, kubeconfigPath, talosconfigPath string) err
 		ns = "seam-system"
 	}
 
-	// Build the ordered patch list:
-	//   1. CiliumPrerequisites (built-in, applied first)
-	//   2. RegistryMirrors (injected next)
-	//   3. User Patches (applied last, in order)
-	var patches []string
-	if in.CiliumPrerequisites {
-		patches = append(patches, ciliumPrerequisitesPatch())
-	}
-	if len(in.RegistryMirrors) > 0 {
-		mirrorPatch, err := buildRegistryMirrorsPatch(in.RegistryMirrors)
+	// MachineConfig CRs are generated for bootstrap mode only.
+	// Import mode: admin provides MachineConfig CRs (via compiler addnode or hand-authored).
+	// platform-schema.md §9, CP-INV-004.
+	var crNames []string
+	if tcMode == platformv1alpha1.TalosClusterModeBootstrap {
+		secretsBundle, err := secrets.NewBundle(
+			secrets.NewFixedClock(time.Now()),
+			versionContract,
+		)
 		if err != nil {
-			return fmt.Errorf("build registry mirrors patch: %w", err)
+			return fmt.Errorf("generate secrets bundle: %w", err)
 		}
-		patches = append(patches, mirrorPatch)
-	}
-	patches = append(patches, in.Patches...)
 
-	// Generate machine configuration for each node and write as a Secret.
-	var secretNames []string
-	for _, node := range b.Nodes {
-		machineType, err := nodeRoleToMachineType(node.Role)
+		genInput, err := generate.NewInput(
+			in.Name,
+			controlPlaneEndpoint,
+			kubernetesVersion,
+			generate.WithVersionContract(versionContract),
+			generate.WithSecretsBundle(secretsBundle),
+			generate.WithInstallDisk(installDisk),
+			generate.WithInstallImage(installerImage),
+			generate.WithEndpointList(cpIPs),
+		)
 		if err != nil {
-			return fmt.Errorf("node %q: %w", node.Hostname, err)
+			return fmt.Errorf("build generate input: %w", err)
 		}
 
-		cfg, err := genInput.Config(machineType)
-		if err != nil {
-			return fmt.Errorf("generate config for node %q: %w", node.Hostname, err)
+		// Build the ordered patch list:
+		//   1. CiliumPrerequisites (built-in, applied first)
+		//   2. RegistryMirrors (injected next)
+		//   3. User Patches (applied last, in order)
+		var patches []string
+		if in.CiliumPrerequisites {
+			patches = append(patches, ciliumPrerequisitesPatch())
 		}
+		if len(in.RegistryMirrors) > 0 {
+			mirrorPatch, err := buildRegistryMirrorsPatch(in.RegistryMirrors)
+			if err != nil {
+				return fmt.Errorf("build registry mirrors patch: %w", err)
+			}
+			patches = append(patches, mirrorPatch)
+		}
+		patches = append(patches, in.Patches...)
 
-		cfgBytes, err := cfg.Bytes()
-		if err != nil {
-			return fmt.Errorf("marshal config for node %q: %w", node.Hostname, err)
+		// Pre-compute upgrade order for each node.
+		// init=0, controlplane nodes=1..N in declaration order, workers=N+1..M.
+		cpIdx := int32(0)
+		workerIdx := int32(0)
+		cpCount := int32(0)
+		for _, n := range b.Nodes {
+			if n.Role == "controlplane" {
+				cpCount++
+			}
+		}
+		nodeOrder := make(map[string]int32, len(b.Nodes))
+		for _, n := range b.Nodes {
+			switch n.Role {
+			case "init":
+				nodeOrder[n.Hostname] = 0
+			case "controlplane":
+				cpIdx++
+				nodeOrder[n.Hostname] = cpIdx
+			case "worker":
+				nodeOrder[n.Hostname] = cpCount + 1 + workerIdx
+				workerIdx++
+			}
 		}
 
-		// Apply all patches in order (CiliumPrerequisites → RegistryMirrors → user Patches).
-		for i, patch := range patches {
-			cfgBytes, err = applyYAMLPatch(cfgBytes, patch)
+		for _, node := range b.Nodes {
+			machineType, err := nodeRoleToMachineType(node.Role)
 			if err != nil {
-				return fmt.Errorf("apply patch %d to node %q: %w", i, node.Hostname, err)
+				return fmt.Errorf("node %q: %w", node.Hostname, err)
 			}
-		}
 
-		// Strip cluster-name prefix from hostname before constructing the secret
-		// name so the prefix is not doubled (e.g. ccs-mgmt-cp1 → cp1). C-32.
-		// Machine config secrets always live in seam-tenant-{cluster}, not in the
-		// TalosCluster CR namespace (seam-system). Platform reads them from there.
-		bareHostname := strings.TrimPrefix(node.Hostname, in.Name+"-")
-		secretName := "seam-mc-" + in.Name + "-" + bareHostname
-		secret := corev1.Secret{
-			TypeMeta: metav1.TypeMeta{
-				APIVersion: "v1",
-				Kind:       "Secret",
-			},
-			ObjectMeta: metav1.ObjectMeta{
-				Name:      secretName,
-				Namespace: "seam-tenant-" + in.Name,
-				Labels: map[string]string{
-					"ontai.dev/cluster":    in.Name,
-					"ontai.dev/node":       node.Hostname,
-					"ontai.dev/node-role":  node.Role,
-					"ontai.dev/managed-by": "compiler",
-				},
-			},
-			Type: corev1.SecretTypeOpaque,
-			StringData: map[string]string{
-				"machineconfig.yaml": string(cfgBytes),
-			},
-		}
+			cfg, err := genInput.Config(machineType)
+			if err != nil {
+				return fmt.Errorf("generate config for node %q: %w", node.Hostname, err)
+			}
 
-		if err := writeCRYAML(output, secretName, secret); err != nil {
-			return fmt.Errorf("write machineconfig secret for node %q: %w", node.Hostname, err)
+			cfgBytes, err := cfg.Bytes()
+			if err != nil {
+				return fmt.Errorf("marshal config for node %q: %w", node.Hostname, err)
+			}
+
+			// Apply all patches in order (CiliumPrerequisites → RegistryMirrors → user Patches).
+			for i, patch := range patches {
+				cfgBytes, err = applyYAMLPatch(cfgBytes, patch)
+				if err != nil {
+					return fmt.Errorf("apply patch %d to node %q: %w", i, node.Hostname, err)
+				}
+			}
+
+			// Strip cluster-name prefix from hostname before constructing the CR
+			// name so the prefix is not doubled (e.g. ccs-mgmt-cp1 → cp1). C-32.
+			// MachineConfig CRs always live in seam-tenant-{cluster}.
+			bareHostname := strings.TrimPrefix(node.Hostname, in.Name+"-")
+			mc, err := buildMachineConfigCR(node, cfgBytes, in.Name, bareHostname, nodeOrder[node.Hostname])
+			if err != nil {
+				return fmt.Errorf("build MachineConfig CR for node %q: %w", node.Hostname, err)
+			}
+			crName := mc.Name
+			if err := writeCRYAML(output, crName, mc); err != nil {
+				return fmt.Errorf("write MachineConfig CR for node %q: %w", node.Hostname, err)
+			}
+			crNames = append(crNames, crName+".yaml")
 		}
-		secretNames = append(secretNames, secretName+".yaml")
 	}
 
-	// C-35: When importExistingCluster=true, also emit the talosconfig Secret so
-	// Platform can generate the kubeconfig via ensureKubeconfigSecret. Applies to
-	// both the machineConfigPaths path (local file PKI) and the Kubernetes API path
-	// (Seam clusters). Failure is a warning -- the operator can apply manually.
-	// Also emit the seam-tenant namespace manifest so the admin can apply it before
-	// the Secrets (which live in seam-tenant-{cluster}). platform-schema.md §9.
-	if in.Mode == "import" || in.ImportExistingCluster {
-		nsFile, err := writeSeamTenantNamespaceManifest(in.Name, output)
-		if err != nil {
-			return err
-		}
-		secretNames = append([]string{nsFile}, secretNames...)
+	// Namespace manifest: always emitted. MachineConfig CRs (bootstrap) and
+	// talosconfig Secrets (import) both live in seam-tenant-{cluster}.
+	nsFile, err := writeSeamTenantNamespaceManifest(in.Name, output)
+	if err != nil {
+		return err
+	}
+	allResources := append([]string{nsFile}, crNames...)
+
+	// Import mode: also emit the talosconfig Secret so Platform can generate the
+	// kubeconfig via ensureKubeconfigSecret. Failure is a warning -- the operator
+	// can apply manually.
+	if tcMode == platformv1alpha1.TalosClusterModeImport {
 		if tcfgFile, err := writeTalosconfigSecret(in, talosconfigPath, output); err != nil {
 			return err
 		} else if tcfgFile != "" {
-			secretNames = append(secretNames, tcfgFile)
+			allResources = append(allResources, tcfgFile)
 		}
 	}
 
-	// Fix 1: importExistingCluster=true always emits mode=import. The
-	// machineConfigPaths field only controls where PKI is read from, not the
-	// cluster lifecycle mode. A re-imported cluster is always mode=import.
-	tcMode := platformv1alpha1.TalosClusterModeBootstrap
-	if in.Mode == "import" || in.ImportExistingCluster {
-		tcMode = platformv1alpha1.TalosClusterModeImport
-	}
-
 	// Produce TalosCluster CR. ontai.dev/owns-runnerconfig signals Platform to add
-	// a finalizer and clean up the RunnerConfig in ont-system on deletion. Bug 3.
+	// a finalizer and clean up the RunnerConfig in ont-system on deletion.
 	//
 	// Role is set when: (a) import path -- clusterRole defaults empty to management;
 	// (b) bootstrap path with explicit role field (e.g. role: tenant in fixture).
@@ -1093,7 +1022,79 @@ func compileBootstrap(input, output, kubeconfigPath, talosconfigPath string) err
 	}
 
 	// Produce bootstrap-sequence.yaml documenting the apply order.
-	return writeBootstrapSequence(output, in.Name, secretNames, tcMode)
+	return writeBootstrapSequence(output, in.Name, allResources, tcMode)
+}
+
+// buildMachineConfigCR converts a generated Talos machine config YAML into a
+// MachineConfig CR. The machine and cluster top-level sections are stored as
+// unstructured JSON in spec.machine and spec.cluster respectively so the CR
+// remains Talos-version-agnostic.
+//
+// bareHostname must be the hostname with any cluster-name prefix stripped
+// (e.g. "cp1" for node "ccs-mgmt-cp1" in cluster "ccs-mgmt").
+func buildMachineConfigCR(node BootstrapNode, cfgBytes []byte, clusterName, bareHostname string, order int32) (platformv1alpha1.MachineConfig, error) {
+	var rawMap map[string]interface{}
+	if err := yaml.Unmarshal(cfgBytes, &rawMap); err != nil {
+		return platformv1alpha1.MachineConfig{}, fmt.Errorf("parse machineconfig for node %q: %w", node.Hostname, err)
+	}
+
+	toJSON := func(key string) (*apiextensionsv1.JSON, error) {
+		v, ok := rawMap[key]
+		if !ok || v == nil {
+			return nil, nil
+		}
+		b, err := json.Marshal(v)
+		if err != nil {
+			return nil, fmt.Errorf("json-encode %q section: %w", key, err)
+		}
+		return &apiextensionsv1.JSON{Raw: b}, nil
+	}
+
+	machineJSON, err := toJSON("machine")
+	if err != nil {
+		return platformv1alpha1.MachineConfig{}, err
+	}
+	clusterJSON, err := toJSON("cluster")
+	if err != nil {
+		return platformv1alpha1.MachineConfig{}, err
+	}
+
+	role := platformv1alpha1.MachineConfigRoleControlPlane
+	switch node.Role {
+	case "init":
+		role = platformv1alpha1.MachineConfigRoleInit
+	case "worker":
+		role = platformv1alpha1.MachineConfigRoleWorker
+	}
+
+	crName := "seam-mc-" + clusterName + "-" + bareHostname
+	return platformv1alpha1.MachineConfig{
+		TypeMeta: metav1.TypeMeta{
+			APIVersion: "platform.ontai.dev/v1alpha1",
+			Kind:       "MachineConfig",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      crName,
+			Namespace: "seam-tenant-" + clusterName,
+			Labels: map[string]string{
+				"ontai.dev/cluster":    clusterName,
+				"ontai.dev/node":       node.Hostname,
+				"ontai.dev/node-role":  node.Role,
+				"ontai.dev/managed-by": "compiler",
+			},
+		},
+		Spec: platformv1alpha1.MachineConfigSpec{
+			Role:  role,
+			Order: order,
+			ClusterRef: corev1.LocalObjectReference{
+				Name: clusterName,
+			},
+			NodeIP:       node.IP,
+			NodeHostname: bareHostname,
+			Machine:      machineJSON,
+			Cluster:      clusterJSON,
+		},
+	}, nil
 }
 
 // nodeRoleToMachineType converts a bootstrap node role to the Talos machine.Type.
@@ -1193,18 +1194,18 @@ type BootstrapSequence struct {
 //
 // C-36: previously used kind: BootstrapSequence (not a valid CRD). platform-schema.md §9.
 func writeBootstrapSequence(output, clusterName string, secretFiles []string, mode platformv1alpha1.TalosClusterMode) error {
-	step1Desc := "Apply Talos machineconfig Secrets — one per node. " +
-		"Apply ALL before the TalosCluster CR."
+	step1Desc := "Apply seam-tenant namespace manifest AND MachineConfig CRs (one per node) " +
+		"in seam-tenant-" + clusterName + ". Apply ALL before the TalosCluster CR."
 	step2Desc := "Apply TalosCluster CR with mode=bootstrap and capi.enabled=false. " +
 		"Platform's TalosClusterReconciler watches this CR and submits the bootstrap Conductor Job."
 
 	if mode == platformv1alpha1.TalosClusterModeImport {
-		step1Desc = "Apply ALL Secrets: machineconfig Secrets (one per node) AND the talosconfig Secret " +
-			"(seam-mc-" + clusterName + "-talosconfig.yaml). " +
-			"The talosconfig Secret is required for Platform to generate the kubeconfig. " +
-			"Apply ALL before TalosCluster CR."
+		step1Desc = "Apply seam-tenant namespace manifest AND the talosconfig Secret " +
+			"(seam-mc-" + clusterName + "-talosconfig.yaml) in seam-tenant-" + clusterName + ". " +
+			"Admin must separately apply MachineConfig CRs (via compiler addnode or hand-authored) " +
+			"before applying the TalosCluster CR."
 		step2Desc = "Apply TalosCluster CR with mode=import. " +
-			"Apply AFTER all Secrets in step 1 are present in the cluster — " +
+			"Apply AFTER namespace, talosconfig Secret, and MachineConfig CRs are present — " +
 			"Platform reads the talosconfig Secret during TalosCluster reconciliation " +
 			"to generate and store the cluster kubeconfig."
 	}
diff --git a/cmd/compiler/compile_bootstrap_features_test.go b/cmd/compiler/compile_bootstrap_features_test.go
index a57f129..1a79946 100644
--- a/cmd/compiler/compile_bootstrap_features_test.go
+++ b/cmd/compiler/compile_bootstrap_features_test.go
@@ -673,10 +673,11 @@ bootstrap:
 
 // ── ImportExistingCluster ─────────────────────────────────────────────────────
 
-// TestBootstrap_ImportExistingCluster_MissingKubeconfigReturnsError verifies that
-// importExistingCluster: true with a non-existent kubeconfig path returns an error
-// rather than silently generating fresh PKI material.
-func TestBootstrap_ImportExistingCluster_MissingKubeconfigReturnsError(t *testing.T) {
+// TestBootstrap_ImportExistingCluster_Succeeds verifies that importExistingCluster:true
+// with all required fields present succeeds. The kubeconfig API fallback path was
+// removed when the MachineConfig CRD migration eliminated PKI extraction from
+// the cluster. import mode no longer connects to any external API.
+func TestBootstrap_ImportExistingCluster_Succeeds(t *testing.T) {
 	input := `
 name: test-cluster
 namespace: seam-system
@@ -696,10 +697,16 @@ bootstrap:
       role: init
 `
 	inputPath := writeInputFile(t, input)
-	// Pass a kubeconfig path that does not exist — connection must fail with an error.
-	err := compileBootstrap(inputPath, t.TempDir(), "/nonexistent/kubeconfig.yaml", "")
-	if err == nil {
-		t.Fatal("expected error for missing kubeconfig; got nil")
+	outDir := t.TempDir()
+	if err := compileBootstrap(inputPath, outDir, "", ""); err != nil {
+		t.Fatalf("expected importExistingCluster=true to succeed; got: %v", err)
+	}
+	// importExistingCluster=true → tcMode=Import → no MachineConfig CRs emitted.
+	if _, err := os.Stat(filepath.Join(outDir, "seam-mc-test-cluster-cp1.yaml")); err == nil {
+		t.Error("import mode must not emit MachineConfig CRs")
+	}
+	if _, err := os.Stat(filepath.Join(outDir, "test-cluster.yaml")); err != nil {
+		t.Errorf("TalosCluster CR not found: %v", err)
 	}
 }
 
diff --git a/cmd/compiler/compile_bootstrap_import_test.go b/cmd/compiler/compile_bootstrap_import_test.go
index 7eec389..9e0bef5 100644
--- a/cmd/compiler/compile_bootstrap_import_test.go
+++ b/cmd/compiler/compile_bootstrap_import_test.go
@@ -14,12 +14,17 @@ import (
 )
 
 // generateMachineConfigFile produces a valid Talos init-node machine config YAML
-// file for use in import-path tests. It runs compileBootstrap with fresh PKI to
-// generate a seam-mc Secret, extracts the machineconfig.yaml field, writes it to
-// a temp file, and returns the path.
+// file for use in import-path tests (machineConfigPaths). It runs compileBootstrap
+// with fresh PKI to generate a MachineConfig CR, extracts spec.machine and
+// spec.cluster, reconstructs a Talos machineconfig YAML, and writes it to a temp
+// file. This file is used as a machineConfigPaths entry for endpoint/disk extraction.
 func generateMachineConfigFile(t *testing.T, clusterName, hostname string) string {
 	t.Helper()
 
+	// Strip the cluster-name prefix from hostname to match the CR name.
+	// compileBootstrap uses TrimPrefix(hostname, clusterName+"-") for the bare name.
+	bareHostname := strings.TrimPrefix(hostname, clusterName+"-")
+
 	input := fmt.Sprintf(`
 name: %s
 namespace: seam-system
@@ -45,31 +50,44 @@ bootstrap:
 		t.Fatalf("generateMachineConfigFile: compileBootstrap failed: %v", err)
 	}
 
-	// Read the Secret YAML produced for the init node.
-	secretPath := filepath.Join(outDir, fmt.Sprintf("seam-mc-%s-%s.yaml", clusterName, hostname))
-	secretData, err := os.ReadFile(secretPath)
+	// Read the MachineConfig CR YAML produced for the init node.
+	crPath := filepath.Join(outDir, fmt.Sprintf("seam-mc-%s-%s.yaml", clusterName, bareHostname))
+	crData, err := os.ReadFile(crPath)
 	if err != nil {
-		t.Fatalf("generateMachineConfigFile: read secret YAML: %v", err)
+		t.Fatalf("generateMachineConfigFile: read MachineConfig CR YAML: %v", err)
 	}
 
-	// Extract machineconfig.yaml from the Secret's stringData field.
-	var secretObj struct {
-		StringData map[string]string `yaml:"stringData"`
+	// Parse spec.machine and spec.cluster from the CR, then reconstruct the
+	// full Talos machineconfig YAML (used by extractEndpointFromMachineConfig,
+	// extractInstallDiskFromMachineConfig, and extractCAFromMachineConfig).
+	var crObj struct {
+		Spec struct {
+			Machine interface{} `yaml:"machine"`
+			Cluster interface{} `yaml:"cluster"`
+		} `yaml:"spec"`
 	}
-	if err := yaml.Unmarshal(secretData, &secretObj); err != nil {
-		t.Fatalf("generateMachineConfigFile: parse secret YAML: %v", err)
+	if err := yaml.Unmarshal(crData, &crObj); err != nil {
+		t.Fatalf("generateMachineConfigFile: parse MachineConfig CR: %v", err)
 	}
-	mcYAML, ok := secretObj.StringData["machineconfig.yaml"]
-	if !ok {
-		t.Fatal("generateMachineConfigFile: secret missing machineconfig.yaml field")
+	if crObj.Spec.Machine == nil {
+		t.Fatal("generateMachineConfigFile: spec.machine is nil in generated CR")
 	}
 
-	// Write the raw machine config YAML to a dedicated temp file.
+	reconstructed := map[string]interface{}{
+		"machine": crObj.Spec.Machine,
+		"cluster": crObj.Spec.Cluster,
+	}
+	mcYAML, err := yaml.Marshal(reconstructed)
+	if err != nil {
+		t.Fatalf("generateMachineConfigFile: marshal reconstructed machineconfig: %v", err)
+	}
+
+	// Write the reconstructed machineconfig YAML to a temp file.
 	f, err := os.CreateTemp(t.TempDir(), "mc-*.yaml")
 	if err != nil {
 		t.Fatalf("generateMachineConfigFile: create temp file: %v", err)
 	}
-	if _, err := f.WriteString(mcYAML); err != nil {
+	if _, err := f.Write(mcYAML); err != nil {
 		t.Fatalf("generateMachineConfigFile: write machine config: %v", err)
 	}
 	f.Close()
@@ -80,11 +98,11 @@ bootstrap:
 
 // TestBootstrap_ImportExistingCluster_LocalFilePath verifies that when
 // importExistingCluster=true and machineConfigPaths is non-empty, Compiler
-// reads CA material from the local machine config file and successfully generates
-// all output artifacts (machine config Secrets, TalosCluster CR, bootstrap-sequence).
-// This path is used for clusters bootstrapped before Seam.
+// successfully generates all output artifacts for import mode.
+// Import mode emits: namespace manifest, TalosCluster CR, bootstrap-sequence.
+// MachineConfig CRs are NOT emitted for import mode -- admin provides them
+// (via compiler addnode or hand-authored). CP-INV-004.
 func TestBootstrap_ImportExistingCluster_LocalFilePath(t *testing.T) {
-	// Generate a real init-node machine config file from a fresh PKI bundle.
 	mcPath := generateMachineConfigFile(t, "import-cluster", "cp1")
 
 	input := fmt.Sprintf(`
@@ -118,11 +136,9 @@ bootstrap:
 		t.Fatalf("compileBootstrap (local file path) error: %v", err)
 	}
 
-	// All expected output files must be present.
+	// Import mode output: namespace + TalosCluster + bootstrap-sequence only.
 	for _, name := range []string{
 		"seam-tenant-namespace.yaml",
-		"seam-mc-import-cluster-cp1.yaml",
-		"seam-mc-import-cluster-wk1.yaml",
 		"import-cluster.yaml",
 		"bootstrap-sequence.yaml",
 	} {
@@ -130,11 +146,23 @@ bootstrap:
 			t.Errorf("expected output file %q not found: %v", name, err)
 		}
 	}
+
+	// MachineConfig CRs are admin-provided; compiler must not generate them.
+	for _, name := range []string{
+		"seam-mc-import-cluster-cp1.yaml",
+		"seam-mc-import-cluster-wk1.yaml",
+	} {
+		if _, err := os.Stat(filepath.Join(outDir, name)); err == nil {
+			t.Errorf("import mode must not generate MachineConfig CR %q; admin provides these", name)
+		}
+	}
 }
 
 // TestBootstrap_ImportExistingCluster_LocalFileMissingReturnsError verifies that
-// when machineConfigPaths is non-empty but the referenced file does not exist,
-// Compiler returns an error rather than panicking or silently producing output.
+// when machineConfigPaths references a nonexistent file and controlPlaneEndpoint
+// is absent (forcing endpoint extraction from the file), Compiler returns an error.
+// When endpoint and disk are explicit, the file is not read; omitting them forces
+// the compiler to attempt to read the file.
 func TestBootstrap_ImportExistingCluster_LocalFileMissingReturnsError(t *testing.T) {
 	input := `
 name: import-cluster
@@ -147,7 +175,6 @@ importExistingCluster: true
 machineConfigPaths:
   cp1: /nonexistent/machineconfig.yaml
 bootstrap:
-  controlPlaneEndpoint: "https://10.0.0.10:6443"
   talosVersion: "v1.7.0"
   kubernetesVersion: "1.30.0"
   installDisk: "/dev/sda"
@@ -164,9 +191,9 @@ bootstrap:
 }
 
 // TestBootstrap_ImportExistingCluster_InitNodeAbsentFromMapReturnsError verifies
-// that when machineConfigPaths is non-empty but the init node hostname is absent
-// from the map, Compiler returns an error. The init node entry is required for
-// CA extraction; omitting it is a configuration error.
+// that when machineConfigPaths is non-empty but contains no entry for any
+// control-plane node, and controlPlaneEndpoint is absent (requiring extraction),
+// Compiler returns an error about the missing endpoint.
 func TestBootstrap_ImportExistingCluster_InitNodeAbsentFromMapReturnsError(t *testing.T) {
 	input := `
 name: import-cluster
@@ -179,7 +206,6 @@ importExistingCluster: true
 machineConfigPaths:
   worker1: /some/path/worker.yaml
 bootstrap:
-  controlPlaneEndpoint: "https://10.0.0.10:6443"
   talosVersion: "v1.7.0"
   kubernetesVersion: "1.30.0"
   installDisk: "/dev/sda"
@@ -191,10 +217,10 @@ bootstrap:
 	inputPath := writeInputFile(t, input)
 	err := compileBootstrap(inputPath, t.TempDir(), "", "")
 	if err == nil {
-		t.Fatal("expected error when init node hostname absent from machineConfigPaths; got nil")
+		t.Fatal("expected error when no control-plane node in machineConfigPaths and endpoint absent; got nil")
 	}
-	if !containsStr(err.Error(), "cp1") {
-		t.Errorf("error message should mention the missing hostname %q; got: %v", "cp1", err)
+	if !containsStr(err.Error(), "controlPlaneEndpoint") {
+		t.Errorf("error should mention controlPlaneEndpoint; got: %v", err)
 	}
 }
 
@@ -247,11 +273,11 @@ bootstrap:
 	assertContainsStr(t, content, "ontai.dev/cluster: my-cluster")
 }
 
-// TestBootstrap_BootstrapMode_DoesNotEmitSeamTenantNamespaceManifest verifies that
-// compileBootstrap in mode=bootstrap (importExistingCluster=false) does NOT emit
-// seam-tenant-namespace.yaml. Platform creates the namespace for bootstrap/CAPI clusters.
-// Governor ruling 2026-04-21.
-func TestBootstrap_BootstrapMode_DoesNotEmitSeamTenantNamespaceManifest(t *testing.T) {
+// TestBootstrap_BootstrapMode_EmitsSeamTenantNamespaceManifest verifies that
+// compileBootstrap in mode=bootstrap emits seam-tenant-namespace.yaml.
+// Compiler creates namespaces for all modes; platform no longer creates
+// seam-tenant-{cluster} namespaces. CP-INV-004 amended 2026-05-31.
+func TestBootstrap_BootstrapMode_EmitsSeamTenantNamespaceManifest(t *testing.T) {
 	input := `
 name: fresh-cluster
 namespace: seam-system
@@ -277,9 +303,11 @@ bootstrap:
 	}
 
 	nsPath := filepath.Join(outDir, "seam-tenant-namespace.yaml")
-	if _, err := os.Stat(nsPath); err == nil {
-		t.Error("seam-tenant-namespace.yaml must not be emitted for mode=bootstrap")
+	nsData, err := os.ReadFile(nsPath)
+	if err != nil {
+		t.Fatalf("seam-tenant-namespace.yaml must be emitted for mode=bootstrap: %v", err)
 	}
+	assertContainsStr(t, string(nsData), "name: seam-tenant-fresh-cluster")
 }
 
 // TestBootstrap_ImportMode_NamespaceNameIsSeamTenantNotTenant verifies that the
@@ -328,11 +356,12 @@ bootstrap:
 
 // ── Kubernetes API fallback (machineConfigPaths absent) ───────────────────────
 
-// TestBootstrap_ImportExistingCluster_KubeconfigFallback verifies that when
-// importExistingCluster=true and machineConfigPaths is absent, Compiler falls
-// back to the Kubernetes API path and returns an error when the kubeconfig
-// is unreachable. This is the existing Seam-cluster import path.
-func TestBootstrap_ImportExistingCluster_KubeconfigFallback(t *testing.T) {
+// TestBootstrap_ImportMode_NoMachineConfigPaths_Succeeds verifies that import mode
+// with no machineConfigPaths (and explicit endpoint and disk) succeeds and emits
+// namespace + TalosCluster + bootstrap-sequence. The kubeconfig API fallback was
+// removed when the MachineConfig CRD migration eliminated PKI extraction.
+// Admin provides MachineConfig CRs separately. CP-INV-004.
+func TestBootstrap_ImportMode_NoMachineConfigPaths_Succeeds(t *testing.T) {
 	input := `
 name: import-cluster
 namespace: seam-system
@@ -352,10 +381,14 @@ bootstrap:
       role: init
 `
 	inputPath := writeInputFile(t, input)
-	// Pass a non-existent kubeconfig — the API path must fail with an error.
-	err := compileBootstrap(inputPath, t.TempDir(), "/nonexistent/kubeconfig.yaml", "")
-	if err == nil {
-		t.Fatal("expected error for missing kubeconfig in API fallback path; got nil")
+	outDir := t.TempDir()
+	if err := compileBootstrap(inputPath, outDir, "", ""); err != nil {
+		t.Fatalf("expected import mode to succeed without machineConfigPaths; got: %v", err)
+	}
+	for _, name := range []string{"seam-tenant-namespace.yaml", "import-cluster.yaml", "bootstrap-sequence.yaml"} {
+		if _, err := os.Stat(filepath.Join(outDir, name)); err != nil {
+			t.Errorf("expected output file %q not found: %v", name, err)
+		}
 	}
 }
 
diff --git a/cmd/compiler/compile_bootstrap_test.go b/cmd/compiler/compile_bootstrap_test.go
index 35444aa..618fd2c 100644
--- a/cmd/compiler/compile_bootstrap_test.go
+++ b/cmd/compiler/compile_bootstrap_test.go
@@ -52,8 +52,9 @@ func TestBootstrap_ProducesExpectedOutputFiles(t *testing.T) {
 		t.Fatalf("compileBootstrap error: %v", err)
 	}
 
-	// Expect 3 node Secrets + 1 TalosCluster + 1 bootstrap-sequence.
+	// Expect: namespace manifest + 3 MachineConfig CRs + TalosCluster CR + bootstrap-sequence.
 	expectedFiles := []string{
+		"seam-tenant-namespace.yaml",
 		"seam-mc-ccs-mgmt-node1.yaml",
 		"seam-mc-ccs-mgmt-node2.yaml",
 		"seam-mc-ccs-mgmt-node3.yaml",
@@ -68,10 +69,11 @@ func TestBootstrap_ProducesExpectedOutputFiles(t *testing.T) {
 	}
 }
 
-// TestBootstrap_SecretHasCorrectStructure verifies that the generated machineconfig
-// Secret for the init node has the required Kubernetes Secret fields.
+// TestBootstrap_MachineConfigCRHasCorrectStructure verifies that the generated
+// MachineConfig CR for the init node has the required fields.
 // platform-schema.md §9: naming convention seam-mc-{cluster}-{hostname}.
-func TestBootstrap_SecretHasCorrectStructure(t *testing.T) {
+// Phase 3a: MachineConfig CRD replaces machineconfig Secrets for bootstrap output.
+func TestBootstrap_MachineConfigCRHasCorrectStructure(t *testing.T) {
 	outDir := t.TempDir()
 	inputPath := writeInputFile(t, bootstrapInputYAML)
 
@@ -81,15 +83,17 @@ func TestBootstrap_SecretHasCorrectStructure(t *testing.T) {
 
 	data, err := os.ReadFile(filepath.Join(outDir, "seam-mc-ccs-mgmt-node1.yaml"))
 	if err != nil {
-		t.Fatalf("read Secret YAML: %v", err)
+		t.Fatalf("read MachineConfig CR YAML: %v", err)
 	}
 	content := string(data)
 
-	assertContainsStr(t, content, "apiVersion: v1")
-	assertContainsStr(t, content, "kind: Secret")
+	assertContainsStr(t, content, "apiVersion: platform.ontai.dev/v1alpha1")
+	assertContainsStr(t, content, "kind: MachineConfig")
 	assertContainsStr(t, content, "name: seam-mc-ccs-mgmt-node1")
 	assertContainsStr(t, content, "namespace: seam-tenant-ccs-mgmt")
-	assertContainsStr(t, content, "machineconfig.yaml:")
+	assertContainsStr(t, content, "role: init")
+	assertContainsStr(t, content, "nodeHostname: node1")
+	assertContainsStr(t, content, "nodeIP: 10.20.0.11")
 	assertContainsStr(t, content, "ontai.dev/cluster: ccs-mgmt")
 }
 
@@ -316,12 +320,9 @@ bootstrap:
 		t.Fatalf("compileBootstrap error: %v", err)
 	}
 
-	// The Secret YAML should contain the default installer image reference.
+	// The MachineConfig CR's spec.machine should contain the default installer image.
 	data, _ := os.ReadFile(filepath.Join(outDir, "seam-mc-ccs-mgmt-node1.yaml"))
-	content := string(data)
-	assertContainsStr(t, content, "machineconfig.yaml:")
-	// machineconfig.yaml should contain the default installer image.
-	assertContainsStr(t, content, "ghcr.io/siderolabs/installer:v1.7.0")
+	assertContainsStr(t, string(data), "ghcr.io/siderolabs/installer:v1.7.0")
 }
 
 // WS2 — Bootstrap malformed input validation tests.
diff --git a/cmd/compiler/compile_enable.go b/cmd/compiler/compile_enable.go
index 27a6ec8..14c8b2b 100644
--- a/cmd/compiler/compile_enable.go
+++ b/cmd/compiler/compile_enable.go
@@ -938,9 +938,12 @@ func writeBootstrapRBACPolicy(dir string) error {
 }
 
 // writeBootstrapPermissionSets writes guardian-permissionsets.yaml to dir.
-// Emits ONLY management-maximum, the Layer 1 fleet ceiling (CS-INV-008).
-// Per-operator PermissionSets are not emitted. All Seam operator RBACProfiles
-// reference management-maximum directly. guardian-schema.md §6, §19.
+// Emits two Layer 1 PermissionSets:
+//   - management-maximum: the fleet ceiling; all Seam operator RBACProfiles reference it.
+//   - extensions-maximum: the extension ceiling; covers CRDs for all ONT-managed extension
+//     operators (EXT-1 through EXT-10). RECON-CMN2.
+//
+// Per-operator PermissionSets are not emitted. guardian-schema.md §6, §19, CS-INV-008.
 func writeBootstrapPermissionSets(dir string) error {
 	// rule builds a single permission rule map.
 	rule := func(apiGroups, resources, verbs []string) map[string]interface{} {
@@ -975,14 +978,37 @@ func writeBootstrapPermissionSets(dir string) error {
 				rule([]string{"*"}, []string{"*"}, allVerbs),
 			},
 		},
+		{
+			// extensions-maximum: Layer 1 extension ceiling. Covers CRDs for all ten
+			// ONT-managed extension operator categories (EXT-1 through EXT-10). Extension
+			// RBACProfiles in seam-tenant-* namespaces reference cluster-maximum (the
+			// per-cluster copy) for permission enforcement; this PermissionSet declares the
+			// fleet-level CRD-group boundary for governance audits. RECON-CMN2.
+			name: "extensions-maximum",
+			labels: map[string]string{
+				"ontai.dev/managed-by":          "compiler",
+				"ontai.dev/permission-set-type": "bootstrap",
+				"ontai.dev/policy-type":         "management",
+			},
+			description: "Extension permission ceiling -- CRDs for ONT-managed extension operators",
+			permissions: []map[string]interface{}{
+				rule([]string{"external-secrets.io"}, []string{"externalsecrets", "secretstores", "clustersecretstores"}, allVerbs),
+				rule([]string{"kyverno.io"}, []string{"clusterpolicies", "policies", "policyreports", "clusterpolicyreports"}, allVerbs),
+				rule([]string{"aquasecurity.github.io"}, []string{"vulnerabilityreports", "configauditreports", "clustervulnerabilityreports"}, allVerbs),
+				rule([]string{"velero.io"}, []string{"backups", "backupstoragelocations", "restores", "schedules", "volumesnapshotlocations"}, allVerbs),
+				rule([]string{"cost.grafana.com"}, []string{"*"}, allVerbs),
+				rule([]string{"monitoring.coreos.com"}, []string{"servicemonitors", "prometheusrules", "podmonitors"}, allVerbs),
+				rule([]string{"apiextensions.crossplane.io", "pkg.crossplane.io"}, []string{"*"}, allVerbs),
+			},
+		},
 	}
 
 	var buf bytes.Buffer
 	buf.WriteString("# Bootstrap PermissionSet CRs\n")
 	buf.WriteString("# Generated by: compiler enable (phase 1 guardian-bootstrap)\n")
-	buf.WriteString("# management-maximum is the Layer 1 fleet ceiling (guardian-schema.md §19 Layer 1).\n")
-	buf.WriteString("# CS-INV-008: exactly one PermissionSet at Layer 1. All Seam operator RBACProfiles\n")
-	buf.WriteString("# reference management-maximum directly. No per-operator PermissionSets are emitted.\n")
+	buf.WriteString("# management-maximum: Layer 1 fleet ceiling (guardian-schema.md §19 Layer 1).\n")
+	buf.WriteString("# extensions-maximum: Layer 1 extension ceiling for ONT-managed extension operators.\n")
+	buf.WriteString("# CS-INV-008. No per-operator PermissionSets are emitted.\n")
 
 	for _, s := range sets {
 		spec := map[string]interface{}{
@@ -1653,6 +1679,7 @@ func writePhase3PlatformDispatcher(output string, ops []operatorSpec) error {
 		"platform-dispatcher-deployments.yaml",
 		"platform-dispatcher-metrics-services.yaml",
 		"seam-service.yaml",
+		"seam-declaring-principal-webhook.yaml",
 		"seam-lineage-webhooks.yaml",
 	}
 
@@ -1664,6 +1691,8 @@ func writePhase3PlatformDispatcher(output string, ops []operatorSpec) error {
 			"provisioned=true (kubectl get rbacprofiles -n seam-system). " +
 			"These operators must be operational before Conductor's RBACProfile " +
 			"can be provisioned in phase 4. " +
+			"Verify seam MutatingWebhookConfiguration is registered: " +
+			"kubectl get mutatingwebhookconfigurations | grep seam-root-declaration. " +
 			"Verify seam ValidatingWebhookConfigurations are registered: " +
 			"kubectl get validatingwebhookconfigurations | grep seam-lineage.",
 		ApplyOrder: files,
@@ -1727,6 +1756,14 @@ func writePhase3PlatformDispatcher(output string, ops []operatorSpec) error {
 		return err
 	}
 
+	// seam-declaring-principal-webhook.yaml — MutatingWebhookConfiguration that stamps
+	// infrastructure.ontai.dev/declaring-principal on TalosCluster and PackDelivery at
+	// CREATE time. Required for LineageController to populate declaringPrincipal on
+	// LineageRecord with the actual requesting principal.
+	if err := writeSeamDeclaringPrincipalWebhook(dir, seamNamespace); err != nil {
+		return err
+	}
+
 	// seam-lineage-webhooks.yaml — three ValidatingWebhookConfigurations for LineageRecord
 	// governance: immutability (Decision 1), authorship gate (Decision 3), domainRef (Decision 2).
 	if err := writeSeamWebhooks(dir); err != nil {
@@ -1736,6 +1773,82 @@ func writePhase3PlatformDispatcher(output string, ops []operatorSpec) error {
 	return nil
 }
 
+// writeSeamDeclaringPrincipalWebhook writes seam-declaring-principal-webhook.yaml to dir.
+// Emits a MutatingWebhookConfiguration that intercepts CREATE for TalosCluster and
+// PackDelivery and stamps infrastructure.ontai.dev/declaring-principal with the
+// requesting user's identity from AdmissionReview.UserInfo.Username.
+//
+// Without this webhook, the LineageController falls back to "system:unknown" for
+// declaringPrincipal on every LineageRecord, making ownership tracing impossible.
+//
+// caBundle injected by cert-manager CA injector via cert-manager.io/inject-ca-from.
+func writeSeamDeclaringPrincipalWebhook(dir, seamNamespace string) error {
+	injectAnnotation := seamNamespace + "/seam-webhook-cert"
+
+	mwc := map[string]interface{}{
+		"apiVersion": "admissionregistration.k8s.io/v1",
+		"kind":       "MutatingWebhookConfiguration",
+		"metadata": map[string]interface{}{
+			"name": "seam-root-declaration-principal",
+			"annotations": map[string]string{
+				"ontai.dev/managed-by":           "compiler",
+				"cert-manager.io/inject-ca-from": injectAnnotation,
+			},
+		},
+		"webhooks": []map[string]interface{}{
+			{
+				"name":                    "mutate-root-declaration-declaring-principal.seam.ontai.dev",
+				"admissionReviewVersions": []string{"v1"},
+				"sideEffects":             "None",
+				"failurePolicy":           "Fail",
+				"rules": []map[string]interface{}{
+					{
+						"apiGroups":   []string{"seam.ontai.dev"},
+						"apiVersions": []string{"v1alpha1"},
+						"operations":  []string{"CREATE"},
+						"resources":   []string{"talosclusters", "packdeliveries"},
+						"scope":       "Namespaced",
+					},
+				},
+				"namespaceSelector": map[string]interface{}{
+					"matchExpressions": []map[string]interface{}{
+						{
+							"key":      "seam.ontai.dev/webhook-mode",
+							"operator": "NotIn",
+							"values":   []string{"exempt"},
+						},
+					},
+				},
+				"clientConfig": map[string]interface{}{
+					"service": map[string]interface{}{
+						"name":      "seam",
+						"namespace": seamNamespace,
+						"path":      "/mutate-root-declaration-declaring-principal",
+						"port":      443,
+					},
+				},
+			},
+		},
+	}
+
+	data, err := yaml.Marshal(mwc)
+	if err != nil {
+		return fmt.Errorf("marshal seam declaring-principal MutatingWebhookConfiguration: %w", err)
+	}
+
+	var buf bytes.Buffer
+	buf.WriteString("# seam Root Declaration Declaring Principal MutatingWebhookConfiguration\n")
+	buf.WriteString("# Generated by: compiler enable (phase 3 platform-dispatcher)\n")
+	buf.WriteString("# Stamps infrastructure.ontai.dev/declaring-principal on TalosCluster and\n")
+	buf.WriteString("# PackDelivery at CREATE time from AdmissionReview.UserInfo.Username.\n")
+	buf.WriteString("# Required for LineageController declaringPrincipal traceability.\n")
+	buf.WriteString("# caBundle injected by cert-manager CA injector.\n")
+	buf.WriteString("---\n")
+	buf.Write(data)
+
+	return os.WriteFile(filepath.Join(dir, "seam-declaring-principal-webhook.yaml"), buf.Bytes(), 0644)
+}
+
 // writePlatformExecutorRoleFile emits a Role and RoleBinding in ont-system for the
 // platform-executor SA. Conductor executor Jobs run as this SA and write
 // InfrastructureTalosClusterOperationResult CRs to POD_NAMESPACE (ont-system).
@@ -1949,11 +2062,20 @@ func writePhase5PostBootstrap(output string, operators []operatorSpec, clusterNa
 	files := []string{
 		"leaderelection.yaml",
 	}
-	// pack-deploy-queue.yaml and dispatcher-runner.yaml require Kueue and seam-tenant-{name}
-	// namespaces, which exist only on the management cluster (INV-003).
-	if clusterName != "" && clusterRole != "tenant" {
+	if clusterName != "" {
+		// pack-deploy-queue.yaml and dispatcher-runner.yaml are required for both
+		// management and tenant cluster enable bundles. These resources live in
+		// seam-tenant-{clusterName} on the management cluster and must be applied to
+		// the management cluster (not the tenant cluster). The enable script is
+		// responsible for routing these files to the correct kubectl context.
 		files = append(files, "pack-deploy-queue.yaml", "dispatcher-runner.yaml")
 	}
+	if clusterName != "" && clusterRole != "tenant" {
+		// watchdog-queue.yaml is management-cluster-only: the conductor watchdog
+		// submits remediation Jobs in ont-system, which only exists on ccs-mgmt.
+		// conductor-schema.md §6 RuntimeDrift remediation.
+		files = append(files, "watchdog-queue.yaml")
+	}
 
 	meta := phaseMeta{
 		Phase: "post-bootstrap",
@@ -1971,19 +2093,27 @@ func writePhase5PostBootstrap(output string, operators []operatorSpec, clusterNa
 		return err
 	}
 
-	// Kueue and seam-tenant-{name} resources are management-cluster-only (INV-003).
-	if clusterName != "" && clusterRole != "tenant" {
-		// pack-deploy-queue.yaml — Kueue LocalQueue in seam-tenant-{clusterName}.
-		// wrapper-schema.md §9 pack delivery chain.
+	if clusterName != "" {
+		// pack-deploy-queue.yaml — Kueue LocalQueue in seam-tenant-{clusterName} on the
+		// management cluster. Required for pack-deploy Job admission for any cluster.
+		// dispatcher-schema.md §9, conductor-schema.md §5 (execute mode).
 		if err := writePackDeployQueueYAML(dir, clusterName); err != nil {
 			return err
 		}
-		// dispatcher-runner.yaml — SA, Role, RoleBinding for pack-deploy Job identity.
-		// guardian-schema.md §6, INV-004.
+		// dispatcher-runner.yaml — SA, Role, RoleBinding for the pack-deploy Job identity
+		// in seam-tenant-{clusterName} on the management cluster. Required for both
+		// management and tenant cluster PackExecution RBAC gates. INV-004.
 		if err := writeDispatcherRunnerRBACYAML(dir, clusterName); err != nil {
 			return err
 		}
 	}
+	if clusterName != "" && clusterRole != "tenant" {
+		// watchdog-queue.yaml — Kueue LocalQueue in ont-system for watchdog Jobs.
+		// conductor-schema.md §6 RuntimeDrift remediation.
+		if err := writeWatchdogQueueYAML(dir); err != nil {
+			return err
+		}
+	}
 
 	return nil
 }
@@ -2400,6 +2530,30 @@ func buildOperatorDeployment(op operatorSpec) appsv1.Deployment {
 		})
 	}
 
+	// Tenant conductor mounts the management cluster kubeconfig so all target-cluster
+	// drift loops (TalosVersionDriftLoop, KubernetesVersionDriftLoop, PackPodHealthLoop,
+	// PackReceiptDriftLoop, etc.) can write DriftSignals and read PackInstalled on
+	// ccs-mgmt. Without MGMT_KUBECONFIG_PATH the gate in agent.go silently disables
+	// every loop that requires management cluster access. conductor-schema.md §15.
+	if op.Name == "conductor" && op.Role == "tenant" {
+		env = append(env,
+			corev1.EnvVar{Name: "MGMT_KUBECONFIG_PATH", Value: "/etc/conductor/mgmt/kubeconfig"},
+		)
+		volumes = append(volumes, corev1.Volume{
+			Name: "conductor-mgmt-kubeconfig",
+			VolumeSource: corev1.VolumeSource{
+				Secret: &corev1.SecretVolumeSource{
+					SecretName: "conductor-mgmt-kubeconfig",
+				},
+			},
+		})
+		volumeMounts = append(volumeMounts, corev1.VolumeMount{
+			Name:      "conductor-mgmt-kubeconfig",
+			MountPath: "/etc/conductor/mgmt",
+			ReadOnly:  true,
+		})
+	}
+
 	// Platform, Dispatcher, and seam carry OPERATOR_NAMESPACE so their webhook
 	// servers and controllers can resolve their own namespace without downward API
 	// duplication. OPERATOR_NAMESPACE is also required by Guardian admission hooks
@@ -2413,6 +2567,22 @@ func buildOperatorDeployment(op operatorSpec) appsv1.Deployment {
 		})
 	}
 
+	// seam carries LINEAGE_CNPG_URI so the LineageController can archive
+	// LineageRecords to CNPG on root declaration deletion. The URI is sourced from
+	// the guardian-db-app Secret which CNPG generates for the app user.
+	// seam-schema.md §4 CNPG Lineage Archival. INV-016.
+	if op.Name == "seam" {
+		env = append(env, corev1.EnvVar{
+			Name: "LINEAGE_CNPG_URI",
+			ValueFrom: &corev1.EnvVarSource{
+				SecretKeyRef: &corev1.SecretKeySelector{
+					LocalObjectReference: corev1.LocalObjectReference{Name: "guardian-db-app"},
+					Key:                  "uri",
+				},
+			},
+		})
+	}
+
 	// Platform carries CONDUCTOR_REGISTRY so it can construct Conductor executor Job
 	// image references without hardcoding the registry. conductor-schema.md §15.
 	if op.ConductorRegistry != "" {
@@ -2715,6 +2885,42 @@ func writePackDeployQueueYAML(dir, clusterName string) error {
 	return os.WriteFile(filepath.Join(dir, "pack-deploy-queue.yaml"), buf.Bytes(), 0644)
 }
 
+// writeWatchdogQueueYAML emits a Kueue LocalQueue named watchdog-queue in ont-system
+// referencing ClusterQueue seam-pack-deploy. The LocalQueue gates watchdog Job admission
+// for RuntimeDrift remediation capabilities submitted by the conductor agent.
+// conductor-schema.md §6 RuntimeDrift remediation.
+func writeWatchdogQueueYAML(dir string) error {
+	lq := map[string]interface{}{
+		"apiVersion": "kueue.x-k8s.io/v1beta1",
+		"kind":       "LocalQueue",
+		"metadata": map[string]interface{}{
+			"name":      "watchdog-queue",
+			"namespace": "ont-system",
+			"labels": map[string]interface{}{
+				"ontai.dev/managed-by": "compiler",
+			},
+		},
+		"spec": map[string]interface{}{
+			"clusterQueue": "seam-pack-deploy",
+		},
+	}
+
+	data, err := yaml.Marshal(lq)
+	if err != nil {
+		return fmt.Errorf("marshal watchdog-queue LocalQueue: %w", err)
+	}
+
+	var buf bytes.Buffer
+	buf.WriteString("# Kueue LocalQueue — watchdog-queue in ont-system\n")
+	buf.WriteString("# References ClusterQueue seam-pack-deploy.\n")
+	buf.WriteString("# Required for watchdog Job admission for RuntimeDrift remediation.\n")
+	buf.WriteString("# Generated by: compiler enable (phase 05 post-bootstrap)\n")
+	buf.WriteString("# conductor-schema.md §6.\n")
+	buf.WriteString("---\n")
+	buf.Write(data)
+	return os.WriteFile(filepath.Join(dir, "watchdog-queue.yaml"), buf.Bytes(), 0644)
+}
+
 // writeDispatcherRunnerRBACYAML emits ServiceAccount, Role, and RoleBinding for the
 // dispatcher-runner identity in seam-tenant-{clusterName}. The Role is annotated with
 // ontai.dev/rbac-owner=guardian per INV-004.
diff --git a/cmd/compiler/compile_enable_test.go b/cmd/compiler/compile_enable_test.go
index fd605a5..15feef7 100644
--- a/cmd/compiler/compile_enable_test.go
+++ b/cmd/compiler/compile_enable_test.go
@@ -441,10 +441,10 @@ func TestEnable_ManagementMaximumHasPolicyTypeLabel(t *testing.T) {
 	assertContainsStr(t, content, "ontai.dev/policy-type: management")
 }
 
-// TestEnable_OnlyManagementMaximumPermissionSet verifies that guardian-permissionsets.yaml
-// contains exactly one PermissionSet document (management-maximum) and that it is the
-// wildcard Layer 1 ceiling. Per-operator PermissionSets must not be emitted. CS-INV-008.
-func TestEnable_OnlyManagementMaximumPermissionSet(t *testing.T) {
+// TestEnable_BootstrapPermissionSetCount verifies that guardian-permissionsets.yaml
+// contains exactly two PermissionSet documents: management-maximum and extensions-maximum.
+// Per-operator PermissionSets must not be emitted. CS-INV-008, RECON-CMN2.
+func TestEnable_BootstrapPermissionSetCount(t *testing.T) {
 	outDir := t.TempDir()
 	if err := compileEnableBundle(outDir, "dev", defaultRegistry, "", false, "", "", "", "", ""); err != nil {
 		t.Fatalf("compileEnableBundle error: %v", err)
@@ -452,16 +452,43 @@ func TestEnable_OnlyManagementMaximumPermissionSet(t *testing.T) {
 
 	content := readPhaseFile(t, outDir, "01-guardian-bootstrap", "guardian-permissionsets.yaml")
 
-	// Count PermissionSet documents.
 	count := strings.Count(content, "kind: PermissionSet")
-	if count != 1 {
-		t.Errorf("expected exactly 1 PermissionSet document, got %d (CS-INV-008)", count)
+	if count != 2 {
+		t.Errorf("expected exactly 2 PermissionSet documents (management-maximum + extensions-maximum), got %d (CS-INV-008, RECON-CMN2)", count)
 	}
 
-	// The sole document must be management-maximum.
 	if !strings.Contains(content, "name: management-maximum") {
 		t.Error("expected management-maximum PermissionSet document")
 	}
+	if !strings.Contains(content, "name: extensions-maximum") {
+		t.Error("expected extensions-maximum PermissionSet document (RECON-CMN2)")
+	}
+}
+
+// TestEnable_ExtensionsMaximumPermissionSet verifies that extensions-maximum covers all
+// ten ONT extension operator CRD groups. RECON-CMN2.
+func TestEnable_ExtensionsMaximumPermissionSet(t *testing.T) {
+	outDir := t.TempDir()
+	if err := compileEnableBundle(outDir, "dev", defaultRegistry, "", false, "", "", "", "", ""); err != nil {
+		t.Fatalf("compileEnableBundle error: %v", err)
+	}
+
+	content := readPhaseFile(t, outDir, "01-guardian-bootstrap", "guardian-permissionsets.yaml")
+
+	for _, group := range []string{
+		"external-secrets.io",
+		"kyverno.io",
+		"aquasecurity.github.io",
+		"velero.io",
+		"cost.grafana.com",
+		"monitoring.coreos.com",
+		"apiextensions.crossplane.io",
+		"pkg.crossplane.io",
+	} {
+		if !strings.Contains(content, group) {
+			t.Errorf("extensions-maximum missing CRD group %q (RECON-CMN2)", group)
+		}
+	}
 }
 
 // TestEnable_RBACProfilesRefManagementPolicyAndMaximum verifies that all emitted
diff --git a/cmd/compiler/main.go b/cmd/compiler/main.go
index 93aa5d7..0acec76 100644
--- a/cmd/compiler/main.go
+++ b/cmd/compiler/main.go
@@ -42,6 +42,8 @@ func main() {
 		runComponentSubcommand(os.Args[2:])
 	case "maintenance":
 		runMaintenanceSubcommand(os.Args[2:])
+	case "addnode":
+		runAddNodeSubcommand(os.Args[2:])
 	case "scaffold":
 		runScaffoldSubcommand(os.Args[2:])
 	case "domain":
@@ -198,6 +200,7 @@ func printUsageTo(w *os.File) {
 	fmt.Fprintln(w, "  packbuild    Compile a PackBuild spec into a ClusterPack CR")
 	fmt.Fprintln(w, "  maintenance  Compile a MaintenanceBundle CR with pre-resolved scheduling context")
 	fmt.Fprintln(w, "  component    Produce RBACProfile CR YAML from the embedded catalog or a descriptor")
+	fmt.Fprintln(w, "  addnode      Generate a MachineConfig CR for a node being added to an existing cluster")
 	fmt.Fprintln(w, "  scaffold     Generate a seam-domain operator scaffold pre-wired with seam-sdk")
 	fmt.Fprintln(w, "  domain       Reserved — not yet implemented")
 	fmt.Fprintln(w, "")
diff --git a/config/crd/seam.ontai.dev_runnerconfigs.yaml b/config/crd/seam.ontai.dev_runnerconfigs.yaml
deleted file mode 100644
index 094bf6e..0000000
--- a/config/crd/seam.ontai.dev_runnerconfigs.yaml
+++ /dev/null
@@ -1,323 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
-  annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
-  name: runnerconfigs.seam.ontai.dev
-spec:
-  group: seam.ontai.dev
-  names:
-    kind: RunnerConfig
-    listKind: RunnerConfigList
-    plural: runnerconfigs
-    shortNames:
-    - rc
-    singular: runnerconfig
-  scope: Namespaced
-  versions:
-  - additionalPrinterColumns:
-    - jsonPath: .spec.clusterRef
-      name: Cluster
-      type: string
-    - jsonPath: .metadata.creationTimestamp
-      name: Age
-      type: date
-    name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: |-
-          RunnerConfig is the seam-core CRD for Conductor agent runtime configuration.
-          Owned by seam-core; authored exclusively by the platform operator. INV-009.
-          conductor-schema.md. MIGRATION-3.8.
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: |-
-              RunnerConfigSpec is the operator-generated operational contract for a
-              specific cluster. Generated at runtime by platform using the runner shared library.
-              Never human-authored. INV-009, INV-010. conductor-schema.md.
-            properties:
-              clusterRef:
-                description: ClusterRef is the name of the TalosCluster this RunnerConfig
-                  is authoritative for.
-                type: string
-              maintenanceTargetNodes:
-                description: MaintenanceTargetNodes is the list of node names that
-                  are the subject of the operation.
-                items:
-                  type: string
-                type: array
-              operationalHistory:
-                description: OperationalHistory is an append-only record of completed
-                  RunnerConfig executions.
-                items:
-                  description: |-
-                    RunnerOperationalHistoryEntry is a single append-only audit record describing one
-                    configuration change applied to this RunnerConfig. Never truncated.
-                  properties:
-                    appliedAt:
-                      description: AppliedAt is the time this change was applied.
-                      format: date-time
-                      type: string
-                    appliedBy:
-                      description: AppliedBy identifies who applied the change.
-                      type: string
-                    concern:
-                      description: Concern identifies what aspect of configuration
-                        changed.
-                      type: string
-                    newValue:
-                      description: NewValue is the value after the change.
-                      type: string
-                    previousValue:
-                      description: PreviousValue is the value before the change. Empty
-                        for initial entries.
-                      type: string
-                  required:
-                  - appliedAt
-                  - appliedBy
-                  - concern
-                  - newValue
-                  type: object
-                type: array
-              operatorLeaderNode:
-                description: OperatorLeaderNode is the node hosting the leader pod
-                  of the initiating operator.
-                type: string
-              phases:
-                description: Phases is the ordered list of operational phases for
-                  this cluster's Conductor lifecycle.
-                items:
-                  description: RunnerPhaseConfig carries per-phase parameters for
-                    the runner's execution context.
-                  properties:
-                    name:
-                      description: Name identifies the phase.
-                      type: string
-                    parameters:
-                      additionalProperties:
-                        type: string
-                      description: Parameters holds phase-specific key-value configuration.
-                      type: object
-                  required:
-                  - name
-                  type: object
-                type: array
-              runnerImage:
-                description: |-
-                  RunnerImage is the fully qualified container image reference for the Conductor agent.
-                  Tag convention: v{talosVersion}-r{revision} stable, dev/dev-rc{N} development. INV-011.
-                type: string
-              selfOperation:
-                description: SelfOperation is true when the Job's execution cluster
-                  and the target cluster are the same.
-                type: boolean
-              steps:
-                description: Steps is the ordered list of execution steps across all
-                  phases.
-                items:
-                  description: RunnerConfigStep declares one step in a multi-step
-                    operation intent.
-                  properties:
-                    capability:
-                      description: Capability is the named Conductor capability to
-                        invoke for this step.
-                      type: string
-                    dependsOn:
-                      description: DependsOn is the name of a prior step that must
-                        complete before this step begins.
-                      type: string
-                    haltOnFailure:
-                      description: |-
-                        HaltOnFailure controls sequencer behaviour when this step fails.
-                        When true, failure terminates the RunnerConfig with no further steps executing.
-                      type: boolean
-                    name:
-                      description: Name is the unique identifier for this step within
-                        the RunnerConfig.
-                      type: string
-                    parameters:
-                      additionalProperties:
-                        type: string
-                      description: Parameters is the input parameter map passed to
-                        the capability at Job materialisation time.
-                      type: object
-                  required:
-                  - capability
-                  - name
-                  type: object
-                type: array
-            required:
-            - clusterRef
-            - runnerImage
-            type: object
-          status:
-            description: |-
-              RunnerConfigStatus is written exclusively by the Conductor agent leader.
-              CR-INV-006.
-            properties:
-              agentLeader:
-                description: AgentLeader is the pod name of the current Conductor
-                  agent leader.
-                type: string
-              agentVersion:
-                description: AgentVersion is the version string of the Conductor agent
-                  binary currently running.
-                type: string
-              capabilities:
-                description: |-
-                  Capabilities is the self-declared capability manifest emitted by the Conductor agent on startup.
-                  CR-INV-005.
-                items:
-                  description: RunnerCapabilityEntry is one capability declared by
-                    the Conductor agent on startup.
-                  properties:
-                    description:
-                      description: Description is a human-readable description of
-                        what this capability does.
-                      type: string
-                    name:
-                      description: Name is the capability name (e.g., pack-deploy,
-                        talos-upgrade).
-                      type: string
-                    version:
-                      description: Version is the capability version declared by the
-                        agent.
-                      type: string
-                  required:
-                  - name
-                  - version
-                  type: object
-                type: array
-              conditions:
-                description: Conditions is the standard Kubernetes condition list
-                  for this RunnerConfig.
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                type: array
-              failedStep:
-                description: |-
-                  FailedStep is the name of the first step that reached the Failed phase.
-                  Present only when Phase="Failed". conductor-schema.md §17.
-                type: string
-              phase:
-                description: |-
-                  Phase is the terminal execution phase written by Conductor execute mode.
-                  "Completed" means all steps succeeded. "Failed" means at least one step failed.
-                  Empty means execution is in progress. Platform operators watch this field to
-                  detect terminal conditions without scanning StepResults. conductor-schema.md §17.
-                type: string
-              stepResults:
-                description: StepResults is the ordered list of step result records
-                  written by Conductor execute mode.
-                items:
-                  description: RunnerConfigStepResult is the status record for one
-                    step.
-                  properties:
-                    completedAt:
-                      description: CompletedAt is the time this step finished execution.
-                      format: date-time
-                      type: string
-                    message:
-                      description: Message is additional context about the step outcome.
-                      type: string
-                    name:
-                      description: Name matches the Name field of the corresponding
-                        RunnerConfigStep in spec.
-                      type: string
-                    startedAt:
-                      description: StartedAt is the time this step began execution.
-                      format: date-time
-                      type: string
-                    status:
-                      allOf:
-                      - enum:
-                        - Succeeded
-                        - Failed
-                        - Skipped
-                      - enum:
-                        - Succeeded
-                        - Failed
-                        - Skipped
-                      description: Status is the terminal status of this step execution.
-                      type: string
-                  required:
-                  - name
-                  - status
-                  type: object
-                type: array
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
diff --git a/go.mod b/go.mod
index e88fae0..13818d3 100644
--- a/go.mod
+++ b/go.mod
@@ -18,17 +18,19 @@ require (
 	github.com/onsi/ginkgo/v2 v2.27.2
 	github.com/onsi/gomega v1.38.2
 	github.com/ontai-dev/conductor-sdk v0.0.0-00010101000000-000000000000
+	github.com/ontai-dev/dispatcher v0.0.0-00010101000000-000000000000
 	github.com/ontai-dev/guardian v0.0.0-00010101000000-000000000000
 	github.com/ontai-dev/platform v0.0.0-00010101000000-000000000000
-	github.com/ontai-dev/dispatcher v0.0.0-00010101000000-000000000000
 	github.com/ontai-dev/seam v0.0.0-00010101000000-000000000000
 	github.com/ontai-dev/seam-sdk v0.0.0-00010101000000-000000000000
 	github.com/prometheus/client_golang v1.23.2
 	github.com/siderolabs/talos/pkg/machinery v1.12.6
+	golang.org/x/time v0.14.0
 	google.golang.org/grpc v1.79.3
 	gopkg.in/yaml.v3 v3.0.1
 	helm.sh/helm/v3 v3.17.3
 	k8s.io/api v0.35.3
+	k8s.io/apiextensions-apiserver v0.35.0
 	k8s.io/apimachinery v0.35.3
 	k8s.io/client-go v0.35.3
 	sigs.k8s.io/controller-runtime v0.23.3
@@ -158,7 +160,6 @@ require (
 	golang.org/x/sys v0.41.0 // indirect
 	golang.org/x/term v0.40.0 // indirect
 	golang.org/x/text v0.34.0 // indirect
-	golang.org/x/time v0.14.0 // indirect
 	golang.org/x/tools v0.41.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect
@@ -166,7 +167,6 @@ require (
 	google.golang.org/protobuf v1.36.10 // indirect
 	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
-	k8s.io/apiextensions-apiserver v0.35.0 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
 	k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
diff --git a/internal/agent/backup_health_loop.go b/internal/agent/backup_health_loop.go
new file mode 100644
index 0000000..acc6d49
--- /dev/null
+++ b/internal/agent/backup_health_loop.go
@@ -0,0 +1,291 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	k8serrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/dynamic"
+)
+
+// backupStorageLocationGVR is the GroupVersionResource for BackupStorageLocation CRs (Velero).
+var backupStorageLocationGVR = schema.GroupVersionResource{
+	Group:    "velero.io",
+	Version:  "v1",
+	Resource: "backupstoragelocations",
+}
+
+// veleroBackupGVR is the GroupVersionResource for Backup CRs (Velero).
+var veleroBackupGVR = schema.GroupVersionResource{
+	Group:    "velero.io",
+	Version:  "v1",
+	Resource: "backups",
+}
+
+const bslSignalPrefix = "drift-bsl-"
+const backupRPOSignalPrefix = "drift-backup-rpo-"
+
+// defaultBackupRPO is the maximum age of a successful backup before a RPO breach signal is emitted.
+const defaultBackupRPO = 25 * time.Hour
+
+// BackupHealthLoop runs on conductor role=management. On each cycle it:
+//  1. Lists BackupStorageLocation CRs in the management namespace; emits BackupStorageUnavailable
+//     when status.phase is not Available.
+//  2. Lists Backup CRs and finds the most recent successful backup; emits BackupRPOBreached
+//     when no successful backup is younger than defaultBackupRPO (25h, covering daily schedules
+//     with a 1-hour grace window).
+//
+// Skips cleanly when Velero CRDs are not installed.
+// AutonomyLevel=observe-only: logs only, no DriftSignal written.
+// RECON-N2.
+type BackupHealthLoop struct {
+	client     dynamic.Interface
+	namespace  string
+	ocWatcher  *OperatorContextWatcher
+	clusterRef string
+	rpo        time.Duration
+}
+
+// NewBackupHealthLoop constructs a BackupHealthLoop for the given namespace.
+func NewBackupHealthLoop(client dynamic.Interface, namespace, clusterRef string) *BackupHealthLoop {
+	return &BackupHealthLoop{
+		client:     client,
+		namespace:  namespace,
+		clusterRef: clusterRef,
+		rpo:        defaultBackupRPO,
+	}
+}
+
+// WithOperatorContextWatcher sets the OperatorContextWatcher for autonomy-level gating.
+func (l *BackupHealthLoop) WithOperatorContextWatcher(w *OperatorContextWatcher) {
+	l.ocWatcher = w
+}
+
+// Run runs the loop until ctx is cancelled.
+func (l *BackupHealthLoop) Run(ctx context.Context, interval time.Duration) {
+	l.checkOnce(ctx)
+	if ctx.Err() != nil {
+		return
+	}
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			l.checkOnce(ctx)
+		}
+	}
+}
+
+func (l *BackupHealthLoop) checkOnce(ctx context.Context) {
+	l.checkBSLs(ctx)
+	l.checkBackupRPO(ctx)
+}
+
+func (l *BackupHealthLoop) checkBSLs(ctx context.Context) {
+	list, err := l.client.Resource(backupStorageLocationGVR).Namespace(l.namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		if isNoCRDError(err) {
+			return
+		}
+		fmt.Printf("backup health loop: list BackupStorageLocations in %s: %v\n", l.namespace, err)
+		return
+	}
+	for i := range list.Items {
+		l.checkBSL(ctx, &list.Items[i])
+	}
+}
+
+func (l *BackupHealthLoop) checkBSL(ctx context.Context, bsl *k8sunstructured.Unstructured) {
+	name := bsl.GetName()
+	signalName := bslSignalPrefix + name
+
+	status, _, _ := unstructuredNestedMap(bsl.Object, "status")
+	phase, _ := status["phase"].(string)
+
+	if phase == "Available" {
+		l.confirmSignalIfPresent(ctx, signalName)
+		return
+	}
+
+	if l.ocWatcher != nil && !l.ocWatcher.IsAutonomousActionsAllowedFor(l.clusterRef, "management") {
+		fmt.Printf("backup health loop: bsl=%q phase=%q -- observe-only mode, no DriftSignal written\n", name, phase)
+		return
+	}
+
+	driftReason := fmt.Sprintf("BackupStorageLocation unavailable: name=%s phase=%s", name, phase)
+	l.emitSignal(ctx, signalName, "BackupStorageUnavailable", name, "velero.io", "BackupStorageLocation", driftReason)
+}
+
+func (l *BackupHealthLoop) checkBackupRPO(ctx context.Context) {
+	list, err := l.client.Resource(veleroBackupGVR).Namespace(l.namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		if isNoCRDError(err) {
+			return
+		}
+		fmt.Printf("backup health loop: list Backups in %s: %v\n", l.namespace, err)
+		return
+	}
+
+	signalName := backupRPOSignalPrefix + "cluster"
+	latestSuccess := l.findLatestSuccessfulBackup(list.Items)
+
+	if latestSuccess.IsZero() {
+		// No successful backup at all.
+		if len(list.Items) == 0 {
+			// No backups scheduled yet -- not a breach.
+			return
+		}
+		if l.ocWatcher != nil && !l.ocWatcher.IsAutonomousActionsAllowedFor(l.clusterRef, "management") {
+			fmt.Printf("backup health loop: no successful backup found -- observe-only mode, no DriftSignal written\n")
+			return
+		}
+		driftReason := "BackupRPOBreached: no successful backup found"
+		l.emitSignal(ctx, signalName, "BackupRPOBreached", "cluster", "velero.io", "Backup", driftReason)
+		return
+	}
+
+	age := time.Since(latestSuccess)
+	if age <= l.rpo {
+		l.confirmSignalIfPresent(ctx, signalName)
+		return
+	}
+
+	if l.ocWatcher != nil && !l.ocWatcher.IsAutonomousActionsAllowedFor(l.clusterRef, "management") {
+		fmt.Printf("backup health loop: last successful backup age=%v exceeds RPO=%v -- observe-only mode, no DriftSignal written\n", age.Round(time.Minute), l.rpo)
+		return
+	}
+
+	driftReason := fmt.Sprintf("BackupRPOBreached: last successful backup age=%v exceeds RPO=%v", age.Round(time.Minute), l.rpo)
+	l.emitSignal(ctx, signalName, "BackupRPOBreached", "cluster", "velero.io", "Backup", driftReason)
+}
+
+// findLatestSuccessfulBackup returns the completion time of the most recent Completed backup.
+func (l *BackupHealthLoop) findLatestSuccessfulBackup(items []k8sunstructured.Unstructured) time.Time {
+	var latest time.Time
+	for i := range items {
+		status, _, _ := unstructuredNestedMap(items[i].Object, "status")
+		phase, _ := status["phase"].(string)
+		if phase != "Completed" {
+			continue
+		}
+		completionStr, _ := status["completionTimestamp"].(string)
+		t, err := time.Parse(time.RFC3339, completionStr)
+		if err != nil {
+			continue
+		}
+		if t.After(latest) {
+			latest = t
+		}
+	}
+	return latest
+}
+
+func (l *BackupHealthLoop) emitSignal(ctx context.Context, signalName, signalKind, resourceName, group, kind, driftReason string) {
+	now := time.Now().UTC().Format(time.RFC3339)
+
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil && !k8serrors.IsNotFound(err) {
+		fmt.Printf("backup health loop: get DriftSignal %s: %v\n", signalName, err)
+		return
+	}
+
+	if k8serrors.IsNotFound(err) {
+		obj := map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": signalName, "namespace": l.namespace},
+			"spec": map[string]interface{}{
+				"state":         "pending",
+				"signalKind":    signalKind,
+				"driftLayer":    "infrastructure",
+				"correlationID": fmt.Sprintf("backup-%s-%d", resourceName, time.Now().UnixNano()),
+				"observedAt":    now,
+				"driftReason":   driftReason,
+				"affectedCRRef": map[string]interface{}{
+					"group":     group,
+					"kind":      kind,
+					"namespace": l.namespace,
+					"name":      resourceName,
+				},
+				"escalationCounter": int64(0),
+			},
+		}
+		if _, cErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Create(
+			ctx, &k8sunstructured.Unstructured{Object: obj}, metav1.CreateOptions{},
+		); cErr != nil {
+			fmt.Printf("backup health loop: create DriftSignal %s: %v\n", signalName, cErr)
+		}
+		fmt.Printf("backup health loop: %s -- DriftSignal written\n", driftReason)
+		return
+	}
+
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	counter, _ := spec["escalationCounter"].(int64)
+	if int32(counter) >= escalationThreshold {
+		return
+	}
+	if state == "confirmed" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"correlationID":     fmt.Sprintf("backup-%s-%d", resourceName, time.Now().UnixNano()),
+				"observedAt":        now,
+				"escalationCounter": int64(0),
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("backup health loop: reset DriftSignal %s: %v\n", signalName, pErr)
+		}
+		return
+	}
+	if state == "queued" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"escalationCounter": counter + 1,
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("backup health loop: increment escalation counter %s: %v\n", signalName, pErr)
+		}
+	}
+}
+
+func (l *BackupHealthLoop) confirmSignalIfPresent(ctx context.Context, signalName string) {
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil {
+		return
+	}
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	if state == "confirmed" || state == "" {
+		return
+	}
+	patch := map[string]interface{}{
+		"spec": map[string]interface{}{"state": "confirmed", "correlationID": ""},
+	}
+	data, _ := json.Marshal(patch)
+	if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+		ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+	); pErr != nil {
+		fmt.Printf("backup health loop: confirm DriftSignal %s/%s: %v\n", l.namespace, signalName, pErr)
+	}
+}
diff --git a/internal/agent/backup_health_loop_test.go b/internal/agent/backup_health_loop_test.go
new file mode 100644
index 0000000..ea9cda0
--- /dev/null
+++ b/internal/agent/backup_health_loop_test.go
@@ -0,0 +1,194 @@
+package agent
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic/fake"
+)
+
+var backupTestGVRs = map[schema.GroupVersionResource]string{
+	backupStorageLocationGVR: "BackupStorageLocationList",
+	veleroBackupGVR:          "BackupList",
+	driftSignalGVR:           "DriftSignalList",
+}
+
+func newBackupFakeClient(objs ...runtime.Object) *fake.FakeDynamicClient {
+	return fake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), backupTestGVRs, objs...)
+}
+
+func fakeBSL(name, phase string) *unstructured.Unstructured {
+	return &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "velero.io/v1",
+		"kind":       "BackupStorageLocation",
+		"metadata":   map[string]interface{}{"name": name, "namespace": "seam-system"},
+		"status":     map[string]interface{}{"phase": phase},
+	}}
+}
+
+func fakeBackup(name, phase, completionTimestamp string) *unstructured.Unstructured {
+	return &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "velero.io/v1",
+		"kind":       "Backup",
+		"metadata":   map[string]interface{}{"name": name, "namespace": "seam-system"},
+		"status":     map[string]interface{}{"phase": phase, "completionTimestamp": completionTimestamp},
+	}}
+}
+
+func TestBackupHealthLoop_BSLAvailable_NoSignal(t *testing.T) {
+	bsl := fakeBSL("default", "Available")
+	backup := fakeBackup("daily-backup", "Completed", time.Now().Add(-1*time.Hour).UTC().Format(time.RFC3339))
+
+	client := newBackupFakeClient(bsl, backup)
+	l := NewBackupHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Errorf("expected no DriftSignal for available BSL with recent backup, got create on %s", a.GetResource().Resource)
+		}
+	}
+}
+
+func TestBackupHealthLoop_BSLUnavailable_EmitsSignal(t *testing.T) {
+	bsl := fakeBSL("default", "Unavailable")
+	client := newBackupFakeClient(bsl)
+
+	l := NewBackupHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	signalName := bslSignalPrefix + "default"
+	ds, err := client.Resource(driftSignalGVR).Namespace("seam-system").Get(
+		context.Background(), signalName, metav1.GetOptions{},
+	)
+	if err != nil {
+		t.Fatalf("expected BackupStorageUnavailable DriftSignal: %v", err)
+	}
+	spec, _, _ := unstructuredNestedMap(ds.Object, "spec")
+	if kind, _ := spec["signalKind"].(string); kind != "BackupStorageUnavailable" {
+		t.Errorf("signalKind = %q, want BackupStorageUnavailable", kind)
+	}
+}
+
+func TestBackupHealthLoop_RPOBreached_EmitsSignal(t *testing.T) {
+	bsl := fakeBSL("default", "Available")
+	// Backup completed 30 hours ago -- exceeds defaultBackupRPO (25h).
+	oldBackup := fakeBackup("old-backup", "Completed", time.Now().Add(-30*time.Hour).UTC().Format(time.RFC3339))
+
+	client := newBackupFakeClient(bsl, oldBackup)
+	l := NewBackupHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	signalName := backupRPOSignalPrefix + "cluster"
+	ds, err := client.Resource(driftSignalGVR).Namespace("seam-system").Get(
+		context.Background(), signalName, metav1.GetOptions{},
+	)
+	if err != nil {
+		t.Fatalf("expected BackupRPOBreached DriftSignal: %v", err)
+	}
+	spec, _, _ := unstructuredNestedMap(ds.Object, "spec")
+	if kind, _ := spec["signalKind"].(string); kind != "BackupRPOBreached" {
+		t.Errorf("signalKind = %q, want BackupRPOBreached", kind)
+	}
+}
+
+func TestBackupHealthLoop_RecentBackup_NoRPOSignal(t *testing.T) {
+	bsl := fakeBSL("default", "Available")
+	recentBackup := fakeBackup("recent-backup", "Completed", time.Now().Add(-2*time.Hour).UTC().Format(time.RFC3339))
+
+	client := newBackupFakeClient(bsl, recentBackup)
+	l := NewBackupHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Errorf("expected no DriftSignal for recent backup, got create on %s", a.GetResource().Resource)
+		}
+	}
+}
+
+func TestBackupHealthLoop_ObserveOnly_NoSignal(t *testing.T) {
+	bsl := fakeBSL("default", "Unavailable")
+	client := newBackupFakeClient(bsl)
+
+	ocWatcher := NewOperatorContextWatcher(client, "ont-system")
+	ocWatcher.mu.Lock()
+	ocWatcher.resolved["ccs-mgmt"] = resolvedContext{autonomyLevel: AutonomyLevelObserveOnly, mode: "normal"}
+	ocWatcher.mu.Unlock()
+
+	l := NewBackupHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.WithOperatorContextWatcher(ocWatcher)
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Error("expected no DriftSignal under observe-only mode")
+		}
+	}
+}
+
+func TestBackupHealthLoop_BSLConfirmedWhenAvailable(t *testing.T) {
+	bsl := fakeBSL("default", "Available")
+	backup := fakeBackup("daily", "Completed", time.Now().Add(-1*time.Hour).UTC().Format(time.RFC3339))
+	existingSignal := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "seam.ontai.dev/v1alpha1",
+		"kind":       "DriftSignal",
+		"metadata":   map[string]interface{}{"name": bslSignalPrefix + "default", "namespace": "seam-system"},
+		"spec":       map[string]interface{}{"state": "queued"},
+	}}
+
+	client := newBackupFakeClient(bsl, backup, existingSignal)
+	l := NewBackupHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	var patched bool
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "patch" && a.GetResource().Resource == "driftsignals" {
+			patched = true
+		}
+	}
+	if !patched {
+		t.Error("expected DriftSignal to be confirmed when BSL returns to Available")
+	}
+}
+
+func TestBackupHealthLoop_FindLatestSuccessfulBackup(t *testing.T) {
+	l := NewBackupHealthLoop(nil, "seam-system", "ccs-mgmt")
+
+	older := time.Now().Add(-10 * time.Hour)
+	newer := time.Now().Add(-2 * time.Hour)
+
+	items := []unstructured.Unstructured{
+		*fakeBackup("b1", "Completed", older.UTC().Format(time.RFC3339)),
+		*fakeBackup("b2", "Failed", newer.UTC().Format(time.RFC3339)),
+		*fakeBackup("b3", "Completed", newer.UTC().Format(time.RFC3339)),
+	}
+
+	result := l.findLatestSuccessfulBackup(items)
+	if result.IsZero() {
+		t.Fatal("expected a valid timestamp")
+	}
+	if result.Before(older) || result.Before(newer.Add(-time.Second)) {
+		t.Errorf("expected result close to newer time, got %v", result)
+	}
+}
+
+func TestBackupHealthLoop_NoBackups_NoRPOSignal(t *testing.T) {
+	bsl := fakeBSL("default", "Available")
+	client := newBackupFakeClient(bsl)
+
+	l := NewBackupHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" && a.GetResource().Resource == "driftsignals" {
+			spec := a.(interface{ GetObject() runtime.Object }).GetObject()
+			t.Errorf("unexpected DriftSignal create: %v", spec)
+		}
+	}
+}
diff --git a/internal/agent/capability_publisher.go b/internal/agent/capability_publisher.go
index 5e5a6fa..af9d236 100644
--- a/internal/agent/capability_publisher.go
+++ b/internal/agent/capability_publisher.go
@@ -35,11 +35,11 @@ const capabilityWatchInterval = 15 * time.Second
 const runnerConfigMissingDriftThreshold = 5
 
 // runnerConfigGVR is the GroupVersionResource for RunnerConfig CRs.
-// API group infrastructure.ontai.dev, schema version v1alpha1. conductor-schema.md §5.
+// API group seam.ontai.dev, schema version v1alpha1. conductor-schema.md §5.
 var runnerConfigGVR = schema.GroupVersionResource{
-	Group:    "infrastructure.ontai.dev",
+	Group:    "seam.ontai.dev",
 	Version:  "v1alpha1",
-	Resource: "infrastructurerunnerconfigs",
+	Resource: "runnerconfigs",
 }
 
 // CapabilityPublisher writes the Conductor capability manifest to the RunnerConfig
@@ -69,7 +69,7 @@ func (p *CapabilityPublisher) emitRunnerConfigMissingSignal(ctx context.Context,
 	now := time.Now().UTC().Format(time.RFC3339)
 
 	obj := map[string]interface{}{
-		"apiVersion": "infrastructure.ontai.dev/v1alpha1",
+		"apiVersion": "seam.ontai.dev/v1alpha1",
 		"kind":       "DriftSignal",
 		"metadata": map[string]interface{}{
 			"name":      signalName,
@@ -81,7 +81,7 @@ func (p *CapabilityPublisher) emitRunnerConfigMissingSignal(ctx context.Context,
 			"observedAt":    now,
 			"driftReason":   "RunnerConfig not found in ont-system -- cluster-state drift",
 			"affectedCRRef": map[string]interface{}{
-				"group": "infrastructure.ontai.dev",
+				"group": "seam.ontai.dev",
 				"kind":  "RunnerConfig",
 				"name":  clusterRef,
 			},
diff --git a/internal/agent/cluster_disk_pressure.go b/internal/agent/cluster_disk_pressure.go
index e69870a..5dbe48b 100644
--- a/internal/agent/cluster_disk_pressure.go
+++ b/internal/agent/cluster_disk_pressure.go
@@ -8,6 +8,8 @@ import (
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+
+	"github.com/ontai-dev/seam/pkg/namespaces"
 )
 
 // diskPressureWarnThreshold is the number of consecutive check cycles with DiskPressure=True
@@ -114,7 +116,7 @@ func (l *ClusterNodeHealthLoop) setTalosClusterDiskPressure(ctx context.Context,
 		fmt.Printf("disk pressure: cluster=%q marshal condition patch: %v\n", l.clusterRef, err)
 		return
 	}
-	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).Patch(
+	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(namespaces.SeamSystem).Patch(
 		ctx, l.clusterRef, types.MergePatchType, data, metav1.PatchOptions{}, "status",
 	); err != nil {
 		fmt.Printf("disk pressure: cluster=%q patch DiskPressure condition: %v\n", l.clusterRef, err)
diff --git a/internal/agent/cluster_endpoint_drift.go b/internal/agent/cluster_endpoint_drift.go
index 295fa85..42f9235 100644
--- a/internal/agent/cluster_endpoint_drift.go
+++ b/internal/agent/cluster_endpoint_drift.go
@@ -12,6 +12,8 @@ import (
 	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/types"
 	"gopkg.in/yaml.v3"
+
+	"github.com/ontai-dev/seam/pkg/namespaces"
 )
 
 // endpointDriftConsecutiveRequired is the number of consecutive checkOnce cycles with
@@ -164,7 +166,7 @@ func (l *ClusterNodeHealthLoop) setHumanInterventionRequired(ctx context.Context
 		fmt.Printf("cluster endpoint drift: cluster=%q marshal HumanInterventionRequired patch: %v\n", l.clusterRef, err)
 		return
 	}
-	_, err = l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).Patch(
+	_, err = l.localClient.Resource(talosClusterGVR).Namespace(namespaces.SeamSystem).Patch(
 		ctx, l.clusterRef, types.MergePatchType, patchBytes, metav1.PatchOptions{}, "status",
 	)
 	if err != nil {
@@ -197,7 +199,7 @@ func (l *ClusterNodeHealthLoop) emitEndpointDriftSignal(ctx context.Context, old
 				"affectedCRRef": map[string]interface{}{
 					"group":     "seam.ontai.dev",
 					"kind":      "TalosCluster",
-					"namespace": l.namespace,
+					"namespace": namespaces.SeamSystem,
 					"name":      l.clusterRef,
 				},
 				"driftReason": msg,
diff --git a/internal/agent/cluster_etcd_health.go b/internal/agent/cluster_etcd_health.go
index e8ad733..cb896ba 100644
--- a/internal/agent/cluster_etcd_health.go
+++ b/internal/agent/cluster_etcd_health.go
@@ -10,6 +10,8 @@ import (
 	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
+
+	"github.com/ontai-dev/seam/pkg/namespaces"
 )
 
 // etcdDegradedThreshold is the number of consecutive cycles a member must be
@@ -205,7 +207,7 @@ func (l *ClusterNodeHealthLoop) writeEtcdHealthAnnotation(ctx context.Context, m
 		fmt.Printf("etcd health: cluster=%q marshal annotation: %v\n", l.clusterRef, err)
 		return
 	}
-	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).Patch(
+	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(namespaces.SeamSystem).Patch(
 		ctx, l.clusterRef, types.MergePatchType, data, metav1.PatchOptions{},
 	); err != nil {
 		fmt.Printf("etcd health: cluster=%q write etcd annotation: %v\n", l.clusterRef, err)
diff --git a/internal/agent/cluster_node_health_loop.go b/internal/agent/cluster_node_health_loop.go
index 21640ec..18901d1 100644
--- a/internal/agent/cluster_node_health_loop.go
+++ b/internal/agent/cluster_node_health_loop.go
@@ -18,6 +18,8 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/dynamic"
 	sigsyaml "sigs.k8s.io/yaml"
+
+	"github.com/ontai-dev/seam/pkg/namespaces"
 )
 
 // NodeHealthState classifies a node's health based on Kubernetes node conditions.
@@ -220,6 +222,8 @@ func (l *ClusterNodeHealthLoop) checkOnce(ctx context.Context) {
 
 	// Detect nodes missing the ONT enrollment label. RECON-C2.
 	l.checkNodeRegistration(ctx, nodes)
+	// Resolve any NodeRegistrationDrift signals for nodes that now have the label.
+	l.resolveNodeRegistrationDrift(ctx, nodes)
 
 	// Check CPU/memory utilisation against the CapacitySaturation threshold. RECON-C6.
 	l.checkCapacitySaturation(ctx, nodes)
@@ -441,17 +445,18 @@ func (l *ClusterNodeHealthLoop) writeTalosClusterHealthStatus(ctx context.Contex
 		return fmt.Errorf("marshal TalosCluster health patch: %w", err)
 	}
 	// Metadata patch via merge-patch.
-	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).Patch(
+	tcNS := namespaces.SeamSystem
+	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(tcNS).Patch(
 		ctx, l.clusterRef, types.MergePatchType, data, metav1.PatchOptions{},
 	); err != nil {
-		return fmt.Errorf("patch TalosCluster %s/%s metadata: %w", l.namespace, l.clusterRef, err)
+		return fmt.Errorf("patch TalosCluster %s/%s metadata: %w", tcNS, l.clusterRef, err)
 	}
 	// Status subresource patch.
 	statusData, _ := json.Marshal(patch)
-	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).Patch(
+	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(tcNS).Patch(
 		ctx, l.clusterRef, types.MergePatchType, statusData, metav1.PatchOptions{}, "status",
 	); err != nil {
-		return fmt.Errorf("patch TalosCluster %s/%s status: %w", l.namespace, l.clusterRef, err)
+		return fmt.Errorf("patch TalosCluster %s/%s status: %w", tcNS, l.clusterRef, err)
 	}
 	return nil
 }
@@ -559,7 +564,7 @@ func (l *ClusterNodeHealthLoop) emitHumanInterventionRequired(ctx context.Contex
 		},
 	}
 	data, _ := json.Marshal(patch)
-	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).Patch(
+	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(namespaces.SeamSystem).Patch(
 		ctx, l.clusterRef, types.MergePatchType, data, metav1.PatchOptions{}, "status",
 	); err != nil {
 		fmt.Printf("cluster node health loop: cluster=%q set HumanInterventionRequired: %v\n", l.clusterRef, err)
@@ -610,7 +615,7 @@ func (l *ClusterNodeHealthLoop) emitTier3DriftSignal(
 				"affectedCRRef": map[string]interface{}{
 					"group":     "seam.ontai.dev",
 					"kind":      "TalosCluster",
-					"namespace": l.namespace,
+					"namespace": namespaces.SeamSystem,
 					"name":      l.clusterRef,
 				},
 				"driftReason": msg,
@@ -823,6 +828,64 @@ func (l *ClusterNodeHealthLoop) checkNodeRegistration(ctx context.Context, nodes
 	}
 }
 
+// resolveNodeRegistrationDrift patches NodeRegistrationDrift DriftSignals to
+// state=resolved when the affected node now carries ont.platform.dev/controlled=true.
+// Called on every checkOnce cycle after checkNodeRegistration. RECON-C2.
+func (l *ClusterNodeHealthLoop) resolveNodeRegistrationDrift(ctx context.Context, nodes []map[string]interface{}) {
+	ns := "seam-tenant-" + l.clusterRef
+
+	// Build a set of node names that now have the controlled label.
+	controlled := make(map[string]bool, len(nodes))
+	for _, n := range nodes {
+		meta, _ := n["metadata"].(map[string]interface{})
+		if meta == nil {
+			continue
+		}
+		name, _ := meta["name"].(string)
+		if name == "" {
+			continue
+		}
+		labels, _ := meta["labels"].(map[string]interface{})
+		if v, ok := labels["ont.platform.dev/controlled"]; ok && v == "true" {
+			controlled[name] = true
+		}
+	}
+
+	// List all DriftSignals of kind NodeRegistrationDrift in the tenant namespace.
+	list, err := l.localClient.Resource(driftSignalGVR).Namespace(ns).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return
+	}
+
+	patchBytes := []byte(`{"spec":{"state":"resolved"}}`)
+	for _, item := range list.Items {
+		spec, _ := item.Object["spec"].(map[string]interface{})
+		if spec == nil {
+			continue
+		}
+		if spec["signalKind"] != "NodeRegistrationDrift" {
+			continue
+		}
+		if spec["state"] == "resolved" {
+			continue
+		}
+		affectedCRRef, _ := spec["affectedCRRef"].(map[string]interface{})
+		nodeName, _ := affectedCRRef["name"].(string)
+		if nodeName == "" || !controlled[nodeName] {
+			continue
+		}
+		if _, err := l.localClient.Resource(driftSignalGVR).Namespace(ns).Patch(
+			ctx, item.GetName(), types.MergePatchType, patchBytes, metav1.PatchOptions{},
+		); err != nil {
+			fmt.Printf("cluster node health loop: cluster=%q resolveNodeRegistrationDrift patch %s: %v\n",
+				l.clusterRef, item.GetName(), err)
+			continue
+		}
+		fmt.Printf("cluster node health loop: cluster=%q node=%q NodeRegistrationDrift DriftSignal %s resolved\n",
+			l.clusterRef, nodeName, item.GetName())
+	}
+}
+
 // checkCapacitySaturation queries the metrics-server NodeMetrics API and compares
 // CPU and memory usage to each node's allocatable capacity. Nodes above
 // capacitySaturationThresholdPct for capacitySaturationConsecutiveRequired
@@ -963,7 +1026,7 @@ func (l *ClusterNodeHealthLoop) writeCapacitySaturationCondition(ctx context.Con
 		},
 	}
 	data, _ := json.Marshal(patch)
-	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).Patch(
+	if _, err := l.localClient.Resource(talosClusterGVR).Namespace(namespaces.SeamSystem).Patch(
 		ctx, l.clusterRef, types.MergePatchType, data, metav1.PatchOptions{}, "status",
 	); err != nil {
 		fmt.Printf("cluster node health loop: cluster=%q writeCapacitySaturationCondition: %v\n", l.clusterRef, err)
diff --git a/internal/agent/cluster_node_health_loop_maintenance_test.go b/internal/agent/cluster_node_health_loop_maintenance_test.go
index 54aa5c9..7fd1e55 100644
--- a/internal/agent/cluster_node_health_loop_maintenance_test.go
+++ b/internal/agent/cluster_node_health_loop_maintenance_test.go
@@ -24,7 +24,7 @@ func TestTwoPhase_UnreachableNodeWithMaintenancePortOpen_ClassifiedAsMaintenance
 	defer func() { probeMaintenancePortFn = old }()
 
 	node := makeUnreachableNode("cp2", "10.20.0.3")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelObserveOnly, "normal")
 	dynClient := buildHealthFakeClient(node, tc, ocObj)
 	ocWatcher := NewOperatorContextWatcher(dynClient, "ont-system")
@@ -50,7 +50,7 @@ func TestTwoPhase_UnreachableNodeWithPortClosed_RemainsUnreachable(t *testing.T)
 	defer func() { probeMaintenancePortFn = old }()
 
 	node := makeUnreachableNode("cp2", "10.20.0.3")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelObserveOnly, "normal")
 	dynClient := buildHealthFakeClient(node, tc, ocObj)
 	ocWatcher := NewOperatorContextWatcher(dynClient, "ont-system")
@@ -77,7 +77,7 @@ func TestTwoPhase_ReadyNodeSkipsProbe(t *testing.T) {
 	defer func() { probeMaintenancePortFn = old }()
 
 	node := makeNode("cp1", "10.20.0.2", "True") // Ready
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelObserveOnly, "normal")
 	dynClient := buildHealthFakeClient(node, tc, ocObj)
 	ocWatcher := NewOperatorContextWatcher(dynClient, "ont-system")
@@ -97,7 +97,7 @@ func TestTwoPhase_MaintenanceMode_DoesNotIncrementConsecutiveBad(t *testing.T) {
 	defer func() { probeMaintenancePortFn = old }()
 
 	node := makeUnreachableNode("cp2", "10.20.0.3")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelObserveOnly, "normal")
 	dynClient := buildHealthFakeClient(node, tc, ocObj)
 	ocWatcher := NewOperatorContextWatcher(dynClient, "ont-system")
@@ -129,7 +129,7 @@ func TestTriggerReenrollment_WithPerNodeSecret_DelegatedLevel_CreatesNodeOperati
 	const ns = "seam-tenant-ccs-mgmt"
 
 	node := makeUnreachableNode("cp2", "10.20.0.3")
-	tc := makeTalosCluster(clusterRef, "ont-system")
+	tc := makeTalosCluster(clusterRef, "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelDelegated, "normal")
 
 	// Per-node machineconfig secret.
@@ -166,7 +166,7 @@ func TestTriggerReenrollment_WithPerNodeSecret_DelegatedLevel_CreatesNodeOperati
 func TestTriggerReenrollment_NoSecret_SetsHumanInterventionRequired(t *testing.T) {
 	const clusterRef = "ccs-mgmt"
 
-	tc := makeTalosCluster(clusterRef, "ont-system")
+	tc := makeTalosCluster(clusterRef, "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelDelegated, "normal")
 	// No machineconfig secrets.
 	dynClient := buildHealthFakeClient(tc, ocObj)
@@ -182,7 +182,7 @@ func TestTriggerReenrollment_NoSecret_SetsHumanInterventionRequired(t *testing.T
 	talosClusterGVRTest := schema.GroupVersionResource{
 		Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "talosclusters",
 	}
-	tc2, err := dynClient.Resource(talosClusterGVRTest).Namespace("ont-system").Get(
+	tc2, err := dynClient.Resource(talosClusterGVRTest).Namespace("seam-system").Get(
 		context.Background(), clusterRef, metav1.GetOptions{})
 	if err != nil {
 		t.Fatalf("get TalosCluster: %v", err)
@@ -211,7 +211,7 @@ func TestTriggerReenrollment_LowAutonomyLevel_BlockedFromCreatingNodeOperation(t
 	const ns = "seam-tenant-ccs-mgmt"
 
 	node := makeUnreachableNode("cp2", "10.20.0.3")
-	tc := makeTalosCluster(clusterRef, "ont-system")
+	tc := makeTalosCluster(clusterRef, "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelObserveOnly, "normal")
 
 	mcSecret := &unstructured.Unstructured{}
@@ -245,7 +245,7 @@ func TestTriggerReenrollment_Idempotent_SecondCheckDoesNotDuplicateCR(t *testing
 	const ns = "seam-tenant-ccs-mgmt"
 
 	node := makeUnreachableNode("cp2", "10.20.0.3")
-	tc := makeTalosCluster(clusterRef, "ont-system")
+	tc := makeTalosCluster(clusterRef, "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelDelegated, "normal")
 
 	mcSecret := &unstructured.Unstructured{}
diff --git a/internal/agent/cluster_node_health_loop_test.go b/internal/agent/cluster_node_health_loop_test.go
index dfeffa2..40ee8bd 100644
--- a/internal/agent/cluster_node_health_loop_test.go
+++ b/internal/agent/cluster_node_health_loop_test.go
@@ -243,7 +243,7 @@ func TestConsecutiveFailureTracking_ResetsOnReady(t *testing.T) {
 
 func TestConsecutiveFailureTracking_Increments(t *testing.T) {
 	node := makeNode("cp1", "10.20.0.2", "False")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -270,7 +270,7 @@ func TestConsecutiveFailureTracking_Increments(t *testing.T) {
 func TestTier1Reboot_BlockedByObserveOnly(t *testing.T) {
 	// Create a node that has been Degraded 3 times consecutively.
 	node := makeNode("cp1", "10.20.0.2", "False")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelObserveOnly, "normal")
 	dynClient := buildHealthFakeClient(node, tc, ocObj)
 
@@ -301,7 +301,7 @@ func TestTier1Reboot_BlockedByObserveOnly(t *testing.T) {
 
 func TestTier1Reboot_AllowedByDelegated(t *testing.T) {
 	node := makeNode("cp1", "10.20.0.2", "False")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelDelegated, "normal")
 	dynClient := buildHealthFakeClient(node, tc, ocObj)
 
@@ -330,7 +330,7 @@ func TestTier1Reboot_AllowedByDelegated(t *testing.T) {
 func TestTier3_MultipleNodesDegraded_SetsHumanIntervention(t *testing.T) {
 	node1 := makeNode("cp1", "10.20.0.2", "False")
 	node2 := makeNode("cp2", "10.20.0.3", "False")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node1, node2, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -340,7 +340,7 @@ func TestTier3_MultipleNodesDegraded_SetsHumanIntervention(t *testing.T) {
 		{name: "cp2", ip: "10.20.0.3", state: NodeHealthStateDegraded, consecutiveBad: 1},
 	}, false)
 
-	tc2, err := dynClient.Resource(talosClusterGVR).Namespace("ont-system").Get(
+	tc2, err := dynClient.Resource(talosClusterGVR).Namespace("seam-system").Get(
 		context.Background(), "ccs-mgmt", metav1.GetOptions{}, "status",
 	)
 	if err != nil {
@@ -369,14 +369,14 @@ func TestTier3_MultipleNodesDegraded_SetsHumanIntervention(t *testing.T) {
 
 func TestNodeHealthSummaryAnnotation_Content(t *testing.T) {
 	node := makeNode("cp1", "10.20.0.2", "True")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
 	loop.checkOnce(context.Background())
 
 	// Verify the TalosCluster was patched with an annotation.
-	updated, err := dynClient.Resource(talosClusterGVR).Namespace("ont-system").Get(
+	updated, err := dynClient.Resource(talosClusterGVR).Namespace("seam-system").Get(
 		context.Background(), "ccs-mgmt", metav1.GetOptions{},
 	)
 	if err != nil {
@@ -403,7 +403,7 @@ func TestNodeHealthSummaryAnnotation_Content(t *testing.T) {
 
 func TestClusterNodeHealthLoop_RunCancelsCleanly(t *testing.T) {
 	node := makeNode("cp1", "10.20.0.2", "True")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -449,7 +449,7 @@ func TestCheckNodeRegistration_LabeledNode_NoDriftSignal(t *testing.T) {
 	node := makeNodeWithLabels("cp1", "10.20.0.2", "True", map[string]interface{}{
 		"ont.platform.dev/controlled": "true",
 	})
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -468,7 +468,7 @@ func TestCheckNodeRegistration_LabeledNode_NoDriftSignal(t *testing.T) {
 
 func TestCheckNodeRegistration_UnlabeledNode_CreatesDriftSignal(t *testing.T) {
 	node := makeNode("cp1", "10.20.0.2", "True") // no labels at all
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -495,7 +495,7 @@ func TestCheckNodeRegistration_UnlabeledNode_CreatesDriftSignal(t *testing.T) {
 
 func TestCheckNodeRegistration_DuplicateSignal_NotCreated(t *testing.T) {
 	node := makeNode("cp1", "10.20.0.2", "True")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -525,7 +525,7 @@ func TestCheckNodeRegistration_DuplicateSignal_NotCreated(t *testing.T) {
 // metrics-server returns an empty list (no NodeMetrics objects), no saturation is detected.
 func TestCheckCapacitySaturation_NoMetricsObjects_NoSaturation(t *testing.T) {
 	node := makeNodeWithAllocatable("cp1", "10.20.0.2", "True", "4", "8Gi")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -565,7 +565,7 @@ func deleteNodeMetrics(t *testing.T, dynClient *dynamicfake.FakeDynamicClient, n
 func TestCheckCapacitySaturation_BelowThreshold_NeverSaturates(t *testing.T) {
 	// 4 CPUs allocatable; 200m usage = 5%. 8Gi allocatable; 400Mi usage = ~5%.
 	node := makeNodeWithAllocatable("cp1", "10.20.0.2", "True", "4", "8Gi")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 	injectNodeMetrics(t, dynClient, "cp1", "200m", "400Mi")
 
@@ -590,7 +590,7 @@ func TestCheckCapacitySaturation_BelowThreshold_NeverSaturates(t *testing.T) {
 func TestCheckCapacitySaturation_ConsecutiveRequired_SetsCondition(t *testing.T) {
 	// 4 CPUs allocatable; 3800m usage = 95% (above 85% threshold).
 	node := makeNodeWithAllocatable("cp1", "10.20.0.2", "True", "4", "8Gi")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 	injectNodeMetrics(t, dynClient, "cp1", "3800m", "400Mi") // CPU: 95%, mem: ~5%
 
@@ -618,7 +618,7 @@ func TestCheckCapacitySaturation_ConsecutiveRequired_SetsCondition(t *testing.T)
 // the consecutive counter resets to 0 when utilisation drops below the threshold.
 func TestCheckCapacitySaturation_ConsecutiveResetsOnBelowThreshold(t *testing.T) {
 	node := makeNodeWithAllocatable("cp1", "10.20.0.2", "True", "4", "8Gi")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 	injectNodeMetrics(t, dynClient, "cp1", "3800m", "400Mi") // CPU: 95%
 
@@ -723,7 +723,7 @@ func TestFleetHealthDispatcher_ContextCancellation(t *testing.T) {
 
 func TestCheckNodeRegistration_DriftLayerIsInfrastructure(t *testing.T) {
 	node := makeNode("cp1", "10.20.0.2", "True")
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -830,7 +830,7 @@ func TestCheckNodeRegistration_LossScopePopulated(t *testing.T) {
 	node := makeNodeWithLabels("cp1", "10.20.0.2", "True", map[string]interface{}{
 		"node-role.kubernetes.io/control-plane": "",
 	})
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(node, tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -863,7 +863,7 @@ func TestCheckNodeRegistration_LossScopePopulated(t *testing.T) {
 }
 
 func TestEmitTier3DriftSignal_CreatesSignalWithLossScope(t *testing.T) {
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(tc)
 
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
@@ -902,3 +902,109 @@ func TestEmitTier3DriftSignal_CreatesSignalWithLossScope(t *testing.T) {
 		t.Errorf("lossScope.severity = %v, want quorum-at-risk", ls["severity"])
 	}
 }
+
+// ---------------------------------------------------------------------------
+// resolveNodeRegistrationDrift: auto-resolution when controlled label present
+// ---------------------------------------------------------------------------
+
+func TestResolveNodeRegistrationDrift_ResolvesWhenLabelPresent(t *testing.T) {
+	// Create a DriftSignal in "pending" state for a node that now has the label.
+	existingSignal := &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": "node-reg-drift-cp1", "namespace": "seam-tenant-ccs-mgmt"},
+			"spec": map[string]interface{}{
+				"signalKind":    "NodeRegistrationDrift",
+				"state":         "pending",
+				"driftLayer":    "infrastructure",
+				"correlationID": "node-reg-ccs-mgmt-cp1",
+				"affectedCRRef": map[string]interface{}{"group": "", "kind": "Node", "name": "cp1"},
+			},
+		},
+	}
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
+	dynClient := buildHealthFakeClient(tc, existingSignal)
+
+	// Node now has the controlled label.
+	nodeControlled := makeNodeWithLabels("cp1", "10.20.0.2", "True", map[string]interface{}{
+		"node-role.kubernetes.io/control-plane": "",
+		"ont.platform.dev/controlled":           "true",
+	})
+
+	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
+	loop.resolveNodeRegistrationDrift(context.Background(), []map[string]interface{}{nodeControlled.Object})
+
+	list, err := dynClient.Resource(driftSignalGVR).Namespace("seam-tenant-ccs-mgmt").List(
+		context.Background(), metav1.ListOptions{},
+	)
+	if err != nil {
+		t.Fatalf("list DriftSignals: %v", err)
+	}
+	if len(list.Items) == 0 {
+		t.Fatal("DriftSignal was deleted; expected it to be patched to resolved")
+	}
+	specBytes, _ := json.Marshal(list.Items[0].Object["spec"])
+	var spec map[string]interface{}
+	_ = json.Unmarshal(specBytes, &spec)
+	if spec["state"] != "resolved" {
+		t.Errorf("state = %q, want resolved", spec["state"])
+	}
+}
+
+func TestResolveNodeRegistrationDrift_SkipsWhenLabelAbsent(t *testing.T) {
+	existingSignal := &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": "node-reg-drift-cp1", "namespace": "seam-tenant-ccs-mgmt"},
+			"spec": map[string]interface{}{
+				"signalKind":    "NodeRegistrationDrift",
+				"state":         "pending",
+				"affectedCRRef": map[string]interface{}{"group": "", "kind": "Node", "name": "cp1"},
+			},
+		},
+	}
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
+	dynClient := buildHealthFakeClient(tc, existingSignal)
+
+	// Node still lacks the controlled label.
+	nodeUncontrolled := makeNode("cp1", "10.20.0.2", "True")
+
+	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
+	loop.resolveNodeRegistrationDrift(context.Background(), []map[string]interface{}{nodeUncontrolled.Object})
+
+	list, _ := dynClient.Resource(driftSignalGVR).Namespace("seam-tenant-ccs-mgmt").List(
+		context.Background(), metav1.ListOptions{},
+	)
+	specBytes, _ := json.Marshal(list.Items[0].Object["spec"])
+	var spec map[string]interface{}
+	_ = json.Unmarshal(specBytes, &spec)
+	if spec["state"] == "resolved" {
+		t.Error("state was resolved but label was absent; should not have been patched")
+	}
+}
+
+func TestResolveNodeRegistrationDrift_SkipsAlreadyResolved(t *testing.T) {
+	existingSignal := &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": "node-reg-drift-cp1", "namespace": "seam-tenant-ccs-mgmt"},
+			"spec": map[string]interface{}{
+				"signalKind":    "NodeRegistrationDrift",
+				"state":         "resolved",
+				"affectedCRRef": map[string]interface{}{"group": "", "kind": "Node", "name": "cp1"},
+			},
+		},
+	}
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
+	dynClient := buildHealthFakeClient(tc, existingSignal)
+
+	nodeControlled := makeNodeWithLabels("cp1", "10.20.0.2", "True", map[string]interface{}{
+		"ont.platform.dev/controlled": "true",
+	})
+	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", nil)
+	// Should be a no-op -- already resolved.
+	loop.resolveNodeRegistrationDrift(context.Background(), []map[string]interface{}{nodeControlled.Object})
+}
diff --git a/internal/agent/cluster_pki_expiry.go b/internal/agent/cluster_pki_expiry.go
index 6d787a5..034ac92 100644
--- a/internal/agent/cluster_pki_expiry.go
+++ b/internal/agent/cluster_pki_expiry.go
@@ -13,6 +13,8 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+
+	"github.com/ontai-dev/seam/pkg/namespaces"
 )
 
 // pkiRotationGVR is the GroupVersionResource for PKIRotation CRs.
@@ -35,7 +37,7 @@ const pkiExpiryActionThreshold = 7
 // approaching, either auto-creates a PKIRotation CR (if AutonomyLevel >= delegated)
 // or sets HumanInterventionRequired=True on the TalosCluster. RECON-C1.
 func (l *ClusterNodeHealthLoop) checkPKIExpiry(ctx context.Context) {
-	tc, err := l.localClient.Resource(talosClusterGVR).Namespace(l.namespace).
+	tc, err := l.localClient.Resource(talosClusterGVR).Namespace(namespaces.SeamSystem).
 		Get(ctx, l.clusterRef, metav1.GetOptions{})
 	if err != nil {
 		return
diff --git a/internal/agent/cluster_pki_expiry_test.go b/internal/agent/cluster_pki_expiry_test.go
index e250868..50ee56c 100644
--- a/internal/agent/cluster_pki_expiry_test.go
+++ b/internal/agent/cluster_pki_expiry_test.go
@@ -22,7 +22,7 @@ func makeTalosClusterWithPKIExpiry(name, namespace, expiryRFC3339 string) *unstr
 // pkiExpiryDate field, checkPKIExpiry exits early without creating any PKIRotation CR.
 // RECON-C1.
 func TestCheckPKIExpiry_NoActionWhenNoExpiryDate(t *testing.T) {
-	tc := makeTalosCluster("ccs-mgmt", "ont-system")
+	tc := makeTalosCluster("ccs-mgmt", "seam-system")
 	dynClient := buildHealthFakeClient(tc)
 	w := NewOperatorContextWatcher(dynClient, "ont-system")
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", w)
@@ -44,7 +44,7 @@ func TestCheckPKIExpiry_NoActionWhenNoExpiryDate(t *testing.T) {
 // expiry 60 days in the future does not trigger any action. RECON-C1.
 func TestCheckPKIExpiry_NoActionWhenFarFromExpiry(t *testing.T) {
 	expiry := time.Now().UTC().Add(60 * 24 * time.Hour).Format(time.RFC3339)
-	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "ont-system", expiry)
+	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "seam-system", expiry)
 	dynClient := buildHealthFakeClient(tc)
 	w := NewOperatorContextWatcher(dynClient, "ont-system")
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", w)
@@ -67,7 +67,7 @@ func TestCheckPKIExpiry_NoActionWhenFarFromExpiry(t *testing.T) {
 // RECON-C1.
 func TestCheckPKIExpiry_LogsOnlyWhenWithin30Days(t *testing.T) {
 	expiry := time.Now().UTC().Add(15 * 24 * time.Hour).Format(time.RFC3339)
-	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "ont-system", expiry)
+	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "seam-system", expiry)
 	dynClient := buildHealthFakeClient(tc)
 	w := NewOperatorContextWatcher(dynClient, "ont-system")
 	loop := NewClusterNodeHealthLoop(dynClient, "ccs-mgmt", "ont-system", w)
@@ -89,7 +89,7 @@ func TestCheckPKIExpiry_LogsOnlyWhenWithin30Days(t *testing.T) {
 // expires in 5 days and AutonomyLevel=delegated, a PKIRotation CR is created. RECON-C1.
 func TestCheckPKIExpiry_CreatesRotationWhenWithin7DaysAndDelegated(t *testing.T) {
 	expiry := time.Now().UTC().Add(5 * 24 * time.Hour).Format(time.RFC3339)
-	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "ont-system", expiry)
+	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "seam-system", expiry)
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelDelegated, "normal")
 	dynClient := buildHealthFakeClient(tc, ocObj)
 
@@ -118,7 +118,7 @@ func TestCheckPKIExpiry_CreatesRotationWhenWithin7DaysAndDelegated(t *testing.T)
 // autonomy level also triggers PKIRotation creation within the 7-day window. RECON-C1.
 func TestCheckPKIExpiry_CreatesRotationWhenFullDelegation(t *testing.T) {
 	expiry := time.Now().UTC().Add(3 * 24 * time.Hour).Format(time.RFC3339)
-	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "ont-system", expiry)
+	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "seam-system", expiry)
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelFullDelegation, "normal")
 	dynClient := buildHealthFakeClient(tc, ocObj)
 
@@ -144,7 +144,7 @@ func TestCheckPKIExpiry_CreatesRotationWhenFullDelegation(t *testing.T) {
 // RECON-C1.
 func TestCheckPKIExpiry_NoRotationWhenLowAutonomy(t *testing.T) {
 	expiry := time.Now().UTC().Add(5 * 24 * time.Hour).Format(time.RFC3339)
-	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "ont-system", expiry)
+	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "seam-system", expiry)
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelObserveOnly, "normal")
 	dynClient := buildHealthFakeClient(tc, ocObj)
 
@@ -170,7 +170,7 @@ func TestCheckPKIExpiry_NoRotationWhenLowAutonomy(t *testing.T) {
 // RECON-C1.
 func TestCheckPKIExpiry_IdempotentWhenCRAlreadyPending(t *testing.T) {
 	expiry := time.Now().UTC().Add(5 * 24 * time.Hour).Format(time.RFC3339)
-	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "ont-system", expiry)
+	tc := makeTalosClusterWithPKIExpiry("ccs-mgmt", "seam-system", expiry)
 	ocObj := makeOperatorContext("ont-system", AutonomyLevelDelegated, "normal")
 	dynClient := buildHealthFakeClient(tc, ocObj)
 
diff --git a/internal/agent/eso_health_loop.go b/internal/agent/eso_health_loop.go
new file mode 100644
index 0000000..24cc407
--- /dev/null
+++ b/internal/agent/eso_health_loop.go
@@ -0,0 +1,257 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	k8serrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/dynamic"
+)
+
+// externalSecretGVR is the GroupVersionResource for ExternalSecret CRs (ESO v1beta1).
+var externalSecretGVR = schema.GroupVersionResource{
+	Group:    "external-secrets.io",
+	Version:  "v1beta1",
+	Resource: "externalsecrets",
+}
+
+// esoSignalPrefix is the DriftSignal name prefix for ESO sync failure signals.
+const esoSignalPrefix = "drift-eso-sync-"
+
+// ESOHealthLoop runs on conductor role=management. On each cycle it:
+//  1. Lists ExternalSecret CRs across the management namespace (seam-system).
+//  2. For each, inspects status.conditions for a Ready=False or Synced=False condition.
+//  3. Emits an ExternalSecretSyncFailed DriftSignal when a sync error is detected.
+//  4. Confirms any existing signal when the ExternalSecret reaches Ready=True.
+//
+// Skips cleanly when the external-secrets CRDs are not installed on the cluster.
+// AutonomyLevel=observe-only: logs only, no DriftSignal written.
+// RECON-K3.
+type ESOHealthLoop struct {
+	client    dynamic.Interface
+	namespace string
+	ocWatcher *OperatorContextWatcher
+	clusterRef string
+}
+
+// NewESOHealthLoop constructs an ESOHealthLoop for the given namespace.
+func NewESOHealthLoop(client dynamic.Interface, namespace, clusterRef string) *ESOHealthLoop {
+	return &ESOHealthLoop{
+		client:     client,
+		namespace:  namespace,
+		clusterRef: clusterRef,
+	}
+}
+
+// WithOperatorContextWatcher sets the OperatorContextWatcher for autonomy-level gating.
+func (l *ESOHealthLoop) WithOperatorContextWatcher(w *OperatorContextWatcher) {
+	l.ocWatcher = w
+}
+
+// Run runs the loop until ctx is cancelled.
+func (l *ESOHealthLoop) Run(ctx context.Context, interval time.Duration) {
+	l.checkOnce(ctx)
+	if ctx.Err() != nil {
+		return
+	}
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			l.checkOnce(ctx)
+		}
+	}
+}
+
+func (l *ESOHealthLoop) checkOnce(ctx context.Context) {
+	list, err := l.client.Resource(externalSecretGVR).Namespace(l.namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		if isNoCRDError(err) {
+			return
+		}
+		fmt.Printf("eso health loop: list ExternalSecrets in %s: %v\n", l.namespace, err)
+		return
+	}
+	for i := range list.Items {
+		l.checkESO(ctx, &list.Items[i])
+	}
+}
+
+func (l *ESOHealthLoop) checkESO(ctx context.Context, es *k8sunstructured.Unstructured) {
+	name := es.GetName()
+	signalName := esoSignalPrefix + name
+
+	conditions, _, _ := unstructuredNestedSlice(es.Object, "status", "conditions")
+	syncFailed, reason := esoSyncFailed(conditions)
+
+	if !syncFailed {
+		l.confirmSignalIfPresent(ctx, signalName)
+		return
+	}
+
+	if l.ocWatcher != nil && !l.ocWatcher.IsAutonomousActionsAllowedFor(l.clusterRef, "management") {
+		fmt.Printf("eso health loop: eso=%q sync failed (%s) -- observe-only mode, no DriftSignal written\n", name, reason)
+		return
+	}
+
+	driftReason := fmt.Sprintf("ExternalSecret sync failed: name=%s reason=%s", name, reason)
+	l.emitSignal(ctx, signalName, name, driftReason)
+}
+
+// esoSyncFailed returns true when any condition indicates sync failure.
+func esoSyncFailed(conditions []interface{}) (bool, string) {
+	for _, raw := range conditions {
+		cond, ok := raw.(map[string]interface{})
+		if !ok {
+			continue
+		}
+		condType, _ := cond["type"].(string)
+		condStatus, _ := cond["status"].(string)
+		reason, _ := cond["reason"].(string)
+		if (condType == "Ready" || condType == "Synced") && condStatus == "False" {
+			if reason == "" {
+				reason = "unknown"
+			}
+			return true, reason
+		}
+	}
+	return false, ""
+}
+
+func (l *ESOHealthLoop) emitSignal(ctx context.Context, signalName, esName, driftReason string) {
+	now := time.Now().UTC().Format(time.RFC3339)
+
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil && !k8serrors.IsNotFound(err) {
+		fmt.Printf("eso health loop: eso=%q get DriftSignal: %v\n", esName, err)
+		return
+	}
+
+	if k8serrors.IsNotFound(err) {
+		obj := map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": signalName, "namespace": l.namespace},
+			"spec": map[string]interface{}{
+				"state":         "pending",
+				"signalKind":    "ExternalSecretSyncFailed",
+				"driftLayer":    "kubernetes",
+				"correlationID": fmt.Sprintf("eso-%s-%d", esName, time.Now().UnixNano()),
+				"observedAt":    now,
+				"driftReason":   driftReason,
+				"affectedCRRef": map[string]interface{}{
+					"group":     "external-secrets.io",
+					"kind":      "ExternalSecret",
+					"namespace": l.namespace,
+					"name":      esName,
+				},
+				"escalationCounter": int64(0),
+			},
+		}
+		if _, cErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Create(
+			ctx, &k8sunstructured.Unstructured{Object: obj}, metav1.CreateOptions{},
+		); cErr != nil {
+			fmt.Printf("eso health loop: eso=%q create DriftSignal: %v\n", esName, cErr)
+		}
+		fmt.Printf("eso health loop: eso=%q sync failed -- DriftSignal written\n", esName)
+		return
+	}
+
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	counter, _ := spec["escalationCounter"].(int64)
+	if int32(counter) >= escalationThreshold {
+		return
+	}
+	if state == "confirmed" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"correlationID":     fmt.Sprintf("eso-%s-%d", esName, time.Now().UnixNano()),
+				"observedAt":        now,
+				"escalationCounter": int64(0),
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("eso health loop: eso=%q reset DriftSignal: %v\n", esName, pErr)
+		}
+		return
+	}
+	if state == "queued" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"escalationCounter": counter + 1,
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("eso health loop: eso=%q increment escalation counter: %v\n", esName, pErr)
+		}
+	}
+}
+
+func (l *ESOHealthLoop) confirmSignalIfPresent(ctx context.Context, signalName string) {
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil {
+		return
+	}
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	if state == "confirmed" || state == "" {
+		return
+	}
+	patch := map[string]interface{}{
+		"spec": map[string]interface{}{"state": "confirmed", "correlationID": ""},
+	}
+	data, _ := json.Marshal(patch)
+	if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+		ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+	); pErr != nil {
+		fmt.Printf("eso health loop: confirm DriftSignal %s/%s: %v\n", l.namespace, signalName, pErr)
+	}
+}
+
+// isNoCRDError returns true when the API server reports the CRD resource type is unknown.
+// This occurs when an extension is not installed and its CRDs are absent.
+func isNoCRDError(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := err.Error()
+	return strings.Contains(msg, "no matches for kind") ||
+		strings.Contains(msg, "the server could not find the requested resource") ||
+		k8serrors.IsNotFound(err)
+}
+
+// unstructuredNestedSlice extracts a []interface{} from an unstructured map by field path.
+func unstructuredNestedSlice(obj map[string]interface{}, fields ...string) ([]interface{}, bool, error) {
+	cur := obj
+	for _, f := range fields[:len(fields)-1] {
+		next, ok := cur[f].(map[string]interface{})
+		if !ok {
+			return nil, false, nil
+		}
+		cur = next
+	}
+	last := fields[len(fields)-1]
+	val, ok := cur[last].([]interface{})
+	return val, ok, nil
+}
diff --git a/internal/agent/eso_health_loop_test.go b/internal/agent/eso_health_loop_test.go
new file mode 100644
index 0000000..88e32f6
--- /dev/null
+++ b/internal/agent/eso_health_loop_test.go
@@ -0,0 +1,237 @@
+package agent
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic/fake"
+)
+
+// esoTestGVRs contains the list-kind mappings used by all ESO loop tests.
+var esoTestGVRs = map[schema.GroupVersionResource]string{
+	externalSecretGVR: "ExternalSecretList",
+	driftSignalGVR:    "DriftSignalList",
+}
+
+// newESOFakeClient builds a fake dynamic client pre-loaded with the given objects,
+// registering ExternalSecret and DriftSignal list kinds.
+func newESOFakeClient(objs ...runtime.Object) *fake.FakeDynamicClient {
+	return fake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), esoTestGVRs, objs...)
+}
+
+func TestESOHealthLoop_HealthyESO_NoSignal(t *testing.T) {
+	es := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "external-secrets.io/v1beta1",
+		"kind":       "ExternalSecret",
+		"metadata":   map[string]interface{}{"name": "my-secret", "namespace": "seam-system"},
+		"status": map[string]interface{}{
+			"conditions": []interface{}{
+				map[string]interface{}{"type": "Ready", "status": "True"},
+			},
+		},
+	}}
+
+	client := newESOFakeClient(es)
+	l := NewESOHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Errorf("expected no DriftSignal created for healthy ESO, got create action")
+		}
+	}
+}
+
+func TestESOHealthLoop_SyncFailed_EmitsSignal(t *testing.T) {
+	es := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "external-secrets.io/v1beta1",
+		"kind":       "ExternalSecret",
+		"metadata":   map[string]interface{}{"name": "bad-secret", "namespace": "seam-system"},
+		"status": map[string]interface{}{
+			"conditions": []interface{}{
+				map[string]interface{}{"type": "Ready", "status": "False", "reason": "SecretSyncError"},
+			},
+		},
+	}}
+
+	client := newESOFakeClient(es)
+	l := NewESOHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	var created bool
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" && a.GetResource().Resource == "driftsignals" {
+			created = true
+		}
+	}
+	if !created {
+		t.Error("expected DriftSignal to be created for failed ESO sync")
+	}
+}
+
+func TestESOHealthLoop_SyncFailed_ObserveOnly_NoSignal(t *testing.T) {
+	es := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "external-secrets.io/v1beta1",
+		"kind":       "ExternalSecret",
+		"metadata":   map[string]interface{}{"name": "bad-secret", "namespace": "seam-system"},
+		"status": map[string]interface{}{
+			"conditions": []interface{}{
+				map[string]interface{}{"type": "Synced", "status": "False", "reason": "VaultError"},
+			},
+		},
+	}}
+
+	client := newESOFakeClient(es)
+
+	ocWatcher := NewOperatorContextWatcher(client, "ont-system")
+	ocWatcher.mu.Lock()
+	ocWatcher.resolved["ccs-mgmt"] = resolvedContext{autonomyLevel: AutonomyLevelObserveOnly, mode: "normal"}
+	ocWatcher.mu.Unlock()
+
+	l := NewESOHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.WithOperatorContextWatcher(ocWatcher)
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Error("expected no DriftSignal created under observe-only mode")
+		}
+	}
+}
+
+func TestESOHealthLoop_ConfirmsSignalWhenHealthy(t *testing.T) {
+	es := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "external-secrets.io/v1beta1",
+		"kind":       "ExternalSecret",
+		"metadata":   map[string]interface{}{"name": "my-secret", "namespace": "seam-system"},
+		"status": map[string]interface{}{
+			"conditions": []interface{}{
+				map[string]interface{}{"type": "Ready", "status": "True"},
+			},
+		},
+	}}
+	existingSignal := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "seam.ontai.dev/v1alpha1",
+		"kind":       "DriftSignal",
+		"metadata":   map[string]interface{}{"name": esoSignalPrefix + "my-secret", "namespace": "seam-system"},
+		"spec":       map[string]interface{}{"state": "queued"},
+	}}
+
+	client := newESOFakeClient(es, existingSignal)
+	l := NewESOHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	var patched bool
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "patch" && a.GetResource().Resource == "driftsignals" {
+			patched = true
+		}
+	}
+	if !patched {
+		t.Error("expected DriftSignal to be confirmed (patched) when ESO is healthy")
+	}
+}
+
+func TestESOHealthLoop_SignalCreated_VerifyFields(t *testing.T) {
+	es := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "external-secrets.io/v1beta1",
+		"kind":       "ExternalSecret",
+		"metadata":   map[string]interface{}{"name": "vault-secret", "namespace": "seam-system"},
+		"status": map[string]interface{}{
+			"conditions": []interface{}{
+				map[string]interface{}{"type": "Ready", "status": "False", "reason": "VaultUnreachable"},
+			},
+		},
+	}}
+
+	client := newESOFakeClient(es)
+	l := NewESOHealthLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	ds, err := client.Resource(driftSignalGVR).Namespace("seam-system").Get(
+		context.Background(), esoSignalPrefix+"vault-secret", metav1.GetOptions{},
+	)
+	if err != nil {
+		t.Fatalf("expected DriftSignal: %v", err)
+	}
+	spec, _, _ := unstructuredNestedMap(ds.Object, "spec")
+	if kind, _ := spec["signalKind"].(string); kind != "ExternalSecretSyncFailed" {
+		t.Errorf("signalKind = %q, want ExternalSecretSyncFailed", kind)
+	}
+	if state, _ := spec["state"].(string); state != "pending" {
+		t.Errorf("state = %q, want pending", state)
+	}
+}
+
+func TestESOSyncFailed_BothConditionTypes(t *testing.T) {
+	tests := []struct {
+		name     string
+		condType string
+		status   string
+		wantFail bool
+	}{
+		{"ready false", "Ready", "False", true},
+		{"synced false", "Synced", "False", true},
+		{"ready true", "Ready", "True", false},
+		{"synced true", "Synced", "True", false},
+		{"other type false", "Connected", "False", false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			conditions := []interface{}{
+				map[string]interface{}{"type": tt.condType, "status": tt.status},
+			}
+			failed, _ := esoSyncFailed(conditions)
+			if failed != tt.wantFail {
+				t.Errorf("esoSyncFailed: got %v want %v", failed, tt.wantFail)
+			}
+		})
+	}
+}
+
+func TestIsNoCRDError_DetectsKnownPatterns(t *testing.T) {
+	tests := []struct {
+		err  error
+		want bool
+	}{
+		{nil, false},
+		{&esoTestError{"no matches for kind ExternalSecret"}, true},
+		{&esoTestError{"the server could not find the requested resource"}, true},
+		{fmt.Errorf("connection refused"), false},
+	}
+	for _, tt := range tests {
+		got := isNoCRDError(tt.err)
+		if got != tt.want {
+			t.Errorf("isNoCRDError(%v) = %v, want %v", tt.err, got, tt.want)
+		}
+	}
+}
+
+func TestESOHealthLoop_Run_StopsOnContextCancel(t *testing.T) {
+	client := newESOFakeClient()
+	l := NewESOHealthLoop(client, "seam-system", "ccs-mgmt")
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // cancel immediately so Run exits after first checkOnce returns
+	done := make(chan struct{})
+	go func() {
+		l.Run(ctx, 100*time.Millisecond)
+		close(done)
+	}()
+	select {
+	case <-done:
+	case <-time.After(2 * time.Second):
+		t.Fatal("ESOHealthLoop.Run did not stop after context cancel")
+	}
+}
+
+// esoTestError is a minimal error type for CRD-not-installed test cases.
+type esoTestError struct{ msg string }
+
+func (e *esoTestError) Error() string { return e.msg }
diff --git a/internal/agent/pack_source_version_loop.go b/internal/agent/pack_source_version_loop.go
new file mode 100644
index 0000000..1c5cc3f
--- /dev/null
+++ b/internal/agent/pack_source_version_loop.go
@@ -0,0 +1,291 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"time"
+
+	k8serrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/dynamic"
+	"sigs.k8s.io/yaml"
+)
+
+// packSourceSignalPrefix is the DriftSignal name prefix for upstream version signals.
+// One signal per PackDelivery, written in the same namespace as the PackDelivery.
+const packSourceSignalPrefix = "drift-pack-source-"
+
+// helmIndexMaxBytes is the upper bound for index.yaml fetches.
+const helmIndexMaxBytes = 8 * 1024 * 1024 // 8 MiB
+
+// helmIndex represents the relevant fields of a Helm chart repository index.yaml.
+type helmIndex struct {
+	Entries map[string][]helmIndexEntry `json:"entries" yaml:"entries"`
+}
+
+type helmIndexEntry struct {
+	Version string `json:"version" yaml:"version"`
+}
+
+// PackSourceVersionLoop runs on conductor role=management. On each cycle it:
+//  1. Lists all PackDeliveries in the management namespace (seam-system).
+//  2. Filters those with a non-empty spec.chartURL (Helm-backed packs).
+//  3. For each, derives the Helm repository base URL from spec.chartURL, fetches
+//     the repository index.yaml, and finds the latest version for spec.chartName.
+//  4. If the latest version is newer than spec.chartVersion, emits an
+//     UpstreamVersionAvailable DriftSignal in the same namespace as the PackDelivery.
+//  5. If no newer version is found, confirms any existing signal.
+//
+// RECON-CMN1. conductor-schema.md §7.
+type PackSourceVersionLoop struct {
+	client     dynamic.Interface
+	namespace  string
+	httpClient *http.Client
+}
+
+// NewPackSourceVersionLoop constructs a PackSourceVersionLoop for the given namespace.
+func NewPackSourceVersionLoop(client dynamic.Interface, namespace string) *PackSourceVersionLoop {
+	return &PackSourceVersionLoop{
+		client:    client,
+		namespace: namespace,
+		httpClient: &http.Client{
+			Timeout: 15 * time.Second,
+		},
+	}
+}
+
+// Run runs the loop until ctx is cancelled. Fires once immediately then repeats.
+func (l *PackSourceVersionLoop) Run(ctx context.Context, interval time.Duration) {
+	l.checkOnce(ctx)
+	if ctx.Err() != nil {
+		return
+	}
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			l.checkOnce(ctx)
+		}
+	}
+}
+
+// checkOnce performs one version check cycle across all Helm-backed PackDeliveries.
+func (l *PackSourceVersionLoop) checkOnce(ctx context.Context) {
+	list, err := l.client.Resource(clusterPackMgmtGVR).Namespace(l.namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		fmt.Printf("pack source version loop: list PackDeliveries in %s: %v\n", l.namespace, err)
+		return
+	}
+
+	for i := range list.Items {
+		l.checkPack(ctx, &list.Items[i])
+	}
+}
+
+// checkPack checks one PackDelivery for upstream version availability.
+func (l *PackSourceVersionLoop) checkPack(ctx context.Context, pd *k8sunstructured.Unstructured) {
+	spec, _, _ := unstructuredNestedMap(pd.Object, "spec")
+	chartURL, _ := spec["chartURL"].(string)
+	chartName, _ := spec["chartName"].(string)
+	chartVersion, _ := spec["chartVersion"].(string)
+	packName := pd.GetName()
+
+	if chartURL == "" || chartName == "" || chartVersion == "" {
+		return
+	}
+
+	repoURL, err := helmRepoBaseURL(chartURL)
+	if err != nil {
+		fmt.Printf("pack source version loop: pack=%q derive repo URL from %q: %v\n", packName, chartURL, err)
+		return
+	}
+
+	latest, err := l.fetchLatestHelmVersion(ctx, repoURL, chartName)
+	if err != nil {
+		fmt.Printf("pack source version loop: pack=%q fetch index from %s: %v\n", packName, repoURL, err)
+		return
+	}
+	if latest == "" {
+		return
+	}
+
+	signalName := packSourceSignalPrefix + packName
+
+	if latest == chartVersion {
+		l.confirmSignalIfPresent(ctx, signalName, l.namespace)
+		return
+	}
+
+	driftReason := fmt.Sprintf("upstream version available: chart=%s current=%s latest=%s", chartName, chartVersion, latest)
+	l.emitVersionSignal(ctx, signalName, l.namespace, packName, chartName, chartVersion, latest, driftReason)
+}
+
+// fetchLatestHelmVersion downloads {repoURL}/index.yaml and returns the newest version
+// listed for chartName. Returns "" if the chart is not found in the index.
+func (l *PackSourceVersionLoop) fetchLatestHelmVersion(ctx context.Context, repoURL, chartName string) (string, error) {
+	indexURL := repoURL + "/index.yaml"
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, indexURL, nil)
+	if err != nil {
+		return "", fmt.Errorf("build request for %s: %w", indexURL, err)
+	}
+	resp, err := l.httpClient.Do(req)
+	if err != nil {
+		return "", fmt.Errorf("GET %s: %w", indexURL, err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("GET %s: status %d", indexURL, resp.StatusCode)
+	}
+
+	body, err := io.ReadAll(io.LimitReader(resp.Body, helmIndexMaxBytes))
+	if err != nil {
+		return "", fmt.Errorf("read index body from %s: %w", indexURL, err)
+	}
+
+	var idx helmIndex
+	if err := yaml.Unmarshal(body, &idx); err != nil {
+		return "", fmt.Errorf("parse index.yaml from %s: %w", indexURL, err)
+	}
+
+	entries := idx.Entries[chartName]
+	if len(entries) == 0 {
+		return "", nil
+	}
+	// Helm index.yaml entries are sorted newest-first by convention.
+	return entries[0].Version, nil
+}
+
+// emitVersionSignal writes or updates the UpstreamVersionAvailable DriftSignal.
+// Idempotent: creates if absent, increments counter if present.
+func (l *PackSourceVersionLoop) emitVersionSignal(ctx context.Context, signalName, namespace, packName, chartName, currentVersion, latestVersion, driftReason string) {
+	now := time.Now().UTC().Format(time.RFC3339)
+
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil && !k8serrors.IsNotFound(err) {
+		fmt.Printf("pack source version loop: pack=%q get DriftSignal: %v\n", packName, err)
+		return
+	}
+
+	if k8serrors.IsNotFound(err) {
+		obj := map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": signalName, "namespace": namespace},
+			"spec": map[string]interface{}{
+				"state":         "pending",
+				"signalKind":    "UpstreamVersionAvailable",
+				"driftLayer":    "governance",
+				"correlationID": fmt.Sprintf("pack-source-%s-%d", packName, time.Now().UnixNano()),
+				"observedAt":    now,
+				"driftReason":   driftReason,
+				"affectedCRRef": map[string]interface{}{
+					"group":     "seam.ontai.dev",
+					"kind":      "PackDelivery",
+					"namespace": namespace,
+					"name":      packName,
+				},
+				"escalationCounter": int64(0),
+			},
+		}
+		if _, cErr := l.client.Resource(driftSignalGVR).Namespace(namespace).Create(
+			ctx, &k8sunstructured.Unstructured{Object: obj}, metav1.CreateOptions{},
+		); cErr != nil {
+			fmt.Printf("pack source version loop: pack=%q create DriftSignal: %v\n", packName, cErr)
+		}
+		fmt.Printf("pack source version loop: pack=%q upstream version available (current=%s latest=%s)\n",
+			packName, currentVersion, latestVersion)
+		return
+	}
+
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	counter, _ := spec["escalationCounter"].(int64)
+
+	if int32(counter) >= escalationThreshold {
+		return
+	}
+
+	if state == "confirmed" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"correlationID":     fmt.Sprintf("pack-source-%s-%d", packName, time.Now().UnixNano()),
+				"observedAt":        now,
+				"escalationCounter": int64(0),
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("pack source version loop: pack=%q reset confirmed DriftSignal: %v\n", packName, pErr)
+		}
+		return
+	}
+
+	if state == "queued" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"escalationCounter": counter + 1,
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("pack source version loop: pack=%q increment escalation counter: %v\n", packName, pErr)
+		}
+	}
+}
+
+// confirmSignalIfPresent advances the DriftSignal to confirmed if it exists and is not
+// already in a terminal state.
+func (l *PackSourceVersionLoop) confirmSignalIfPresent(ctx context.Context, signalName, namespace string) {
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil {
+		return
+	}
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	if state == "confirmed" || state == "" {
+		return
+	}
+	patch := map[string]interface{}{
+		"spec": map[string]interface{}{
+			"state":         "confirmed",
+			"correlationID": "",
+		},
+	}
+	data, _ := json.Marshal(patch)
+	if _, pErr := l.client.Resource(driftSignalGVR).Namespace(namespace).Patch(
+		ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+	); pErr != nil {
+		fmt.Printf("pack source version loop: confirm DriftSignal %s/%s: %v\n", namespace, signalName, pErr)
+	}
+}
+
+// helmRepoBaseURL extracts the Helm repository base URL (scheme + host) from a chart URL.
+// Helm chart repositories serve index.yaml at the root of the host.
+// Example: "http://10.20.0.1:5000/charts/mychart-1.0.0.tgz" -> "http://10.20.0.1:5000"
+func helmRepoBaseURL(chartURL string) (string, error) {
+	u, err := url.Parse(chartURL)
+	if err != nil {
+		return "", fmt.Errorf("parse chart URL %q: %w", chartURL, err)
+	}
+	if u.Scheme == "" || u.Host == "" {
+		return "", fmt.Errorf("chart URL %q missing scheme or host", chartURL)
+	}
+	return fmt.Sprintf("%s://%s", u.Scheme, u.Host), nil
+}
diff --git a/internal/agent/pack_source_version_loop_test.go b/internal/agent/pack_source_version_loop_test.go
new file mode 100644
index 0000000..0c3bede
--- /dev/null
+++ b/internal/agent/pack_source_version_loop_test.go
@@ -0,0 +1,330 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/dynamic/fake"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+)
+
+// helmIndexYAML is a minimal Helm chart repository index for testing.
+const helmIndexYAML = `
+apiVersion: v1
+entries:
+  mychart:
+    - version: "2.0.0"
+    - version: "1.5.0"
+    - version: "1.0.0"
+  otherchart:
+    - version: "0.3.0"
+`
+
+// helmIndexEmpty has no entries for the requested chart.
+const helmIndexEmpty = `
+apiVersion: v1
+entries: {}
+`
+
+// TestHelmRepoBaseURL verifies URL base extraction from chart URLs.
+func TestHelmRepoBaseURL(t *testing.T) {
+	cases := []struct {
+		input   string
+		want    string
+		wantErr bool
+	}{
+		{"http://10.20.0.1:5000/charts/mychart-1.0.0.tgz", "http://10.20.0.1:5000", false},
+		{"http://10.20.0.1:5000/mychart-1.0.0.tgz", "http://10.20.0.1:5000", false},
+		{"https://charts.example.com/charts/app-2.0.0.tgz", "https://charts.example.com", false},
+		{"not-a-url", "", true},
+		{"", "", true},
+	}
+	for _, tc := range cases {
+		got, err := helmRepoBaseURL(tc.input)
+		if tc.wantErr {
+			if err == nil {
+				t.Errorf("helmRepoBaseURL(%q): expected error, got %q", tc.input, got)
+			}
+			continue
+		}
+		if err != nil {
+			t.Errorf("helmRepoBaseURL(%q): unexpected error: %v", tc.input, err)
+			continue
+		}
+		if got != tc.want {
+			t.Errorf("helmRepoBaseURL(%q) = %q, want %q", tc.input, got, tc.want)
+		}
+	}
+}
+
+// TestFetchLatestHelmVersion verifies the Helm index fetch and parse logic.
+func TestFetchLatestHelmVersion(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != "/index.yaml" {
+			http.NotFound(w, r)
+			return
+		}
+		w.Header().Set("Content-Type", "application/x-yaml")
+		_, _ = w.Write([]byte(helmIndexYAML))
+	}))
+	defer srv.Close()
+
+	loop := NewPackSourceVersionLoop(nil, "seam-system")
+	loop.httpClient = srv.Client()
+
+	t.Run("KnownChart", func(t *testing.T) {
+		got, err := loop.fetchLatestHelmVersion(context.Background(), srv.URL, "mychart")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if got != "2.0.0" {
+			t.Errorf("fetchLatestHelmVersion(mychart) = %q, want %q", got, "2.0.0")
+		}
+	})
+
+	t.Run("UnknownChart", func(t *testing.T) {
+		got, err := loop.fetchLatestHelmVersion(context.Background(), srv.URL, "notexist")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if got != "" {
+			t.Errorf("fetchLatestHelmVersion(notexist) = %q, want empty", got)
+		}
+	})
+}
+
+// TestFetchLatestHelmVersion_HTTPError verifies error propagation on server errors.
+func TestFetchLatestHelmVersion_HTTPError(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		http.Error(w, "internal server error", http.StatusInternalServerError)
+	}))
+	defer srv.Close()
+
+	loop := NewPackSourceVersionLoop(nil, "seam-system")
+	loop.httpClient = srv.Client()
+
+	_, err := loop.fetchLatestHelmVersion(context.Background(), srv.URL, "mychart")
+	if err == nil {
+		t.Error("expected error on HTTP 500, got nil")
+	}
+}
+
+// TestFetchLatestHelmVersion_EmptyIndex verifies that an empty index returns "".
+func TestFetchLatestHelmVersion_EmptyIndex(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/x-yaml")
+		_, _ = w.Write([]byte(helmIndexEmpty))
+	}))
+	defer srv.Close()
+
+	loop := NewPackSourceVersionLoop(nil, "seam-system")
+	loop.httpClient = srv.Client()
+
+	got, err := loop.fetchLatestHelmVersion(context.Background(), srv.URL, "mychart")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "" {
+		t.Errorf("fetchLatestHelmVersion on empty index = %q, want empty", got)
+	}
+}
+
+// newFakePackDelivery builds an unstructured PackDelivery for testing.
+func newFakePackDelivery(name, namespace, chartURL, chartName, chartVersion string) *unstructured.Unstructured {
+	return &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "PackDelivery",
+			"metadata": map[string]interface{}{
+				"name":      name,
+				"namespace": namespace,
+			},
+			"spec": map[string]interface{}{
+				"chartURL":     chartURL,
+				"chartName":    chartName,
+				"chartVersion": chartVersion,
+			},
+		},
+	}
+}
+
+// newFakeDynamicClient builds a fake dynamic client pre-loaded with the given objects.
+func newFakeDynamicClient(scheme *runtime.Scheme, objs ...runtime.Object) *fake.FakeDynamicClient {
+	return fake.NewSimpleDynamicClient(scheme, objs...)
+}
+
+// TestCheckOnce_EmitsDriftSignalOnNewerVersion verifies that checkOnce creates a
+// DriftSignal when the index reports a version newer than spec.chartVersion.
+func TestCheckOnce_EmitsDriftSignalOnNewerVersion(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if strings.HasSuffix(r.URL.Path, "/index.yaml") {
+			_, _ = w.Write([]byte(helmIndexYAML)) // latest: 2.0.0
+			return
+		}
+		http.NotFound(w, r)
+	}))
+	defer srv.Close()
+
+	scheme := runtime.NewScheme()
+	pd := newFakePackDelivery("mypack", "seam-system", srv.URL+"/charts/mychart-1.0.0.tgz", "mychart", "1.0.0")
+
+	// Register GroupVersionResource for fake client.
+	pdGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "packdeliveries"}
+	dsGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "driftsignals"}
+
+	client := fake.NewSimpleDynamicClientWithCustomListKinds(scheme,
+		map[schema.GroupVersionResource]string{
+			pdGVR: "PackDeliveryList",
+			dsGVR: "DriftSignalList",
+		},
+		pd,
+	)
+
+	loop := NewPackSourceVersionLoop(client, "seam-system")
+	loop.httpClient = srv.Client()
+
+	loop.checkOnce(context.Background())
+
+	signalName := packSourceSignalPrefix + "mypack"
+	ds, err := client.Resource(dsGVR).Namespace("seam-system").Get(context.Background(), signalName, metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("expected DriftSignal to be created, got error: %v", err)
+	}
+
+	spec, _, _ := unstructuredNestedMap(ds.Object, "spec")
+	state, _ := spec["state"].(string)
+	if state != "pending" {
+		t.Errorf("DriftSignal state = %q, want %q", state, "pending")
+	}
+	signalKind, _ := spec["signalKind"].(string)
+	if signalKind != "UpstreamVersionAvailable" {
+		t.Errorf("DriftSignal signalKind = %q, want UpstreamVersionAvailable", signalKind)
+	}
+}
+
+// TestCheckOnce_NoSignalWhenVersionCurrent verifies that checkOnce does not create
+// a DriftSignal when spec.chartVersion matches the latest version in the index.
+func TestCheckOnce_NoSignalWhenVersionCurrent(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_, _ = w.Write([]byte(helmIndexYAML)) // latest: 2.0.0
+	}))
+	defer srv.Close()
+
+	scheme := runtime.NewScheme()
+	// Pack already at latest version.
+	pd := newFakePackDelivery("mypack", "seam-system", srv.URL+"/charts/mychart-2.0.0.tgz", "mychart", "2.0.0")
+
+	pdGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "packdeliveries"}
+	dsGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "driftsignals"}
+
+	client := fake.NewSimpleDynamicClientWithCustomListKinds(scheme,
+		map[schema.GroupVersionResource]string{
+			pdGVR: "PackDeliveryList",
+			dsGVR: "DriftSignalList",
+		},
+		pd,
+	)
+
+	loop := NewPackSourceVersionLoop(client, "seam-system")
+	loop.httpClient = srv.Client()
+
+	loop.checkOnce(context.Background())
+
+	signalName := packSourceSignalPrefix + "mypack"
+	_, err := client.Resource(dsGVR).Namespace("seam-system").Get(context.Background(), signalName, metav1.GetOptions{})
+	if err == nil {
+		t.Error("expected no DriftSignal when version is current, but one was created")
+	}
+}
+
+// TestCheckOnce_SkipsNonHelmPacks verifies that packs without chartURL are skipped.
+func TestCheckOnce_SkipsNonHelmPacks(t *testing.T) {
+	scheme := runtime.NewScheme()
+	pd := newFakePackDelivery("rawpack", "seam-system", "", "", "") // no chartURL
+
+	pdGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "packdeliveries"}
+	dsGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "driftsignals"}
+
+	client := fake.NewSimpleDynamicClientWithCustomListKinds(scheme,
+		map[schema.GroupVersionResource]string{
+			pdGVR: "PackDeliveryList",
+			dsGVR: "DriftSignalList",
+		},
+		pd,
+	)
+
+	loop := NewPackSourceVersionLoop(client, "seam-system")
+
+	// Should complete without panicking or creating any DriftSignal.
+	loop.checkOnce(context.Background())
+
+	signalName := packSourceSignalPrefix + "rawpack"
+	_, err := client.Resource(dsGVR).Namespace("seam-system").Get(context.Background(), signalName, metav1.GetOptions{})
+	if err == nil {
+		t.Error("expected no DriftSignal for non-Helm pack, but one was created")
+	}
+}
+
+// TestCheckOnce_ConfirmsExistingSignalWhenVersionCurrent verifies that checkOnce
+// advances an existing DriftSignal to confirmed when the pack is at the latest version.
+func TestCheckOnce_ConfirmsExistingSignalWhenVersionCurrent(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		_, _ = w.Write([]byte(helmIndexYAML)) // latest: 2.0.0
+	}))
+	defer srv.Close()
+
+	scheme := runtime.NewScheme()
+	pd := newFakePackDelivery("mypack", "seam-system", srv.URL+"/charts/mychart-2.0.0.tgz", "mychart", "2.0.0")
+
+	signalName := packSourceSignalPrefix + "mypack"
+	existingSignal := &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata": map[string]interface{}{
+				"name":      signalName,
+				"namespace": "seam-system",
+			},
+			"spec": map[string]interface{}{
+				"state":     "queued",
+				"signalKind": "UpstreamVersionAvailable",
+			},
+		},
+	}
+	// Serialize and deserialize to ensure the raw JSON format that the fake client returns.
+	rawBytes, _ := json.Marshal(existingSignal.Object)
+	_ = json.Unmarshal(rawBytes, &existingSignal.Object)
+
+	pdGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "packdeliveries"}
+	dsGVR := schema.GroupVersionResource{Group: "seam.ontai.dev", Version: "v1alpha1", Resource: "driftsignals"}
+
+	client := fake.NewSimpleDynamicClientWithCustomListKinds(scheme,
+		map[schema.GroupVersionResource]string{
+			pdGVR: "PackDeliveryList",
+			dsGVR: "DriftSignalList",
+		},
+		pd, existingSignal,
+	)
+
+	loop := NewPackSourceVersionLoop(client, "seam-system")
+	loop.httpClient = srv.Client()
+
+	loop.checkOnce(context.Background())
+
+	ds, err := client.Resource(dsGVR).Namespace("seam-system").Get(context.Background(), signalName, metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("expected DriftSignal to exist: %v", err)
+	}
+	spec, _, _ := unstructuredNestedMap(ds.Object, "spec")
+	state, _ := spec["state"].(string)
+	if state != "confirmed" {
+		t.Errorf("DriftSignal state = %q, want confirmed", state)
+	}
+}
diff --git a/internal/agent/packinstance_pull_loop.go b/internal/agent/packinstance_pull_loop.go
index c841c7d..1f62fb2 100644
--- a/internal/agent/packinstance_pull_loop.go
+++ b/internal/agent/packinstance_pull_loop.go
@@ -356,7 +356,7 @@ func (l *PackInstancePullLoop) upsertPackReceipt(
 	receipt := &unstructured.Unstructured{
 		Object: map[string]interface{}{
 			"apiVersion": "seam.ontai.dev/v1alpha1",
-			"kind":       "InfrastructurePackReceipt",
+			"kind":       "PackReceipt",
 			"metadata": map[string]interface{}{
 				"name":      receiptName,
 				"namespace": l.namespace,
diff --git a/internal/agent/policy_report_drift_loop.go b/internal/agent/policy_report_drift_loop.go
new file mode 100644
index 0000000..b54d66a
--- /dev/null
+++ b/internal/agent/policy_report_drift_loop.go
@@ -0,0 +1,249 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	k8serrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/dynamic"
+)
+
+// clusterPolicyReportGVR is the GroupVersionResource for ClusterPolicyReport CRs (Kyverno).
+var clusterPolicyReportGVR = schema.GroupVersionResource{
+	Group:    "wgpolicyk8s.io",
+	Version:  "v1alpha2",
+	Resource: "clusterpolicyreports",
+}
+
+// policyReportGVR is the GroupVersionResource for namespaced PolicyReport CRs (Kyverno).
+var policyReportGVR = schema.GroupVersionResource{
+	Group:    "wgpolicyk8s.io",
+	Version:  "v1alpha2",
+	Resource: "policyreports",
+}
+
+// policyReportSignalPrefix is the DriftSignal name prefix for Kyverno policy violation signals.
+const policyReportSignalPrefix = "drift-policy-"
+
+// PolicyReportDriftLoop runs on conductor role=management. On each cycle it:
+//  1. Lists ClusterPolicyReport and PolicyReport CRs across the management namespace.
+//  2. For each report with at least one fail result, emits a KyvernoPolicyViolation DriftSignal.
+//  3. Confirms any existing signal when the report has no fail results.
+//
+// Skips cleanly when Kyverno CRDs are not installed.
+// AutonomyLevel=observe-only: logs only, no DriftSignal written.
+// RECON-L2.
+type PolicyReportDriftLoop struct {
+	client     dynamic.Interface
+	namespace  string
+	ocWatcher  *OperatorContextWatcher
+	clusterRef string
+}
+
+// NewPolicyReportDriftLoop constructs a PolicyReportDriftLoop for the given namespace.
+func NewPolicyReportDriftLoop(client dynamic.Interface, namespace, clusterRef string) *PolicyReportDriftLoop {
+	return &PolicyReportDriftLoop{
+		client:     client,
+		namespace:  namespace,
+		clusterRef: clusterRef,
+	}
+}
+
+// WithOperatorContextWatcher sets the OperatorContextWatcher for autonomy-level gating.
+func (l *PolicyReportDriftLoop) WithOperatorContextWatcher(w *OperatorContextWatcher) {
+	l.ocWatcher = w
+}
+
+// Run runs the loop until ctx is cancelled.
+func (l *PolicyReportDriftLoop) Run(ctx context.Context, interval time.Duration) {
+	l.checkOnce(ctx)
+	if ctx.Err() != nil {
+		return
+	}
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			l.checkOnce(ctx)
+		}
+	}
+}
+
+func (l *PolicyReportDriftLoop) checkOnce(ctx context.Context) {
+	clusterList, err := l.client.Resource(clusterPolicyReportGVR).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		if isNoCRDError(err) {
+			return
+		}
+		fmt.Printf("policy report drift loop: list ClusterPolicyReports: %v\n", err)
+		return
+	}
+	for i := range clusterList.Items {
+		l.checkReport(ctx, &clusterList.Items[i], true)
+	}
+
+	nsList, err := l.client.Resource(policyReportGVR).Namespace(l.namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		if isNoCRDError(err) {
+			return
+		}
+		fmt.Printf("policy report drift loop: list PolicyReports in %s: %v\n", l.namespace, err)
+		return
+	}
+	for i := range nsList.Items {
+		l.checkReport(ctx, &nsList.Items[i], false)
+	}
+}
+
+func (l *PolicyReportDriftLoop) checkReport(ctx context.Context, report *k8sunstructured.Unstructured, cluster bool) {
+	name := report.GetName()
+	prefix := "cluster-"
+	if !cluster {
+		prefix = ""
+	}
+	signalName := policyReportSignalPrefix + prefix + name
+
+	failCount, policies := policyReportFailures(report.Object)
+	if failCount == 0 {
+		l.confirmSignalIfPresent(ctx, signalName)
+		return
+	}
+
+	if l.ocWatcher != nil && !l.ocWatcher.IsAutonomousActionsAllowedFor(l.clusterRef, "management") {
+		fmt.Printf("policy report drift loop: report=%q has %d fail(s) -- observe-only mode, no DriftSignal written\n", name, failCount)
+		return
+	}
+
+	driftReason := fmt.Sprintf("Kyverno policy violations: report=%s failCount=%d policies=%v", name, failCount, policies)
+	l.emitSignal(ctx, signalName, name, driftReason)
+}
+
+// policyReportFailures counts fail results in a PolicyReport object and returns policy names.
+func policyReportFailures(obj map[string]interface{}) (int, []string) {
+	results, _, _ := unstructuredNestedSlice(obj, "results")
+	var count int
+	var names []string
+	for _, raw := range results {
+		entry, ok := raw.(map[string]interface{})
+		if !ok {
+			continue
+		}
+		result, _ := entry["result"].(string)
+		if result == "fail" {
+			count++
+			if policy, ok := entry["policy"].(string); ok {
+				names = append(names, policy)
+			}
+		}
+	}
+	return count, names
+}
+
+func (l *PolicyReportDriftLoop) emitSignal(ctx context.Context, signalName, reportName, driftReason string) {
+	now := time.Now().UTC().Format(time.RFC3339)
+
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil && !k8serrors.IsNotFound(err) {
+		fmt.Printf("policy report drift loop: report=%q get DriftSignal: %v\n", reportName, err)
+		return
+	}
+
+	if k8serrors.IsNotFound(err) {
+		obj := map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": signalName, "namespace": l.namespace},
+			"spec": map[string]interface{}{
+				"state":         "pending",
+				"signalKind":    "KyvernoPolicyViolation",
+				"driftLayer":    "kubernetes",
+				"correlationID": fmt.Sprintf("policy-%s-%d", reportName, time.Now().UnixNano()),
+				"observedAt":    now,
+				"driftReason":   driftReason,
+				"affectedCRRef": map[string]interface{}{
+					"group": "wgpolicyk8s.io",
+					"kind":  "PolicyReport",
+					"name":  reportName,
+				},
+				"escalationCounter": int64(0),
+			},
+		}
+		if _, cErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Create(
+			ctx, &k8sunstructured.Unstructured{Object: obj}, metav1.CreateOptions{},
+		); cErr != nil {
+			fmt.Printf("policy report drift loop: report=%q create DriftSignal: %v\n", reportName, cErr)
+		}
+		fmt.Printf("policy report drift loop: report=%q Kyverno policy violations -- DriftSignal written\n", reportName)
+		return
+	}
+
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	counter, _ := spec["escalationCounter"].(int64)
+	if int32(counter) >= escalationThreshold {
+		return
+	}
+	if state == "confirmed" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"correlationID":     fmt.Sprintf("policy-%s-%d", reportName, time.Now().UnixNano()),
+				"observedAt":        now,
+				"escalationCounter": int64(0),
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("policy report drift loop: report=%q reset DriftSignal: %v\n", reportName, pErr)
+		}
+		return
+	}
+	if state == "queued" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"escalationCounter": counter + 1,
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("policy report drift loop: report=%q increment escalation counter: %v\n", reportName, pErr)
+		}
+	}
+}
+
+func (l *PolicyReportDriftLoop) confirmSignalIfPresent(ctx context.Context, signalName string) {
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil {
+		return
+	}
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	if state == "confirmed" || state == "" {
+		return
+	}
+	patch := map[string]interface{}{
+		"spec": map[string]interface{}{"state": "confirmed", "correlationID": ""},
+	}
+	data, _ := json.Marshal(patch)
+	if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+		ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+	); pErr != nil {
+		fmt.Printf("policy report drift loop: confirm DriftSignal %s/%s: %v\n", l.namespace, signalName, pErr)
+	}
+}
diff --git a/internal/agent/policy_report_drift_loop_test.go b/internal/agent/policy_report_drift_loop_test.go
new file mode 100644
index 0000000..4ec4a87
--- /dev/null
+++ b/internal/agent/policy_report_drift_loop_test.go
@@ -0,0 +1,169 @@
+package agent
+
+import (
+	"context"
+	"testing"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic/fake"
+)
+
+var policyTestGVRs = map[schema.GroupVersionResource]string{
+	clusterPolicyReportGVR: "ClusterPolicyReportList",
+	policyReportGVR:        "PolicyReportList",
+	driftSignalGVR:         "DriftSignalList",
+}
+
+func newPolicyFakeClient(objs ...runtime.Object) *fake.FakeDynamicClient {
+	return fake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), policyTestGVRs, objs...)
+}
+
+func TestPolicyReportDriftLoop_NoViolations_NoSignal(t *testing.T) {
+	cr := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "wgpolicyk8s.io/v1alpha2",
+		"kind":       "ClusterPolicyReport",
+		"metadata":   map[string]interface{}{"name": "cluster-report"},
+		"results": []interface{}{
+			map[string]interface{}{"result": "pass", "policy": "require-labels"},
+		},
+	}}
+
+	client := newPolicyFakeClient(cr)
+	l := NewPolicyReportDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Error("expected no DriftSignal for passing policy report")
+		}
+	}
+}
+
+func TestPolicyReportDriftLoop_ClusterReportFail_EmitsSignal(t *testing.T) {
+	cr := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "wgpolicyk8s.io/v1alpha2",
+		"kind":       "ClusterPolicyReport",
+		"metadata":   map[string]interface{}{"name": "cluster-report"},
+		"results": []interface{}{
+			map[string]interface{}{"result": "fail", "policy": "require-psa"},
+			map[string]interface{}{"result": "pass", "policy": "require-labels"},
+		},
+	}}
+
+	client := newPolicyFakeClient(cr)
+	l := NewPolicyReportDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	var created bool
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" && a.GetResource().Resource == "driftsignals" {
+			created = true
+		}
+	}
+	if !created {
+		t.Error("expected DriftSignal created for ClusterPolicyReport with fail results")
+	}
+}
+
+func TestPolicyReportDriftLoop_SignalFields(t *testing.T) {
+	cr := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "wgpolicyk8s.io/v1alpha2",
+		"kind":       "ClusterPolicyReport",
+		"metadata":   map[string]interface{}{"name": "my-report"},
+		"results": []interface{}{
+			map[string]interface{}{"result": "fail", "policy": "no-privileged"},
+		},
+	}}
+
+	client := newPolicyFakeClient(cr)
+	l := NewPolicyReportDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	signalName := policyReportSignalPrefix + "cluster-" + "my-report"
+	ds, err := client.Resource(driftSignalGVR).Namespace("seam-system").Get(
+		context.Background(), signalName, metav1.GetOptions{},
+	)
+	if err != nil {
+		t.Fatalf("expected DriftSignal: %v", err)
+	}
+	spec, _, _ := unstructuredNestedMap(ds.Object, "spec")
+	if kind, _ := spec["signalKind"].(string); kind != "KyvernoPolicyViolation" {
+		t.Errorf("signalKind = %q, want KyvernoPolicyViolation", kind)
+	}
+}
+
+func TestPolicyReportDriftLoop_ObserveOnly_NoSignal(t *testing.T) {
+	cr := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "wgpolicyk8s.io/v1alpha2",
+		"kind":       "ClusterPolicyReport",
+		"metadata":   map[string]interface{}{"name": "cluster-report"},
+		"results": []interface{}{
+			map[string]interface{}{"result": "fail", "policy": "require-psa"},
+		},
+	}}
+
+	client := newPolicyFakeClient(cr)
+	ocWatcher := NewOperatorContextWatcher(client, "ont-system")
+	ocWatcher.mu.Lock()
+	ocWatcher.resolved["ccs-mgmt"] = resolvedContext{autonomyLevel: AutonomyLevelObserveOnly, mode: "normal"}
+	ocWatcher.mu.Unlock()
+
+	l := NewPolicyReportDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.WithOperatorContextWatcher(ocWatcher)
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Error("expected no DriftSignal under observe-only mode")
+		}
+	}
+}
+
+func TestPolicyReportDriftLoop_ConfirmsSignalWhenClean(t *testing.T) {
+	cr := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "wgpolicyk8s.io/v1alpha2",
+		"kind":       "ClusterPolicyReport",
+		"metadata":   map[string]interface{}{"name": "clean-report"},
+		"results":    []interface{}{},
+	}}
+	existingSignal := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "seam.ontai.dev/v1alpha1",
+		"kind":       "DriftSignal",
+		"metadata":   map[string]interface{}{"name": policyReportSignalPrefix + "cluster-" + "clean-report", "namespace": "seam-system"},
+		"spec":       map[string]interface{}{"state": "queued"},
+	}}
+
+	client := newPolicyFakeClient(cr, existingSignal)
+	l := NewPolicyReportDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	var patched bool
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "patch" && a.GetResource().Resource == "driftsignals" {
+			patched = true
+		}
+	}
+	if !patched {
+		t.Error("expected DriftSignal to be confirmed when report has no violations")
+	}
+}
+
+func TestPolicyReportFailures_CountsCorrectly(t *testing.T) {
+	obj := map[string]interface{}{
+		"results": []interface{}{
+			map[string]interface{}{"result": "fail", "policy": "pol-a"},
+			map[string]interface{}{"result": "pass", "policy": "pol-b"},
+			map[string]interface{}{"result": "fail", "policy": "pol-c"},
+		},
+	}
+	count, policies := policyReportFailures(obj)
+	if count != 2 {
+		t.Errorf("failCount = %d, want 2", count)
+	}
+	if len(policies) != 2 {
+		t.Errorf("policies len = %d, want 2", len(policies))
+	}
+}
diff --git a/internal/agent/runtime_drift_handler.go b/internal/agent/runtime_drift_handler.go
index 260fe28..e159108 100644
--- a/internal/agent/runtime_drift_handler.go
+++ b/internal/agent/runtime_drift_handler.go
@@ -13,6 +13,8 @@ import (
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/dynamic"
+
+	"github.com/ontai-dev/conductor-sdk/runnerlib"
 )
 
 // packLogGVR is the GroupVersionResource for PackLog CRs (dispatcher).
@@ -212,14 +214,21 @@ func (h *RuntimeDriftHandler) reconcileRuntimeDrift(
 			h.markApprovalActed(ctx, approval.GetName(), packInstalledNS)
 		}
 
-		// 4. Submit remediation Job (Job scheduling via Kueue placeholder).
-		// The actual Kueue Job submission is handled by the remediation capability
-		// executor. Here we increment the attempt count in PackLog and advance the
-		// signal to state=queued.
+		// 4. Submit a Kueue watchdog Job that runs the appropriate remediation
+		// capability against the target cluster. The capability is chosen from
+		// the failureReason via watchdogCapabilityForFailureReason.
+		capability := watchdogCapabilityForFailureReason(failureReason)
+		executeImage := h.resolveExecuteImage(ctx, clusterName)
+		jobErr := h.submitWatchdogJob(ctx, clusterName, packInstalledName, capability, failureReason, executeImage)
+		if jobErr != nil {
+			fmt.Printf("runtime drift handler: cluster=%q signal=%q Job submit failed: %v\n",
+				clusterName, signalName, jobErr)
+			return
+		}
 		h.incrementPackLogAttempts(ctx, packLogName, packInstalledNS, failureReason, currentAttempts+1)
 		h.advanceSignalState(ctx, tenantNS, signalName, "queued")
-		fmt.Printf("runtime drift handler: cluster=%q signal=%q remediation attempt %d submitted\n",
-			clusterName, signalName, currentAttempts+1)
+		fmt.Printf("runtime drift handler: cluster=%q signal=%q capability=%q attempt %d queued\n",
+			clusterName, signalName, capability, currentAttempts+1)
 		return
 	}
 
@@ -535,3 +544,147 @@ func (h *RuntimeDriftHandler) markApprovalActed(ctx context.Context, approvalNam
 			namespace, approvalName, pErr)
 	}
 }
+
+// watchdogCapabilityForFailureReason maps a DriftSignal failureReason string to the
+// appropriate watchdog capability name. Defaults to pod-restart for unknown reasons.
+func watchdogCapabilityForFailureReason(failureReason string) string {
+	switch failureReason {
+	case "OOMKilled":
+		return runnerlib.CapabilityResourcePatch
+	case "ImagePullBackOff", "ErrImagePull":
+		return runnerlib.CapabilityCredentialRefresh
+	case "FailedMount", "MultiAttachError":
+		return runnerlib.CapabilityForceVolumeDetach
+	default:
+		// CrashLoopBackOff and all other reasons.
+		return runnerlib.CapabilityPodRestart
+	}
+}
+
+// watchdogExecuteImageFallback is used when the RunnerConfig cannot be read.
+const watchdogExecuteImageFallback = "10.20.0.1:5000/ontai-dev/conductor-execute:dev"
+
+// resolveExecuteImage reads spec.runnerImage from the RunnerConfig for clusterRef.
+// Returns the fallback image when the RunnerConfig is absent or the field is empty.
+func (h *RuntimeDriftHandler) resolveExecuteImage(ctx context.Context, clusterRef string) string {
+	rc, err := h.client.Resource(runnerConfigGVR).Namespace(h.namespace).Get(ctx, clusterRef, metav1.GetOptions{})
+	if err != nil {
+		return watchdogExecuteImageFallback
+	}
+	img, _, _ := unstructured.NestedString(rc.Object, "spec", "runnerImage")
+	if img == "" {
+		return watchdogExecuteImageFallback
+	}
+	return img
+}
+
+// watchdogJobGVR is the GroupVersionResource for batch/v1 Jobs.
+var watchdogJobGVR = schema.GroupVersionResource{Group: "batch", Version: "v1", Resource: "jobs"}
+
+// submitWatchdogJob creates a Kueue-admitted batch/v1 Job in h.namespace that runs
+// the given watchdog capability against the target cluster. The Job mounts the
+// tenant kubeconfig Secret so the capability executor can reach the tenant cluster.
+// conductor-schema.md §6, wrapper-schema.md §9.
+func (h *RuntimeDriftHandler) submitWatchdogJob(
+	ctx context.Context,
+	clusterRef, packInstalledName, capability, failureReason, executeImage string,
+) error {
+	jobName := fmt.Sprintf("watchdog-%s-%s-%d", sanitizeLabel(capability), sanitizeLabel(clusterRef), time.Now().Unix())
+	ttl := int64(600)
+	completions := int64(1)
+	backoffLimit := int64(0)
+	falseVal := false
+	trueVal := true
+
+	job := &unstructured.Unstructured{
+		Object: map[string]interface{}{
+			"apiVersion": "batch/v1",
+			"kind":       "Job",
+			"metadata": map[string]interface{}{
+				"name":      jobName,
+				"namespace": h.namespace,
+				"labels": map[string]interface{}{
+					"kueue.x-k8s.io/queue-name":     "watchdog-queue",
+					"ontai.dev/watchdog-capability": capability,
+					"ontai.dev/cluster-ref":          clusterRef,
+				},
+			},
+			"spec": map[string]interface{}{
+				"ttlSecondsAfterFinished": ttl,
+				"completions":             completions,
+				"backoffLimit":            backoffLimit,
+				"template": map[string]interface{}{
+					"spec": map[string]interface{}{
+						"serviceAccountName": "conductor",
+						"restartPolicy":      "Never",
+						"securityContext": map[string]interface{}{
+							"runAsNonRoot": trueVal,
+							"seccompProfile": map[string]interface{}{
+								"type": "RuntimeDefault",
+							},
+						},
+						"volumes": []interface{}{
+							map[string]interface{}{
+								"name": "kubeconfig",
+								"secret": map[string]interface{}{
+									"secretName": "seam-mc-" + clusterRef + "-kubeconfig",
+								},
+							},
+						},
+						"containers": []interface{}{
+							map[string]interface{}{
+								"name":            "conductor",
+								"image":           executeImage,
+								"imagePullPolicy": "Always",
+								"env": []interface{}{
+									map[string]interface{}{"name": "CAPABILITY", "value": capability},
+									map[string]interface{}{"name": "CLUSTER_REF", "value": clusterRef},
+									map[string]interface{}{"name": "POD_NAMESPACE", "value": h.namespace},
+									map[string]interface{}{"name": "PACK_INSTALLED_NAME", "value": packInstalledName},
+									map[string]interface{}{"name": "FAILURE_REASON", "value": failureReason},
+								},
+								"volumeMounts": []interface{}{
+									map[string]interface{}{
+										"name":      "kubeconfig",
+										"mountPath": "/var/run/secrets/kubeconfig",
+										"subPath":   "value",
+										"readOnly":  trueVal,
+									},
+								},
+								"securityContext": map[string]interface{}{
+									"allowPrivilegeEscalation": falseVal,
+									"capabilities": map[string]interface{}{
+										"drop": []interface{}{"ALL"},
+									},
+									"runAsNonRoot": trueVal,
+									"seccompProfile": map[string]interface{}{
+										"type": "RuntimeDefault",
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	_, err := h.client.Resource(watchdogJobGVR).Namespace(h.namespace).Create(ctx, job, metav1.CreateOptions{})
+	return err
+}
+
+// sanitizeLabel trims characters that are not valid in Kubernetes label values or
+// Job name components (alphanumeric plus hyphen and dot, max 63 chars per segment).
+// Used to build the Job name from capability and clusterRef strings.
+func sanitizeLabel(s string) string {
+	out := make([]byte, 0, len(s))
+	for i := 0; i < len(s) && len(out) < 30; i++ {
+		c := s[i]
+		if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' {
+			out = append(out, c)
+		} else if c >= 'A' && c <= 'Z' {
+			out = append(out, c+32) // to lower
+		}
+	}
+	return string(out)
+}
diff --git a/internal/agent/runtime_drift_handler_test.go b/internal/agent/runtime_drift_handler_test.go
index 05dc505..a5440bb 100644
--- a/internal/agent/runtime_drift_handler_test.go
+++ b/internal/agent/runtime_drift_handler_test.go
@@ -160,3 +160,98 @@ func TestRuntimeDriftHandler_SkipsGovernanceDrift(t *testing.T) {
 	// If this panics, the guard is missing and the test fails.
 	_ = h
 }
+
+// TestWatchdogCapabilityForFailureReason verifies that each known failure reason
+// maps to the expected watchdog capability and that unknown reasons fall through
+// to pod-restart.
+func TestWatchdogCapabilityForFailureReason(t *testing.T) {
+	cases := []struct {
+		reason string
+		want   string
+	}{
+		{"OOMKilled", "resource-patch"},
+		{"ImagePullBackOff", "credential-refresh"},
+		{"ErrImagePull", "credential-refresh"},
+		{"FailedMount", "force-volume-detach"},
+		{"MultiAttachError", "force-volume-detach"},
+		{"CrashLoopBackOff", "pod-restart"},
+		{"Unknown", "pod-restart"},
+		{"", "pod-restart"},
+	}
+	for _, tc := range cases {
+		got := watchdogCapabilityForFailureReason(tc.reason)
+		if got != tc.want {
+			t.Errorf("watchdogCapabilityForFailureReason(%q) = %q, want %q", tc.reason, got, tc.want)
+		}
+	}
+}
+
+// setupJobScheme builds a fake scheme with types needed to verify Job creation.
+func setupJobScheme() *runtime.Scheme {
+	s := setupApprovalScheme()
+	s.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "batch", Version: "v1", Kind: "Job",
+	}, &unstructured.Unstructured{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "batch", Version: "v1", Kind: "JobList",
+	}, &unstructured.UnstructuredList{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "seam.ontai.dev", Version: "v1alpha1", Kind: "RunnerConfig",
+	}, &unstructured.Unstructured{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "seam.ontai.dev", Version: "v1alpha1", Kind: "RunnerConfigList",
+	}, &unstructured.UnstructuredList{})
+	return s
+}
+
+// TestSubmitWatchdogJob_CreatesJobInOntSystem verifies that submitWatchdogJob
+// creates a batch/v1 Job in the ont-system namespace with the expected Kueue
+// queue label and env vars.
+func TestSubmitWatchdogJob_CreatesJobInOntSystem(t *testing.T) {
+	client := fake.NewSimpleDynamicClient(setupJobScheme())
+	h := NewRuntimeDriftHandler(client, "ont-system")
+
+	err := h.submitWatchdogJob(context.Background(),
+		"ccs-dev", "nginx", "pod-restart", "CrashLoopBackOff", "10.20.0.1:5000/ontai-dev/conductor-execute:dev")
+	if err != nil {
+		t.Fatalf("submitWatchdogJob returned unexpected error: %v", err)
+	}
+
+	jobGVR := schema.GroupVersionResource{Group: "batch", Version: "v1", Resource: "jobs"}
+	list, listErr := client.Resource(jobGVR).Namespace("ont-system").List(context.Background(), metav1.ListOptions{})
+	if listErr != nil {
+		t.Fatalf("list Jobs: %v", listErr)
+	}
+	if len(list.Items) != 1 {
+		t.Fatalf("expected 1 Job, got %d", len(list.Items))
+	}
+	job := list.Items[0]
+
+	// Verify Kueue queue label.
+	labels := job.GetLabels()
+	if queueName := labels["kueue.x-k8s.io/queue-name"]; queueName != "watchdog-queue" {
+		t.Errorf("expected queue label watchdog-queue, got %q", queueName)
+	}
+	// Verify namespace.
+	if job.GetNamespace() != "ont-system" {
+		t.Errorf("expected namespace ont-system, got %q", job.GetNamespace())
+	}
+	// Verify CAPABILITY env var.
+	containers, _, _ := unstructured.NestedSlice(job.Object, "spec", "template", "spec", "containers")
+	if len(containers) == 0 {
+		t.Fatal("expected at least 1 container in Job spec")
+	}
+	container, _ := containers[0].(map[string]interface{})
+	envVars, _, _ := unstructured.NestedSlice(container, "env")
+	found := false
+	for _, envRaw := range envVars {
+		env, _ := envRaw.(map[string]interface{})
+		if env["name"] == "CAPABILITY" && env["value"] == "pod-restart" {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Errorf("CAPABILITY=pod-restart env var not found in Job container; env: %v", envVars)
+	}
+}
diff --git a/internal/agent/vulnerability_drift_loop.go b/internal/agent/vulnerability_drift_loop.go
new file mode 100644
index 0000000..4105a6e
--- /dev/null
+++ b/internal/agent/vulnerability_drift_loop.go
@@ -0,0 +1,248 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	k8serrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	k8sunstructured "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/dynamic"
+)
+
+// vulnerabilityReportGVR is the GroupVersionResource for VulnerabilityReport CRs (Trivy Operator).
+var vulnerabilityReportGVR = schema.GroupVersionResource{
+	Group:    "aquasecurity.github.io",
+	Version:  "v1alpha1",
+	Resource: "vulnerabilityreports",
+}
+
+// vulnerabilitySignalPrefix is the DriftSignal name prefix for Trivy vulnerability signals.
+const vulnerabilitySignalPrefix = "drift-vuln-"
+
+// criticalSeverity is the default minimum severity threshold for emitting a DriftSignal.
+const criticalSeverity = "CRITICAL"
+
+// VulnerabilityDriftLoop runs on conductor role=management. On each cycle it:
+//  1. Lists VulnerabilityReport CRs in the management namespace (seam-system).
+//  2. For each report containing at least one vulnerability at or above CRITICAL severity,
+//     emits a VulnerableImageDetected DriftSignal.
+//  3. Confirms any existing signal when the report has no CRITICAL vulnerabilities.
+//
+// Skips cleanly when Trivy Operator CRDs are not installed.
+// AutonomyLevel=observe-only: logs only, no DriftSignal written.
+// RECON-M2.
+type VulnerabilityDriftLoop struct {
+	client     dynamic.Interface
+	namespace  string
+	ocWatcher  *OperatorContextWatcher
+	clusterRef string
+}
+
+// NewVulnerabilityDriftLoop constructs a VulnerabilityDriftLoop for the given namespace.
+func NewVulnerabilityDriftLoop(client dynamic.Interface, namespace, clusterRef string) *VulnerabilityDriftLoop {
+	return &VulnerabilityDriftLoop{
+		client:     client,
+		namespace:  namespace,
+		clusterRef: clusterRef,
+	}
+}
+
+// WithOperatorContextWatcher sets the OperatorContextWatcher for autonomy-level gating.
+func (l *VulnerabilityDriftLoop) WithOperatorContextWatcher(w *OperatorContextWatcher) {
+	l.ocWatcher = w
+}
+
+// Run runs the loop until ctx is cancelled.
+func (l *VulnerabilityDriftLoop) Run(ctx context.Context, interval time.Duration) {
+	l.checkOnce(ctx)
+	if ctx.Err() != nil {
+		return
+	}
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			l.checkOnce(ctx)
+		}
+	}
+}
+
+func (l *VulnerabilityDriftLoop) checkOnce(ctx context.Context) {
+	list, err := l.client.Resource(vulnerabilityReportGVR).Namespace(l.namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		if isNoCRDError(err) {
+			return
+		}
+		fmt.Printf("vulnerability drift loop: list VulnerabilityReports in %s: %v\n", l.namespace, err)
+		return
+	}
+	for i := range list.Items {
+		l.checkReport(ctx, &list.Items[i])
+	}
+}
+
+func (l *VulnerabilityDriftLoop) checkReport(ctx context.Context, report *k8sunstructured.Unstructured) {
+	name := report.GetName()
+	signalName := vulnerabilitySignalPrefix + name
+
+	critCount, imageRef := vulnerabilityCriticalCount(report.Object)
+	if critCount == 0 {
+		l.confirmSignalIfPresent(ctx, signalName)
+		return
+	}
+
+	if l.ocWatcher != nil && !l.ocWatcher.IsAutonomousActionsAllowedFor(l.clusterRef, "management") {
+		fmt.Printf("vulnerability drift loop: report=%q has %d CRITICAL vuln(s) -- observe-only mode, no DriftSignal written\n", name, critCount)
+		return
+	}
+
+	driftReason := fmt.Sprintf("vulnerable image detected: report=%s image=%s criticalCount=%d", name, imageRef, critCount)
+	l.emitSignal(ctx, signalName, name, imageRef, driftReason)
+}
+
+// vulnerabilityCriticalCount returns the count of CRITICAL severity vulnerabilities
+// and the image reference from the VulnerabilityReport object.
+func vulnerabilityCriticalCount(obj map[string]interface{}) (int, string) {
+	report, _, _ := unstructuredNestedMap(obj, "report")
+	imageRef := ""
+	if artifact, _, _ := unstructuredNestedMap(obj, "report", "artifact"); len(artifact) > 0 {
+		imageRef, _ = artifact["repository"].(string)
+		if tag, _ := artifact["tag"].(string); tag != "" {
+			imageRef = imageRef + ":" + tag
+		}
+	}
+
+	summary, _, _ := unstructuredNestedMap(report, "summary")
+	if len(summary) == 0 {
+		// Fall back to scanning vulnerabilities slice directly.
+		return vulnerabilityCriticalCountFromSlice(obj, imageRef)
+	}
+
+	critFloat, _ := summary["criticalCount"].(float64)
+	return int(critFloat), imageRef
+}
+
+func vulnerabilityCriticalCountFromSlice(obj map[string]interface{}, imageRef string) (int, string) {
+	vulnerabilities, _, _ := unstructuredNestedSlice(obj, "report", "vulnerabilities")
+	count := 0
+	for _, raw := range vulnerabilities {
+		v, ok := raw.(map[string]interface{})
+		if !ok {
+			continue
+		}
+		severity, _ := v["severity"].(string)
+		if severity == criticalSeverity {
+			count++
+		}
+	}
+	return count, imageRef
+}
+
+func (l *VulnerabilityDriftLoop) emitSignal(ctx context.Context, signalName, reportName, imageRef, driftReason string) {
+	now := time.Now().UTC().Format(time.RFC3339)
+
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil && !k8serrors.IsNotFound(err) {
+		fmt.Printf("vulnerability drift loop: report=%q get DriftSignal: %v\n", reportName, err)
+		return
+	}
+
+	if k8serrors.IsNotFound(err) {
+		obj := map[string]interface{}{
+			"apiVersion": "seam.ontai.dev/v1alpha1",
+			"kind":       "DriftSignal",
+			"metadata":   map[string]interface{}{"name": signalName, "namespace": l.namespace},
+			"spec": map[string]interface{}{
+				"state":         "pending",
+				"signalKind":    "VulnerableImageDetected",
+				"driftLayer":    "kubernetes",
+				"correlationID": fmt.Sprintf("vuln-%s-%d", reportName, time.Now().UnixNano()),
+				"observedAt":    now,
+				"driftReason":   driftReason,
+				"affectedCRRef": map[string]interface{}{
+					"group":     "aquasecurity.github.io",
+					"kind":      "VulnerabilityReport",
+					"namespace": l.namespace,
+					"name":      reportName,
+				},
+				"escalationCounter": int64(0),
+			},
+		}
+		if _, cErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Create(
+			ctx, &k8sunstructured.Unstructured{Object: obj}, metav1.CreateOptions{},
+		); cErr != nil {
+			fmt.Printf("vulnerability drift loop: report=%q create DriftSignal: %v\n", reportName, cErr)
+		}
+		fmt.Printf("vulnerability drift loop: report=%q CRITICAL vuln(s) in %s -- DriftSignal written\n", reportName, imageRef)
+		return
+	}
+
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	counter, _ := spec["escalationCounter"].(int64)
+	if int32(counter) >= escalationThreshold {
+		return
+	}
+	if state == "confirmed" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"correlationID":     fmt.Sprintf("vuln-%s-%d", reportName, time.Now().UnixNano()),
+				"observedAt":        now,
+				"escalationCounter": int64(0),
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("vulnerability drift loop: report=%q reset DriftSignal: %v\n", reportName, pErr)
+		}
+		return
+	}
+	if state == "queued" {
+		patch := map[string]interface{}{
+			"spec": map[string]interface{}{
+				"state":             "pending",
+				"driftReason":       driftReason,
+				"escalationCounter": counter + 1,
+			},
+		}
+		data, _ := json.Marshal(patch)
+		if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+			ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+		); pErr != nil {
+			fmt.Printf("vulnerability drift loop: report=%q increment escalation counter: %v\n", reportName, pErr)
+		}
+	}
+}
+
+func (l *VulnerabilityDriftLoop) confirmSignalIfPresent(ctx context.Context, signalName string) {
+	existing, err := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Get(ctx, signalName, metav1.GetOptions{})
+	if err != nil {
+		return
+	}
+	spec, _, _ := unstructuredNestedMap(existing.Object, "spec")
+	state, _ := spec["state"].(string)
+	if state == "confirmed" || state == "" {
+		return
+	}
+	patch := map[string]interface{}{
+		"spec": map[string]interface{}{"state": "confirmed", "correlationID": ""},
+	}
+	data, _ := json.Marshal(patch)
+	if _, pErr := l.client.Resource(driftSignalGVR).Namespace(l.namespace).Patch(
+		ctx, signalName, types.MergePatchType, data, metav1.PatchOptions{},
+	); pErr != nil {
+		fmt.Printf("vulnerability drift loop: confirm DriftSignal %s/%s: %v\n", l.namespace, signalName, pErr)
+	}
+}
diff --git a/internal/agent/vulnerability_drift_loop_test.go b/internal/agent/vulnerability_drift_loop_test.go
new file mode 100644
index 0000000..c81a6c8
--- /dev/null
+++ b/internal/agent/vulnerability_drift_loop_test.go
@@ -0,0 +1,162 @@
+package agent
+
+import (
+	"context"
+	"testing"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic/fake"
+)
+
+var vulnTestGVRs = map[schema.GroupVersionResource]string{
+	vulnerabilityReportGVR: "VulnerabilityReportList",
+	driftSignalGVR:         "DriftSignalList",
+}
+
+func newVulnFakeClient(objs ...runtime.Object) *fake.FakeDynamicClient {
+	return fake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), vulnTestGVRs, objs...)
+}
+
+// fakeVulnReport builds a VulnerabilityReport with the given criticalCount in the summary.
+func fakeVulnReport(name string, critCount int) *unstructured.Unstructured {
+	return &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "aquasecurity.github.io/v1alpha1",
+		"kind":       "VulnerabilityReport",
+		"metadata":   map[string]interface{}{"name": name, "namespace": "seam-system"},
+		"report": map[string]interface{}{
+			"artifact": map[string]interface{}{"repository": "nginx", "tag": "1.25.0"},
+			"summary":  map[string]interface{}{"criticalCount": float64(critCount)},
+		},
+	}}
+}
+
+func TestVulnerabilityDriftLoop_NoCritical_NoSignal(t *testing.T) {
+	report := fakeVulnReport("safe-report", 0)
+	client := newVulnFakeClient(report)
+
+	l := NewVulnerabilityDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Error("expected no DriftSignal for report with zero critical vulnerabilities")
+		}
+	}
+}
+
+func TestVulnerabilityDriftLoop_CriticalFound_EmitsSignal(t *testing.T) {
+	report := fakeVulnReport("vuln-report", 3)
+	client := newVulnFakeClient(report)
+
+	l := NewVulnerabilityDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	var created bool
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" && a.GetResource().Resource == "driftsignals" {
+			created = true
+		}
+	}
+	if !created {
+		t.Error("expected DriftSignal created for report with critical vulnerabilities")
+	}
+}
+
+func TestVulnerabilityDriftLoop_SignalFields(t *testing.T) {
+	report := fakeVulnReport("my-vuln-report", 2)
+	client := newVulnFakeClient(report)
+
+	l := NewVulnerabilityDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	signalName := vulnerabilitySignalPrefix + "my-vuln-report"
+	ds, err := client.Resource(driftSignalGVR).Namespace("seam-system").Get(
+		context.Background(), signalName, metav1.GetOptions{},
+	)
+	if err != nil {
+		t.Fatalf("expected DriftSignal: %v", err)
+	}
+	spec, _, _ := unstructuredNestedMap(ds.Object, "spec")
+	if kind, _ := spec["signalKind"].(string); kind != "VulnerableImageDetected" {
+		t.Errorf("signalKind = %q, want VulnerableImageDetected", kind)
+	}
+	if state, _ := spec["state"].(string); state != "pending" {
+		t.Errorf("state = %q, want pending", state)
+	}
+}
+
+func TestVulnerabilityDriftLoop_ObserveOnly_NoSignal(t *testing.T) {
+	report := fakeVulnReport("vuln-report", 5)
+	client := newVulnFakeClient(report)
+
+	ocWatcher := NewOperatorContextWatcher(client, "ont-system")
+	ocWatcher.mu.Lock()
+	ocWatcher.resolved["ccs-mgmt"] = resolvedContext{autonomyLevel: AutonomyLevelObserveOnly, mode: "normal"}
+	ocWatcher.mu.Unlock()
+
+	l := NewVulnerabilityDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.WithOperatorContextWatcher(ocWatcher)
+	l.checkOnce(context.Background())
+
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "create" {
+			t.Error("expected no DriftSignal under observe-only mode")
+		}
+	}
+}
+
+func TestVulnerabilityDriftLoop_ConfirmsSignalWhenClean(t *testing.T) {
+	report := fakeVulnReport("fixed-report", 0)
+	existingSignal := &unstructured.Unstructured{Object: map[string]interface{}{
+		"apiVersion": "seam.ontai.dev/v1alpha1",
+		"kind":       "DriftSignal",
+		"metadata":   map[string]interface{}{"name": vulnerabilitySignalPrefix + "fixed-report", "namespace": "seam-system"},
+		"spec":       map[string]interface{}{"state": "queued"},
+	}}
+
+	client := newVulnFakeClient(report, existingSignal)
+	l := NewVulnerabilityDriftLoop(client, "seam-system", "ccs-mgmt")
+	l.checkOnce(context.Background())
+
+	var patched bool
+	for _, a := range client.Actions() {
+		if a.GetVerb() == "patch" && a.GetResource().Resource == "driftsignals" {
+			patched = true
+		}
+	}
+	if !patched {
+		t.Error("expected DriftSignal to be confirmed when no critical vulns remain")
+	}
+}
+
+func TestVulnerabilityCriticalCount_FromSummary(t *testing.T) {
+	obj := map[string]interface{}{
+		"report": map[string]interface{}{
+			"summary": map[string]interface{}{"criticalCount": float64(7)},
+		},
+	}
+	count, _ := vulnerabilityCriticalCount(obj)
+	if count != 7 {
+		t.Errorf("criticalCount = %d, want 7", count)
+	}
+}
+
+func TestVulnerabilityCriticalCount_FromSlice(t *testing.T) {
+	obj := map[string]interface{}{
+		"report": map[string]interface{}{
+			"vulnerabilities": []interface{}{
+				map[string]interface{}{"severity": "CRITICAL"},
+				map[string]interface{}{"severity": "HIGH"},
+				map[string]interface{}{"severity": "CRITICAL"},
+				map[string]interface{}{"severity": "MEDIUM"},
+			},
+		},
+	}
+	count, _ := vulnerabilityCriticalCount(obj)
+	if count != 2 {
+		t.Errorf("criticalCount from slice = %d, want 2", count)
+	}
+}
diff --git a/internal/capability/adapters.go b/internal/capability/adapters.go
index 9abd046..d327824 100644
--- a/internal/capability/adapters.go
+++ b/internal/capability/adapters.go
@@ -287,6 +287,12 @@ func (a *TalosClientAdapter) Reboot(ctx context.Context) error {
 	return a.inner.Reboot(ctx)
 }
 
+// RebootPowercycle reboots the node in hardware powercycle mode (power off then on).
+// Required after Talos upgrade staging so that BIOS/UEFI re-initialises cleanly.
+func (a *TalosClientAdapter) RebootPowercycle(ctx context.Context) error {
+	return a.inner.Reboot(ctx, talos_client.WithPowerCycle)
+}
+
 // Reset performs a factory reset of the node. reboot is always false;
 // the caller controls any subsequent reboot via a separate Reboot capability.
 func (a *TalosClientAdapter) Reset(ctx context.Context, graceful bool) error {
diff --git a/internal/capability/clients.go b/internal/capability/clients.go
index a993866..d432ae5 100644
--- a/internal/capability/clients.go
+++ b/internal/capability/clients.go
@@ -30,6 +30,11 @@ type TalosNodeClient interface {
 	// Reboot reboots the node.
 	Reboot(ctx context.Context) error
 
+	// RebootPowercycle reboots the node using hardware powercycle mode (power off then
+	// power on). Required for Talos upgrade to ensure BIOS/UEFI re-initialises cleanly.
+	// Distinct from Reboot (OS-level restart) to allow test stubs to record the mode.
+	RebootPowercycle(ctx context.Context) error
+
 	// Reset performs a factory reset of the node. graceful=true drains workloads first.
 	Reset(ctx context.Context, graceful bool) error
 
diff --git a/internal/capability/platform_cluster_test.go b/internal/capability/platform_cluster_test.go
index 2062d1b..423c872 100644
--- a/internal/capability/platform_cluster_test.go
+++ b/internal/capability/platform_cluster_test.go
@@ -28,7 +28,8 @@ type stubBootstrapTalosClient struct {
 func (s *stubBootstrapTalosClient) Bootstrap(_ context.Context) error                              { return nil }
 func (s *stubBootstrapTalosClient) ApplyConfiguration(_ context.Context, _ []byte, _ string) error { return nil }
 func (s *stubBootstrapTalosClient) Upgrade(_ context.Context, _ string, _ bool) error              { return nil }
-func (s *stubBootstrapTalosClient) Reboot(_ context.Context) error                                 { return nil }
+func (s *stubBootstrapTalosClient) Reboot(_ context.Context) error          { return nil }
+func (s *stubBootstrapTalosClient) RebootPowercycle(_ context.Context) error { return nil }
 func (s *stubBootstrapTalosClient) Reset(_ context.Context, _ bool) error                         { return nil }
 func (s *stubBootstrapTalosClient) EtcdSnapshot(_ context.Context, _ io.Writer) error             { return nil }
 func (s *stubBootstrapTalosClient) EtcdRecover(_ context.Context, _ io.Reader) error              { return nil }
diff --git a/internal/capability/platform_machineconfig_constants.go b/internal/capability/platform_machineconfig_constants.go
new file mode 100644
index 0000000..0a3b2a8
--- /dev/null
+++ b/internal/capability/platform_machineconfig_constants.go
@@ -0,0 +1,9 @@
+package capability
+
+// platform_machineconfig_constants.go -- shared constants for machineconfig Secret keys.
+// Used by reenrollment and scale-up capabilities that still manage the legacy per-node
+// Secret model until those capabilities are migrated to MachineConfig CRs.
+
+// machineConfigSyncDataKey is the primary Secret data key that holds the raw Talos
+// machineconfig YAML. Used by node-reenrollment and node-scale-up capabilities.
+const machineConfigSyncDataKey = "machineconfig"
diff --git a/internal/capability/platform_machineconfig_sync.go b/internal/capability/platform_machineconfig_sync.go
index 640d79c..fb54c9c 100644
--- a/internal/capability/platform_machineconfig_sync.go
+++ b/internal/capability/platform_machineconfig_sync.go
@@ -2,9 +2,10 @@ package capability
 
 // platform_machineconfig_sync.go -- machineconfig-sync named capability.
 //
-// Reads the canonical machineconfig from the source-of-truth Secret in
-// seam-tenant-{clusterRef}, injects the ONT node label, and applies the config
-// to each node in the target cluster via the Talos machine API.
+// Reads the canonical machineconfig from the source-of-truth MachineConfig CR in
+// seam-tenant-{clusterRef}, reconstructs full Talos YAML from spec.machine and
+// spec.cluster, injects the ONT node label, and applies the config to the target
+// node via the Talos machine API.
 //
 // Named Conductor capability: machineconfig-sync.
 // conductor-schema.md §6, platform-schema.md §15, RECON-A5.
@@ -13,36 +14,44 @@ package capability
 // in execute mode. Never imported or called from agent mode.
 
 import (
-	"bytes"
-	"compress/gzip"
 	"context"
 	"fmt"
-	"io"
 	"os"
-	"strings"
 	"time"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime/schema"
 	sigsyaml "sigs.k8s.io/yaml"
 
 	"github.com/ontai-dev/conductor-sdk/runnerlib"
 )
 
-// machineConfigCompressionLabel mirrors LabelMachineConfigCompression in platform.
-const machineConfigCompressionLabel = "platform.ontai.dev/compression"
+// machineConfigGVR is the GroupVersionResource for MachineConfig CRs.
+// platform.ontai.dev/v1alpha1/machineconfigs -- platform-schema.md §9.
+var machineConfigGVR = schema.GroupVersionResource{
+	Group:    "platform.ontai.dev",
+	Version:  "v1alpha1",
+	Resource: "machineconfigs",
+}
 
 // envMCSyncNodeClass is the env var key injected by MachineConfigSyncReconciler.
 // Must match envMCNodeClass in platform/internal/controller/machineconfigsync_reconciler.go.
 const envMCSyncNodeClass = "MC_NODE_CLASS"
 
-// machineConfigSyncSecretNamespace returns the namespace that holds the source-of-truth Secret.
-func machineConfigSyncSecretNamespace(clusterRef string) string {
+// envMCSyncNodeIP is the env var key injected when MachineConfigSync.spec.nodeRef is set.
+// When present, the capability applies the machineconfig to only this specific node IP.
+// Must match envMCNodeIP in platform/internal/controller/machineconfigsync_reconciler.go.
+// PLT-BUG-3-ARCH.
+const envMCSyncNodeIP = "MC_NODE_IP"
+
+// machineConfigSyncCRNamespace returns the namespace holding the MachineConfig CR.
+func machineConfigSyncCRNamespace(clusterRef string) string {
 	return "seam-tenant-" + clusterRef
 }
 
-// machineConfigSyncSecretName returns the canonical Secret name for a given cluster and class.
-// Mirrors MachineConfigSecretName in platform/internal/controller/machineconfig_labels.go.
-func machineConfigSyncSecretName(clusterRef, nodeClass string) string {
+// machineConfigSyncCRName returns the MachineConfig CR name for a given cluster and nodeClass.
+// Mirrors MachineConfigCRName in platform/internal/controller/machineconfig_labels.go.
+func machineConfigSyncCRName(clusterRef, nodeClass string) string {
 	return "seam-mc-" + clusterRef + "-" + nodeClass
 }
 
@@ -51,19 +60,15 @@ func machineConfigSyncSecretName(clusterRef, nodeClass string) string {
 // Mirrors MachineConfigNodeLabel in platform/internal/controller/machineconfig_labels.go.
 const ontControlledLabel = "ont.platform.dev/controlled"
 
-// machineConfigSyncDataKey is the Secret data key that holds the raw Talos machineconfig YAML.
-// Mirrors MachineConfigDataKey in platform/internal/controller/machineconfig_labels.go.
-const machineConfigSyncDataKey = "machineconfig"
-
 // machineConfigSyncHandler implements the machineconfig-sync named capability.
 type machineConfigSyncHandler struct{}
 
 func (h *machineConfigSyncHandler) Execute(ctx context.Context, params ExecuteParams) (runnerlib.OperationResultSpec, error) {
 	now := time.Now().UTC()
 
-	if params.TalosClient == nil || params.KubeClient == nil {
+	if params.TalosClient == nil || params.DynamicClient == nil {
 		return failureResult(runnerlib.CapabilityMachineConfigSync, now, runnerlib.ValidationFailure,
-			"machineconfig-sync requires TalosClient and KubeClient"), nil
+			"machineconfig-sync requires TalosClient and DynamicClient"), nil
 	}
 
 	nodeClass := os.Getenv(envMCSyncNodeClass)
@@ -72,31 +77,30 @@ func (h *machineConfigSyncHandler) Execute(ctx context.Context, params ExecutePa
 			"machineconfig-sync: MC_NODE_CLASS env var is required but not set"), nil
 	}
 
+	// MC_NODE_IP is set by the MachineConfigSync reconciler when spec.nodeRef is
+	// non-empty. When present, apply to only this specific node. PLT-BUG-3-ARCH.
+	nodeIP := os.Getenv(envMCSyncNodeIP)
+
 	clusterRef := params.ClusterRef
-	secretNS := machineConfigSyncSecretNamespace(clusterRef)
-	secretName := machineConfigSyncSecretName(clusterRef, nodeClass)
+	crNS := machineConfigSyncCRNamespace(clusterRef)
+	crName := machineConfigSyncCRName(clusterRef, nodeClass)
 
-	// Read the source-of-truth machineconfig Secret.
-	secret, err := params.KubeClient.CoreV1().Secrets(secretNS).Get(ctx, secretName, metav1.GetOptions{})
+	// Read the source-of-truth MachineConfig CR via the management cluster DynamicClient.
+	// MachineConfig CRs live in seam-tenant-{clusterRef} on the management cluster.
+	mcObj, err := params.DynamicClient.Resource(machineConfigGVR).Namespace(crNS).
+		Get(ctx, crName, metav1.GetOptions{})
 	if err != nil {
 		return failureResult(runnerlib.CapabilityMachineConfigSync, now, runnerlib.ExecutionFailure,
-			fmt.Sprintf("get MachineConfig Secret %s/%s: %v", secretNS, secretName, err)), nil
+			fmt.Sprintf("get MachineConfig CR %s/%s: %v", crNS, crName, err)), nil
 	}
 
-	mcBytes := secret.Data[machineConfigSyncDataKey]
-	if len(mcBytes) == 0 {
+	// Reconstruct full Talos YAML from spec.machine and spec.cluster sections.
+	// Both sections are stored as unstructured JSON in the CR; unmarshal and
+	// marshal back to produce a valid Talos v1alpha1 machineconfig YAML.
+	mcBytes, err := reconstructMachineConfigYAML(mcObj.Object)
+	if err != nil {
 		return failureResult(runnerlib.CapabilityMachineConfigSync, now, runnerlib.ValidationFailure,
-			fmt.Sprintf("MachineConfig Secret %s/%s has no data key %q", secretNS, secretName, machineConfigSyncDataKey)), nil
-	}
-
-	// Decompress if the secret was stored with gzip compression. RECON-F5.
-	if secret.Labels[machineConfigCompressionLabel] == "gzip" {
-		decompressed, dcErr := decompressMachineConfig(mcBytes)
-		if dcErr != nil {
-			return failureResult(runnerlib.CapabilityMachineConfigSync, now, runnerlib.ValidationFailure,
-				fmt.Sprintf("decompress machineconfig from %s/%s: %v", secretNS, secretName, dcErr)), nil
-		}
-		mcBytes = decompressed
+			fmt.Sprintf("reconstruct machineconfig YAML from CR %s/%s: %v", crNS, crName, err)), nil
 	}
 
 	// Inject the ONT controlled node label into the machineconfig.
@@ -116,9 +120,12 @@ func (h *machineConfigSyncHandler) Execute(ctx context.Context, params ExecutePa
 			fmt.Sprintf("merged machineconfig is not valid YAML: %v", err)), nil
 	}
 
-	// Enumerate nodes from talosconfig; fall back to single-context when absent.
+	// When MC_NODE_IP is set (PLT-BUG-3-ARCH per-node MCS), apply to only that
+	// specific node. Skip talosconfig endpoint enumeration.
 	var nodeIPs []string
-	if params.TalosconfigPath != "" {
+	if nodeIP != "" {
+		nodeIPs = []string{nodeIP}
+	} else if params.TalosconfigPath != "" {
 		ips, epErr := EndpointsFromTalosconfig(params.TalosconfigPath)
 		if epErr != nil {
 			return failureResult(runnerlib.CapabilityMachineConfigSync, now, runnerlib.ExecutionFailure,
@@ -127,28 +134,20 @@ func (h *machineConfigSyncHandler) Execute(ctx context.Context, params ExecutePa
 		nodeIPs = ips
 	}
 
-	// singleNodeClass is true when nodeClass is already a per-node class (e.g. "node-cp1").
-	// In that mode, the base secret IS the per-node config; skip per-node patch lookup.
-	singleNodeClass := strings.HasPrefix(nodeClass, "node-")
-
 	var steps []runnerlib.StepResult
 	if len(nodeIPs) > 0 {
-		for _, nodeIP := range nodeIPs {
-			nodeConfig := modifiedConfig
-			if !singleNodeClass {
-				nodeConfig = perNodePatchConfig(ctx, params, secretNS, clusterRef, nodeIP, modifiedConfig)
-			}
+		for _, ip := range nodeIPs {
 			stepStart := time.Now().UTC()
-			if err := params.TalosClient.ApplyConfiguration(NodeContext(ctx, nodeIP), nodeConfig, "no-reboot"); err != nil {
+			if err := params.TalosClient.ApplyConfiguration(NodeContext(ctx, ip), modifiedConfig, "no-reboot"); err != nil {
 				return failureResult(runnerlib.CapabilityMachineConfigSync, now, runnerlib.ExecutionFailure,
-					fmt.Sprintf("ApplyConfiguration on %s: %v", nodeIP, err)), nil
+					fmt.Sprintf("ApplyConfiguration on %s: %v", ip, err)), nil
 			}
 			steps = append(steps, runnerlib.StepResult{
-				Name:        "machineconfig-sync-" + nodeIP,
+				Name:        "machineconfig-sync-" + ip,
 				Status:      runnerlib.ResultSucceeded,
 				StartedAt:   stepStart,
 				CompletedAt: time.Now().UTC(),
-				Message:     fmt.Sprintf("machineconfig applied to %s (nodeClass=%s)", nodeIP, nodeClass),
+				Message:     fmt.Sprintf("machineconfig applied to %s (nodeClass=%s)", ip, nodeClass),
 			})
 		}
 	} else {
@@ -175,67 +174,33 @@ func (h *machineConfigSyncHandler) Execute(ctx context.Context, params ExecutePa
 	}, nil
 }
 
-// decompressMachineConfig gunzips gzip-compressed machineconfig bytes. RECON-F5.
-func decompressMachineConfig(compressed []byte) ([]byte, error) {
-	r, err := gzip.NewReader(bytes.NewReader(compressed))
-	if err != nil {
-		return nil, fmt.Errorf("gzip.NewReader: %w", err)
-	}
-	defer r.Close()
-	out, err := io.ReadAll(r)
-	if err != nil {
-		return nil, fmt.Errorf("read decompressed: %w", err)
-	}
-	return out, nil
-}
-
-// perNodePatchConfig looks up any per-node patch secret for the Kubernetes node
-// whose InternalIP matches nodeIP, then merges it with baseConfig. The ONT controlled
-// label is always re-injected after merging so it cannot be overridden by a patch.
-// Returns baseConfig unchanged when no per-node secret exists or any step fails. RECON-A8.
-func perNodePatchConfig(ctx context.Context, params ExecuteParams, secretNS, clusterRef, nodeIP string, baseConfig []byte) []byte {
-	nodeList, err := params.KubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
-	if err != nil {
-		return baseConfig
-	}
-
-	var hostname string
-	for i := range nodeList.Items {
-		node := &nodeList.Items[i]
-		for _, addr := range node.Status.Addresses {
-			if string(addr.Type) == "InternalIP" && addr.Address == nodeIP {
-				hostname = node.Name
-				break
-			}
-		}
-		if hostname != "" {
-			break
-		}
+// reconstructMachineConfigYAML builds a Talos v1alpha1 machineconfig YAML document
+// from a MachineConfig CR's unstructured object. The spec.machine and spec.cluster
+// sections are extracted and merged into a single top-level map.
+//
+// Returns an error when neither section is present (empty CR is not applicable).
+func reconstructMachineConfigYAML(obj map[string]interface{}) ([]byte, error) {
+	spec, _ := obj["spec"].(map[string]interface{})
+	if spec == nil {
+		return nil, fmt.Errorf("MachineConfig CR has no spec")
 	}
-	if hostname == "" {
-		return baseConfig
+	combined := map[string]interface{}{
+		"version": "v1alpha1",
+		"debug":   false,
+		"persist": true,
 	}
-
-	patchSecretName := machineConfigSyncSecretName(clusterRef, "node-"+hostname)
-	patchSecret, pErr := params.KubeClient.CoreV1().Secrets(secretNS).Get(ctx, patchSecretName, metav1.GetOptions{})
-	if pErr != nil {
-		return baseConfig
+	if m := spec["machine"]; m != nil {
+		combined["machine"] = m
 	}
-	patchBytes := patchSecret.Data[machineConfigSyncDataKey]
-	if len(patchBytes) == 0 {
-		return baseConfig
+	if c := spec["cluster"]; c != nil {
+		combined["cluster"] = c
 	}
-
-	merged, mergeErr := mergeYAMLPatch(baseConfig, patchBytes)
-	if mergeErr != nil {
-		return baseConfig
+	if spec["machine"] == nil && spec["cluster"] == nil {
+		return nil, fmt.Errorf("MachineConfig CR spec has neither machine nor cluster section")
 	}
-
-	// Re-inject the ONT controlled label: it must never be overridden by a per-node patch.
-	labelPatch := []byte(fmt.Sprintf(`{"machine":{"nodeLabels":{%q:"true"}}}`, ontControlledLabel))
-	result, lErr := mergeYAMLPatch(merged, labelPatch)
-	if lErr != nil {
-		return merged
+	data, err := sigsyaml.Marshal(combined)
+	if err != nil {
+		return nil, fmt.Errorf("marshal reconstructed machineconfig: %w", err)
 	}
-	return result
+	return data, nil
 }
diff --git a/internal/capability/platform_machineconfig_sync_test.go b/internal/capability/platform_machineconfig_sync_test.go
index 16ada2b..87af176 100644
--- a/internal/capability/platform_machineconfig_sync_test.go
+++ b/internal/capability/platform_machineconfig_sync_test.go
@@ -1,25 +1,24 @@
 package capability
 
 import (
-	"bytes"
-	"compress/gzip"
 	"context"
-	"fmt"
 	"io"
 	"os"
 	"testing"
 
-	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/client-go/kubernetes/fake"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	fakedyn "k8s.io/client-go/dynamic/fake"
 
 	"github.com/ontai-dev/conductor-sdk/runnerlib"
 )
 
 // stubApplyTalosClient records ApplyConfiguration calls and exposes applied bytes.
 type stubApplyTalosClient struct {
-	applied    [][]byte
-	applyErr   error
+	applied  [][]byte
+	applyErr error
 }
 
 func (s *stubApplyTalosClient) Bootstrap(_ context.Context) error { return nil }
@@ -28,45 +27,106 @@ func (s *stubApplyTalosClient) ApplyConfiguration(_ context.Context, cfg []byte,
 	return s.applyErr
 }
 func (s *stubApplyTalosClient) Upgrade(_ context.Context, _ string, _ bool) error { return nil }
-func (s *stubApplyTalosClient) Reboot(_ context.Context) error                     { return nil }
+func (s *stubApplyTalosClient) Reboot(_ context.Context) error             { return nil }
+func (s *stubApplyTalosClient) RebootPowercycle(_ context.Context) error   { return nil }
 func (s *stubApplyTalosClient) Reset(_ context.Context, _ bool) error              { return nil }
 func (s *stubApplyTalosClient) EtcdSnapshot(_ context.Context, _ io.Writer) error  { return nil }
-func (s *stubApplyTalosClient) EtcdRecover(_ context.Context, _ io.Reader) error    { return nil }
-func (s *stubApplyTalosClient) EtcdDefragment(_ context.Context) error              { return nil }
-func (s *stubApplyTalosClient) GetMachineConfig(_ context.Context) ([]byte, error)  { return nil, nil }
-func (s *stubApplyTalosClient) Kubeconfig(_ context.Context) ([]byte, error)        { return nil, nil }
-func (s *stubApplyTalosClient) Nodes() []string                                     { return nil }
-func (s *stubApplyTalosClient) Rollback(_ context.Context) error                    { return nil }
-func (s *stubApplyTalosClient) WipeDisk(_ context.Context) error                    { return nil }
-func (s *stubApplyTalosClient) Health(_ context.Context) error                      { return nil }
-func (s *stubApplyTalosClient) Close() error                                        { return nil }
-
-// mcSyncTestSecret builds a Kubernetes Secret for the machineconfig-sync capability tests.
-func mcSyncTestSecret(clusterRef, nodeClass string, content []byte) *corev1.Secret {
-	return &corev1.Secret{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "seam-mc-" + clusterRef + "-" + nodeClass,
-			Namespace: "seam-tenant-" + clusterRef,
-		},
-		Data: map[string][]byte{
-			"machineconfig": content,
-		},
+func (s *stubApplyTalosClient) EtcdRecover(_ context.Context, _ io.Reader) error   { return nil }
+func (s *stubApplyTalosClient) EtcdDefragment(_ context.Context) error             { return nil }
+func (s *stubApplyTalosClient) GetMachineConfig(_ context.Context) ([]byte, error) { return nil, nil }
+func (s *stubApplyTalosClient) Kubeconfig(_ context.Context) ([]byte, error)       { return nil, nil }
+func (s *stubApplyTalosClient) Nodes() []string                                    { return nil }
+func (s *stubApplyTalosClient) Rollback(_ context.Context) error                   { return nil }
+func (s *stubApplyTalosClient) WipeDisk(_ context.Context) error                   { return nil }
+func (s *stubApplyTalosClient) Health(_ context.Context) error                     { return nil }
+func (s *stubApplyTalosClient) Close() error                                       { return nil }
+
+// buildMCSyncScheme returns a runtime.Scheme with MachineConfig and MachineConfigList registered.
+func buildMCSyncScheme() *runtime.Scheme {
+	scheme := runtime.NewScheme()
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfig",
+	}, &unstructured.Unstructured{})
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfigList",
+	}, &unstructured.UnstructuredList{})
+	return scheme
+}
+
+// buildMCSyncDynClient returns a fake DynamicClient with one MachineConfig CR seeded.
+// machineSection and clusterSection are the spec.machine and spec.cluster content
+// stored as unstructured Go maps (matching the CR's unstructured representation).
+func buildMCSyncDynClient(clusterRef, nodeClass string, machineSection, clusterSection map[string]interface{}) *fakedyn.FakeDynamicClient {
+	cr := &unstructured.Unstructured{}
+	cr.SetGroupVersionKind(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfig",
+	})
+	cr.SetName(machineConfigSyncCRName(clusterRef, nodeClass))
+	cr.SetNamespace(machineConfigSyncCRNamespace(clusterRef))
+
+	spec := map[string]interface{}{
+		"role":         "controlplane",
+		"order":        int64(1),
+		"clusterRef":   map[string]interface{}{"name": clusterRef},
+		"nodeIP":       "10.20.0.11",
+		"nodeHostname": nodeClass,
+	}
+	if machineSection != nil {
+		spec["machine"] = machineSection
+	}
+	if clusterSection != nil {
+		spec["cluster"] = clusterSection
+	}
+	cr.Object["spec"] = spec
+
+	return fakedyn.NewSimpleDynamicClient(buildMCSyncScheme(), cr)
+}
+
+// writeFakeTalosconfig writes a minimal talosconfig YAML to a temp file and returns
+// its path. The config uses ctx.nodes so EndpointsFromTalosconfig returns nodeIPs directly.
+func writeFakeTalosconfig(t *testing.T, nodeIPs []string) string {
+	t.Helper()
+	var nodesYAML string
+	for _, ip := range nodeIPs {
+		nodesYAML += "    - " + ip + "\n"
 	}
+	content := "context: default\ncontexts:\n  default:\n    endpoints: []\n    nodes:\n" + nodesYAML
+	f, err := os.CreateTemp(t.TempDir(), "talosconfig-*.yaml")
+	if err != nil {
+		t.Fatalf("create temp talosconfig: %v", err)
+	}
+	if _, err := f.WriteString(content); err != nil {
+		t.Fatalf("write talosconfig: %v", err)
+	}
+	_ = f.Close()
+	return f.Name()
+}
+
+// containsString is a simple string containment check for test use only.
+func containsString(s, sub string) bool {
+	return len(s) >= len(sub) && (s == sub || len(sub) == 0 ||
+		func() bool {
+			for i := 0; i <= len(s)-len(sub); i++ {
+				if s[i:i+len(sub)] == sub {
+					return true
+				}
+			}
+			return false
+		}())
 }
 
 // TestMachineConfigSyncHandler_MissingEnvVar verifies that a ValidationFailure is
 // returned when MC_NODE_CLASS is absent from the environment.
 func TestMachineConfigSyncHandler_MissingEnvVar(t *testing.T) {
-	// Ensure MC_NODE_CLASS is not set.
 	t.Setenv(envMCSyncNodeClass, "")
 
 	handler := &machineConfigSyncHandler{}
 	result, err := handler.Execute(context.Background(), ExecuteParams{
-		Capability:  runnerlib.CapabilityMachineConfigSync,
-		ClusterRef:  "ccs-mgmt",
+		Capability: runnerlib.CapabilityMachineConfigSync,
+		ClusterRef: "ccs-mgmt",
 		ExecuteClients: ExecuteClients{
-			TalosClient: &stubApplyTalosClient{},
-			KubeClient:  fake.NewSimpleClientset(),
+			TalosClient:   &stubApplyTalosClient{},
+			DynamicClient: buildMCSyncDynClient("ccs-mgmt", "", nil, nil),
 		},
 	})
 	if err != nil {
@@ -81,9 +141,9 @@ func TestMachineConfigSyncHandler_MissingEnvVar(t *testing.T) {
 }
 
 // TestMachineConfigSyncHandler_NilClients verifies that a ValidationFailure is
-// returned when TalosClient or KubeClient is nil.
+// returned when TalosClient or DynamicClient is nil.
 func TestMachineConfigSyncHandler_NilClients(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "controlplane")
+	t.Setenv(envMCSyncNodeClass, "cp1")
 
 	handler := &machineConfigSyncHandler{}
 	result, err := handler.Execute(context.Background(), ExecuteParams{
@@ -101,18 +161,18 @@ func TestMachineConfigSyncHandler_NilClients(t *testing.T) {
 	}
 }
 
-// TestMachineConfigSyncHandler_SecretNotFound verifies that a ExecutionFailure is
-// returned when the machineconfig Secret is absent from the cluster.
-func TestMachineConfigSyncHandler_SecretNotFound(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "controlplane")
+// TestMachineConfigSyncHandler_CRNotFound verifies that an ExecutionFailure is
+// returned when the MachineConfig CR is absent from the management cluster.
+func TestMachineConfigSyncHandler_CRNotFound(t *testing.T) {
+	t.Setenv(envMCSyncNodeClass, "cp1")
 
 	handler := &machineConfigSyncHandler{}
 	result, err := handler.Execute(context.Background(), ExecuteParams{
 		Capability: runnerlib.CapabilityMachineConfigSync,
 		ClusterRef: "ccs-mgmt",
 		ExecuteClients: ExecuteClients{
-			TalosClient: &stubApplyTalosClient{},
-			KubeClient:  fake.NewSimpleClientset(), // no secret
+			TalosClient:   &stubApplyTalosClient{},
+			DynamicClient: fakedyn.NewSimpleDynamicClient(buildMCSyncScheme()), // no CR seeded
 		},
 	})
 	if err != nil {
@@ -121,16 +181,24 @@ func TestMachineConfigSyncHandler_SecretNotFound(t *testing.T) {
 	if result.Status != runnerlib.ResultFailed {
 		t.Errorf("expected ResultFailed, got %q", result.Status)
 	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ExecutionFailure {
+		t.Errorf("expected ExecutionFailure for missing CR, got %v", result.FailureReason)
+	}
 }
 
 // TestMachineConfigSyncHandler_AppliesAndInjectsLabel verifies that the handler
-// applies the machineconfig to the Talos node and injects the ONT controlled label.
+// reads the MachineConfig CR, reconstructs Talos YAML, applies it, and injects
+// the ONT controlled node label.
 func TestMachineConfigSyncHandler_AppliesAndInjectsLabel(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "controlplane")
+	t.Setenv(envMCSyncNodeClass, "cp1")
+	t.Setenv(envMCSyncNodeIP, "10.20.0.11")
+	defer t.Setenv(envMCSyncNodeIP, "")
 
-	machineConfigContent := []byte("machine:\n  type: controlplane\n  nodeLabels: {}\n")
-	secret := mcSyncTestSecret("ccs-mgmt", "controlplane", machineConfigContent)
-	kubeClient := fake.NewSimpleClientset(secret)
+	machineSection := map[string]interface{}{
+		"type":       "controlplane",
+		"nodeLabels": map[string]interface{}{},
+	}
+	dynClient := buildMCSyncDynClient("ccs-mgmt", "cp1", machineSection, nil)
 	talosClient := &stubApplyTalosClient{}
 
 	handler := &machineConfigSyncHandler{}
@@ -138,8 +206,8 @@ func TestMachineConfigSyncHandler_AppliesAndInjectsLabel(t *testing.T) {
 		Capability: runnerlib.CapabilityMachineConfigSync,
 		ClusterRef: "ccs-mgmt",
 		ExecuteClients: ExecuteClients{
-			TalosClient: talosClient,
-			KubeClient:  kubeClient,
+			TalosClient:   talosClient,
+			DynamicClient: dynClient,
 		},
 	})
 	if err != nil {
@@ -152,79 +220,32 @@ func TestMachineConfigSyncHandler_AppliesAndInjectsLabel(t *testing.T) {
 		}
 	}
 
-	// Verify ApplyConfiguration was called once.
 	if len(talosClient.applied) != 1 {
 		t.Fatalf("expected 1 ApplyConfiguration call, got %d", len(talosClient.applied))
 	}
-
-	// Verify the applied config contains the ONT label.
 	applied := string(talosClient.applied[0])
 	if !containsString(applied, ontControlledLabel) {
 		t.Errorf("applied config does not contain node label %q:\n%s", ontControlledLabel, applied)
 	}
-
 	if len(result.Steps) != 1 {
 		t.Errorf("expected 1 step result, got %d", len(result.Steps))
 	}
 }
 
-// containsString is a simple string containment check for test use only.
-func containsString(s, sub string) bool {
-	return len(s) >= len(sub) && (s == sub || len(sub) == 0 ||
-		func() bool {
-			for i := 0; i <= len(s)-len(sub); i++ {
-				if s[i:i+len(sub)] == sub {
-					return true
-				}
-			}
-			return false
-		}())
-}
+// TestMachineConfigSyncHandler_ReconstructsBothSections verifies that spec.machine
+// and spec.cluster are both present in the reconstructed Talos YAML.
+func TestMachineConfigSyncHandler_ReconstructsBothSections(t *testing.T) {
+	t.Setenv(envMCSyncNodeClass, "cp1")
+	t.Setenv(envMCSyncNodeIP, "10.20.0.11")
+	defer t.Setenv(envMCSyncNodeIP, "")
 
-// writeFakeTalosconfig writes a minimal talosconfig YAML to a temp file and returns
-// its path. The config uses ctx.nodes so EndpointsFromTalosconfig returns nodeIPs directly.
-func writeFakeTalosconfig(t *testing.T, nodeIPs []string) string {
-	t.Helper()
-	var nodesYAML string
-	for _, ip := range nodeIPs {
-		nodesYAML += fmt.Sprintf("    - %s\n", ip)
-	}
-	content := fmt.Sprintf("context: default\ncontexts:\n  default:\n    endpoints: []\n    nodes:\n%s", nodesYAML)
-	f, err := os.CreateTemp(t.TempDir(), "talosconfig-*.yaml")
-	if err != nil {
-		t.Fatalf("create temp talosconfig: %v", err)
+	machineSection := map[string]interface{}{
+		"type": "controlplane",
 	}
-	if _, err := f.WriteString(content); err != nil {
-		t.Fatalf("write talosconfig: %v", err)
+	clusterSection := map[string]interface{}{
+		"clusterName": "ccs-mgmt",
 	}
-	_ = f.Close()
-	return f.Name()
-}
-
-// TestMachineConfigSyncHandler_DecompressesGzipSecret verifies that when the machineconfig
-// secret is gzip-compressed (compression label present), the capability decompresses it
-// before applying. RECON-F5.
-func TestMachineConfigSyncHandler_DecompressesGzipSecret(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "controlplane")
-
-	rawContent := []byte("machine:\n  type: controlplane\n")
-	var buf bytes.Buffer
-	w := mustGzipWriter(t, &buf)
-	_, _ = w.Write(rawContent)
-	_ = w.Close()
-	compressed := buf.Bytes()
-
-	secret := &corev1.Secret{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "seam-mc-ccs-mgmt-controlplane",
-			Namespace: "seam-tenant-ccs-mgmt",
-			Labels: map[string]string{
-				"platform.ontai.dev/compression": "gzip",
-			},
-		},
-		Data: map[string][]byte{"machineconfig": compressed},
-	}
-	kubeClient := fake.NewSimpleClientset(secret)
+	dynClient := buildMCSyncDynClient("ccs-mgmt", "cp1", machineSection, clusterSection)
 	talosClient := &stubApplyTalosClient{}
 
 	handler := &machineConfigSyncHandler{}
@@ -232,8 +253,8 @@ func TestMachineConfigSyncHandler_DecompressesGzipSecret(t *testing.T) {
 		Capability: runnerlib.CapabilityMachineConfigSync,
 		ClusterRef: "ccs-mgmt",
 		ExecuteClients: ExecuteClients{
-			TalosClient: talosClient,
-			KubeClient:  kubeClient,
+			TalosClient:   talosClient,
+			DynamicClient: dynClient,
 		},
 	})
 	if err != nil {
@@ -243,57 +264,38 @@ func TestMachineConfigSyncHandler_DecompressesGzipSecret(t *testing.T) {
 		t.Fatalf("expected ResultSucceeded, got %q; reason: %v", result.Status, result.FailureReason)
 	}
 	if len(talosClient.applied) != 1 {
-		t.Fatalf("expected 1 apply call, got %d", len(talosClient.applied))
+		t.Fatalf("expected 1 ApplyConfiguration call, got %d", len(talosClient.applied))
 	}
 	applied := string(talosClient.applied[0])
-	if !containsString(applied, "type: controlplane") {
-		t.Errorf("decompressed content not present in applied config:\n%s", applied)
+	if !containsString(applied, "machine:") {
+		t.Errorf("applied config missing machine section:\n%s", applied)
 	}
-}
-
-// mustGzipWriter returns a gzip.Writer writing to w. Fatals if creation fails.
-func mustGzipWriter(t *testing.T, w *bytes.Buffer) *gzip.Writer {
-	t.Helper()
-	gw := gzip.NewWriter(w)
-	return gw
-}
-
-// buildMCSyncNode returns a minimal Kubernetes Node object with the given name and InternalIP.
-func buildMCSyncNode(name, ip string) *corev1.Node {
-	return &corev1.Node{
-		ObjectMeta: metav1.ObjectMeta{Name: name},
-		Status: corev1.NodeStatus{
-			Addresses: []corev1.NodeAddress{
-				{Type: corev1.NodeInternalIP, Address: ip},
-			},
-		},
+	if !containsString(applied, "cluster:") {
+		t.Errorf("applied config missing cluster section:\n%s", applied)
 	}
 }
 
-// TestMachineConfigSyncHandler_PerNodePatchMerged verifies that when a per-node patch
-// secret exists for a node, its content is merged into the base class config. RECON-A8.
-func TestMachineConfigSyncHandler_PerNodePatchMerged(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "controlplane")
-
-	baseContent := []byte("machine:\n  type: controlplane\n")
-	patchContent := []byte("machine:\n  network:\n    hostname: cp1\n")
-
-	secret := mcSyncTestSecret("ccs-mgmt", "controlplane", baseContent)
-	patchSecret := mcSyncTestSecret("ccs-mgmt", "node-cp1", patchContent)
-	node := buildMCSyncNode("cp1", "10.20.0.2")
-	kubeClient := fake.NewSimpleClientset(secret, patchSecret, node)
+// TestMachineConfigSyncHandler_NodeIPTargetsSingleNode verifies that when MC_NODE_IP
+// is set, the capability applies to only that one node and skips talosconfig enumeration.
+// PLT-BUG-3-ARCH.
+func TestMachineConfigSyncHandler_NodeIPTargetsSingleNode(t *testing.T) {
+	t.Setenv(envMCSyncNodeClass, "cp1")
+	t.Setenv(envMCSyncNodeIP, "10.20.0.11")
+	defer t.Setenv(envMCSyncNodeIP, "")
+
+	machineSection := map[string]interface{}{"type": "controlplane"}
+	dynClient := buildMCSyncDynClient("ccs-dev", "cp1", machineSection, nil)
+	// Provide a talosconfig with multiple nodes -- only MC_NODE_IP should be targeted.
+	talosconfigPath := writeFakeTalosconfig(t, []string{"10.20.0.11", "10.20.0.12", "10.20.0.13"})
 	talosClient := &stubApplyTalosClient{}
 
-	// Provide a fake talosconfig so the handler enumerates nodeIPs.
-	talosconfigPath := writeFakeTalosconfig(t, []string{"10.20.0.2"})
-
 	handler := &machineConfigSyncHandler{}
 	result, err := handler.Execute(context.Background(), ExecuteParams{
 		Capability: runnerlib.CapabilityMachineConfigSync,
-		ClusterRef: "ccs-mgmt",
+		ClusterRef: "ccs-dev",
 		ExecuteClients: ExecuteClients{
 			TalosClient:     talosClient,
-			KubeClient:      kubeClient,
+			DynamicClient:   dynClient,
 			TalosconfigPath: talosconfigPath,
 		},
 	})
@@ -303,38 +305,28 @@ func TestMachineConfigSyncHandler_PerNodePatchMerged(t *testing.T) {
 	if result.Status != runnerlib.ResultSucceeded {
 		t.Fatalf("expected ResultSucceeded, got %q; reason: %v", result.Status, result.FailureReason)
 	}
+	// Must apply to exactly 1 node (not 3).
 	if len(talosClient.applied) != 1 {
-		t.Fatalf("expected 1 apply call, got %d", len(talosClient.applied))
+		t.Fatalf("expected 1 ApplyConfiguration call (MC_NODE_IP single-target), got %d", len(talosClient.applied))
 	}
-	applied := string(talosClient.applied[0])
-	if !containsString(applied, "hostname: cp1") {
-		t.Errorf("per-node patch hostname not merged into applied config:\n%s", applied)
+	if len(result.Steps) != 1 {
+		t.Errorf("expected 1 step result, got %d", len(result.Steps))
 	}
-	if !containsString(applied, ontControlledLabel) {
-		t.Errorf("ONT controlled label missing from merged config:\n%s", applied)
+	if !containsString(result.Steps[0].Message, "10.20.0.11") {
+		t.Errorf("step message must reference nodeIP 10.20.0.11, got %q", result.Steps[0].Message)
 	}
 }
 
-// TestMachineConfigSyncHandler_PerNodePatchPreservesONTLabel verifies that a per-node
-// patch cannot override the ontControlledLabel (protected field). RECON-A8.
-func TestMachineConfigSyncHandler_PerNodePatchPreservesONTLabel(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "controlplane")
-
-	baseContent := []byte("machine:\n  type: controlplane\n")
-	// Patch explicitly tries to remove/override the ONT label.
-	patchContent := []byte(`machine:
-  nodeLabels:
-    ont.platform.dev/controlled: "false"
-    custom-key: custom-val
-`)
-
-	secret := mcSyncTestSecret("ccs-mgmt", "controlplane", baseContent)
-	patchSecret := mcSyncTestSecret("ccs-mgmt", "node-cp2", patchContent)
-	node := buildMCSyncNode("cp2", "10.20.0.3")
-	kubeClient := fake.NewSimpleClientset(secret, patchSecret, node)
-	talosClient := &stubApplyTalosClient{}
+// TestMachineConfigSyncHandler_TalosconfigMultipleNodes verifies that when MC_NODE_IP
+// is not set but a talosconfig with multiple nodes is provided, the capability applies
+// to all enumerated nodes.
+func TestMachineConfigSyncHandler_TalosconfigMultipleNodes(t *testing.T) {
+	t.Setenv(envMCSyncNodeClass, "cp1")
 
-	talosconfigPath := writeFakeTalosconfig(t, []string{"10.20.0.3"})
+	machineSection := map[string]interface{}{"type": "controlplane"}
+	dynClient := buildMCSyncDynClient("ccs-mgmt", "cp1", machineSection, nil)
+	talosconfigPath := writeFakeTalosconfig(t, []string{"10.20.0.11", "10.20.0.12"})
+	talosClient := &stubApplyTalosClient{}
 
 	handler := &machineConfigSyncHandler{}
 	result, err := handler.Execute(context.Background(), ExecuteParams{
@@ -342,7 +334,7 @@ func TestMachineConfigSyncHandler_PerNodePatchPreservesONTLabel(t *testing.T) {
 		ClusterRef: "ccs-mgmt",
 		ExecuteClients: ExecuteClients{
 			TalosClient:     talosClient,
-			KubeClient:      kubeClient,
+			DynamicClient:   dynClient,
 			TalosconfigPath: talosconfigPath,
 		},
 	})
@@ -352,90 +344,98 @@ func TestMachineConfigSyncHandler_PerNodePatchPreservesONTLabel(t *testing.T) {
 	if result.Status != runnerlib.ResultSucceeded {
 		t.Fatalf("expected ResultSucceeded, got %q; reason: %v", result.Status, result.FailureReason)
 	}
-	if len(talosClient.applied) != 1 {
-		t.Fatalf("expected 1 apply call, got %d", len(talosClient.applied))
-	}
-	applied := string(talosClient.applied[0])
-	// ONT label must be "true" (re-injected after merge).
-	if !containsString(applied, `ont.platform.dev/controlled: "true"`) {
-		t.Errorf("ONT controlled label not protected; applied config:\n%s", applied)
-	}
-	// Per-node patch content should also be present.
-	if !containsString(applied, "custom-key") {
-		t.Errorf("per-node patch custom label missing from merged config:\n%s", applied)
+	if len(talosClient.applied) != 2 {
+		t.Errorf("expected 2 ApplyConfiguration calls (one per node), got %d", len(talosClient.applied))
 	}
 }
 
-// TestMachineConfigSyncHandler_SingleNodeClass verifies that when nodeClass starts with
-// "node-", no additional per-node patch lookup is performed and the base config is applied
-// directly. RECON-A8.
-func TestMachineConfigSyncHandler_SingleNodeClass(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "node-cp1")
-
-	nodeContent := []byte("machine:\n  type: controlplane\n  network:\n    hostname: cp1\n")
-	secret := mcSyncTestSecret("ccs-mgmt", "node-cp1", nodeContent)
-	kubeClient := fake.NewSimpleClientset(secret)
-	talosClient := &stubApplyTalosClient{}
+// TestMachineConfigSyncHandler_CREmptySpecFails verifies that a ValidationFailure
+// is returned when the MachineConfig CR spec has neither machine nor cluster section.
+func TestMachineConfigSyncHandler_CREmptySpecFails(t *testing.T) {
+	t.Setenv(envMCSyncNodeClass, "cp1")
+	t.Setenv(envMCSyncNodeIP, "10.20.0.11")
+	defer t.Setenv(envMCSyncNodeIP, "")
+
+	// Seed a CR with no machine/cluster sections.
+	cr := &unstructured.Unstructured{}
+	cr.SetGroupVersionKind(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfig",
+	})
+	cr.SetName("seam-mc-ccs-mgmt-cp1")
+	cr.SetNamespace("seam-tenant-ccs-mgmt")
+	cr.Object["spec"] = map[string]interface{}{
+		"role":  "controlplane",
+		"order": int64(1),
+	}
+	dynClient := fakedyn.NewSimpleDynamicClient(buildMCSyncScheme(), cr)
 
 	handler := &machineConfigSyncHandler{}
 	result, err := handler.Execute(context.Background(), ExecuteParams{
 		Capability: runnerlib.CapabilityMachineConfigSync,
 		ClusterRef: "ccs-mgmt",
 		ExecuteClients: ExecuteClients{
-			TalosClient: talosClient,
-			KubeClient:  kubeClient,
+			TalosClient:   &stubApplyTalosClient{},
+			DynamicClient: dynClient,
 		},
 	})
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
-	if result.Status != runnerlib.ResultSucceeded {
-		t.Fatalf("expected ResultSucceeded, got %q; reason: %v", result.Status, result.FailureReason)
-	}
-	if len(talosClient.applied) != 1 {
-		t.Fatalf("expected 1 apply call, got %d", len(talosClient.applied))
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed for empty CR spec, got %q", result.Status)
 	}
-	applied := string(talosClient.applied[0])
-	if !containsString(applied, ontControlledLabel) {
-		t.Errorf("ONT controlled label missing in single-node-class apply:\n%s", applied)
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure for empty CR spec, got %v", result.FailureReason)
 	}
 }
 
-// TestMachineConfigSyncHandler_NoPatchSecretFallsBackToBase verifies that when no
-// per-node patch secret exists, the base class config is applied unchanged. RECON-A8.
-func TestMachineConfigSyncHandler_NoPatchSecretFallsBackToBase(t *testing.T) {
-	t.Setenv(envMCSyncNodeClass, "controlplane")
-
-	baseContent := []byte("machine:\n  type: controlplane\n")
-	secret := mcSyncTestSecret("ccs-mgmt", "controlplane", baseContent)
-	node := buildMCSyncNode("cp3", "10.20.0.4")
-	// No per-node patch secret in the fake client.
-	kubeClient := fake.NewSimpleClientset(secret, node)
-	talosClient := &stubApplyTalosClient{}
-
-	talosconfigPath := writeFakeTalosconfig(t, []string{"10.20.0.4"})
-
-	handler := &machineConfigSyncHandler{}
-	result, err := handler.Execute(context.Background(), ExecuteParams{
-		Capability: runnerlib.CapabilityMachineConfigSync,
-		ClusterRef: "ccs-mgmt",
-		ExecuteClients: ExecuteClients{
-			TalosClient:     talosClient,
-			KubeClient:      kubeClient,
-			TalosconfigPath: talosconfigPath,
+// TestReconstructMachineConfigYAML_BothSections verifies YAML reconstruction from a CR
+// with both machine and cluster sections.
+func TestReconstructMachineConfigYAML_BothSections(t *testing.T) {
+	obj := map[string]interface{}{
+		"spec": map[string]interface{}{
+			"machine": map[string]interface{}{
+				"type": "controlplane",
+			},
+			"cluster": map[string]interface{}{
+				"clusterName": "ccs-mgmt",
+			},
 		},
-	})
+	}
+	out, err := reconstructMachineConfigYAML(obj)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
-	if result.Status != runnerlib.ResultSucceeded {
-		t.Fatalf("expected ResultSucceeded, got %q; reason: %v", result.Status, result.FailureReason)
+	s := string(out)
+	if !containsString(s, "machine:") {
+		t.Errorf("output missing machine section:\n%s", s)
 	}
-	if len(talosClient.applied) != 1 {
-		t.Fatalf("expected 1 apply call, got %d", len(talosClient.applied))
+	if !containsString(s, "cluster:") {
+		t.Errorf("output missing cluster section:\n%s", s)
 	}
-	applied := string(talosClient.applied[0])
-	if !containsString(applied, ontControlledLabel) {
-		t.Errorf("ONT label missing in fallback apply:\n%s", applied)
+}
+
+// TestReconstructMachineConfigYAML_MissingSpec verifies an error when spec is absent.
+func TestReconstructMachineConfigYAML_MissingSpec(t *testing.T) {
+	_, err := reconstructMachineConfigYAML(map[string]interface{}{})
+	if err == nil {
+		t.Error("expected error for missing spec, got nil")
+	}
+}
+
+// TestReconstructMachineConfigYAML_EmptySections verifies an error when neither
+// machine nor cluster is present in spec.
+func TestReconstructMachineConfigYAML_EmptySections(t *testing.T) {
+	obj := map[string]interface{}{
+		"spec": map[string]interface{}{
+			"role": "controlplane",
+		},
+	}
+	_, err := reconstructMachineConfigYAML(obj)
+	if err == nil {
+		t.Error("expected error for spec with no machine/cluster, got nil")
 	}
 }
+
+// metav1 import is used via metav1.GetOptions in the dynamic client calls.
+var _ = metav1.GetOptions{}
diff --git a/internal/capability/platform_machineconfig_test.go b/internal/capability/platform_machineconfig_test.go
index 66b2719..3c717d8 100644
--- a/internal/capability/platform_machineconfig_test.go
+++ b/internal/capability/platform_machineconfig_test.go
@@ -23,7 +23,8 @@ type stubTalosClientMC struct {
 func (s *stubTalosClientMC) Bootstrap(_ context.Context) error                              { return nil }
 func (s *stubTalosClientMC) ApplyConfiguration(_ context.Context, _ []byte, _ string) error { return nil }
 func (s *stubTalosClientMC) Upgrade(_ context.Context, _ string, _ bool) error              { return nil }
-func (s *stubTalosClientMC) Reboot(_ context.Context) error                                 { return nil }
+func (s *stubTalosClientMC) Reboot(_ context.Context) error          { return nil }
+func (s *stubTalosClientMC) RebootPowercycle(_ context.Context) error { return nil }
 func (s *stubTalosClientMC) Reset(_ context.Context, _ bool) error                          { return nil }
 func (s *stubTalosClientMC) EtcdSnapshot(_ context.Context, _ io.Writer) error              { return nil }
 func (s *stubTalosClientMC) EtcdRecover(_ context.Context, _ io.Reader) error               { return nil }
@@ -302,7 +303,8 @@ func (s *stubTalosClientRestore) ApplyConfiguration(_ context.Context, _ []byte,
 	return s.applyErr
 }
 func (s *stubTalosClientRestore) Upgrade(_ context.Context, _ string, _ bool) error { return nil }
-func (s *stubTalosClientRestore) Reboot(_ context.Context) error                    { return nil }
+func (s *stubTalosClientRestore) Reboot(_ context.Context) error             { return nil }
+func (s *stubTalosClientRestore) RebootPowercycle(_ context.Context) error   { return nil }
 func (s *stubTalosClientRestore) Reset(_ context.Context, _ bool) error             { return nil }
 func (s *stubTalosClientRestore) EtcdSnapshot(_ context.Context, _ io.Writer) error { return nil }
 func (s *stubTalosClientRestore) EtcdRecover(_ context.Context, _ io.Reader) error  { return nil }
diff --git a/internal/capability/platform_node_scaleup_test.go b/internal/capability/platform_node_scaleup_test.go
index cb1cabf..7e3d02e 100644
--- a/internal/capability/platform_node_scaleup_test.go
+++ b/internal/capability/platform_node_scaleup_test.go
@@ -37,7 +37,8 @@ func (s *stubEnrollTalosClient) ApplyConfiguration(_ context.Context, cfg []byte
 	return s.applyErr
 }
 func (s *stubEnrollTalosClient) Upgrade(_ context.Context, _ string, _ bool) error { return nil }
-func (s *stubEnrollTalosClient) Reboot(_ context.Context) error                    { return nil }
+func (s *stubEnrollTalosClient) Reboot(_ context.Context) error             { return nil }
+func (s *stubEnrollTalosClient) RebootPowercycle(_ context.Context) error   { return nil }
 func (s *stubEnrollTalosClient) Reset(_ context.Context, _ bool) error             { return nil }
 func (s *stubEnrollTalosClient) EtcdSnapshot(_ context.Context, _ io.Writer) error { return nil }
 func (s *stubEnrollTalosClient) EtcdRecover(_ context.Context, _ io.Reader) error  { return nil }
diff --git a/internal/capability/platform_postop_test.go b/internal/capability/platform_postop_test.go
index 332a003..26432ce 100644
--- a/internal/capability/platform_postop_test.go
+++ b/internal/capability/platform_postop_test.go
@@ -18,7 +18,8 @@ type mockTalosPostOp struct {
 func (m *mockTalosPostOp) Bootstrap(ctx context.Context) error                           { return nil }
 func (m *mockTalosPostOp) ApplyConfiguration(_ context.Context, _ []byte, _ string) error { return nil }
 func (m *mockTalosPostOp) Upgrade(_ context.Context, _ string, _ bool) error             { return nil }
-func (m *mockTalosPostOp) Reboot(_ context.Context) error                                { return nil }
+func (m *mockTalosPostOp) Reboot(_ context.Context) error          { return nil }
+func (m *mockTalosPostOp) RebootPowercycle(_ context.Context) error { return nil }
 func (m *mockTalosPostOp) Reset(_ context.Context, _ bool) error                         { return nil }
 func (m *mockTalosPostOp) EtcdSnapshot(_ context.Context, _ io.Writer) error             { return nil }
 func (m *mockTalosPostOp) EtcdRecover(_ context.Context, _ io.Reader) error              { return nil }
diff --git a/internal/capability/platform_upgrade.go b/internal/capability/platform_upgrade.go
index 9a94507..fb3cd0a 100644
--- a/internal/capability/platform_upgrade.go
+++ b/internal/capability/platform_upgrade.go
@@ -9,6 +9,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"log/slog"
+	"sort"
 	"time"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -116,6 +117,40 @@ func clearUpgradeProgress(ctx context.Context, dynClient dynamic.Interface, ns,
 	}
 }
 
+// nodesFromMachineConfigCRs lists all MachineConfig CRs in seam-tenant-{clusterRef},
+// sorts them by spec.order ascending (lower ordinal upgrades first), and returns the
+// ordered slice of nodeIP strings. Returns nil when no CRs are found or on list error
+// so the caller can fall back to talosconfig endpoint enumeration.
+func nodesFromMachineConfigCRs(ctx context.Context, dynClient dynamic.Interface, ns string) []string {
+	crList, err := dynClient.Resource(machineConfigGVR).Namespace(ns).List(ctx, metav1.ListOptions{})
+	if err != nil || len(crList.Items) == 0 {
+		return nil
+	}
+	type nodeEntry struct {
+		order  int64
+		nodeIP string
+	}
+	entries := make([]nodeEntry, 0, len(crList.Items))
+	for _, item := range crList.Items {
+		spec, _ := item.Object["spec"].(map[string]interface{})
+		if spec == nil {
+			continue
+		}
+		ip, _, _ := unstructuredString(spec, "nodeIP")
+		if ip == "" {
+			continue
+		}
+		order, _ := spec["order"].(int64)
+		entries = append(entries, nodeEntry{order: order, nodeIP: ip})
+	}
+	sort.Slice(entries, func(i, j int) bool { return entries[i].order < entries[j].order })
+	ips := make([]string, len(entries))
+	for i, e := range entries {
+		ips[i] = e.nodeIP
+	}
+	return ips
+}
+
 // talosUpgradeHandler implements the talos-upgrade named capability.
 // Performs a rolling sequential upgrade of all nodes: each node is upgraded
 // with stage=false (immediate reboot), then we wait for it to return healthy
@@ -130,13 +165,18 @@ func (h *talosUpgradeHandler) Execute(ctx context.Context, params ExecuteParams)
 			"talos-upgrade requires TalosClient and DynamicClient"), nil
 	}
 
-	nodes := params.TalosClient.Nodes()
+	ns := tenantNamespace(params.ClusterRef)
+
+	// Use MachineConfig CRs sorted by spec.order as the canonical node iteration order.
+	// Falls back to talosconfig endpoint enumeration when no CRs are present.
+	nodes := nodesFromMachineConfigCRs(ctx, params.DynamicClient, ns)
+	if len(nodes) == 0 {
+		nodes = params.TalosClient.Nodes()
+	}
 	if len(nodes) == 0 {
 		return failureResult(runnerlib.CapabilityTalosUpgrade, now, runnerlib.ValidationFailure,
-			"talos-upgrade: no nodes available from talosconfig"), nil
+			"talos-upgrade: no nodes available from MachineConfig CRs or talosconfig"), nil
 	}
-
-	ns := tenantNamespace(params.ClusterRef)
 	crList, err := params.DynamicClient.Resource(upgradePolicyGVR).Namespace(ns).
 		List(ctx, metav1.ListOptions{})
 	if err != nil {
@@ -220,11 +260,11 @@ func (h *talosUpgradeHandler) Execute(ctx context.Context, params ExecuteParams)
 				fmt.Sprintf("stage upgrade node %s to %s: %v", nodeIP, upgradeImage, uErr)), nil
 		}
 
-		if rErr := params.TalosClient.Reboot(nodeCtx); rErr != nil {
-			slog.Info("talos-upgrade: forced reboot failed",
+		if rErr := params.TalosClient.RebootPowercycle(nodeCtx); rErr != nil {
+			slog.Info("talos-upgrade: powercycle reboot failed",
 				slog.String("node", nodeIP), slog.String("error", rErr.Error()))
 			return failureResult(runnerlib.CapabilityTalosUpgrade, now, runnerlib.ExecutionFailure,
-				fmt.Sprintf("reboot node %s after staging upgrade to %s: %v", nodeIP, upgradeImage, rErr)), nil
+				fmt.Sprintf("powercycle reboot node %s after staging upgrade to %s: %v", nodeIP, upgradeImage, rErr)), nil
 		}
 
 		slog.Info("talos-upgrade: upgrade staged and reboot forced, waiting for node reboot",
diff --git a/internal/capability/platform_upgrade_test.go b/internal/capability/platform_upgrade_test.go
index c3b227a..c6d7be3 100644
--- a/internal/capability/platform_upgrade_test.go
+++ b/internal/capability/platform_upgrade_test.go
@@ -2,6 +2,7 @@ package capability
 
 import (
 	"context"
+	"fmt"
 	"io"
 	"testing"
 
@@ -11,20 +12,25 @@ import (
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	fakedyn "k8s.io/client-go/dynamic/fake"
 	"k8s.io/client-go/kubernetes/fake"
+
+	"github.com/ontai-dev/conductor-sdk/runnerlib"
 )
 
 // stubUpgradeTalosClient is a TalosNodeClient stub for upgrade tests.
 // Health returns healthErr on every call (nil = node is healthy).
+// powercycleCalled records whether RebootPowercycle was called (for Phase 4b test assertions).
 type stubUpgradeTalosClient struct {
-	nodes      []string
-	healthErr  error
-	upgradeErr error
+	nodes            []string
+	healthErr        error
+	upgradeErr       error
+	powercycleCalled bool
 }
 
 func (s *stubUpgradeTalosClient) Bootstrap(_ context.Context) error                              { return nil }
 func (s *stubUpgradeTalosClient) ApplyConfiguration(_ context.Context, _ []byte, _ string) error { return nil }
 func (s *stubUpgradeTalosClient) Upgrade(_ context.Context, _ string, _ bool) error              { return s.upgradeErr }
 func (s *stubUpgradeTalosClient) Reboot(_ context.Context) error                                 { return nil }
+func (s *stubUpgradeTalosClient) RebootPowercycle(_ context.Context) error                       { s.powercycleCalled = true; return nil }
 func (s *stubUpgradeTalosClient) Reset(_ context.Context, _ bool) error                         { return nil }
 func (s *stubUpgradeTalosClient) EtcdSnapshot(_ context.Context, _ io.Writer) error             { return nil }
 func (s *stubUpgradeTalosClient) EtcdRecover(_ context.Context, _ io.Reader) error              { return nil }
@@ -270,3 +276,131 @@ func TestWaitForNodeReboot_KubeNotReadyReturnsError(t *testing.T) {
 		t.Error("expected error when Kubernetes node remains NotReady, got nil")
 	}
 }
+
+// ── nodesFromMachineConfigCRs ────────────────────────────────────────────────
+
+// buildUpgradeDynClientWithMachineConfigs returns a fake DynamicClient containing
+// both an UpgradePolicy CR and a set of MachineConfig CRs for order-based iteration tests.
+// nodes is a slice of (nodeIP, order) pairs; the function creates one MachineConfig CR per entry.
+func buildUpgradeDynClientWithMachineConfigs(clusterRef, policyName, upgradeType, targetVersion string, nodes []struct{ ip string; order int64 }) *fakedyn.FakeDynamicClient {
+	scheme := runtime.NewScheme()
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "UpgradePolicy",
+	}, &unstructured.Unstructured{})
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "UpgradePolicyList",
+	}, &unstructured.UnstructuredList{})
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfig",
+	}, &unstructured.Unstructured{})
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfigList",
+	}, &unstructured.UnstructuredList{})
+
+	ns := "seam-tenant-" + clusterRef
+
+	policy := &unstructured.Unstructured{}
+	policy.SetGroupVersionKind(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "UpgradePolicy",
+	})
+	policy.SetName(policyName)
+	policy.SetNamespace(ns)
+	specVersionKey := "targetTalosVersion"
+	if upgradeType == "kubernetes" {
+		specVersionKey = "targetKubernetesVersion"
+	}
+	policy.Object["spec"] = map[string]interface{}{
+		"upgradeType": upgradeType,
+		specVersionKey: targetVersion,
+	}
+
+	objs := []runtime.Object{policy}
+	for i, n := range nodes {
+		mc := &unstructured.Unstructured{}
+		mc.SetGroupVersionKind(schema.GroupVersionKind{
+			Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfig",
+		})
+		mc.SetName(fmt.Sprintf("seam-mc-%s-node%d", clusterRef, i))
+		mc.SetNamespace(ns)
+		mc.Object["spec"] = map[string]interface{}{
+			"nodeIP": n.ip,
+			"order":  n.order,
+		}
+		objs = append(objs, mc)
+	}
+	return fakedyn.NewSimpleDynamicClient(scheme, objs...)
+}
+
+// TestNodesFromMachineConfigCRs_SortsAscendingByOrder verifies that
+// nodesFromMachineConfigCRs returns nodeIPs sorted by spec.order ascending.
+// Phase 4b: upgrade order is driven by MachineConfig CR spec.order.
+func TestNodesFromMachineConfigCRs_SortsAscendingByOrder(t *testing.T) {
+	// Intentionally seed CRs out of order: 2, 0, 1 -- expect 0, 1, 2 back.
+	dyn := buildUpgradeDynClientWithMachineConfigs("ccs-dev", "up-mc", "talos", "v1.10.0", []struct{ ip string; order int64 }{
+		{ip: "10.20.0.12", order: 2},
+		{ip: "10.20.0.10", order: 0},
+		{ip: "10.20.0.11", order: 1},
+	})
+	ns := "seam-tenant-ccs-dev"
+
+	got := nodesFromMachineConfigCRs(context.Background(), dyn, ns)
+	want := []string{"10.20.0.10", "10.20.0.11", "10.20.0.12"}
+	if len(got) != len(want) {
+		t.Fatalf("got %d nodes, want %d: %v", len(got), len(want), got)
+	}
+	for i, ip := range want {
+		if got[i] != ip {
+			t.Errorf("node[%d]: got %q, want %q", i, got[i], ip)
+		}
+	}
+}
+
+// TestNodesFromMachineConfigCRs_EmptyWhenNoCRs verifies that nodesFromMachineConfigCRs
+// returns nil when no MachineConfig CRs exist, so the caller can fall back.
+func TestNodesFromMachineConfigCRs_EmptyWhenNoCRs(t *testing.T) {
+	scheme := runtime.NewScheme()
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfig",
+	}, &unstructured.Unstructured{})
+	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
+		Group: "platform.ontai.dev", Version: "v1alpha1", Kind: "MachineConfigList",
+	}, &unstructured.UnstructuredList{})
+	dyn := fakedyn.NewSimpleDynamicClient(scheme)
+
+	got := nodesFromMachineConfigCRs(context.Background(), dyn, "seam-tenant-ccs-dev")
+	if len(got) != 0 {
+		t.Errorf("expected nil/empty, got %v", got)
+	}
+}
+
+// TestTalosUpgrade_PowercycleCalledAfterStage verifies that after staging the Talos
+// upgrade, the handler calls RebootPowercycle (not plain Reboot) on each node.
+// Phase 4b: hardware powercycle ensures clean BIOS/UEFI re-initialisation post-upgrade.
+func TestTalosUpgrade_PowercycleCalledAfterStage(t *testing.T) {
+	defer setRebootTimeouts()()
+
+	talos := &stubUpgradeTalosClient{
+		nodes: []string{"10.20.0.10"},
+	}
+	dyn := buildUpgradeDynClientWithMachineConfigs("ccs-dev", "up-pow", "talos", "v1.10.0", []struct{ ip string; order int64 }{
+		{ip: "10.20.0.10", order: 0},
+	})
+
+	handler := &talosUpgradeHandler{}
+	result, err := handler.Execute(context.Background(), ExecuteParams{
+		ClusterRef: "ccs-dev",
+		ExecuteClients: ExecuteClients{
+			TalosClient:   talos,
+			DynamicClient: dyn,
+		},
+	})
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+	if result.Status != runnerlib.ResultSucceeded {
+		t.Errorf("expected ResultSucceeded, got %q: %v", result.Status, result.Steps)
+	}
+	if !talos.powercycleCalled {
+		t.Error("expected RebootPowercycle to be called; it was not")
+	}
+}
diff --git a/internal/capability/stubs.go b/internal/capability/stubs.go
index 826a229..2d31a06 100644
--- a/internal/capability/stubs.go
+++ b/internal/capability/stubs.go
@@ -42,6 +42,13 @@ func RegisterAll(reg *Registry) {
 	// Guardian capabilities -- RBAC plane.
 	reg.Register(runnerlib.CapabilityRBACProvision, &rbacProvisionHandler{})
 
+	// Watchdog capabilities -- runtime failure remediation. Triggered by
+	// RuntimeDrift DriftSignals on the management cluster. conductor-schema.md §6.
+	reg.Register(runnerlib.CapabilityPodRestart, &podRestartHandler{})
+	reg.Register(runnerlib.CapabilityResourcePatch, &resourcePatchHandler{})
+	reg.Register(runnerlib.CapabilityForceVolumeDetach, &forceVolumeDetachHandler{})
+	reg.Register(runnerlib.CapabilityCredentialRefresh, &credentialRefreshHandler{})
+
 	// Note: CapabilityPackCompile is NOT registered here. pack-compile is a
 	// Compiler compile-mode invocation only -- it never runs as a Conductor Job.
 	// Registering it here would be a schema violation. conductor-schema.md §6.
diff --git a/internal/capability/watchdog.go b/internal/capability/watchdog.go
new file mode 100644
index 0000000..2fab1fe
--- /dev/null
+++ b/internal/capability/watchdog.go
@@ -0,0 +1,355 @@
+// Package capability — Conductor Watchdog remediation capability implementations.
+// pod-restart, resource-patch, force-volume-detach, credential-refresh.
+// Triggered by RuntimeDrift DriftSignals on the management cluster.
+// conductor-schema.md §6, conductor-sdk runnerlib/constants.go.
+package capability
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/types"
+
+	"github.com/ontai-dev/conductor-sdk/runnerlib"
+)
+
+// packNameLabel is the label key used to scope watchdog operations to a specific pack.
+// Set by the pack-deploy handler on pod template specs at deploy time.
+const packNameLabel = "seam.ontai.dev/pack-name"
+
+// volumeAttachmentGVR is the GroupVersionResource for storage.k8s.io/v1 VolumeAttachment.
+var volumeAttachmentGVR = schema.GroupVersionResource{
+	Group:   "storage.k8s.io",
+	Version: "v1",
+	Resource: "volumeattachments",
+}
+
+// podGVR is the GroupVersionResource for core/v1 Pod.
+var podGVR = schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"}
+
+// podNamespaceAnnotation is stamped on Jobs by the execute runner so watchdog handlers
+// can find the namespace where the failing pod lives.
+const podNamespaceAnnotation = "conductor.ontai.dev/pod-namespace"
+
+// ---------------------------------------------------------------------------
+// pod-restart
+// ---------------------------------------------------------------------------
+
+// podRestartHandler implements the pod-restart named capability.
+// Deletes all pods bearing the seam.ontai.dev/pack-name={PackInstalledName} label
+// so the ReplicaSet controller recreates them. Used for CrashLoopBackOff failures.
+// conductor-schema.md §6, conductor-sdk CapabilityPodRestart.
+type podRestartHandler struct{}
+
+func (h *podRestartHandler) Execute(ctx context.Context, params ExecuteParams) (runnerlib.OperationResultSpec, error) {
+	now := time.Now().UTC()
+
+	if params.TenantKubeClient == nil {
+		return failureResult(runnerlib.CapabilityPodRestart, now, runnerlib.ValidationFailure,
+			"pod-restart requires TenantKubeClient"), nil
+	}
+	if params.PackInstalledName == "" {
+		return failureResult(runnerlib.CapabilityPodRestart, now, runnerlib.ValidationFailure,
+			"pod-restart requires PackInstalledName"), nil
+	}
+
+	selector := packNameLabel + "=" + params.PackInstalledName
+	podList, err := params.TenantKubeClient.CoreV1().Pods("").List(ctx, metav1.ListOptions{
+		LabelSelector: selector,
+	})
+	if err != nil {
+		return failureResult(runnerlib.CapabilityPodRestart, now, runnerlib.ExecutionFailure,
+			fmt.Sprintf("list pods with label %s on cluster %s: %v", selector, params.ClusterRef, err)), nil
+	}
+
+	stepStart := time.Now().UTC()
+	deleted := 0
+	for _, pod := range podList.Items {
+		if err := params.TenantKubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil {
+			return failureResult(runnerlib.CapabilityPodRestart, now, runnerlib.ExecutionFailure,
+				fmt.Sprintf("delete pod %s/%s on cluster %s: %v", pod.Namespace, pod.Name, params.ClusterRef, err)), nil
+		}
+		deleted++
+		params.Log().Info("pod-restart: deleted pod", "cluster", params.ClusterRef, "pod", pod.Namespace+"/"+pod.Name)
+	}
+
+	return runnerlib.OperationResultSpec{
+		Capability:  runnerlib.CapabilityPodRestart,
+		Status:      runnerlib.ResultSucceeded,
+		StartedAt:   now,
+		CompletedAt: time.Now().UTC(),
+		Artifacts:   []runnerlib.ArtifactRef{},
+		Steps: []runnerlib.StepResult{{
+			Name:        "delete-pods",
+			Status:      runnerlib.ResultSucceeded,
+			StartedAt:   stepStart,
+			CompletedAt: time.Now().UTC(),
+			Message:     fmt.Sprintf("%d pod(s) deleted from pack %s on cluster %s", deleted, params.PackInstalledName, params.ClusterRef),
+		}},
+	}, nil
+}
+
+// ---------------------------------------------------------------------------
+// resource-patch
+// ---------------------------------------------------------------------------
+
+// resourcePatchHandler implements the resource-patch named capability.
+// Triggers a rollout restart on all Deployments and StatefulSets bearing the
+// seam.ontai.dev/pack-name label. The rollout annotation forces pods to be
+// recreated with updated scheduler placement, resolving OOMKilled failures.
+// conductor-schema.md §6, conductor-sdk CapabilityResourcePatch.
+type resourcePatchHandler struct{}
+
+func (h *resourcePatchHandler) Execute(ctx context.Context, params ExecuteParams) (runnerlib.OperationResultSpec, error) {
+	now := time.Now().UTC()
+
+	if params.TenantDynamicClient == nil {
+		return failureResult(runnerlib.CapabilityResourcePatch, now, runnerlib.ValidationFailure,
+			"resource-patch requires TenantDynamicClient"), nil
+	}
+	if params.PackInstalledName == "" {
+		return failureResult(runnerlib.CapabilityResourcePatch, now, runnerlib.ValidationFailure,
+			"resource-patch requires PackInstalledName"), nil
+	}
+
+	selector := packNameLabel + "=" + params.PackInstalledName
+	restartTS := time.Now().UTC().Format(time.RFC3339)
+
+	// Patch both Deployments and StatefulSets that belong to this pack.
+	gvrs := []schema.GroupVersionResource{deploymentGVR, statefulSetGVR}
+	stepStart := time.Now().UTC()
+	patched := 0
+
+	for _, gvr := range gvrs {
+		list, err := params.TenantDynamicClient.Resource(gvr).Namespace("").List(ctx, metav1.ListOptions{
+			LabelSelector: selector,
+		})
+		if err != nil {
+			return failureResult(runnerlib.CapabilityResourcePatch, now, runnerlib.ExecutionFailure,
+				fmt.Sprintf("list %s with label %s on cluster %s: %v", gvr.Resource, selector, params.ClusterRef, err)), nil
+		}
+		for _, item := range list.Items {
+			// Inject restart annotation on spec.template.metadata.annotations.
+			patch := fmt.Sprintf(
+				`{"spec":{"template":{"metadata":{"annotations":{"kubectl.kubernetes.io/restartedAt":%q}}}}}`,
+				restartTS,
+			)
+			_, err := params.TenantDynamicClient.Resource(gvr).Namespace(item.GetNamespace()).Patch(
+				ctx, item.GetName(), types.MergePatchType, []byte(patch), metav1.PatchOptions{},
+			)
+			if err != nil {
+				return failureResult(runnerlib.CapabilityResourcePatch, now, runnerlib.ExecutionFailure,
+					fmt.Sprintf("patch %s %s/%s on cluster %s: %v", gvr.Resource, item.GetNamespace(), item.GetName(), params.ClusterRef, err)), nil
+			}
+			patched++
+			params.Log().Info("resource-patch: rollout restart triggered",
+				"cluster", params.ClusterRef, "resource", gvr.Resource, "name", item.GetNamespace()+"/"+item.GetName())
+		}
+	}
+
+	return runnerlib.OperationResultSpec{
+		Capability:  runnerlib.CapabilityResourcePatch,
+		Status:      runnerlib.ResultSucceeded,
+		StartedAt:   now,
+		CompletedAt: time.Now().UTC(),
+		Artifacts:   []runnerlib.ArtifactRef{},
+		Steps: []runnerlib.StepResult{{
+			Name:        "rollout-restart",
+			Status:      runnerlib.ResultSucceeded,
+			StartedAt:   stepStart,
+			CompletedAt: time.Now().UTC(),
+			Message:     fmt.Sprintf("%d workload(s) patched for rollout restart on cluster %s", patched, params.ClusterRef),
+		}},
+	}, nil
+}
+
+// ---------------------------------------------------------------------------
+// force-volume-detach
+// ---------------------------------------------------------------------------
+
+// forceVolumeDetachHandler implements the force-volume-detach named capability.
+// Deletes VolumeAttachment objects for PVCs belonging to the pack so the kubelet
+// can re-attach the volumes on a healthy node. Used for FailedMount and
+// MultiAttachError failures. conductor-schema.md §6, conductor-sdk CapabilityForceVolumeDetach.
+type forceVolumeDetachHandler struct{}
+
+func (h *forceVolumeDetachHandler) Execute(ctx context.Context, params ExecuteParams) (runnerlib.OperationResultSpec, error) {
+	now := time.Now().UTC()
+
+	if params.TenantDynamicClient == nil {
+		return failureResult(runnerlib.CapabilityForceVolumeDetach, now, runnerlib.ValidationFailure,
+			"force-volume-detach requires TenantDynamicClient"), nil
+	}
+	if params.PackInstalledName == "" {
+		return failureResult(runnerlib.CapabilityForceVolumeDetach, now, runnerlib.ValidationFailure,
+			"force-volume-detach requires PackInstalledName"), nil
+	}
+
+	selector := packNameLabel + "=" + params.PackInstalledName
+
+	// List PVCs with the pack label to find which PVs to detach.
+	pvcList, err := params.TenantDynamicClient.Resource(pvcGVR).Namespace("").List(ctx, metav1.ListOptions{
+		LabelSelector: selector,
+	})
+	if err != nil {
+		return failureResult(runnerlib.CapabilityForceVolumeDetach, now, runnerlib.ExecutionFailure,
+			fmt.Sprintf("list PVCs with label %s on cluster %s: %v", selector, params.ClusterRef, err)), nil
+	}
+
+	// Build the set of PV names bound to these PVCs.
+	pvNames := make(map[string]struct{}, len(pvcList.Items))
+	for _, pvc := range pvcList.Items {
+		spec, _, _ := unstructuredNestedMap(pvc.Object, "spec")
+		if pvName, _ := spec["volumeName"].(string); pvName != "" {
+			pvNames[pvName] = struct{}{}
+		}
+	}
+
+	if len(pvNames) == 0 {
+		return runnerlib.OperationResultSpec{
+			Capability:  runnerlib.CapabilityForceVolumeDetach,
+			Status:      runnerlib.ResultSucceeded,
+			StartedAt:   now,
+			CompletedAt: time.Now().UTC(),
+			Artifacts:   []runnerlib.ArtifactRef{},
+			Steps: []runnerlib.StepResult{{
+				Name:        "delete-volume-attachments",
+				Status:      runnerlib.ResultSucceeded,
+				StartedAt:   now,
+				CompletedAt: time.Now().UTC(),
+				Message:     fmt.Sprintf("no PVs bound to pack %s on cluster %s", params.PackInstalledName, params.ClusterRef),
+			}},
+		}, nil
+	}
+
+	// List all VolumeAttachments and delete those referencing our PVs.
+	stepStart := time.Now().UTC()
+	vaList, err := params.TenantDynamicClient.Resource(volumeAttachmentGVR).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return failureResult(runnerlib.CapabilityForceVolumeDetach, now, runnerlib.ExecutionFailure,
+			fmt.Sprintf("list VolumeAttachments on cluster %s: %v", params.ClusterRef, err)), nil
+	}
+
+	deleted := 0
+	for _, va := range vaList.Items {
+		spec, _, _ := unstructuredNestedMap(va.Object, "spec")
+		pvRef, _ := spec["source"].(map[string]interface{})
+		pvName, _ := pvRef["persistentVolumeName"].(string)
+		if _, ok := pvNames[pvName]; !ok {
+			continue
+		}
+		if err := params.TenantDynamicClient.Resource(volumeAttachmentGVR).Delete(ctx, va.GetName(), metav1.DeleteOptions{}); err != nil {
+			return failureResult(runnerlib.CapabilityForceVolumeDetach, now, runnerlib.ExecutionFailure,
+				fmt.Sprintf("delete VolumeAttachment %s on cluster %s: %v", va.GetName(), params.ClusterRef, err)), nil
+		}
+		deleted++
+		params.Log().Info("force-volume-detach: deleted VolumeAttachment",
+			"cluster", params.ClusterRef, "volumeAttachment", va.GetName(), "pv", pvName)
+	}
+
+	return runnerlib.OperationResultSpec{
+		Capability:  runnerlib.CapabilityForceVolumeDetach,
+		Status:      runnerlib.ResultSucceeded,
+		StartedAt:   now,
+		CompletedAt: time.Now().UTC(),
+		Artifacts:   []runnerlib.ArtifactRef{},
+		Steps: []runnerlib.StepResult{{
+			Name:        "delete-volume-attachments",
+			Status:      runnerlib.ResultSucceeded,
+			StartedAt:   stepStart,
+			CompletedAt: time.Now().UTC(),
+			Message:     fmt.Sprintf("%d VolumeAttachment(s) deleted for pack %s on cluster %s", deleted, params.PackInstalledName, params.ClusterRef),
+		}},
+	}, nil
+}
+
+// ---------------------------------------------------------------------------
+// credential-refresh
+// ---------------------------------------------------------------------------
+
+// credentialRefreshHandler implements the credential-refresh named capability.
+// Deletes pods bearing the seam.ontai.dev/pack-name label so the kubelet
+// retries the image pull with up-to-date imagePullSecret credentials.
+// Intended for ImagePullBackOff failures where the imagePullSecret has been
+// refreshed out-of-band (e.g., by a secret rotation operator).
+// conductor-schema.md §6, conductor-sdk CapabilityCredentialRefresh.
+type credentialRefreshHandler struct{}
+
+func (h *credentialRefreshHandler) Execute(ctx context.Context, params ExecuteParams) (runnerlib.OperationResultSpec, error) {
+	now := time.Now().UTC()
+
+	if params.TenantKubeClient == nil {
+		return failureResult(runnerlib.CapabilityCredentialRefresh, now, runnerlib.ValidationFailure,
+			"credential-refresh requires TenantKubeClient"), nil
+	}
+	if params.PackInstalledName == "" {
+		return failureResult(runnerlib.CapabilityCredentialRefresh, now, runnerlib.ValidationFailure,
+			"credential-refresh requires PackInstalledName"), nil
+	}
+
+	selector := packNameLabel + "=" + params.PackInstalledName
+	podList, err := params.TenantKubeClient.CoreV1().Pods("").List(ctx, metav1.ListOptions{
+		LabelSelector: selector,
+	})
+	if err != nil {
+		return failureResult(runnerlib.CapabilityCredentialRefresh, now, runnerlib.ExecutionFailure,
+			fmt.Sprintf("list pods with label %s on cluster %s: %v", selector, params.ClusterRef, err)), nil
+	}
+
+	// Filter to pods in ImagePullBackOff or ErrImagePull state.
+	stepStart := time.Now().UTC()
+	deleted := 0
+	for _, pod := range podList.Items {
+		if !hasPullFailure(pod) {
+			continue
+		}
+		if err := params.TenantKubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil {
+			return failureResult(runnerlib.CapabilityCredentialRefresh, now, runnerlib.ExecutionFailure,
+				fmt.Sprintf("delete pod %s/%s on cluster %s: %v", pod.Namespace, pod.Name, params.ClusterRef, err)), nil
+		}
+		deleted++
+		params.Log().Info("credential-refresh: deleted pod for image pull retry",
+			"cluster", params.ClusterRef, "pod", pod.Namespace+"/"+pod.Name)
+	}
+
+	return runnerlib.OperationResultSpec{
+		Capability:  runnerlib.CapabilityCredentialRefresh,
+		Status:      runnerlib.ResultSucceeded,
+		StartedAt:   now,
+		CompletedAt: time.Now().UTC(),
+		Artifacts:   []runnerlib.ArtifactRef{},
+		Steps: []runnerlib.StepResult{{
+			Name:        "delete-image-pull-failed-pods",
+			Status:      runnerlib.ResultSucceeded,
+			StartedAt:   stepStart,
+			CompletedAt: time.Now().UTC(),
+			Message:     fmt.Sprintf("%d pod(s) deleted for image pull retry on cluster %s", deleted, params.ClusterRef),
+		}},
+	}, nil
+}
+
+// hasPullFailure returns true when any container in the pod is in ImagePullBackOff
+// or ErrImagePull waiting state, indicating the pod needs a fresh pull attempt.
+func hasPullFailure(pod corev1.Pod) bool {
+	for _, cs := range pod.Status.ContainerStatuses {
+		if cs.State.Waiting != nil {
+			switch cs.State.Waiting.Reason {
+			case "ImagePullBackOff", "ErrImagePull":
+				return true
+			}
+		}
+	}
+	for _, ics := range pod.Status.InitContainerStatuses {
+		if ics.State.Waiting != nil {
+			switch ics.State.Waiting.Reason {
+			case "ImagePullBackOff", "ErrImagePull":
+				return true
+			}
+		}
+	}
+	return false
+}
diff --git a/internal/capability/watchdog_test.go b/internal/capability/watchdog_test.go
new file mode 100644
index 0000000..66ecf98
--- /dev/null
+++ b/internal/capability/watchdog_test.go
@@ -0,0 +1,322 @@
+package capability
+
+import (
+	"context"
+	"testing"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	dynfake "k8s.io/client-go/dynamic/fake"
+	kubefake "k8s.io/client-go/kubernetes/fake"
+
+	"github.com/ontai-dev/conductor-sdk/runnerlib"
+)
+
+// setupWatchdogScheme returns a runtime.Scheme with enough type registrations for
+// watchdog handler tests. Only the GVKs exercised in these tests need to be present.
+func setupWatchdogScheme() *runtime.Scheme {
+	s := runtime.NewScheme()
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "storage.k8s.io", Version: "v1", Kind: "VolumeAttachment"}, &runtime.Unknown{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "storage.k8s.io", Version: "v1", Kind: "VolumeAttachmentList"}, &runtime.Unknown{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "", Version: "v1", Kind: "PersistentVolumeClaim"}, &runtime.Unknown{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "", Version: "v1", Kind: "PersistentVolumeClaimList"}, &runtime.Unknown{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "Deployment"}, &runtime.Unknown{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "DeploymentList"}, &runtime.Unknown{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "StatefulSet"}, &runtime.Unknown{})
+	s.AddKnownTypeWithName(schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "StatefulSetList"}, &runtime.Unknown{})
+	return s
+}
+
+// TestPodRestartHandler_NilClient verifies that a nil TenantKubeClient returns
+// a ValidationFailure without panicking.
+func TestPodRestartHandler_NilClient(t *testing.T) {
+	h := &podRestartHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityPodRestart,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestPodRestartHandler_NilPackInstalledName verifies that a missing PackInstalledName
+// returns a ValidationFailure.
+func TestPodRestartHandler_NilPackInstalledName(t *testing.T) {
+	h := &podRestartHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:  runnerlib.CapabilityPodRestart,
+		ClusterRef:  "ccs-dev",
+		ExecuteClients: ExecuteClients{
+			TenantKubeClient: kubefake.NewSimpleClientset(),
+		},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestPodRestartHandler_DeletesPods verifies that Execute deletes pods bearing
+// the pack-name label on the tenant cluster.
+func TestPodRestartHandler_DeletesPods(t *testing.T) {
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "nginx-abc",
+			Namespace: "default",
+			Labels:    map[string]string{packNameLabel: "nginx"},
+		},
+	}
+	client := kubefake.NewSimpleClientset(pod)
+	h := &podRestartHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityPodRestart,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+		ExecuteClients:    ExecuteClients{TenantKubeClient: client},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultSucceeded {
+		t.Errorf("expected ResultSucceeded, got %q; reason %+v", result.Status, result.FailureReason)
+	}
+	// Verify pod was deleted.
+	_, getErr := client.CoreV1().Pods("default").Get(context.Background(), "nginx-abc", metav1.GetOptions{})
+	if getErr == nil {
+		t.Error("expected pod to be deleted but it still exists")
+	}
+}
+
+// TestResourcePatchHandler_NilClient verifies that a nil TenantDynamicClient returns
+// a ValidationFailure without panicking.
+func TestResourcePatchHandler_NilClient(t *testing.T) {
+	h := &resourcePatchHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityResourcePatch,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestResourcePatchHandler_NilPackInstalledName verifies that a missing PackInstalledName
+// returns a ValidationFailure.
+func TestResourcePatchHandler_NilPackInstalledName(t *testing.T) {
+	h := &resourcePatchHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability: runnerlib.CapabilityResourcePatch,
+		ClusterRef: "ccs-dev",
+		ExecuteClients: ExecuteClients{
+			TenantDynamicClient: dynfake.NewSimpleDynamicClient(setupWatchdogScheme()),
+		},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestResourcePatchHandler_EmptyCluster verifies that no Deployments with the pack
+// label results in a Succeeded result (no-op is valid).
+func TestResourcePatchHandler_EmptyCluster(t *testing.T) {
+	client := dynfake.NewSimpleDynamicClient(setupWatchdogScheme())
+	h := &resourcePatchHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityResourcePatch,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+		ExecuteClients:    ExecuteClients{TenantDynamicClient: client},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultSucceeded {
+		t.Errorf("expected ResultSucceeded for empty cluster, got %q; reason %+v", result.Status, result.FailureReason)
+	}
+}
+
+// TestForceVolumeDetachHandler_NilClient verifies that a nil TenantDynamicClient
+// returns a ValidationFailure without panicking.
+func TestForceVolumeDetachHandler_NilClient(t *testing.T) {
+	h := &forceVolumeDetachHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityForceVolumeDetach,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestForceVolumeDetachHandler_NilPackInstalledName verifies that a missing
+// PackInstalledName returns a ValidationFailure.
+func TestForceVolumeDetachHandler_NilPackInstalledName(t *testing.T) {
+	h := &forceVolumeDetachHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability: runnerlib.CapabilityForceVolumeDetach,
+		ClusterRef: "ccs-dev",
+		ExecuteClients: ExecuteClients{
+			TenantDynamicClient: dynfake.NewSimpleDynamicClient(setupWatchdogScheme()),
+		},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestForceVolumeDetachHandler_NoPVCs verifies that when no PVCs match the pack
+// label the result is Succeeded (no-op is valid).
+func TestForceVolumeDetachHandler_NoPVCs(t *testing.T) {
+	client := dynfake.NewSimpleDynamicClient(setupWatchdogScheme())
+	h := &forceVolumeDetachHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityForceVolumeDetach,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+		ExecuteClients:    ExecuteClients{TenantDynamicClient: client},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultSucceeded {
+		t.Errorf("expected ResultSucceeded for no PVCs, got %q; reason %+v", result.Status, result.FailureReason)
+	}
+}
+
+// TestCredentialRefreshHandler_NilClient verifies that a nil TenantKubeClient
+// returns a ValidationFailure without panicking.
+func TestCredentialRefreshHandler_NilClient(t *testing.T) {
+	h := &credentialRefreshHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityCredentialRefresh,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestCredentialRefreshHandler_NilPackInstalledName verifies that a missing
+// PackInstalledName returns a ValidationFailure.
+func TestCredentialRefreshHandler_NilPackInstalledName(t *testing.T) {
+	h := &credentialRefreshHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability: runnerlib.CapabilityCredentialRefresh,
+		ClusterRef: "ccs-dev",
+		ExecuteClients: ExecuteClients{
+			TenantKubeClient: kubefake.NewSimpleClientset(),
+		},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultFailed {
+		t.Errorf("expected ResultFailed, got %q", result.Status)
+	}
+	if result.FailureReason == nil || result.FailureReason.Category != runnerlib.ValidationFailure {
+		t.Errorf("expected ValidationFailure, got %+v", result.FailureReason)
+	}
+}
+
+// TestCredentialRefreshHandler_DeletesPullFailedPods verifies that pods in
+// ImagePullBackOff state are deleted and pods in other states are skipped.
+func TestCredentialRefreshHandler_DeletesPullFailedPods(t *testing.T) {
+	pullFailPod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "nginx-pull-fail",
+			Namespace: "default",
+			Labels:    map[string]string{packNameLabel: "nginx"},
+		},
+		Status: corev1.PodStatus{
+			ContainerStatuses: []corev1.ContainerStatus{{
+				State: corev1.ContainerState{
+					Waiting: &corev1.ContainerStateWaiting{Reason: "ImagePullBackOff"},
+				},
+			}},
+		},
+	}
+	runningPod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "nginx-running",
+			Namespace: "default",
+			Labels:    map[string]string{packNameLabel: "nginx"},
+		},
+		Status: corev1.PodStatus{
+			ContainerStatuses: []corev1.ContainerStatus{{
+				State: corev1.ContainerState{Running: &corev1.ContainerStateRunning{}},
+			}},
+		},
+	}
+	client := kubefake.NewSimpleClientset(pullFailPod, runningPod)
+	h := &credentialRefreshHandler{}
+	result, err := h.Execute(context.Background(), ExecuteParams{
+		Capability:        runnerlib.CapabilityCredentialRefresh,
+		PackInstalledName: "nginx",
+		ClusterRef:        "ccs-dev",
+		ExecuteClients:    ExecuteClients{TenantKubeClient: client},
+	})
+	if err != nil {
+		t.Fatalf("Execute returned unexpected error: %v", err)
+	}
+	if result.Status != runnerlib.ResultSucceeded {
+		t.Errorf("expected ResultSucceeded, got %q; reason %+v", result.Status, result.FailureReason)
+	}
+	// pull-fail pod must be deleted.
+	_, getErr := client.CoreV1().Pods("default").Get(context.Background(), "nginx-pull-fail", metav1.GetOptions{})
+	if getErr == nil {
+		t.Error("expected pull-fail pod to be deleted but it still exists")
+	}
+	// running pod must be preserved.
+	_, getErr = client.CoreV1().Pods("default").Get(context.Background(), "nginx-running", metav1.GetOptions{})
+	if getErr != nil {
+		t.Errorf("expected running pod to be preserved but got: %v", getErr)
+	}
+}
diff --git a/internal/capability/wrapper.go b/internal/capability/wrapper.go
index cf2ee6b..1065268 100644
--- a/internal/capability/wrapper.go
+++ b/internal/capability/wrapper.go
@@ -1019,9 +1019,10 @@ func ensureNamespaces(ctx context.Context, dynClient dynamic.Interface, manifest
 	for ns := range needed {
 		nsJSON := []byte(fmt.Sprintf(
 			`{"apiVersion":"v1","kind":"Namespace","metadata":{"name":%q}}`, ns))
+		forceNS := true
 		_, err := dynClient.Resource(namespaceGVR).Patch(
 			ctx, ns, types.ApplyPatchType, nsJSON,
-			metav1.PatchOptions{FieldManager: "conductor-pack-deploy"},
+			metav1.PatchOptions{FieldManager: "conductor-pack-deploy", Force: &forceNS},
 		)
 		if err != nil {
 			return created, fmt.Errorf("pre-create namespace %q: %w", ns, err)
@@ -1103,18 +1104,23 @@ func stageForKind(kind string) string {
 // ---------------------------------------------------------------------------
 
 // applyParsedManifest applies m to the cluster via server-side apply.
+// Force is set to true so conductor-pack-deploy takes field ownership from any
+// prior manager (e.g. kubectl) without returning a conflict error.
 func applyParsedManifest(ctx context.Context, dynClient dynamic.Interface, m parsedManifest) error {
 	gvr := gvrFromAPIVersionKind(m.apiVersion, m.kind)
+	force := true
 	if m.namespace != "" {
 		_, err := dynClient.Resource(gvr).Namespace(m.namespace).
 			Patch(ctx, m.name, types.ApplyPatchType, m.jsonData, metav1.PatchOptions{
 				FieldManager: "conductor-pack-deploy",
+				Force:        &force,
 			})
 		return err
 	}
 	_, err := dynClient.Resource(gvr).
 		Patch(ctx, m.name, types.ApplyPatchType, m.jsonData, metav1.PatchOptions{
 			FieldManager: "conductor-pack-deploy",
+			Force:        &force,
 		})
 	return err
 }
diff --git a/internal/federation/metrics.go b/internal/federation/metrics.go
new file mode 100644
index 0000000..33e11c2
--- /dev/null
+++ b/internal/federation/metrics.go
@@ -0,0 +1,29 @@
+package federation
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+	ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
+)
+
+var (
+	// metricActiveStreams tracks the current number of live streams accepted by FederationServer.
+	// ADR-F6 D4.
+	metricActiveStreams = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "conductor_federation_stream_active_count",
+		Help: "Current number of live streams accepted by FederationServer.",
+	})
+
+	// metricReconnectsTotal counts reconnect events observed per tenant cluster.
+	// ADR-F6 D4.
+	metricReconnectsTotal = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Name: "conductor_federation_stream_reconnects_total",
+			Help: "Total number of stream reconnect events observed, labeled by cluster ID.",
+		},
+		[]string{"cluster_id"},
+	)
+)
+
+func init() {
+	ctrlmetrics.Registry.MustRegister(metricActiveStreams, metricReconnectsTotal)
+}
diff --git a/internal/federation/server.go b/internal/federation/server.go
index 6fa4daf..8f78ecb 100644
--- a/internal/federation/server.go
+++ b/internal/federation/server.go
@@ -7,8 +7,10 @@ import (
 	"fmt"
 	"net"
 	"sync"
+	"sync/atomic"
 	"time"
 
+	"golang.org/x/time/rate"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials"
@@ -47,6 +49,19 @@ type clusterStatus struct {
 	missedHeartbeats int
 }
 
+// FederationServerOptions configures the stream admission limits for FederationServer.
+// Zero values disable the corresponding limit. ADR-F6.
+type FederationServerOptions struct {
+	// MaxConcurrentStreams is the maximum number of simultaneous active streams.
+	// When reached, new connections receive codes.ResourceExhausted.
+	// Must be in [1, 1000]; 0 means unlimited (no semaphore). Default: 50 via env.
+	MaxConcurrentStreams int
+
+	// AdmissionRate is the token-bucket refill rate in tokens per second.
+	// Burst capacity is 2x this value. 0 means unlimited. Default: 5 via env.
+	AdmissionRate int
+}
+
 // FederationServer is the management-side federation gRPC server.
 // It listens on the federation port with mutual TLS, extracts cluster IDs
 // from client certificate SANs, and maintains the bidirectional stream with
@@ -63,31 +78,82 @@ type FederationServer struct {
 	mu sync.RWMutex
 	// connectedClusters maps clusterID → stream status for heartbeat tracking.
 	connectedClusters map[string]*clusterStatus
+
+	// semaphore limits concurrent active streams. nil = unlimited. ADR-F6 D1.
+	semaphore chan struct{}
+
+	// admissionLimiter rate-limits new stream accepts. nil = unlimited. ADR-F6 D2.
+	admissionLimiter *rate.Limiter
+
+	// activeCount is the live stream count, kept in sync with the semaphore. ADR-F6 D4.
+	activeCount atomic.Int64
 }
 
 // NewFederationServer constructs a FederationServer from certificate paths.
 // The server does not start until Start is called.
 // conductor-schema.md §18.
-func NewFederationServer(caCertPath, serverCertPath, serverKeyPath string, kubeClient kubernetes.Interface) (*FederationServer, error) {
+func NewFederationServer(caCertPath, serverCertPath, serverKeyPath string, kubeClient kubernetes.Interface, opts FederationServerOptions) (*FederationServer, error) {
 	tlsCfg, err := BuildServerTLSConfig(caCertPath, serverCertPath, serverKeyPath)
 	if err != nil {
 		return nil, fmt.Errorf("federation server TLS config: %w", err)
 	}
-	return &FederationServer{
-		tlsCfg:            tlsCfg,
-		kubeClient:        kubeClient,
-		connectedClusters: make(map[string]*clusterStatus),
-	}, nil
+	return newFederationServer(tlsCfg, kubeClient, opts), nil
 }
 
 // NewFederationServerFromTLS constructs a FederationServer from an already-built
 // tls.Config. Used in tests to inject a test TLS config directly.
-func NewFederationServerFromTLS(tlsCfg *tls.Config, kubeClient kubernetes.Interface) *FederationServer {
-	return &FederationServer{
+func NewFederationServerFromTLS(tlsCfg *tls.Config, kubeClient kubernetes.Interface, opts FederationServerOptions) *FederationServer {
+	return newFederationServer(tlsCfg, kubeClient, opts)
+}
+
+func newFederationServer(tlsCfg *tls.Config, kubeClient kubernetes.Interface, opts FederationServerOptions) *FederationServer {
+	s := &FederationServer{
 		tlsCfg:            tlsCfg,
 		kubeClient:        kubeClient,
 		connectedClusters: make(map[string]*clusterStatus),
 	}
+	if opts.MaxConcurrentStreams > 0 {
+		s.semaphore = make(chan struct{}, opts.MaxConcurrentStreams)
+	}
+	if opts.AdmissionRate > 0 {
+		s.admissionLimiter = rate.NewLimiter(rate.Limit(opts.AdmissionRate), 2*opts.AdmissionRate)
+	}
+	return s
+}
+
+// ActiveStreamCount returns the number of currently active streams. ADR-F6 D4.
+func (s *FederationServer) ActiveStreamCount() int64 {
+	return s.activeCount.Load()
+}
+
+// ParseFederationMaxStreams parses FEDERATION_MAX_CONCURRENT_STREAMS env value.
+// Valid range: [1, 1000]. Returns 50 (default) if empty, 0 on invalid input.
+// ADR-F6 D1.
+func ParseFederationMaxStreams(v string) int {
+	if v == "" {
+		return 50
+	}
+	var n int
+	if _, err := fmt.Sscanf(v, "%d", &n); err != nil || n <= 0 || n > 1000 {
+		fmt.Printf("federation server: invalid FEDERATION_MAX_CONCURRENT_STREAMS %q (must be 1-1000) — using default 50\n", v)
+		return 50
+	}
+	return n
+}
+
+// ParseFederationAdmissionRate parses FEDERATION_ADMISSION_RATE env value.
+// Returns 5 (default) if empty, 0 on invalid input (disables rate limiting).
+// ADR-F6 D2.
+func ParseFederationAdmissionRate(v string) int {
+	if v == "" {
+		return 5
+	}
+	var n int
+	if _, err := fmt.Sscanf(v, "%d", &n); err != nil || n <= 0 {
+		fmt.Printf("federation server: invalid FEDERATION_ADMISSION_RATE %q (must be >0) — using default 5\n", v)
+		return 5
+	}
+	return n
 }
 
 // Start begins listening on addr and serves the federation gRPC stream with mutual TLS.
@@ -138,12 +204,39 @@ func (s *FederationServer) ConnectedClusterIDs() []string {
 // federationStream handles a single bidirectional stream from a connected tenant.
 // It implements the grpc.ServerStream interface handler for the FederationService/Stream method.
 func (s *FederationServer) federationStream(stream grpc.ServerStream) error {
+	// D2: admission rate-limit check before semaphore acquisition. ADR-F6.
+	if s.admissionLimiter != nil && !s.admissionLimiter.Allow() {
+		return status.Errorf(codes.ResourceExhausted, "federation server: admission rate limit exceeded")
+	}
+
+	// D1: semaphore -- reject when max concurrent streams reached. ADR-F6.
+	if s.semaphore != nil {
+		select {
+		case s.semaphore <- struct{}{}:
+			// slot acquired
+		default:
+			return status.Errorf(codes.ResourceExhausted, "federation server: max concurrent stream limit reached")
+		}
+		defer func() { <-s.semaphore }()
+	}
+
+	// Track active count and update the Prometheus gauge. ADR-F6 D4.
+	s.activeCount.Add(1)
+	metricActiveStreams.Inc()
+	defer func() {
+		s.activeCount.Add(-1)
+		metricActiveStreams.Dec()
+	}()
+
 	// Extract cluster ID from the peer TLS certificate SAN.
 	clusterID, err := s.clusterIDFromStream(stream)
 	if err != nil {
 		return status.Errorf(codes.Unauthenticated, "cluster ID extraction: %v", err)
 	}
 
+	// Count this as a reconnect event (every stream accept = one connection). ADR-F6 D4.
+	metricReconnectsTotal.WithLabelValues(clusterID).Inc()
+
 	// Register this cluster as connected.
 	cs := &clusterStatus{}
 	s.mu.Lock()
diff --git a/internal/federation/server_pool_test.go b/internal/federation/server_pool_test.go
new file mode 100644
index 0000000..8816c6a
--- /dev/null
+++ b/internal/federation/server_pool_test.go
@@ -0,0 +1,155 @@
+package federation
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+// acquireSlot tests the semaphore directly, bypassing TLS cert extraction.
+// Returns true if the slot was acquired (semaphore not full), false otherwise.
+func acquireSlot(s *FederationServer) (release func(), ok bool) {
+	if s.semaphore == nil {
+		return func() {}, true
+	}
+	select {
+	case s.semaphore <- struct{}{}:
+		return func() { <-s.semaphore }, true
+	default:
+		return nil, false
+	}
+}
+
+// TestFederationServer_RejectsWhenLimitReached verifies that a server with
+// limit=2 rejects the third concurrent connection with RESOURCE_EXHAUSTED.
+// ADR-F6 D1.
+func TestFederationServer_RejectsWhenLimitReached(t *testing.T) {
+	opts := FederationServerOptions{MaxConcurrentStreams: 2}
+	s := newFederationServer(nil, nil, opts)
+
+	// Acquire both slots.
+	rel1, ok1 := acquireSlot(s)
+	if !ok1 {
+		t.Fatal("expected slot 1 to be acquired")
+	}
+	defer rel1()
+	rel2, ok2 := acquireSlot(s)
+	if !ok2 {
+		t.Fatal("expected slot 2 to be acquired")
+	}
+	defer rel2()
+
+	// Third attempt must be rejected.
+	_, ok3 := acquireSlot(s)
+	if ok3 {
+		t.Error("expected slot 3 to be rejected (limit=2 reached)")
+	}
+}
+
+// TestFederationServer_AdmitsUpToLimit verifies that a server with limit=2
+// admits exactly two concurrent streams and both are recorded as active.
+// ADR-F6 D1.
+func TestFederationServer_AdmitsUpToLimit(t *testing.T) {
+	opts := FederationServerOptions{MaxConcurrentStreams: 2}
+	s := newFederationServer(nil, nil, opts)
+
+	var mu sync.Mutex
+	admitted := 0
+
+	var wg sync.WaitGroup
+	for i := 0; i < 2; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			rel, ok := acquireSlot(s)
+			if !ok {
+				return
+			}
+			defer rel()
+			mu.Lock()
+			admitted++
+			mu.Unlock()
+			// Hold the slot briefly.
+			time.Sleep(10 * time.Millisecond)
+		}()
+	}
+	wg.Wait()
+
+	if admitted != 2 {
+		t.Errorf("expected 2 admitted streams, got %d", admitted)
+	}
+}
+
+// TestActiveStreamCount_DecreasesOnDisconnect verifies that ActiveStreamCount
+// increments when a slot is acquired and decrements when it is released.
+// ADR-F6 D4.
+func TestActiveStreamCount_DecreasesOnDisconnect(t *testing.T) {
+	opts := FederationServerOptions{MaxConcurrentStreams: 5}
+	s := newFederationServer(nil, nil, opts)
+
+	if n := s.ActiveStreamCount(); n != 0 {
+		t.Fatalf("expected ActiveStreamCount=0 before any stream, got %d", n)
+	}
+
+	// Simulate what federationStream does: acquire semaphore + track activeCount.
+	rel, ok := acquireSlot(s)
+	if !ok {
+		t.Fatal("expected slot to be acquired")
+	}
+	s.activeCount.Add(1)
+	metricActiveStreams.Inc()
+
+	if n := s.ActiveStreamCount(); n != 1 {
+		t.Errorf("expected ActiveStreamCount=1 after connect, got %d", n)
+	}
+
+	// Simulate disconnect.
+	s.activeCount.Add(-1)
+	metricActiveStreams.Dec()
+	rel()
+
+	if n := s.ActiveStreamCount(); n != 0 {
+		t.Errorf("expected ActiveStreamCount=0 after disconnect, got %d", n)
+	}
+}
+
+// TestParseFederationMaxStreams verifies the env var parser. ADR-F6 D1.
+func TestParseFederationMaxStreams(t *testing.T) {
+	cases := []struct {
+		input string
+		want  int
+	}{
+		{"", 50},
+		{"10", 10},
+		{"1000", 1000},
+		{"0", 50},    // out of range: default
+		{"1001", 50}, // out of range: default
+		{"bad", 50},  // invalid: default
+		{"-5", 50},   // negative: default
+	}
+	for _, tc := range cases {
+		if got := ParseFederationMaxStreams(tc.input); got != tc.want {
+			t.Errorf("ParseFederationMaxStreams(%q) = %d, want %d", tc.input, got, tc.want)
+		}
+	}
+}
+
+// TestParseFederationAdmissionRate verifies the env var parser. ADR-F6 D2.
+func TestParseFederationAdmissionRate(t *testing.T) {
+	cases := []struct {
+		input string
+		want  int
+	}{
+		{"", 5},
+		{"10", 10},
+		{"1", 1},
+		{"0", 5},   // zero invalid: default
+		{"-1", 5},  // negative: default
+		{"bad", 5}, // invalid: default
+	}
+	for _, tc := range cases {
+		if got := ParseFederationAdmissionRate(tc.input); got != tc.want {
+			t.Errorf("ParseFederationAdmissionRate(%q) = %d, want %d", tc.input, got, tc.want)
+		}
+	}
+}
diff --git a/internal/identity/identity.go b/internal/identity/identity.go
index 1a5caff..3c84ab0 100644
--- a/internal/identity/identity.go
+++ b/internal/identity/identity.go
@@ -54,7 +54,7 @@ func EnsureSeamMembership(ctx context.Context, c client.Client) error {
 		Spec: seamv1alpha1.SeamMembershipSpec{
 			AppIdentityRef:    id.OperatorName(),
 			DomainIdentityRef: id.OperatorName(),
-			PrincipalRef:      "system:serviceaccount:seam-system:" + id.OperatorName(),
+			PrincipalRef:      "system:serviceaccount:ont-system:" + id.OperatorName(),
 			Tier:              "infrastructure",
 		},
 	}
diff --git a/internal/identity/identity_test.go b/internal/identity/identity_test.go
index 81dbf8f..02e4144 100644
--- a/internal/identity/identity_test.go
+++ b/internal/identity/identity_test.go
@@ -92,6 +92,10 @@ func TestEnsureSeamMembership_Creates(t *testing.T) {
 	if sm.Spec.Tier != "infrastructure" {
 		t.Errorf("Tier = %q, want %q", sm.Spec.Tier, "infrastructure")
 	}
+	wantPrincipal := "system:serviceaccount:ont-system:conductor"
+	if sm.Spec.PrincipalRef != wantPrincipal {
+		t.Errorf("PrincipalRef = %q, want %q (conductor runs in ont-system, not seam-system)", sm.Spec.PrincipalRef, wantPrincipal)
+	}
 }
 
 func TestEnsureSeamMembership_Idempotent(t *testing.T) {
diff --git a/internal/kernel/agent.go b/internal/kernel/agent.go
index 19d766a..dc7b2f3 100644
--- a/internal/kernel/agent.go
+++ b/internal/kernel/agent.go
@@ -333,12 +333,75 @@ func RunAgent(goCtx context.Context, execCtx config.ExecutionContext, client kub
 			execCtx.ClusterRef)
 	}
 
+	// PackSourceVersionLoop — role=management only. Polls Helm chart repository
+	// index.yaml for each Helm-backed PackDelivery in seam-system and emits an
+	// UpstreamVersionAvailable DriftSignal when a newer chart version is found.
+	// RECON-CMN1.
+	var packSourceVersionLoop *agent.PackSourceVersionLoop
+	if role == RoleManagement {
+		packSourceVersionLoop = agent.NewPackSourceVersionLoop(dynamicClient, ns)
+		fmt.Printf("conductor agent: cluster=%q pack source version loop enabled (management role)\n",
+			execCtx.ClusterRef)
+	}
+
+	// ESOHealthLoop — role=management only. Polls ExternalSecret CRs in seam-system
+	// and emits ExternalSecretSyncFailed DriftSignals on sync errors.
+	// Skips cleanly when ESO CRDs are not installed. RECON-K3.
+	var esoHealthLoop *agent.ESOHealthLoop
+	if role == RoleManagement {
+		esoHealthLoop = agent.NewESOHealthLoop(dynamicClient, ns, execCtx.ClusterRef)
+		fmt.Printf("conductor agent: cluster=%q ESO health loop enabled (management role)\n",
+			execCtx.ClusterRef)
+	}
+
+	// PolicyReportDriftLoop — role=management only. Polls Kyverno PolicyReport and
+	// ClusterPolicyReport CRs and emits KyvernoPolicyViolation DriftSignals on failures.
+	// Skips cleanly when Kyverno CRDs are not installed. RECON-L2.
+	var policyReportDriftLoop *agent.PolicyReportDriftLoop
+	if role == RoleManagement {
+		policyReportDriftLoop = agent.NewPolicyReportDriftLoop(dynamicClient, ns, execCtx.ClusterRef)
+		fmt.Printf("conductor agent: cluster=%q policy report drift loop enabled (management role)\n",
+			execCtx.ClusterRef)
+	}
+
+	// VulnerabilityDriftLoop — role=management only. Polls Trivy Operator
+	// VulnerabilityReport CRs and emits VulnerableImageDetected DriftSignals for
+	// CRITICAL severity findings. Skips cleanly when Trivy CRDs not installed. RECON-M2.
+	var vulnerabilityDriftLoop *agent.VulnerabilityDriftLoop
+	if role == RoleManagement {
+		vulnerabilityDriftLoop = agent.NewVulnerabilityDriftLoop(dynamicClient, ns, execCtx.ClusterRef)
+		fmt.Printf("conductor agent: cluster=%q vulnerability drift loop enabled (management role)\n",
+			execCtx.ClusterRef)
+	}
+
+	// BackupHealthLoop — role=management only. Polls Velero BackupStorageLocation and
+	// Backup CRs; emits BackupStorageUnavailable and BackupRPOBreached DriftSignals.
+	// Skips cleanly when Velero CRDs are not installed. RECON-N2.
+	var backupHealthLoop *agent.BackupHealthLoop
+	if role == RoleManagement {
+		backupHealthLoop = agent.NewBackupHealthLoop(dynamicClient, ns, execCtx.ClusterRef)
+		fmt.Printf("conductor agent: cluster=%q backup health loop enabled (management role)\n",
+			execCtx.ClusterRef)
+	}
+
 	if runtimeDriftHandler != nil {
 		runtimeDriftHandler.WithOperatorContextWatcher(ocWatcher)
 	}
 	if packPodHealthLoop != nil {
 		packPodHealthLoop.WithOperatorContextWatcher(ocWatcher)
 	}
+	if esoHealthLoop != nil {
+		esoHealthLoop.WithOperatorContextWatcher(ocWatcher)
+	}
+	if policyReportDriftLoop != nil {
+		policyReportDriftLoop.WithOperatorContextWatcher(ocWatcher)
+	}
+	if vulnerabilityDriftLoop != nil {
+		vulnerabilityDriftLoop.WithOperatorContextWatcher(ocWatcher)
+	}
+	if backupHealthLoop != nil {
+		backupHealthLoop.WithOperatorContextWatcher(ocWatcher)
+	}
 
 	// Phase 3b — Start the federation channel listener/client.
 	// Management Conductor: start FederationServer when FEDERATION_CA_CERT_PATH,
@@ -354,7 +417,11 @@ func RunAgent(goCtx context.Context, execCtx config.ExecutionContext, client kub
 
 	if fedCACertPath != "" && fedServerCertPath != "" && fedServerKeyPath != "" {
 		// Management Conductor: start the federation server.
-		fedServer, fedErr := federation.NewFederationServer(fedCACertPath, fedServerCertPath, fedServerKeyPath, nil)
+		fedOpts := federation.FederationServerOptions{
+			MaxConcurrentStreams: federation.ParseFederationMaxStreams(os.Getenv("FEDERATION_MAX_CONCURRENT_STREAMS")),
+			AdmissionRate:        federation.ParseFederationAdmissionRate(os.Getenv("FEDERATION_ADMISSION_RATE")),
+		}
+		fedServer, fedErr := federation.NewFederationServer(fedCACertPath, fedServerCertPath, fedServerKeyPath, nil, fedOpts)
 		if fedErr != nil {
 			return fmt.Errorf("conductor agent: build federation server: %w", fedErr)
 		}
@@ -453,7 +520,7 @@ func RunAgent(goCtx context.Context, execCtx config.ExecutionContext, client kub
 		"", // identity: resolved from hostname inside RunLeaderElection
 		agent.LeaderCallbacks{
 			OnStartedLeading: func(leaderCtx context.Context) {
-				onLeaderStart(leaderCtx, execCtx.ClusterRef, ns, manifest, publisher, reconciler, signingLoop, snapshotPullLoop, packInstancePullLoop, packReceiptDriftLoop, rbacProfilePullLoop, rbacPolicyPullLoop, driftSignalHandler, talosVersionDriftLoop, kubernetesVersionDriftLoop, packPodHealthLoop, runtimeDriftHandler, ocWatcher, clusterNodeHealthLoop, dynamicClient)
+				onLeaderStart(leaderCtx, execCtx.ClusterRef, ns, manifest, publisher, reconciler, signingLoop, snapshotPullLoop, packInstancePullLoop, packReceiptDriftLoop, rbacProfilePullLoop, rbacPolicyPullLoop, driftSignalHandler, talosVersionDriftLoop, kubernetesVersionDriftLoop, packPodHealthLoop, runtimeDriftHandler, ocWatcher, clusterNodeHealthLoop, packSourceVersionLoop, esoHealthLoop, policyReportDriftLoop, vulnerabilityDriftLoop, backupHealthLoop, dynamicClient)
 			},
 			OnStoppedLeading: func() {
 				fmt.Printf("conductor agent: cluster=%q lost leadership — entering standby\n",
@@ -490,6 +557,11 @@ func onLeaderStart(
 	runtimeDriftHandler *agent.RuntimeDriftHandler,
 	ocWatcher *agent.OperatorContextWatcher,
 	clusterNodeHealthLoop *agent.ClusterNodeHealthLoop,
+	packSourceVersionLoop *agent.PackSourceVersionLoop,
+	esoHealthLoop *agent.ESOHealthLoop,
+	policyReportDriftLoop *agent.PolicyReportDriftLoop,
+	vulnerabilityDriftLoop *agent.VulnerabilityDriftLoop,
+	backupHealthLoop *agent.BackupHealthLoop,
 	dynamicClient dynamic.Interface,
 ) {
 	// Publish capability manifest to RunnerConfig status with background retry.
@@ -625,6 +697,42 @@ func onLeaderStart(
 		go clusterNodeHealthLoop.Run(leaderCtx, reconcileInterval)
 	}
 
+	// Start PackSourceVersionLoop (management cluster only). Polls Helm chart repository
+	// index.yaml for each Helm-backed PackDelivery in the management namespace and emits
+	// UpstreamVersionAvailable DriftSignals. RECON-CMN1.
+	const packVersionInterval = 6 * time.Hour
+	if packSourceVersionLoop != nil {
+		go packSourceVersionLoop.Run(leaderCtx, packVersionInterval)
+	}
+
+	// Start ESOHealthLoop (management cluster only). Polls ExternalSecret CRs for sync
+	// failures and emits ExternalSecretSyncFailed DriftSignals. Skips when ESO CRDs absent.
+	// RECON-K3.
+	if esoHealthLoop != nil {
+		go esoHealthLoop.Run(leaderCtx, reconcileInterval)
+	}
+
+	// Start PolicyReportDriftLoop (management cluster only). Polls Kyverno PolicyReport and
+	// ClusterPolicyReport CRs and emits KyvernoPolicyViolation DriftSignals. Skips when
+	// Kyverno CRDs absent. RECON-L2.
+	if policyReportDriftLoop != nil {
+		go policyReportDriftLoop.Run(leaderCtx, reconcileInterval)
+	}
+
+	// Start VulnerabilityDriftLoop (management cluster only). Polls Trivy Operator
+	// VulnerabilityReport CRs and emits VulnerableImageDetected DriftSignals for CRITICAL
+	// findings. Skips when Trivy CRDs absent. RECON-M2.
+	if vulnerabilityDriftLoop != nil {
+		go vulnerabilityDriftLoop.Run(leaderCtx, reconcileInterval)
+	}
+
+	// Start BackupHealthLoop (management cluster only). Polls Velero BackupStorageLocation
+	// and Backup CRs; emits BackupStorageUnavailable and BackupRPOBreached DriftSignals.
+	// Skips when Velero CRDs absent. RECON-N2.
+	if backupHealthLoop != nil {
+		go backupHealthLoop.Run(leaderCtx, reconcileInterval)
+	}
+
 	// Mark InfrastructureTalosCluster Ready=True (tenant clusters only).
 	// snapshotPullLoop non-nil indicates role=tenant. Conductor signals readiness
 	// to management once leadership is established. guardian-schema.md §3.
diff --git a/test/e2e/backup_health_loop_test.go b/test/e2e/backup_health_loop_test.go
new file mode 100644
index 0000000..c8bf20c
--- /dev/null
+++ b/test/e2e/backup_health_loop_test.go
@@ -0,0 +1,41 @@
+package e2e_test
+
+// backup_health_loop_test.go -- live cluster verification that BackupHealthLoop
+// correctly detects Velero BSL unavailability and RPO breaches, emitting
+// BackupStorageUnavailable and BackupRPOBreached DriftSignals.
+//
+// Pre-conditions:
+//   - MGMT_KUBECONFIG set; ccs-mgmt fully onboarded with Guardian operational.
+//   - Velero PackDelivery deployed to seam-system (RECON-N1 closed).
+//   - At least one BackupStorageLocation present in seam-system.
+//   - Conductor agent role=management running with BackupHealthLoop enabled.
+//
+// What this test verifies (RECON-N2):
+//   - BackupHealthLoop creates a BackupStorageUnavailable DriftSignal in seam-system
+//     when a BackupStorageLocation is not in phase=Available.
+//   - BackupHealthLoop creates a BackupRPOBreached DriftSignal when no successful
+//     backup is younger than 25 hours.
+//   - DriftSignal spec.signalKind == "BackupStorageUnavailable" or "BackupRPOBreached".
+//   - After the BSL returns to Available and a recent backup completes, the loop
+//     confirms both DriftSignals (state=confirmed).
+//   - Under AutonomyLevel=observe-only, no DriftSignal is created.
+
+import (
+	"testing"
+)
+
+func TestBackupHealthLoop_BSLUnavailableEmitsDriftSignal(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and Velero deployed with a degraded BackupStorageLocation with RECON-N1 and RECON-N2 closed")
+}
+
+func TestBackupHealthLoop_RPOBreachedEmitsDriftSignal(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and Velero deployed with no successful backup within RPO window with RECON-N1 and RECON-N2 closed")
+}
+
+func TestBackupHealthLoop_ConfirmsSignalAfterRecovery(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and existing BackupStorageUnavailable or BackupRPOBreached DriftSignal with RECON-N2 closed")
+}
+
+func TestBackupHealthLoop_ObserveOnly_NoSignalCreated(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and OperatorContext with autonomyLevel=observe-only with RECON-N2 closed")
+}
diff --git a/test/e2e/eso_health_loop_test.go b/test/e2e/eso_health_loop_test.go
new file mode 100644
index 0000000..a702ea2
--- /dev/null
+++ b/test/e2e/eso_health_loop_test.go
@@ -0,0 +1,34 @@
+package e2e_test
+
+// eso_health_loop_test.go -- live cluster verification that ESOHealthLoop correctly
+// detects ExternalSecret sync failures and emits ExternalSecretSyncFailed DriftSignals.
+//
+// Pre-conditions:
+//   - MGMT_KUBECONFIG set; ccs-mgmt fully onboarded with Guardian operational.
+//   - External Secrets Operator PackDelivery deployed to seam-system (RECON-K1 closed).
+//   - At least one ExternalSecret CR present in seam-system pointing to a secret store.
+//   - Conductor agent role=management running with ESOHealthLoop enabled.
+//
+// What this test verifies (RECON-K3):
+//   - ESOHealthLoop creates an ExternalSecretSyncFailed DriftSignal in seam-system
+//     when an ExternalSecret has a Ready=False or Synced=False condition.
+//   - DriftSignal spec.signalKind == "ExternalSecretSyncFailed".
+//   - After the ExternalSecret recovers (Ready=True), the loop confirms the
+//     DriftSignal (state=confirmed) within the next poll interval.
+//   - Under AutonomyLevel=observe-only, no DriftSignal is created.
+
+import (
+	"testing"
+)
+
+func TestESOHealthLoop_SyncFailedEmitsDriftSignal(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and ESO PackDelivery deployed to seam-system with RECON-K1 and RECON-K3 closed")
+}
+
+func TestESOHealthLoop_ConfirmsSignalOnRecovery(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and an existing ExternalSecretSyncFailed DriftSignal with RECON-K1 and RECON-K3 closed")
+}
+
+func TestESOHealthLoop_ObserveOnly_NoSignalCreated(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and OperatorContext with autonomyLevel=observe-only with RECON-K3 closed")
+}
diff --git a/test/e2e/pack_source_version_loop_test.go b/test/e2e/pack_source_version_loop_test.go
new file mode 100644
index 0000000..9e17bc8
--- /dev/null
+++ b/test/e2e/pack_source_version_loop_test.go
@@ -0,0 +1,36 @@
+package e2e_test
+
+// pack_source_version_loop_test.go -- live cluster verification that the
+// PackSourceVersionLoop correctly detects and signals upstream Helm chart
+// version availability for extension PackDeliveries on ccs-mgmt.
+//
+// Pre-conditions:
+//   - MGMT_KUBECONFIG set; ccs-mgmt fully onboarded with Guardian operational.
+//   - At least one Helm-backed PackDelivery deployed to seam-system (e.g., Dex).
+//   - Helm chart repository at 10.20.0.1:5000 serving index.yaml with a newer
+//     chart version than the one referenced by the PackDelivery.
+//   - Conductor agent role=management running with PackSourceVersionLoop enabled.
+//
+// What this test verifies (RECON-CMN1):
+//   - PackSourceVersionLoop detects the version gap within one poll interval.
+//   - UpstreamVersionAvailable DriftSignal created in seam-system for the pack.
+//   - DriftSignal spec.signalKind == "UpstreamVersionAvailable".
+//   - After updating the PackDelivery to the latest chart version, the loop
+//     confirms the DriftSignal (state=confirmed) within the next poll interval.
+
+import (
+	"testing"
+)
+
+// TestPackSourceVersionLoop_DetectsAndSignalsNewChartVersion verifies the full
+// upstream version detection and DriftSignal lifecycle. RECON-CMN1.
+func TestPackSourceVersionLoop_DetectsAndSignalsNewChartVersion(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and a Helm-backed PackDelivery in seam-system with an available newer chart version and RECON-K1 closed")
+}
+
+// TestPackSourceVersionLoop_ConfirmsSignalAfterVersionUpdate verifies that the
+// loop confirms an existing UpstreamVersionAvailable signal after the PackDelivery
+// spec.chartVersion is updated to match the latest index version. RECON-CMN1.
+func TestPackSourceVersionLoop_ConfirmsSignalAfterVersionUpdate(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and an existing UpstreamVersionAvailable DriftSignal on a Helm-backed PackDelivery and RECON-K1 closed")
+}
diff --git a/test/e2e/policy_report_drift_loop_test.go b/test/e2e/policy_report_drift_loop_test.go
new file mode 100644
index 0000000..dea3970
--- /dev/null
+++ b/test/e2e/policy_report_drift_loop_test.go
@@ -0,0 +1,34 @@
+package e2e_test
+
+// policy_report_drift_loop_test.go -- live cluster verification that PolicyReportDriftLoop
+// correctly detects Kyverno policy violations and emits KyvernoPolicyViolation DriftSignals.
+//
+// Pre-conditions:
+//   - MGMT_KUBECONFIG set; ccs-mgmt fully onboarded with Guardian operational.
+//   - Kyverno PackDelivery deployed to seam-system (RECON-L1 closed).
+//   - At least one ClusterPolicy or Policy active; at least one ClusterPolicyReport present.
+//   - Conductor agent role=management running with PolicyReportDriftLoop enabled.
+//
+// What this test verifies (RECON-L2):
+//   - PolicyReportDriftLoop creates a KyvernoPolicyViolation DriftSignal in seam-system
+//     when a PolicyReport or ClusterPolicyReport contains at least one fail result.
+//   - DriftSignal spec.signalKind == "KyvernoPolicyViolation".
+//   - After the policy violation is remediated (fail result removed), the loop confirms
+//     the DriftSignal (state=confirmed).
+//   - Under AutonomyLevel=observe-only, no DriftSignal is created.
+
+import (
+	"testing"
+)
+
+func TestPolicyReportDriftLoop_ViolationEmitsDriftSignal(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and Kyverno deployed with a failing ClusterPolicyReport with RECON-L1 and RECON-L2 closed")
+}
+
+func TestPolicyReportDriftLoop_ConfirmsSignalAfterRemediation(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and an existing KyvernoPolicyViolation DriftSignal with RECON-L2 closed")
+}
+
+func TestPolicyReportDriftLoop_ObserveOnly_NoSignalCreated(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and OperatorContext with autonomyLevel=observe-only with RECON-L2 closed")
+}
diff --git a/test/e2e/vulnerability_drift_loop_test.go b/test/e2e/vulnerability_drift_loop_test.go
new file mode 100644
index 0000000..c6523b7
--- /dev/null
+++ b/test/e2e/vulnerability_drift_loop_test.go
@@ -0,0 +1,34 @@
+package e2e_test
+
+// vulnerability_drift_loop_test.go -- live cluster verification that VulnerabilityDriftLoop
+// correctly detects CRITICAL vulnerabilities and emits VulnerableImageDetected DriftSignals.
+//
+// Pre-conditions:
+//   - MGMT_KUBECONFIG set; ccs-mgmt fully onboarded with Guardian operational.
+//   - Trivy Operator PackDelivery deployed to seam-system (RECON-M1 closed).
+//   - At least one VulnerabilityReport present in seam-system with scan results.
+//   - Conductor agent role=management running with VulnerabilityDriftLoop enabled.
+//
+// What this test verifies (RECON-M2):
+//   - VulnerabilityDriftLoop creates a VulnerableImageDetected DriftSignal in seam-system
+//     when a VulnerabilityReport contains at least one CRITICAL severity vulnerability.
+//   - DriftSignal spec.signalKind == "VulnerableImageDetected".
+//   - After the image is updated to a patched version (criticalCount drops to zero),
+//     the loop confirms the DriftSignal (state=confirmed).
+//   - Under AutonomyLevel=observe-only, no DriftSignal is created.
+
+import (
+	"testing"
+)
+
+func TestVulnerabilityDriftLoop_CriticalVulnEmitsDriftSignal(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and Trivy Operator deployed with a VulnerabilityReport containing CRITICAL CVEs with RECON-M1 and RECON-M2 closed")
+}
+
+func TestVulnerabilityDriftLoop_ConfirmsSignalAfterImageUpdate(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and an existing VulnerableImageDetected DriftSignal with RECON-M2 closed")
+}
+
+func TestVulnerabilityDriftLoop_ObserveOnly_NoSignalCreated(t *testing.T) {
+	t.Skip("requires MGMT_KUBECONFIG and OperatorContext with autonomyLevel=observe-only with RECON-M2 closed")
+}
diff --git a/test/integration/federation/stream_integration_test.go b/test/integration/federation/stream_integration_test.go
index cb12e8b..4ffa890 100644
--- a/test/integration/federation/stream_integration_test.go
+++ b/test/integration/federation/stream_integration_test.go
@@ -209,7 +209,7 @@ func TestStream_HeartBeat_ServerRespondsWithACK(t *testing.T) {
 	if err != nil {
 		t.Fatalf("server TLS: %v", err)
 	}
-	srv := federation.NewFederationServerFromTLS(serverTLS, nil)
+	srv := federation.NewFederationServerFromTLS(serverTLS, nil, federation.FederationServerOptions{})
 	addr, _ := startStreamServer(t, srv)
 
 	clientTLS, err := federation.BuildClientTLSConfig(caPath, clientCertPath, clientKeyPath)
@@ -264,7 +264,7 @@ func TestStream_AuditEventBatch_ServerRespondsWithAck(t *testing.T) {
 		t.Fatalf("server TLS: %v", err)
 	}
 	// kubeClient is nil — server skips ConfigMap creation but still ACKs.
-	srv := federation.NewFederationServerFromTLS(serverTLS, nil)
+	srv := federation.NewFederationServerFromTLS(serverTLS, nil, federation.FederationServerOptions{})
 	addr, _ := startStreamServer(t, srv)
 
 	clientTLS, err := federation.BuildClientTLSConfig(caPath, clientCertPath, clientKeyPath)
@@ -327,7 +327,7 @@ func TestStream_ClusterID_ExtractedFromClientCert(t *testing.T) {
 	if err != nil {
 		t.Fatalf("server TLS: %v", err)
 	}
-	srv := federation.NewFederationServerFromTLS(serverTLS, nil)
+	srv := federation.NewFederationServerFromTLS(serverTLS, nil, federation.FederationServerOptions{})
 	addr, _ := startStreamServer(t, srv)
 
 	clientTLS, err := federation.BuildClientTLSConfig(caPath, clientCertPath, clientKeyPath)
@@ -374,7 +374,7 @@ func TestStream_WALReplay_OnReconnect(t *testing.T) {
 	if err != nil {
 		t.Fatalf("server TLS: %v", err)
 	}
-	srv := federation.NewFederationServerFromTLS(serverTLS, nil)
+	srv := federation.NewFederationServerFromTLS(serverTLS, nil, federation.FederationServerOptions{})
 	addr, _ := startStreamServer(t, srv)
 
 	// Pre-populate WAL with 3 entries; ACK sequence 1.
diff --git a/test/unit/agent/capability_publisher_test.go b/test/unit/agent/capability_publisher_test.go
index a4b2f55..accc6d6 100644
--- a/test/unit/agent/capability_publisher_test.go
+++ b/test/unit/agent/capability_publisher_test.go
@@ -18,9 +18,9 @@ import (
 )
 
 var runnerConfigGVR = schema.GroupVersionResource{
-	Group:   "infrastructure.ontai.dev",
-	Version: "v1alpha1",
-	Resource: "infrastructurerunnerconfigs",
+	Group:    "seam.ontai.dev",
+	Version:  "v1alpha1",
+	Resource: "runnerconfigs",
 }
 
 // makeRunnerConfig constructs an Unstructured RunnerConfig with optional capabilities
@@ -28,7 +28,7 @@ var runnerConfigGVR = schema.GroupVersionResource{
 func makeRunnerConfig(name, namespace string, hasCaps bool) *unstructured.Unstructured {
 	obj := &unstructured.Unstructured{
 		Object: map[string]interface{}{
-			"apiVersion": "infrastructure.ontai.dev/v1alpha1",
+			"apiVersion": "seam.ontai.dev/v1alpha1",
 			"kind":       "RunnerConfig",
 			"metadata": map[string]interface{}{
 				"name":      name,
@@ -55,15 +55,15 @@ func newFakeDynamicClient(scheme *runtime.Scheme) *dynamicfake.FakeDynamicClient
 	// Register the RunnerConfig GVR in the RESTMapper by adding it to the scheme.
 	// dynamicfake uses the scheme to resolve GVKs; we add a dummy unstructured type.
 	gvk := schema.GroupVersionKind{
-		Group:   "infrastructure.ontai.dev",
+		Group:   "seam.ontai.dev",
 		Version: "v1alpha1",
 		Kind:    "RunnerConfig",
 	}
 	scheme.AddKnownTypeWithName(gvk, &runtime.Unknown{})
 	gvkList := schema.GroupVersionKind{
-		Group:   "infrastructure.ontai.dev",
+		Group:   "seam.ontai.dev",
 		Version: "v1alpha1",
-		Kind:    "InfrastructureRunnerConfigList",
+		Kind:    "RunnerConfigList",
 	}
 	scheme.AddKnownTypeWithName(gvkList, &runtime.Unknown{})
 	_ = meta.NewDefaultRESTMapper(nil)
@@ -137,11 +137,11 @@ func TestCapabilityPublisher_ConstructsWithoutPanic(t *testing.T) {
 // fake tracker knows the list kind mapping.
 func newAllFakeDynamicClient(scheme *runtime.Scheme) *dynamicfake.FakeDynamicClient {
 	scheme.AddKnownTypeWithName(schema.GroupVersionKind{
-		Group: "infrastructure.ontai.dev", Version: "v1alpha1", Kind: "RunnerConfig",
+		Group: "seam.ontai.dev", Version: "v1alpha1", Kind: "RunnerConfig",
 	}, &unstructured.Unstructured{})
 	_ = meta.NewDefaultRESTMapper(nil)
 	return dynamicfake.NewSimpleDynamicClientWithCustomListKinds(scheme,
-		map[schema.GroupVersionResource]string{runnerConfigGVR: "InfrastructureRunnerConfigList"},
+		map[schema.GroupVersionResource]string{runnerConfigGVR: "RunnerConfigList"},
 	)
 }
 
diff --git a/test/unit/capability/platform_test.go b/test/unit/capability/platform_test.go
index 45dc17e..dca2dd7 100644
--- a/test/unit/capability/platform_test.go
+++ b/test/unit/capability/platform_test.go
@@ -36,6 +36,7 @@ var platformKindToResource = map[string]string{
 	"PKIRotation":      "pkirotations",
 	"ClusterReset":     "clusterresets",
 	"HardeningProfile": "hardeningprofiles",
+	"MachineConfig":    "machineconfigs",
 }
 
 // seamKindToResource maps seam.ontai.dev Kind names to GVR resources.
@@ -160,6 +161,10 @@ func (s *stubTalosClient) Reboot(_ context.Context) error {
 	s.rebootCalled = true
 	return s.rebootErr
 }
+func (s *stubTalosClient) RebootPowercycle(_ context.Context) error {
+	s.rebootCalled = true
+	return s.rebootErr
+}
 func (s *stubTalosClient) Reset(_ context.Context, _ bool) error {
 	s.resetCalled = true
 	return s.resetErr
diff --git a/test/unit/compiler/wrapper_runner_rbac_test.go b/test/unit/compiler/wrapper_runner_rbac_test.go
index 651298d..fc11c5b 100644
--- a/test/unit/compiler/wrapper_runner_rbac_test.go
+++ b/test/unit/compiler/wrapper_runner_rbac_test.go
@@ -203,3 +203,50 @@ func TestDispatcherRunnerRole_NotGeneratedWithoutClusterName(t *testing.T) {
 		t.Errorf("dispatcher-runner.yaml was generated without --cluster-name; must not be present")
 	}
 }
+
+// TestWatchdogQueueYAML_EmittedInPostBootstrap verifies that watchdog-queue.yaml is
+// generated in 05-post-bootstrap when --cluster-name is provided.
+func TestWatchdogQueueYAML_EmittedInPostBootstrap(t *testing.T) {
+	bin := buildCompiler(t)
+	out := t.TempDir()
+	cmd := exec.Command(bin, "enable", "--cluster-name", "ccs-mgmt", "--output", out)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("compiler enable failed: %v\n%s", err, output)
+	}
+
+	path := filepath.Join(out, "05-post-bootstrap", "watchdog-queue.yaml")
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("watchdog-queue.yaml not generated: %v", err)
+	}
+	content := string(raw)
+
+	if !strings.Contains(content, "watchdog-queue") {
+		t.Error("watchdog-queue.yaml does not contain 'watchdog-queue' name")
+	}
+	if !strings.Contains(content, "ont-system") {
+		t.Error("watchdog-queue.yaml is not scoped to ont-system namespace")
+	}
+	if !strings.Contains(content, "seam-pack-deploy") {
+		t.Error("watchdog-queue.yaml does not reference ClusterQueue seam-pack-deploy")
+	}
+	if !strings.Contains(content, "LocalQueue") {
+		t.Error("watchdog-queue.yaml is not a LocalQueue kind")
+	}
+}
+
+// TestWatchdogQueueYAML_NotGeneratedWithoutClusterName verifies that watchdog-queue.yaml
+// is NOT generated when --cluster-name is absent.
+func TestWatchdogQueueYAML_NotGeneratedWithoutClusterName(t *testing.T) {
+	bin := buildCompiler(t)
+	out := t.TempDir()
+	cmd := exec.Command(bin, "enable", "--output", out)
+	if output, err := cmd.CombinedOutput(); err != nil {
+		t.Fatalf("compiler enable failed: %v\n%s", err, output)
+	}
+
+	path := filepath.Join(out, "05-post-bootstrap", "watchdog-queue.yaml")
+	if _, err := os.Stat(path); err == nil {
+		t.Errorf("watchdog-queue.yaml was generated without --cluster-name; must not be present")
+	}
+}
diff --git a/test/unit/federation/federation_stream_test.go b/test/unit/federation/federation_stream_test.go
index 6ba1aec..2eac4f6 100644
--- a/test/unit/federation/federation_stream_test.go
+++ b/test/unit/federation/federation_stream_test.go
@@ -38,7 +38,7 @@ func setupStreamTest(t *testing.T) *streamTestEnv {
 	serverCertPath, serverKeyPath, caPath := writeTempCerts(t, serverCertPEM, serverKeyPEM, ca.caPEM())
 
 	// Use a fake kubeClient in tests (nil — server skips ConfigMap creation).
-	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil)
+	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil, federation.FederationServerOptions{})
 	if err != nil {
 		t.Fatalf("NewFederationServer: %v", err)
 	}
diff --git a/test/unit/federation/federation_tls_test.go b/test/unit/federation/federation_tls_test.go
index 03d347f..2def2ca 100644
--- a/test/unit/federation/federation_tls_test.go
+++ b/test/unit/federation/federation_tls_test.go
@@ -286,7 +286,7 @@ func TestFederationServer_gRPC_AcceptsValidCert(t *testing.T) {
 	serverCertPEM, serverKeyPEM := ca.issueServerCert(t, []string{"localhost"})
 	serverCertPath, serverKeyPath, caPath := writeTempCerts(t, serverCertPEM, serverKeyPEM, ca.caPEM())
 
-	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil)
+	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil, federation.FederationServerOptions{})
 	if err != nil {
 		t.Fatalf("NewFederationServer: %v", err)
 	}
@@ -334,7 +334,7 @@ func TestFederationServer_gRPC_RejectsNoCert(t *testing.T) {
 	serverCertPEM, serverKeyPEM := ca.issueServerCert(t, []string{"localhost"})
 	serverCertPath, serverKeyPath, caPath := writeTempCerts(t, serverCertPEM, serverKeyPEM, ca.caPEM())
 
-	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil)
+	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil, federation.FederationServerOptions{})
 	if err != nil {
 		t.Fatalf("NewFederationServer: %v", err)
 	}
@@ -389,7 +389,7 @@ func TestFederationServer_gRPC_RejectsWrongCA(t *testing.T) {
 	serverCertPEM, serverKeyPEM := serverCA.issueServerCert(t, []string{"localhost"})
 	serverCertPath, serverKeyPath, caPath := writeTempCerts(t, serverCertPEM, serverKeyPEM, serverCA.caPEM())
 
-	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil)
+	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil, federation.FederationServerOptions{})
 	if err != nil {
 		t.Fatalf("NewFederationServer: %v", err)
 	}
@@ -460,7 +460,7 @@ func TestFederationClient_ClusterIDExtraction(t *testing.T) {
 	serverCertPEM, serverKeyPEM := ca.issueServerCert(t, []string{"localhost"})
 	serverCertPath, serverKeyPath, caPath := writeTempCerts(t, serverCertPEM, serverKeyPEM, ca.caPEM())
 
-	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil)
+	srv, err := federation.NewFederationServer(caPath, serverCertPath, serverKeyPath, nil, federation.FederationServerOptions{})
 	if err != nil {
 		t.Fatalf("NewFederationServer: %v", err)
 	}