Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 210 additions & 0 deletions cmd/compiler/addnode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// addnode.go implements the compiler addnode subcommand for generating a new
// MachineConfig CR for a node being added to an existing cluster.
//
// Usage:
//
// compiler addnode --cluster <name> --hostname <hostname> --ip <ip> --role <role>
// [--order <n>] [--existing-cr <path>] --output <dir>
//
// When --existing-cr is given, the machine and cluster config sections are
// copied from the specified MachineConfig CR with identity fields overridden.
// When absent, a skeleton CR is emitted with empty machine and cluster stubs.
//
// conductor-schema.md §9. platform-schema.md §9.
package main

import (
"flag"
"fmt"
"os"
"path/filepath"

corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/yaml"

platformv1alpha1 "github.com/ontai-dev/platform/api/v1alpha1"
)

// addnodeHelp is the authored per-subcommand help for 'compiler addnode'.
const addnodeHelp = `Usage: compiler addnode --cluster <name> --hostname <hostname> --ip <ip> --role <role> --output <dir>
[--order <n>] [--existing-cr <path>]

Generate a MachineConfig CR for a node being added to an existing cluster.
The output CR is placed in the --output directory as seam-mc-{cluster}-{hostname}.yaml.

Flags:
--cluster Cluster name (matches the TalosCluster CR name and seam-tenant-{cluster} namespace).
--hostname Node hostname. The cluster-name prefix is stripped automatically if present,
so both "cp4" and "ccs-dev-cp4" produce seam-mc-{cluster}-cp4.
--ip Node IP address reachable on Talos API port 50000.
--role Node role: controlplane or worker. (init is reserved for compiler bootstrap.)
--order Upgrade sequence order (default: 1). init=0, controlplane=1..N, worker=N+1..M.
--existing-cr Path to an existing MachineConfig CR YAML. When provided, spec.machine and
spec.cluster are copied from the existing CR and identity fields are overridden
with the flags above. Use to clone an existing node config for a new peer.
--output Output directory for the generated MachineConfig CR YAML (required).

When --existing-cr is absent, a skeleton CR is emitted with empty machine and cluster sections.
Populate those sections with the Talos v1alpha1 machineconfig content before applying.

Compile-only: compiler addnode never applies resources. Human review and GitOps apply required.
`

// compileAddNode generates a MachineConfig CR for a node being added to an existing cluster.
// clusterName is the TalosCluster name. hostname may include the cluster-name prefix --
// it is stripped before constructing the CR name. role must be "controlplane" or "worker"
// (init is managed exclusively by compiler bootstrap). order is the upgrade sequence position.
// existingCRPath, when non-empty, is read to copy spec.machine and spec.cluster. output is the
// directory receiving seam-mc-{cluster}-{bareHostname}.yaml.
func compileAddNode(clusterName, hostname, ip, role string, order int32, existingCRPath, output string) error {
if clusterName == "" {
return fmt.Errorf("--cluster is required")
}
if hostname == "" {
return fmt.Errorf("--hostname is required")
}
if ip == "" {
return fmt.Errorf("--ip is required")
}
switch role {
case "controlplane", "worker":
case "init":
return fmt.Errorf("role=init is reserved for compiler bootstrap; use controlplane or worker")
default:
return fmt.Errorf("--role must be controlplane or worker, got %q", role)
}
if output == "" {
return fmt.Errorf("--output is required")
}

// Strip cluster-name prefix from hostname so seam-mc-{cluster}-{hostname} is not doubled.
bareHostname := stripClusterPrefix(clusterName, hostname)

mcRole := platformv1alpha1.MachineConfigRoleControlPlane
if role == "worker" {
mcRole = platformv1alpha1.MachineConfigRoleWorker
}

var machineJSON, clusterJSON *apiextensionsv1.JSON
if existingCRPath != "" {
m, c, err := loadMachineClusterFromCR(existingCRPath)
if err != nil {
return fmt.Errorf("read existing CR %q: %w", existingCRPath, err)
}
machineJSON = m
clusterJSON = c
}

crName := "seam-mc-" + clusterName + "-" + bareHostname
mc := platformv1alpha1.MachineConfig{
TypeMeta: metav1.TypeMeta{
APIVersion: "platform.ontai.dev/v1alpha1",
Kind: "MachineConfig",
},
ObjectMeta: metav1.ObjectMeta{
Name: crName,
Namespace: "seam-tenant-" + clusterName,
Labels: map[string]string{
"ontai.dev/cluster": clusterName,
"ontai.dev/node": hostname,
"ontai.dev/node-role": role,
"ontai.dev/managed-by": "compiler",
},
},
Spec: platformv1alpha1.MachineConfigSpec{
Role: mcRole,
Order: order,
ClusterRef: corev1.LocalObjectReference{
Name: clusterName,
},
NodeIP: ip,
NodeHostname: bareHostname,
Machine: machineJSON,
Cluster: clusterJSON,
},
}

data, err := yaml.Marshal(mc)
if err != nil {
return fmt.Errorf("marshal MachineConfig CR: %w", err)
}

var header string
if existingCRPath == "" {
header = "# MachineConfig CR skeleton generated by compiler addnode.\n" +
"# Populate spec.machine and spec.cluster with the Talos v1alpha1\n" +
"# machineconfig sections for this node before applying.\n" +
"# Refer to: https://www.talos.dev/latest/reference/configuration/\n"
}

if err := os.MkdirAll(output, 0755); err != nil {
return fmt.Errorf("create output directory %q: %w", output, err)
}
outPath := filepath.Join(output, crName+".yaml")
if err := os.WriteFile(outPath, []byte(header+string(data)), 0644); err != nil {
return fmt.Errorf("write MachineConfig CR %q: %w", outPath, err)
}
return nil
}

// stripClusterPrefix strips the "{clusterName}-" prefix from hostname if present.
// e.g. stripClusterPrefix("ccs-dev", "ccs-dev-cp4") → "cp4"
//
// stripClusterPrefix("ccs-dev", "cp4") → "cp4"
func stripClusterPrefix(clusterName, hostname string) string {
prefix := clusterName + "-"
if len(hostname) > len(prefix) && hostname[:len(prefix)] == prefix {
return hostname[len(prefix):]
}
return hostname
}

// loadMachineClusterFromCR reads a MachineConfig CR YAML file and returns the
// spec.machine and spec.cluster sections. Used by addnode to clone the config
// body from an existing peer node.
func loadMachineClusterFromCR(path string) (*apiextensionsv1.JSON, *apiextensionsv1.JSON, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, nil, fmt.Errorf("read file: %w", err)
}

var cr struct {
Spec struct {
Machine *apiextensionsv1.JSON `json:"machine" yaml:"machine"`
Cluster *apiextensionsv1.JSON `json:"cluster" yaml:"cluster"`
} `json:"spec" yaml:"spec"`
}
if err := yaml.Unmarshal(data, &cr); err != nil {
return nil, nil, fmt.Errorf("parse MachineConfig CR: %w", err)
}
return cr.Spec.Machine, cr.Spec.Cluster, nil
}

// runAddNodeSubcommand parses addnode-specific flags and calls compileAddNode.
func runAddNodeSubcommand(args []string) {
fs := flag.NewFlagSet("addnode", flag.ExitOnError)
cluster := fs.String("cluster", "", "Cluster name (required)")
hostname := fs.String("hostname", "", "Node hostname (required)")
ip := fs.String("ip", "", "Node IP address (required)")
role := fs.String("role", "", "Node role: controlplane or worker (required)")
order := fs.Int("order", 1, "Upgrade sequence order (default: 1)")
existingCR := fs.String("existing-cr", "", "Path to existing MachineConfig CR to clone machine/cluster sections from")
output := fs.String("output", "", "Output directory (required)")

fs.Usage = func() {
fmt.Fprint(os.Stderr, addnodeHelp)
fs.PrintDefaults()
}

if err := fs.Parse(args); err != nil {
fmt.Fprintf(os.Stderr, "compiler addnode: flag error: %v\n", err)
os.Exit(1)
}

if err := compileAddNode(*cluster, *hostname, *ip, *role, int32(*order), *existingCR, *output); err != nil {
fmt.Fprintf(os.Stderr, "compiler addnode: %v\n", err)
os.Exit(1)
}
}
Loading
Loading